summaryrefslogtreecommitdiff
path: root/tools/testing
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing')
-rw-r--r--tools/testing/cxl/test/cxl.c4
-rw-r--r--tools/testing/cxl/test/mem.c89
-rw-r--r--tools/testing/kunit/qemu_configs/riscv.py2
-rw-r--r--tools/testing/memblock/tests/basic_api.c314
-rw-r--r--tools/testing/memblock/tests/common.c8
-rw-r--r--tools/testing/memblock/tests/common.h4
-rw-r--r--tools/testing/nvdimm/test/iomap.c1
-rw-r--r--tools/testing/nvdimm/test/ndtest.c8
-rw-r--r--tools/testing/nvdimm/test/ndtest.h31
-rw-r--r--tools/testing/nvdimm/test/nfit.c1
-rw-r--r--tools/testing/radix-tree/Makefile4
-rw-r--r--tools/testing/radix-tree/bitmap.c23
-rw-r--r--tools/testing/radix-tree/idr-test.c1
-rw-r--r--tools/testing/radix-tree/linux/kernel.h2
-rw-r--r--tools/testing/radix-tree/maple.c1
-rw-r--r--tools/testing/radix-tree/xarray.c1
-rw-r--r--tools/testing/selftests/Makefile18
-rw-r--r--tools/testing/selftests/alsa/Makefile2
-rw-r--r--tools/testing/selftests/alsa/conf.c2
-rw-r--r--tools/testing/selftests/alsa/mixer-test.c143
-rw-r--r--tools/testing/selftests/alsa/pcm-test.c70
-rw-r--r--tools/testing/selftests/arm64/abi/ptrace.c4
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c2
-rw-r--r--tools/testing/selftests/arm64/fp/.gitignore1
-rw-r--r--tools/testing/selftests/arm64/fp/Makefile1
-rw-r--r--tools/testing/selftests/arm64/fp/fp-stress.c26
-rw-r--r--tools/testing/selftests/arm64/fp/kernel-test.c324
-rw-r--r--tools/testing/selftests/arm64/tags/Makefile1
-rwxr-xr-xtools/testing/selftests/arm64/tags/run_tags_test.sh12
-rw-r--r--tools/testing/selftests/arm64/tags/tags_test.c12
-rw-r--r--tools/testing/selftests/bpf/.gitignore1
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch644
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x3
-rw-r--r--tools/testing/selftests/bpf/Makefile67
-rw-r--r--tools/testing/selftests/bpf/bench.c39
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c185
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_create.c2
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c433
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_trigger.sh22
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_uprobes.sh2
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_common.h2
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_list.h4
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h101
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h3
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h241
-rw-r--r--tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c4
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c460
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h38
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c5
-rw-r--r--tools/testing/selftests/bpf/config24
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c341
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_atomics.c204
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c116
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c362
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_distill.c552
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_field_iter.c161
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c26
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpumask.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/crypto_sanity.c197
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/empty_skb.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_sleep.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_stress.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fib_lookup.c132
-rw-r--r--tools/testing/selftests/bpf/prog_tests/find_vma.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/for_each.c62
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c322
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mptcp.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c214
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_skip.c137
-rw-r--r--tools/testing/selftests/bpf/prog_tests/preempt_lock.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rbtree.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c121
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_assign.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c82
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_addr.c2353
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c171
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c123
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_links.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_netkit.c94
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c5
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_rtt.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c216
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_lockup.c91
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_printk.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_vprintk.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_struct.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c134
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c385
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uretprobe_stack.c186
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/wq.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c168
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c19
-rw-r--r--tools/testing/selftests/bpf/progs/arena_atomics.c247
-rw-r--r--tools/testing/selftests/bpf/progs/arena_htab.c17
-rw-r--r--tools/testing/selftests/bpf/progs/arena_list.c3
-rw-r--r--tools/testing/selftests/bpf/progs/bench_local_storage_create.c5
-rw-r--r--tools/testing/selftests/bpf/progs/bind4_prog.c24
-rw-r--r--tools/testing/selftests/bpf/progs/bind6_prog.c24
-rw-r--r--tools/testing/selftests/bpf/progs/bind_prog.h19
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cc_cubic.c189
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c74
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c98
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp_release.c10
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c6
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c6
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h15
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c8
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h52
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c4
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c4
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h2
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c12
-rw-r--r--tools/testing/selftests/bpf/progs/connect6_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/connect_unix_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h2
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_failure.c3
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_success.c171
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_basic.c68
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_bench.c107
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_common.h66
-rw-r--r--tools/testing/selftests/bpf/progs/crypto_sanity.c161
-rw-r--r--tools/testing/selftests/bpf/progs/dummy_st_ops_success.c15
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c42
-rw-r--r--tools/testing/selftests/bpf/progs/fib_lookup.c2
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_multi_maps.c49
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_ip_test.c7
-rw-r--r--tools/testing/selftests/bpf/progs/getpeername4_prog.c24
-rw-r--r--tools/testing/selftests/bpf/progs/getpeername6_prog.c31
-rw-r--r--tools/testing/selftests/bpf/progs/getsockname4_prog.c24
-rw-r--r--tools/testing/selftests/bpf/progs/getsockname6_prog.c31
-rw-r--r--tools/testing/selftests/bpf/progs/ip_check_defrag.c10
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c4
-rw-r--r--tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c4
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test.c37
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_session.c78
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c58
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list.c47
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage.c20
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_cgroup.c8
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr.c10
-rw-r--r--tools/testing/selftests/bpf/progs/map_percpu_stats.c2
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_sock.c4
-rw-r--r--tools/testing/selftests/bpf/progs/mptcpify.c4
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_common.h2
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_failure.c8
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_success.c8
-rw-r--r--tools/testing/selftests/bpf/progs/netif_receive_skb.c5
-rw-r--r--tools/testing/selftests/bpf/progs/preempt_lock.c132
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h5
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree.c77
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c2
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg4_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg6_prog.c57
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c6
-rw-r--r--tools/testing/selftests/bpf/progs/setget_sockopt.c5
-rw-r--r--tools/testing/selftests/bpf/progs/skb_pkt_end.c13
-rw-r--r--tools/testing/selftests/bpf/progs/sock_addr_kern.c65
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c16
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_detach.c10
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c19
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_module.c36
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c22
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_common.h2
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c12
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c121
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_update.c18
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c20
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_rtt.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_access_variable_array.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_cookie.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_ma.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf.c109
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func10.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c43
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_redirect.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_module_attach.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_skip.c15
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_n.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_write.c46
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_kern.h20
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c17
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop1.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop2.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_prog.c5
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_dtime.c39
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_link.c35
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h2
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c13
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_vlan.c2
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c3
-rw-r--r--tools/testing/selftests/bpf/progs/timer_failure.c2
-rw-r--r--tools/testing/selftests/bpf/progs/timer_lockup.c87
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim.c2
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim_reject.c2
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_struct.c54
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_struct_many_args.c95
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c107
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi.c50
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_syscall.c15
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c17
-rw-r--r--tools/testing/selftests/bpf/progs/uretprobe_stack.c96
-rw-r--r--tools/testing/selftests/bpf/progs/user_ringbuf_fail.c22
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c1
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena_large.c1
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bits_iter.c153
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c63
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_subprogs.c7
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_restricted.c8
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c391
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c122
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_movsx.c63
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c6
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c41
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sock_addr.c331
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c187
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c91
-rw-r--r--tools/testing/selftests/bpf/progs/wq.c189
-rw-r--r--tools/testing/selftests/bpf/progs/wq_failures.c144
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_flowtable.c148
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/xfrm_info.c1
-rw-r--r--tools/testing/selftests/bpf/test_cpp.cpp5
-rw-r--r--tools/testing/selftests/bpf/test_loader.c115
-rw-r--r--tools/testing/selftests/bpf/test_progs.h9
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c1434
-rwxr-xr-xtools/testing/selftests/bpf/test_sock_addr.sh58
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c149
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh13
-rw-r--r--tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c134
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c5
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c16
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c122
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h9
-rw-r--r--tools/testing/selftests/bpf/uprobe_multi.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c15
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c22
-rw-r--r--tools/testing/selftests/bpf/veristat.c5
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c16
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c143
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h14
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c1
-rw-r--r--tools/testing/selftests/cachestat/test_cachestat.c1
-rw-r--r--tools/testing/selftests/capabilities/test_execve.c12
-rw-r--r--tools/testing/selftests/capabilities/validate_cap.c7
-rw-r--r--tools/testing/selftests/cgroup/.gitignore11
-rw-r--r--tools/testing/selftests/cgroup/Makefile27
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c8
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h4
-rw-r--r--tools/testing/selftests/cgroup/config1
-rw-r--r--tools/testing/selftests/cgroup/test_core.c7
-rw-r--r--tools/testing/selftests/cgroup/test_cpu.c6
-rw-r--r--tools/testing/selftests/cgroup/test_cpuset.c2
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh75
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_v1_hp.sh46
-rw-r--r--tools/testing/selftests/cgroup/test_freezer.c2
-rw-r--r--tools/testing/selftests/cgroup/test_hugetlb_memcg.c2
-rw-r--r--tools/testing/selftests/cgroup/test_kill.c2
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c6
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c6
-rw-r--r--tools/testing/selftests/cgroup/test_pids.c178
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c138
-rw-r--r--tools/testing/selftests/clone3/clone3.c7
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c2
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c121
-rw-r--r--tools/testing/selftests/core/close_range_test.c55
-rwxr-xr-xtools/testing/selftests/cpufreq/cpufreq.sh3
-rwxr-xr-xtools/testing/selftests/cpufreq/main.sh47
-rwxr-xr-xtools/testing/selftests/cpufreq/module.sh6
-rw-r--r--tools/testing/selftests/damon/Makefile16
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py244
-rw-r--r--tools/testing/selftests/damon/access_memory.c2
-rw-r--r--tools/testing/selftests/damon/access_memory_even.c42
-rw-r--r--tools/testing/selftests/damon/damon_nr_regions.py145
-rw-r--r--tools/testing/selftests/damon/damos_quota_goal.py77
-rw-r--r--tools/testing/selftests/damon/damos_tried_regions.py65
-rw-r--r--tools/testing/selftests/devices/Makefile4
-rw-r--r--tools/testing/selftests/devices/error_logs/Makefile3
-rwxr-xr-xtools/testing/selftests/devices/error_logs/test_device_error_logs.py85
-rw-r--r--tools/testing/selftests/devices/probe/Makefile4
-rw-r--r--tools/testing/selftests/devices/probe/boards/Dell Inc.,XPS 13 9300.yaml (renamed from tools/testing/selftests/devices/boards/Dell Inc.,XPS 13 9300.yaml)0
-rw-r--r--tools/testing/selftests/devices/probe/boards/google,spherion.yaml (renamed from tools/testing/selftests/devices/boards/google,spherion.yaml)4
-rwxr-xr-xtools/testing/selftests/devices/probe/test_discoverable_devices.py (renamed from tools/testing/selftests/devices/test_discoverable_devices.py)44
-rw-r--r--tools/testing/selftests/dma/dma_map_benchmark.c1
-rw-r--r--tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c251
-rw-r--r--tools/testing/selftests/drivers/dma-buf/udmabuf.c214
-rw-r--r--tools/testing/selftests/drivers/net/Makefile11
-rw-r--r--tools/testing/selftests/drivers/net/README.rst136
-rw-r--r--tools/testing/selftests/drivers/net/config2
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile29
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/csum.py122
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/devlink_port_split.py (renamed from tools/testing/selftests/net/devlink_port_split.py)0
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool.sh (renamed from tools/testing/selftests/net/forwarding/ethtool.sh)20
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh (renamed from tools/testing/selftests/net/forwarding/ethtool_extended_state.sh)5
-rw-r--r--tools/testing/selftests/drivers/net/hw/ethtool_lib.sh (renamed from tools/testing/selftests/net/forwarding/ethtool_lib.sh)0
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool_mm.sh (renamed from tools/testing/selftests/net/forwarding/ethtool_mm.sh)3
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/ethtool_rmon.sh (renamed from tools/testing/selftests/net/forwarding/ethtool_rmon.sh)8
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/hw_stats_l3.sh (renamed from tools/testing/selftests/net/forwarding/hw_stats_l3.sh)20
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh (renamed from tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh)8
-rw-r--r--tools/testing/selftests/drivers/net/hw/lib/py/__init__.py16
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/loopback.sh (renamed from tools/testing/selftests/net/forwarding/loopback.sh)5
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/pp_alloc_fail.py129
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_ctx.py553
-rw-r--r--tools/testing/selftests/drivers/net/hw/settings1
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/__init__.py19
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py241
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/load.py58
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/remote.py15
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/remote_netns.py21
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/remote_ssh.py39
-rwxr-xr-xtools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh668
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh14
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh71
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh18
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh55
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/ping.py51
-rwxr-xr-xtools/testing/selftests/drivers/net/queues.py66
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py144
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/Makefile15
-rwxr-xr-xtools/testing/selftests/drivers/net/virtio_net/basic_features.sh131
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/config8
-rw-r--r--tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh99
-rw-r--r--tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile6
-rwxr-xr-xtools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh494
-rw-r--r--tools/testing/selftests/exec/Makefile20
-rw-r--r--tools/testing/selftests/exec/load_address.c67
-rw-r--r--tools/testing/selftests/exec/recursion-depth.c10
-rw-r--r--tools/testing/selftests/fchmodat2/Makefile11
-rw-r--r--tools/testing/selftests/filesystems/binderfs/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/eventfd/eventfd_test.c136
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c1
-rw-r--r--tools/testing/selftests/filesystems/statmount/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount.h46
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c157
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test_ns.c364
-rw-r--r--tools/testing/selftests/ftrace/config26
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest8
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest-ktap2
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc41
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/fprobe_entry_arg.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc20
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc103
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc29
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc40
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc3
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_entry_arg.tc2
-rw-r--r--tools/testing/selftests/futex/Makefile2
-rw-r--r--tools/testing/selftests/futex/functional/Makefile2
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c2
-rw-r--r--tools/testing/selftests/hid/config.common1
-rw-r--r--tools/testing/selftests/hid/hid_bpf.c564
-rw-r--r--tools/testing/selftests/hid/progs/hid.c438
-rw-r--r--tools/testing/selftests/hid/progs/hid_bpf_helpers.h52
-rw-r--r--tools/testing/selftests/hid/tests/base.py92
-rw-r--r--tools/testing/selftests/hid/tests/base_device.py421
-rw-r--r--tools/testing/selftests/hid/tests/base_gamepad.py238
-rw-r--r--tools/testing/selftests/hid/tests/test_gamepad.py457
-rw-r--r--tools/testing/selftests/hid/tests/test_tablet.py723
-rw-r--r--tools/testing/selftests/intel_pstate/Makefile2
-rw-r--r--tools/testing/selftests/iommu/Makefile2
-rw-r--r--tools/testing/selftests/iommu/iommufd.c86
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c2
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h92
-rw-r--r--tools/testing/selftests/ipc/msgque.c11
-rw-r--r--tools/testing/selftests/kcmp/kcmp_test.c2
-rw-r--r--tools/testing/selftests/kselftest.h57
-rw-r--r--tools/testing/selftests/kselftest/ksft.py (renamed from tools/testing/selftests/devices/ksft.py)0
-rw-r--r--tools/testing/selftests/kselftest/ktap_helpers.sh4
-rwxr-xr-xtools/testing/selftests/kselftest_deps.sh1
-rw-r--r--tools/testing/selftests/kselftest_harness.h151
-rw-r--r--tools/testing/selftests/kvm/Makefile12
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c11
-rw-r--r--tools/testing/selftests/kvm/aarch64/page_fault_test.c5
-rw-r--r--tools/testing/selftests/kvm/aarch64/psci_test.c4
-rw-r--r--tools/testing/selftests/kvm/aarch64/set_id_regs.c140
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_init.c1
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_irq.c15
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c410
-rw-r--r--tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c6
-rw-r--r--tools/testing/selftests/kvm/arch_timer.c4
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c94
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c15
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c26
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c4
-rw-r--r--tools/testing/selftests/kvm/guest_print_test.c1
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c3
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic.h21
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic_v3.h586
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h19
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h21
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/ucall.h2
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/vgic.h5
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h1111
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h1135
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_types.h20
-rw-r--r--tools/testing/selftests/kvm/include/memstress.h1
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h49
-rw-r--r--tools/testing/selftests/kvm/include/riscv/sbi.h141
-rw-r--r--tools/testing/selftests/kvm/include/riscv/ucall.h1
-rw-r--r--tools/testing/selftests/kvm/include/s390x/ucall.h2
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h19
-rw-r--r--tools/testing/selftests/kvm/include/userfaultfd_util.h19
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/apic.h8
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h28
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h30
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/sev.h19
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/ucall.h2
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c2
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c2
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c4
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic.c18
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_private.h4
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_v3.c99
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c248
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c2
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/vgic.c38
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c3
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c26
-rw-r--r--tools/testing/selftests/kvm/lib/memstress.c13
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c13
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/ucall.c1
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c2
-rw-r--r--tools/testing/selftests/kvm/lib/ucall_common.c5
-rw-r--r--tools/testing/selftests/kvm/lib/userfaultfd_util.c156
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c342
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/sev.c45
-rw-r--r--tools/testing/selftests/kvm/max_guest_memory_test.c2
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c9
-rw-r--r--tools/testing/selftests/kvm/pre_fault_memory_test.c146
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c6
-rw-r--r--tools/testing/selftests/kvm/riscv/ebreak_test.c83
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c32
-rw-r--r--tools/testing/selftests/kvm/riscv/sbi_pmu_test.c682
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c48
-rw-r--r--tools/testing/selftests/kvm/s390x/cmma_test.c3
-rw-r--r--tools/testing/selftests/kvm/s390x/memop.c1
-rw-r--r--tools/testing/selftests/kvm/s390x/shared_zeropage_test.c111
-rw-r--r--tools/testing/selftests/kvm/s390x/sync_regs_test.c2
-rw-r--r--tools/testing/selftests/kvm/s390x/tprot.c1
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c21
-rw-r--r--tools/testing/selftests/kvm/steal_time.c53
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c194
-rw-r--r--tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c6
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_ipi.c5
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c22
-rw-r--r--tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c3
-rwxr-xr-xtools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh13
-rw-r--r--tools/testing/selftests/kvm/x86_64/platform_info_test.c61
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_counters_test.c52
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c44
-rw-r--r--tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c17
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_sregs_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_init2_tests.c152
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_smoke_test.c96
-rw-r--r--tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c6
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c5
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c2
-rw-r--r--tools/testing/selftests/kvm/x86_64/ucna_injection_test.c7
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c15
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_state_test.c1
-rw-r--r--tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c3
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c59
-rw-r--r--tools/testing/selftests/kvm/x86_64/xss_msr_test.c2
-rw-r--r--tools/testing/selftests/landlock/base_test.c76
-rw-r--r--tools/testing/selftests/landlock/config1
-rw-r--r--tools/testing/selftests/landlock/fs_test.c615
-rw-r--r--tools/testing/selftests/lib.mk49
-rwxr-xr-xtools/testing/selftests/livepatch/test-livepatch.sh138
-rwxr-xr-xtools/testing/selftests/livepatch/test-syscall.sh5
-rwxr-xr-xtools/testing/selftests/livepatch/test-sysfs.sh48
-rw-r--r--tools/testing/selftests/lkdtm/tests.txt1
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_multi_thread.c2
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_single_thread.c2
-rw-r--r--tools/testing/selftests/memfd/fuse_test.c2
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c2
-rw-r--r--tools/testing/selftests/mm/.gitignore4
-rw-r--r--tools/testing/selftests/mm/Makefile14
-rw-r--r--tools/testing/selftests/mm/compaction_test.c91
-rw-r--r--tools/testing/selftests/mm/cow.c108
-rw-r--r--tools/testing/selftests/mm/droppable.c53
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c18
-rw-r--r--tools/testing/selftests/mm/gup_test.c5
-rw-r--r--tools/testing/selftests/mm/hugepage-mremap.c2
-rw-r--r--tools/testing/selftests/mm/hugetlb-soft-offline.c228
-rw-r--r--tools/testing/selftests/mm/hugetlb_dio.c117
-rw-r--r--tools/testing/selftests/mm/hugetlb_madv_vs_map.c16
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c181
-rw-r--r--tools/testing/selftests/mm/madv_populate.c2
-rw-r--r--tools/testing/selftests/mm/map_fixed_noreplace.c24
-rw-r--r--tools/testing/selftests/mm/mdwe_test.c1
-rw-r--r--tools/testing/selftests/mm/memfd_secret.c65
-rw-r--r--tools/testing/selftests/mm/mkdirty.c10
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c15
-rw-r--r--tools/testing/selftests/mm/mlock2.h1
-rw-r--r--tools/testing/selftests/mm/mremap_test.c206
-rw-r--r--tools/testing/selftests/mm/mseal_helpers.h41
-rw-r--r--tools/testing/selftests/mm/mseal_test.c1847
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c10
-rw-r--r--tools/testing/selftests/mm/protection_keys.c2
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh20
-rw-r--r--tools/testing/selftests/mm/seal_elf.c148
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c2
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c3
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c15
-rw-r--r--tools/testing/selftests/mm/uffd-common.c4
-rw-r--r--tools/testing/selftests/mm/uffd-common.h1
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c31
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c14
-rw-r--r--tools/testing/selftests/mm/va_high_addr_switch.c468
-rwxr-xr-xtools/testing/selftests/mm/va_high_addr_switch.sh4
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c78
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c6
-rw-r--r--tools/testing/selftests/net/.gitignore4
-rw-r--r--tools/testing/selftests/net/Makefile63
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile2
-rw-r--r--tools/testing/selftests/net/af_unix/config3
-rw-r--r--tools/testing/selftests/net/af_unix/msg_oob.c734
-rw-r--r--tools/testing/selftests/net/af_unix/scm_rights.c307
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c436
-rwxr-xr-xtools/testing/selftests/net/amt.sh22
-rwxr-xr-xtools/testing/selftests/net/arp_ndisc_untracked_subnets.sh53
-rw-r--r--tools/testing/selftests/net/bpf.mk53
-rwxr-xr-xtools/testing/selftests/net/bpf_offload.py (renamed from tools/testing/selftests/bpf/test_offload.py)142
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c52
-rwxr-xr-xtools/testing/selftests/net/cmsg_time.sh7
-rw-r--r--tools/testing/selftests/net/config9
-rw-r--r--tools/testing/selftests/net/epoll_busy_poll.c320
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh46
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh24
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile11
-rw-r--r--tools/testing/selftests/net/forwarding/README33
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh18
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_igmp.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mld.sh6
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample53
-rw-r--r--tools/testing/selftests/net/forwarding/ipip_lib.sh1
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh525
-rwxr-xr-xtools/testing/selftests/net/forwarding/lib_sh_test.sh208
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh30
-rwxr-xr-xtools/testing/selftests/net/forwarding/min_max_mtu.sh283
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre.sh45
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bound.sh23
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh21
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh21
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh21
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh29
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_changes.sh73
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_flower.sh43
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh65
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_lib.sh90
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_neigh.sh39
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_nh.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan.sh21
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh69
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh79
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_vlan.sh43
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh35
-rw-r--r--tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh12
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh35
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_seed.sh333
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_nh.sh14
-rw-r--r--tools/testing/selftests/net/forwarding/sch_ets_tests.sh19
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_red.sh10
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_core.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_tunnel_key.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh8
-rw-r--r--tools/testing/selftests/net/gro.c141
-rw-r--r--tools/testing/selftests/net/hsr/Makefile3
-rw-r--r--tools/testing/selftests/net/hsr/config1
-rw-r--r--tools/testing/selftests/net/hsr/hsr_common.sh84
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_ping.sh115
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_redbox.sh136
-rw-r--r--tools/testing/selftests/net/ip_local_port_range.c2
-rw-r--r--tools/testing/selftests/net/lib.sh187
-rw-r--r--tools/testing/selftests/net/lib/.gitignore2
-rw-r--r--tools/testing/selftests/net/lib/Makefile15
-rw-r--r--tools/testing/selftests/net/lib/csum.c (renamed from tools/testing/selftests/net/csum.c)18
-rw-r--r--tools/testing/selftests/net/lib/py/__init__.py8
-rw-r--r--tools/testing/selftests/net/lib/py/consts.py9
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py192
-rw-r--r--tools/testing/selftests/net/lib/py/netns.py31
-rw-r--r--tools/testing/selftests/net/lib/py/nsim.py134
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py155
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py49
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh53
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c8
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh2
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh273
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh198
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh34
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh295
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c2
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh26
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh46
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c14
-rw-r--r--tools/testing/selftests/net/nat6to4.bpf.c (renamed from tools/testing/selftests/net/nat6to4.c)0
-rw-r--r--tools/testing/selftests/net/netfilter/.gitignore (renamed from tools/testing/selftests/netfilter/.gitignore)4
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile52
-rw-r--r--tools/testing/selftests/net/netfilter/audit_logread.c (renamed from tools/testing/selftests/netfilter/audit_logread.c)0
-rwxr-xr-xtools/testing/selftests/net/netfilter/br_netfilter.sh171
-rwxr-xr-xtools/testing/selftests/net/netfilter/bridge_brouter.sh122
-rw-r--r--tools/testing/selftests/net/netfilter/config89
-rw-r--r--tools/testing/selftests/net/netfilter/connect_close.c (renamed from tools/testing/selftests/netfilter/connect_close.c)0
-rw-r--r--tools/testing/selftests/net/netfilter/conntrack_dump_flush.c (renamed from tools/testing/selftests/netfilter/conntrack_dump_flush.c)10
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_icmp_related.sh (renamed from tools/testing/selftests/netfilter/conntrack_icmp_related.sh)179
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh (renamed from tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh)118
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh87
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh164
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_vrf.sh (renamed from tools/testing/selftests/netfilter/conntrack_vrf.sh)121
-rwxr-xr-xtools/testing/selftests/net/netfilter/ipvs.sh211
-rw-r--r--tools/testing/selftests/net/netfilter/lib.sh10
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh71
-rwxr-xr-xtools/testing/selftests/net/netfilter/nf_nat_edemux.sh97
-rw-r--r--tools/testing/selftests/net/netfilter/nf_queue.c (renamed from tools/testing/selftests/netfilter/nf-queue.c)0
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_audit.sh (renamed from tools/testing/selftests/netfilter/nft_audit.sh)31
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range.sh (renamed from tools/testing/selftests/netfilter/nft_concat_range.sh)289
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_concat_range_perf.sh9
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_conntrack_helper.sh171
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh234
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_flowtable.sh (renamed from tools/testing/selftests/netfilter/nft_flowtable.sh)371
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_meta.sh (renamed from tools/testing/selftests/netfilter/nft_meta.sh)4
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat.sh (renamed from tools/testing/selftests/netfilter/nft_nat.sh)480
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_nat_zones.sh (renamed from tools/testing/selftests/netfilter/nft_nat_zones.sh)194
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_queue.sh454
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_synproxy.sh96
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_zones_many.sh (renamed from tools/testing/selftests/netfilter/nft_zones_many.sh)97
-rwxr-xr-xtools/testing/selftests/net/netfilter/packetdrill/common.sh33
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt118
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt62
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt59
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt44
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt51
-rw-r--r--tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt34
-rwxr-xr-xtools/testing/selftests/net/netfilter/rpath.sh (renamed from tools/testing/selftests/netfilter/rpath.sh)10
-rw-r--r--tools/testing/selftests/net/netfilter/sctp_collision.c (renamed from tools/testing/selftests/netfilter/sctp_collision.c)0
-rw-r--r--tools/testing/selftests/net/netfilter/settings1
-rwxr-xr-xtools/testing/selftests/net/netfilter/xt_string.sh (renamed from tools/testing/selftests/netfilter/xt_string.sh)89
-rwxr-xr-xtools/testing/selftests/net/netns-sysctl.sh40
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py98
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh171
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py659
-rw-r--r--tools/testing/selftests/net/openvswitch/settings1
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh145
-rw-r--r--tools/testing/selftests/net/sample_map_ret0.bpf.c (renamed from tools/testing/selftests/bpf/progs/sample_map_ret0.c)2
-rw-r--r--tools/testing/selftests/net/sample_ret0.bpf.c (renamed from tools/testing/selftests/bpf/progs/sample_ret0.c)3
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh335
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh340
-rw-r--r--tools/testing/selftests/net/tcp_ao/Makefile2
-rw-r--r--tools/testing/selftests/net/tcp_ao/self-connect.c18
-rwxr-xr-xtools/testing/selftests/net/test_bridge_neigh_suppress.sh14
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh2
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh8
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh2
-rw-r--r--tools/testing/selftests/net/udpgso.c15
-rwxr-xr-xtools/testing/selftests/net/udpgso.sh43
-rwxr-xr-xtools/testing/selftests/net/veth.sh2
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh93
-rw-r--r--tools/testing/selftests/net/xdp_dummy.bpf.c (renamed from tools/testing/selftests/net/xdp_dummy.c)0
-rwxr-xr-xtools/testing/selftests/net/xfrm_policy.sh4
-rw-r--r--tools/testing/selftests/net/ynl.mk21
-rw-r--r--tools/testing/selftests/netfilter/Makefile21
-rwxr-xr-xtools/testing/selftests/netfilter/bridge_brouter.sh146
-rw-r--r--tools/testing/selftests/netfilter/bridge_netfilter.sh188
-rw-r--r--tools/testing/selftests/netfilter/config9
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_sctp_collision.sh89
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh167
-rwxr-xr-xtools/testing/selftests/netfilter/ipvs.sh228
-rwxr-xr-xtools/testing/selftests/netfilter/nf_nat_edemux.sh127
-rwxr-xr-xtools/testing/selftests/netfilter/nft_conntrack_helper.sh197
-rwxr-xr-xtools/testing/selftests/netfilter/nft_fib.sh273
-rwxr-xr-xtools/testing/selftests/netfilter/nft_queue.sh449
-rwxr-xr-xtools/testing/selftests/netfilter/nft_synproxy.sh117
-rwxr-xr-xtools/testing/selftests/netfilter/nft_trans_stress.sh151
-rw-r--r--tools/testing/selftests/netfilter/settings1
-rw-r--r--tools/testing/selftests/nolibc/Makefile2
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c163
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh9
-rw-r--r--tools/testing/selftests/openat2/Makefile14
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c1
-rw-r--r--tools/testing/selftests/perf_events/.gitignore1
-rw-r--r--tools/testing/selftests/perf_events/Makefile2
-rw-r--r--tools/testing/selftests/perf_events/watermark_signal.c146
-rw-r--r--tools/testing/selftests/pidfd/config2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_open_test.c4
-rw-r--r--tools/testing/selftests/pidfd/pidfd_poll_test.c2
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c260
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c2
-rwxr-xr-xtools/testing/selftests/power_supply/test_power_supply_properties.sh2
-rw-r--r--tools/testing/selftests/powerpc/Makefile11
-rw-r--r--tools/testing/selftests/powerpc/alignment/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/cache_shape/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile21
-rw-r--r--tools/testing/selftests/powerpc/dexcr/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/dexcr/Makefile9
-rw-r--r--tools/testing/selftests/powerpc/dexcr/chdexcr.c112
-rw-r--r--tools/testing/selftests/powerpc/dexcr/dexcr.c40
-rw-r--r--tools/testing/selftests/powerpc/dexcr/dexcr.h57
-rw-r--r--tools/testing/selftests/powerpc/dexcr/dexcr_test.c215
-rw-r--r--tools/testing/selftests/powerpc/dexcr/hashchk_test.c8
-rw-r--r--tools/testing/selftests/powerpc/dexcr/lsdexcr.c103
-rw-r--r--tools/testing/selftests/powerpc/dscr/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/eeh/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/flags.mk9
-rw-r--r--tools/testing/selftests/powerpc/math/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/mce/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/papr_attributes/Makefile3
-rw-r--r--tools/testing/selftests/powerpc/papr_sysparm/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/papr_vpd/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/pmu/Makefile44
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile21
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/primitives/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/ptrace/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/security/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/signal/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/stringloops/Makefile11
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/syscalls/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile1
-rw-r--r--tools/testing/selftests/powerpc/vphn/Makefile5
-rw-r--r--tools/testing/selftests/proc/.gitignore2
-rw-r--r--tools/testing/selftests/proc/Makefile4
-rw-r--r--tools/testing/selftests/proc/proc-2-is-kthread.c53
-rw-r--r--tools/testing/selftests/proc/proc-empty-vm.c3
-rw-r--r--tools/testing/selftests/proc/proc-pid-vm.c86
-rw-r--r--tools/testing/selftests/proc/proc-self-isnt-kthread.c37
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh6
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE095
-rw-r--r--tools/testing/selftests/resctrl/Makefile6
-rw-r--r--tools/testing/selftests/resctrl/cache.c10
-rw-r--r--tools/testing/selftests/resctrl/cat_test.c45
-rw-r--r--tools/testing/selftests/resctrl/cmt_test.c30
-rw-r--r--tools/testing/selftests/resctrl/mba_test.c36
-rw-r--r--tools/testing/selftests/resctrl/mbm_test.c36
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h58
-rw-r--r--tools/testing/selftests/resctrl/resctrl_tests.c26
-rw-r--r--tools/testing/selftests/resctrl/resctrl_val.c379
-rw-r--r--tools/testing/selftests/resctrl/resctrlfs.c67
-rw-r--r--tools/testing/selftests/ring-buffer/.gitignore1
-rw-r--r--tools/testing/selftests/ring-buffer/Makefile7
-rw-r--r--tools/testing/selftests/ring-buffer/config2
-rw-r--r--tools/testing/selftests/ring-buffer/map_test.c294
-rw-r--r--tools/testing/selftests/riscv/Makefile2
-rw-r--r--tools/testing/selftests/riscv/hwprobe/.gitignore2
-rw-r--r--tools/testing/selftests/riscv/mm/Makefile2
-rw-r--r--tools/testing/selftests/riscv/sigreturn/.gitignore1
-rw-r--r--tools/testing/selftests/riscv/sigreturn/Makefile12
-rw-r--r--tools/testing/selftests/riscv/sigreturn/sigreturn.c82
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_prctl.c6
-rw-r--r--tools/testing/selftests/sched/cs_prctl_test.c10
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c6
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c133
-rw-r--r--tools/testing/selftests/sgx/Makefile2
-rw-r--r--tools/testing/selftests/sigaltstack/current_stack_pointer.h2
-rw-r--r--tools/testing/selftests/sync/sync_test.c3
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json44
-rw-r--r--tools/testing/selftests/timens/exec.c6
-rw-r--r--tools/testing/selftests/timens/timer.c2
-rw-r--r--tools/testing/selftests/timens/timerfd.c2
-rw-r--r--tools/testing/selftests/timens/vfork_exec.c4
-rw-r--r--tools/testing/selftests/timers/adjtick.c4
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c4
-rw-r--r--tools/testing/selftests/timers/change_skew.c4
-rw-r--r--tools/testing/selftests/timers/freq-step.c4
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c10
-rw-r--r--tools/testing/selftests/timers/leapcrash.c4
-rw-r--r--tools/testing/selftests/timers/mqueue-lat.c4
-rw-r--r--tools/testing/selftests/timers/posix_timers.c12
-rw-r--r--tools/testing/selftests/timers/raw_skew.c6
-rw-r--r--tools/testing/selftests/timers/rtcpie.c3
-rw-r--r--tools/testing/selftests/timers/set-2038.c4
-rw-r--r--tools/testing/selftests/timers/set-tai.c4
-rw-r--r--tools/testing/selftests/timers/set-timer-lat.c4
-rw-r--r--tools/testing/selftests/timers/set-tz.c4
-rw-r--r--tools/testing/selftests/timers/skew_consistency.c4
-rw-r--r--tools/testing/selftests/timers/threadtest.c2
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c6
-rw-r--r--tools/testing/selftests/tmpfs/Makefile1
-rw-r--r--tools/testing/selftests/tty/tty_tstamp_update.c48
-rwxr-xr-xtools/testing/selftests/turbostat/added_perf_counters.py178
-rwxr-xr-xtools/testing/selftests/turbostat/smi_aperf_mperf.py157
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c8
-rw-r--r--tools/testing/selftests/vDSO/.gitignore2
-rw-r--r--tools/testing/selftests/vDSO/Makefile47
-rw-r--r--tools/testing/selftests/vDSO/parse_vdso.c16
-rw-r--r--tools/testing/selftests/vDSO/vdso_config.h6
-rw-r--r--tools/testing/selftests/vDSO/vdso_standalone_test_x86.c18
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_chacha.c43
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getcpu.c16
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getrandom.c288
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_gettimeofday.c26
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile8
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/riscv32.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/riscv64.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config1
-rw-r--r--tools/testing/selftests/x86/Makefile31
-rw-r--r--tools/testing/selftests/x86/amx.c43
-rw-r--r--tools/testing/selftests/x86/clang_helpers_32.S11
-rw-r--r--tools/testing/selftests/x86/clang_helpers_64.S28
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c6
-rw-r--r--tools/testing/selftests/x86/fsgsbase_restore.c11
-rw-r--r--tools/testing/selftests/x86/lam.c4
-rw-r--r--tools/testing/selftests/x86/sigreturn.c2
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c1
-rw-r--r--tools/testing/selftests/x86/sysret_rip.c20
-rw-r--r--tools/testing/selftests/x86/test_FISTTP.c8
-rw-r--r--tools/testing/selftests/x86/test_mremap_vdso.c43
-rw-r--r--tools/testing/selftests/x86/test_shadow_stack.c212
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c511
-rw-r--r--tools/testing/selftests/x86/vdso_restorer.c2
-rw-r--r--tools/testing/vsock/Makefile13
885 files changed, 46680 insertions, 13674 deletions
diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c
index 3482248aa344..90d5afd52dd0 100644
--- a/tools/testing/cxl/test/cxl.c
+++ b/tools/testing/cxl/test/cxl.c
@@ -630,11 +630,15 @@ static struct cxl_hdm *mock_cxl_setup_hdm(struct cxl_port *port,
struct cxl_endpoint_dvsec_info *info)
{
struct cxl_hdm *cxlhdm = devm_kzalloc(&port->dev, sizeof(*cxlhdm), GFP_KERNEL);
+ struct device *dev = &port->dev;
if (!cxlhdm)
return ERR_PTR(-ENOMEM);
cxlhdm->port = port;
+ cxlhdm->interleave_mask = ~0U;
+ cxlhdm->iw_cap_mask = ~0UL;
+ dev_set_drvdata(dev, cxlhdm);
return cxlhdm;
}
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index 35ee41e435ab..129f179b0ac5 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -3,6 +3,7 @@
#include <linux/platform_device.h>
#include <linux/mod_devicetable.h>
+#include <linux/vmalloc.h>
#include <linux/module.h>
#include <linux/delay.h>
#include <linux/sizes.h>
@@ -127,7 +128,7 @@ static struct {
#define CXL_TEST_EVENT_CNT_MAX 15
/* Set a number of events to return at a time for simulation. */
-#define CXL_TEST_EVENT_CNT 3
+#define CXL_TEST_EVENT_RET_MAX 4
struct mock_event_log {
u16 clear_idx;
@@ -222,6 +223,12 @@ static void mes_add_event(struct mock_event_store *mes,
log->nr_events++;
}
+/*
+ * Vary the number of events returned to simulate events occuring while the
+ * logs are being read.
+ */
+static int ret_limit = 0;
+
static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
{
struct cxl_get_event_payload *pl;
@@ -233,14 +240,18 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
if (cmd->size_in != sizeof(log_type))
return -EINVAL;
- if (cmd->size_out < struct_size(pl, records, CXL_TEST_EVENT_CNT))
+ ret_limit = (ret_limit + 1) % CXL_TEST_EVENT_RET_MAX;
+ if (!ret_limit)
+ ret_limit = 1;
+
+ if (cmd->size_out < struct_size(pl, records, ret_limit))
return -EINVAL;
log_type = *((u8 *)cmd->payload_in);
if (log_type >= CXL_EVENT_TYPE_MAX)
return -EINVAL;
- memset(cmd->payload_out, 0, cmd->size_out);
+ memset(cmd->payload_out, 0, struct_size(pl, records, 0));
log = event_find_log(dev, log_type);
if (!log || event_log_empty(log))
@@ -248,7 +259,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
pl = cmd->payload_out;
- for (i = 0; i < CXL_TEST_EVENT_CNT && !event_log_empty(log); i++) {
+ for (i = 0; i < ret_limit && !event_log_empty(log); i++) {
memcpy(&pl->records[i], event_get_current(log),
sizeof(pl->records[i]));
pl->records[i].event.generic.hdr.handle =
@@ -256,6 +267,7 @@ static int mock_get_event(struct device *dev, struct cxl_mbox_cmd *cmd)
log->cur_idx++;
}
+ cmd->size_out = struct_size(pl, records, i);
pl->record_count = cpu_to_le16(i);
if (!event_log_empty(log))
pl->flags |= CXL_GET_EVENT_FLAG_MORE_RECORDS;
@@ -373,19 +385,21 @@ struct cxl_test_gen_media {
struct cxl_test_gen_media gen_media = {
.id = CXL_EVENT_GEN_MEDIA_UUID,
.rec = {
- .hdr = {
- .length = sizeof(struct cxl_test_gen_media),
- .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT,
- /* .handle = Set dynamically */
- .related_handle = cpu_to_le16(0),
+ .media_hdr = {
+ .hdr = {
+ .length = sizeof(struct cxl_test_gen_media),
+ .flags[0] = CXL_EVENT_RECORD_FLAG_PERMANENT,
+ /* .handle = Set dynamically */
+ .related_handle = cpu_to_le16(0),
+ },
+ .phys_addr = cpu_to_le64(0x2000),
+ .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT,
+ .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR,
+ .transaction_type = CXL_GMER_TRANS_HOST_WRITE,
+ /* .validity_flags = <set below> */
+ .channel = 1,
+ .rank = 30,
},
- .phys_addr = cpu_to_le64(0x2000),
- .descriptor = CXL_GMER_EVT_DESC_UNCORECTABLE_EVENT,
- .type = CXL_GMER_MEM_EVT_TYPE_DATA_PATH_ERROR,
- .transaction_type = CXL_GMER_TRANS_HOST_WRITE,
- /* .validity_flags = <set below> */
- .channel = 1,
- .rank = 30
},
};
@@ -397,18 +411,20 @@ struct cxl_test_dram {
struct cxl_test_dram dram = {
.id = CXL_EVENT_DRAM_UUID,
.rec = {
- .hdr = {
- .length = sizeof(struct cxl_test_dram),
- .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED,
- /* .handle = Set dynamically */
- .related_handle = cpu_to_le16(0),
+ .media_hdr = {
+ .hdr = {
+ .length = sizeof(struct cxl_test_dram),
+ .flags[0] = CXL_EVENT_RECORD_FLAG_PERF_DEGRADED,
+ /* .handle = Set dynamically */
+ .related_handle = cpu_to_le16(0),
+ },
+ .phys_addr = cpu_to_le64(0x8000),
+ .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT,
+ .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR,
+ .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB,
+ /* .validity_flags = <set below> */
+ .channel = 1,
},
- .phys_addr = cpu_to_le64(0x8000),
- .descriptor = CXL_GMER_EVT_DESC_THRESHOLD_EVENT,
- .type = CXL_GMER_MEM_EVT_TYPE_INV_ADDR,
- .transaction_type = CXL_GMER_TRANS_INTERNAL_MEDIA_SCRUB,
- /* .validity_flags = <set below> */
- .channel = 1,
.bank_group = 5,
.bank = 2,
.column = {0xDE, 0xAD},
@@ -462,11 +478,11 @@ static int mock_set_timestamp(struct cxl_dev_state *cxlds,
static void cxl_mock_add_event_logs(struct mock_event_store *mes)
{
put_unaligned_le16(CXL_GMER_VALID_CHANNEL | CXL_GMER_VALID_RANK,
- &gen_media.rec.validity_flags);
+ &gen_media.rec.media_hdr.validity_flags);
put_unaligned_le16(CXL_DER_VALID_CHANNEL | CXL_DER_VALID_BANK_GROUP |
CXL_DER_VALID_BANK | CXL_DER_VALID_COLUMN,
- &dram.rec.validity_flags);
+ &dram.rec.media_hdr.validity_flags);
mes_add_event(mes, CXL_EVENT_TYPE_INFO, &maint_needed);
mes_add_event(mes, CXL_EVENT_TYPE_INFO,
@@ -1119,27 +1135,28 @@ static bool mock_poison_dev_max_injected(struct cxl_dev_state *cxlds)
return (count >= poison_inject_dev_max);
}
-static bool mock_poison_add(struct cxl_dev_state *cxlds, u64 dpa)
+static int mock_poison_add(struct cxl_dev_state *cxlds, u64 dpa)
{
+ /* Return EBUSY to match the CXL driver handling */
if (mock_poison_dev_max_injected(cxlds)) {
dev_dbg(cxlds->dev,
"Device poison injection limit has been reached: %d\n",
- MOCK_INJECT_DEV_MAX);
- return false;
+ poison_inject_dev_max);
+ return -EBUSY;
}
for (int i = 0; i < MOCK_INJECT_TEST_MAX; i++) {
if (!mock_poison_list[i].cxlds) {
mock_poison_list[i].cxlds = cxlds;
mock_poison_list[i].dpa = dpa;
- return true;
+ return 0;
}
}
dev_dbg(cxlds->dev,
"Mock test poison injection limit has been reached: %d\n",
MOCK_INJECT_TEST_MAX);
- return false;
+ return -ENXIO;
}
static bool mock_poison_found(struct cxl_dev_state *cxlds, u64 dpa)
@@ -1163,10 +1180,8 @@ static int mock_inject_poison(struct cxl_dev_state *cxlds,
dev_dbg(cxlds->dev, "DPA: 0x%llx already poisoned\n", dpa);
return 0;
}
- if (!mock_poison_add(cxlds, dpa))
- return -ENXIO;
- return 0;
+ return mock_poison_add(cxlds, dpa);
}
static bool mock_poison_del(struct cxl_dev_state *cxlds, u64 dpa)
diff --git a/tools/testing/kunit/qemu_configs/riscv.py b/tools/testing/kunit/qemu_configs/riscv.py
index 12a1d525978a..c87758030ff7 100644
--- a/tools/testing/kunit/qemu_configs/riscv.py
+++ b/tools/testing/kunit/qemu_configs/riscv.py
@@ -13,7 +13,7 @@ if not os.path.isfile(OPENSBI_PATH):
QEMU_ARCH = QemuArchParams(linux_arch='riscv',
kconfig='''
-CONFIG_SOC_VIRT=y
+CONFIG_ARCH_VIRT=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 57bf2688edfd..67503089e6a0 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -15,12 +15,12 @@ static int memblock_initialization_check(void)
PREFIX_PUSH();
ASSERT_NE(memblock.memory.regions, NULL);
- ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.cnt, 0);
ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS);
ASSERT_EQ(strcmp(memblock.memory.name, "memory"), 0);
ASSERT_NE(memblock.reserved.regions, NULL);
- ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.cnt, 0);
ASSERT_EQ(memblock.memory.max, EXPECTED_MEMBLOCK_REGIONS);
ASSERT_EQ(strcmp(memblock.reserved.name, "reserved"), 0);
@@ -982,6 +982,262 @@ static int memblock_reserve_many_check(void)
return 0;
}
+
+/*
+ * A test that trying to reserve the 129th memory block at all locations.
+ * Expect to trigger memblock_double_array() to double the
+ * memblock.memory.max, find a new valid memory as reserved.regions.
+ *
+ * 0 1 2 128
+ * +-------+ +-------+ +-------+ +-------+
+ * | 32K | | 32K | | 32K | ... | 32K |
+ * +-------+-------+-------+-------+-------+ +-------+
+ * |<-32K->| |<-32K->|
+ *
+ */
+/* Keep the gap so these memory region will not be merged. */
+#define MEMORY_BASE(idx) (SZ_128K + (MEM_SIZE * 2) * (idx))
+static int memblock_reserve_all_locations_check(void)
+{
+ int i, skip;
+ void *orig_region;
+ struct region r = {
+ .base = SZ_16K,
+ .size = SZ_16K,
+ };
+ phys_addr_t new_reserved_regions_size;
+
+ PREFIX_PUSH();
+
+ /* Reserve the 129th memory block for all possible positions*/
+ for (skip = 0; skip < INIT_MEMBLOCK_REGIONS + 1; skip++) {
+ reset_memblock_regions();
+ memblock_allow_resize();
+
+ /* Add a valid memory region used by double_array(). */
+ dummy_physical_memory_init();
+ memblock_add(dummy_physical_memory_base(), MEM_SIZE);
+
+ for (i = 0; i < INIT_MEMBLOCK_REGIONS + 1; i++) {
+ if (i == skip)
+ continue;
+
+ /* Reserve some fakes memory region to fulfill the memblock. */
+ memblock_reserve(MEMORY_BASE(i), MEM_SIZE);
+
+ if (i < skip) {
+ ASSERT_EQ(memblock.reserved.cnt, i + 1);
+ ASSERT_EQ(memblock.reserved.total_size, (i + 1) * MEM_SIZE);
+ } else {
+ ASSERT_EQ(memblock.reserved.cnt, i);
+ ASSERT_EQ(memblock.reserved.total_size, i * MEM_SIZE);
+ }
+ }
+
+ orig_region = memblock.reserved.regions;
+
+ /* This reserve the 129 memory_region, and makes it double array. */
+ memblock_reserve(MEMORY_BASE(skip), MEM_SIZE);
+
+ /*
+ * This is the memory region size used by the doubled reserved.regions,
+ * and it has been reserved due to it has been used. The size is used to
+ * calculate the total_size that the memblock.reserved have now.
+ */
+ new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) *
+ sizeof(struct memblock_region));
+ /*
+ * The double_array() will find a free memory region as the new
+ * reserved.regions, and the used memory region will be reserved, so
+ * there will be one more region exist in the reserved memblock. And the
+ * one more reserved region's size is new_reserved_regions_size.
+ */
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ /*
+ * Now memblock_double_array() works fine. Let's check after the
+ * double_array(), the memblock_reserve() still works as normal.
+ */
+ memblock_reserve(r.base, r.size);
+ ASSERT_EQ(memblock.reserved.regions[0].base, r.base);
+ ASSERT_EQ(memblock.reserved.regions[0].size, r.size);
+
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size +
+ r.size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ dummy_physical_memory_cleanup();
+
+ /*
+ * The current reserved.regions is occupying a range of memory that
+ * allocated from dummy_physical_memory_init(). After free the memory,
+ * we must not use it. So restore the origin memory region to make sure
+ * the tests can run as normal and not affected by the double array.
+ */
+ memblock.reserved.regions = orig_region;
+ memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
+ }
+
+ test_pass_pop();
+
+ return 0;
+}
+
+/*
+ * A test that trying to reserve the 129th memory block at all locations.
+ * Expect to trigger memblock_double_array() to double the
+ * memblock.memory.max, find a new valid memory as reserved.regions. And make
+ * sure it doesn't conflict with the range we want to reserve.
+ *
+ * For example, we have 128 regions in reserved and now want to reserve
+ * the skipped one. Since reserved is full, memblock_double_array() would find
+ * an available range in memory for the new array. We intended to put two
+ * ranges in memory with one is the exact range of the skipped one. Before
+ * commit 48c3b583bbdd ("mm/memblock: fix overlapping allocation when doubling
+ * reserved array"), the new array would sits in the skipped range which is a
+ * conflict. The expected new array should be allocated from memory.regions[0].
+ *
+ * 0 1
+ * memory +-------+ +-------+
+ * | 32K | | 32K |
+ * +-------+ ------+-------+-------+-------+
+ * |<-32K->|<-32K->|<-32K->|
+ *
+ * 0 skipped 127
+ * reserved +-------+ ......... +-------+
+ * | 32K | . 32K . ... | 32K |
+ * +-------+-------+-------+ +-------+
+ * |<-32K->|
+ * ^
+ * |
+ * |
+ * skipped one
+ */
+/* Keep the gap so these memory region will not be merged. */
+#define MEMORY_BASE_OFFSET(idx, offset) ((offset) + (MEM_SIZE * 2) * (idx))
+static int memblock_reserve_many_may_conflict_check(void)
+{
+ int i, skip;
+ void *orig_region;
+ struct region r = {
+ .base = SZ_16K,
+ .size = SZ_16K,
+ };
+ phys_addr_t new_reserved_regions_size;
+
+ /*
+ * 0 1 129
+ * +---+ +---+ +---+
+ * |32K| |32K| .. |32K|
+ * +---+ +---+ +---+
+ *
+ * Pre-allocate the range for 129 memory block + one range for double
+ * memblock.reserved.regions at idx 0.
+ */
+ dummy_physical_memory_init();
+ phys_addr_t memory_base = dummy_physical_memory_base();
+ phys_addr_t offset = PAGE_ALIGN(memory_base);
+
+ PREFIX_PUSH();
+
+ /* Reserve the 129th memory block for all possible positions*/
+ for (skip = 1; skip <= INIT_MEMBLOCK_REGIONS + 1; skip++) {
+ reset_memblock_regions();
+ memblock_allow_resize();
+
+ reset_memblock_attributes();
+ /* Add a valid memory region used by double_array(). */
+ memblock_add(MEMORY_BASE_OFFSET(0, offset), MEM_SIZE);
+ /*
+ * Add a memory region which will be reserved as 129th memory
+ * region. This is not expected to be used by double_array().
+ */
+ memblock_add(MEMORY_BASE_OFFSET(skip, offset), MEM_SIZE);
+
+ for (i = 1; i <= INIT_MEMBLOCK_REGIONS + 1; i++) {
+ if (i == skip)
+ continue;
+
+ /* Reserve some fakes memory region to fulfill the memblock. */
+ memblock_reserve(MEMORY_BASE_OFFSET(i, offset), MEM_SIZE);
+
+ if (i < skip) {
+ ASSERT_EQ(memblock.reserved.cnt, i);
+ ASSERT_EQ(memblock.reserved.total_size, i * MEM_SIZE);
+ } else {
+ ASSERT_EQ(memblock.reserved.cnt, i - 1);
+ ASSERT_EQ(memblock.reserved.total_size, (i - 1) * MEM_SIZE);
+ }
+ }
+
+ orig_region = memblock.reserved.regions;
+
+ /* This reserve the 129 memory_region, and makes it double array. */
+ memblock_reserve(MEMORY_BASE_OFFSET(skip, offset), MEM_SIZE);
+
+ /*
+ * This is the memory region size used by the doubled reserved.regions,
+ * and it has been reserved due to it has been used. The size is used to
+ * calculate the total_size that the memblock.reserved have now.
+ */
+ new_reserved_regions_size = PAGE_ALIGN((INIT_MEMBLOCK_REGIONS * 2) *
+ sizeof(struct memblock_region));
+ /*
+ * The double_array() will find a free memory region as the new
+ * reserved.regions, and the used memory region will be reserved, so
+ * there will be one more region exist in the reserved memblock. And the
+ * one more reserved region's size is new_reserved_regions_size.
+ */
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 2);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ /*
+ * The first reserved region is allocated for double array
+ * with the size of new_reserved_regions_size and the base to be
+ * MEMORY_BASE_OFFSET(0, offset) + SZ_32K - new_reserved_regions_size
+ */
+ ASSERT_EQ(memblock.reserved.regions[0].base + memblock.reserved.regions[0].size,
+ MEMORY_BASE_OFFSET(0, offset) + SZ_32K);
+ ASSERT_EQ(memblock.reserved.regions[0].size, new_reserved_regions_size);
+
+ /*
+ * Now memblock_double_array() works fine. Let's check after the
+ * double_array(), the memblock_reserve() still works as normal.
+ */
+ memblock_reserve(r.base, r.size);
+ ASSERT_EQ(memblock.reserved.regions[0].base, r.base);
+ ASSERT_EQ(memblock.reserved.regions[0].size, r.size);
+
+ ASSERT_EQ(memblock.reserved.cnt, INIT_MEMBLOCK_REGIONS + 3);
+ ASSERT_EQ(memblock.reserved.total_size, (INIT_MEMBLOCK_REGIONS + 1) * MEM_SIZE +
+ new_reserved_regions_size +
+ r.size);
+ ASSERT_EQ(memblock.reserved.max, INIT_MEMBLOCK_REGIONS * 2);
+
+ /*
+ * The current reserved.regions is occupying a range of memory that
+ * allocated from dummy_physical_memory_init(). After free the memory,
+ * we must not use it. So restore the origin memory region to make sure
+ * the tests can run as normal and not affected by the double array.
+ */
+ memblock.reserved.regions = orig_region;
+ memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
+ }
+
+ dummy_physical_memory_cleanup();
+
+ test_pass_pop();
+
+ return 0;
+}
+
static int memblock_reserve_checks(void)
{
prefix_reset();
@@ -997,6 +1253,8 @@ static int memblock_reserve_checks(void)
memblock_reserve_between_check();
memblock_reserve_near_max_check();
memblock_reserve_many_check();
+ memblock_reserve_all_locations_check();
+ memblock_reserve_many_may_conflict_check();
prefix_pop();
@@ -1295,7 +1553,7 @@ static int memblock_remove_only_region_check(void)
ASSERT_EQ(rgn->base, 0);
ASSERT_EQ(rgn->size, 0);
- ASSERT_EQ(memblock.memory.cnt, 1);
+ ASSERT_EQ(memblock.memory.cnt, 0);
ASSERT_EQ(memblock.memory.total_size, 0);
test_pass_pop();
@@ -1723,7 +1981,7 @@ static int memblock_free_only_region_check(void)
ASSERT_EQ(rgn->base, 0);
ASSERT_EQ(rgn->size, 0);
- ASSERT_EQ(memblock.reserved.cnt, 1);
+ ASSERT_EQ(memblock.reserved.cnt, 0);
ASSERT_EQ(memblock.reserved.total_size, 0);
test_pass_pop();
@@ -2129,6 +2387,53 @@ static int memblock_trim_memory_checks(void)
return 0;
}
+static int memblock_overlaps_region_check(void)
+{
+ struct region r = {
+ .base = SZ_1G,
+ .size = SZ_4M
+ };
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_add(r.base, r.size);
+
+ /* Far Away */
+ ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1M, SZ_1M));
+ ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_2G, SZ_1M));
+
+ /* Neighbor */
+ ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_1M, SZ_1M));
+ ASSERT_FALSE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_4M, SZ_1M));
+
+ /* Partial Overlap */
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_1M, SZ_2M));
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_2M, SZ_2M));
+
+ /* Totally Overlap */
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G, SZ_4M));
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G - SZ_2M, SZ_8M));
+ ASSERT_TRUE(memblock_overlaps_region(&memblock.memory, SZ_1G + SZ_1M, SZ_1M));
+
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_overlaps_region_checks(void)
+{
+ prefix_reset();
+ prefix_push("memblock_overlaps_region");
+ test_print("Running memblock_overlaps_region tests...\n");
+
+ memblock_overlaps_region_check();
+
+ prefix_pop();
+
+ return 0;
+}
+
int memblock_basic_checks(void)
{
memblock_initialization_check();
@@ -2138,6 +2443,7 @@ int memblock_basic_checks(void)
memblock_free_checks();
memblock_bottom_up_checks();
memblock_trim_memory_checks();
+ memblock_overlaps_region_checks();
return 0;
}
diff --git a/tools/testing/memblock/tests/common.c b/tools/testing/memblock/tests/common.c
index f43b6f414983..3250c8e5124b 100644
--- a/tools/testing/memblock/tests/common.c
+++ b/tools/testing/memblock/tests/common.c
@@ -40,13 +40,13 @@ void reset_memblock_regions(void)
{
memset(memblock.memory.regions, 0,
memblock.memory.cnt * sizeof(struct memblock_region));
- memblock.memory.cnt = 1;
+ memblock.memory.cnt = 0;
memblock.memory.max = INIT_MEMBLOCK_REGIONS;
memblock.memory.total_size = 0;
memset(memblock.reserved.regions, 0,
memblock.reserved.cnt * sizeof(struct memblock_region));
- memblock.reserved.cnt = 1;
+ memblock.reserved.cnt = 0;
memblock.reserved.max = INIT_MEMBLOCK_RESERVED_REGIONS;
memblock.reserved.total_size = 0;
}
@@ -61,7 +61,7 @@ void reset_memblock_attributes(void)
static inline void fill_memblock(void)
{
- memset(memory_block.base, 1, MEM_SIZE);
+ memset(memory_block.base, 1, PHYS_MEM_SIZE);
}
void setup_memblock(void)
@@ -103,7 +103,7 @@ void setup_numa_memblock(const unsigned int node_fracs[])
void dummy_physical_memory_init(void)
{
- memory_block.base = malloc(MEM_SIZE);
+ memory_block.base = malloc(PHYS_MEM_SIZE);
assert(memory_block.base);
fill_memblock();
}
diff --git a/tools/testing/memblock/tests/common.h b/tools/testing/memblock/tests/common.h
index b5ec59aa62d7..e1138e06c903 100644
--- a/tools/testing/memblock/tests/common.h
+++ b/tools/testing/memblock/tests/common.h
@@ -12,6 +12,7 @@
#include <../selftests/kselftest.h>
#define MEM_SIZE SZ_32K
+#define PHYS_MEM_SIZE SZ_16M
#define NUMA_NODES 8
#define INIT_MEMBLOCK_REGIONS 128
@@ -39,6 +40,9 @@ enum test_flags {
assert((_expected) == (_seen)); \
} while (0)
+#define ASSERT_TRUE(_seen) ASSERT_EQ(true, _seen)
+#define ASSERT_FALSE(_seen) ASSERT_EQ(false, _seen)
+
/**
* ASSERT_NE():
* Check the condition
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index ea956082e6a4..e4313726fae3 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -407,4 +407,5 @@ union acpi_object * __wrap_acpi_evaluate_dsm(acpi_handle handle, const guid_t *g
}
EXPORT_SYMBOL(__wrap_acpi_evaluate_dsm);
+MODULE_DESCRIPTION("NVDIMM unit test");
MODULE_LICENSE("GPL v2");
diff --git a/tools/testing/nvdimm/test/ndtest.c b/tools/testing/nvdimm/test/ndtest.c
index b8419f460368..892e990c034a 100644
--- a/tools/testing/nvdimm/test/ndtest.c
+++ b/tools/testing/nvdimm/test/ndtest.c
@@ -13,6 +13,8 @@
#include <nd-core.h>
#include <linux/printk.h>
#include <linux/seq_buf.h>
+#include <linux/papr_scm.h>
+#include <uapi/linux/papr_pdsm.h>
#include "../watermark.h"
#include "nfit_test.h"
@@ -830,12 +832,11 @@ static int ndtest_bus_register(struct ndtest_priv *p)
return 0;
}
-static int ndtest_remove(struct platform_device *pdev)
+static void ndtest_remove(struct platform_device *pdev)
{
struct ndtest_priv *p = to_ndtest_priv(&pdev->dev);
nvdimm_bus_unregister(p->bus);
- return 0;
}
static int ndtest_probe(struct platform_device *pdev)
@@ -882,7 +883,7 @@ static const struct platform_device_id ndtest_id[] = {
static struct platform_driver ndtest_driver = {
.probe = ndtest_probe,
- .remove = ndtest_remove,
+ .remove_new = ndtest_remove,
.driver = {
.name = KBUILD_MODNAME,
},
@@ -986,5 +987,6 @@ static __exit void ndtest_exit(void)
module_init(ndtest_init);
module_exit(ndtest_exit);
+MODULE_DESCRIPTION("Test non-NFIT devices");
MODULE_LICENSE("GPL");
MODULE_AUTHOR("IBM Corporation");
diff --git a/tools/testing/nvdimm/test/ndtest.h b/tools/testing/nvdimm/test/ndtest.h
index 2c54c9cbb90c..8f27ad6f7319 100644
--- a/tools/testing/nvdimm/test/ndtest.h
+++ b/tools/testing/nvdimm/test/ndtest.h
@@ -5,37 +5,6 @@
#include <linux/platform_device.h>
#include <linux/libnvdimm.h>
-/* SCM device is unable to persist memory contents */
-#define PAPR_PMEM_UNARMED (1ULL << (63 - 0))
-/* SCM device failed to persist memory contents */
-#define PAPR_PMEM_SHUTDOWN_DIRTY (1ULL << (63 - 1))
-/* SCM device contents are not persisted from previous IPL */
-#define PAPR_PMEM_EMPTY (1ULL << (63 - 3))
-#define PAPR_PMEM_HEALTH_CRITICAL (1ULL << (63 - 4))
-/* SCM device will be garded off next IPL due to failure */
-#define PAPR_PMEM_HEALTH_FATAL (1ULL << (63 - 5))
-/* SCM contents cannot persist due to current platform health status */
-#define PAPR_PMEM_HEALTH_UNHEALTHY (1ULL << (63 - 6))
-
-/* Bits status indicators for health bitmap indicating unarmed dimm */
-#define PAPR_PMEM_UNARMED_MASK (PAPR_PMEM_UNARMED | \
- PAPR_PMEM_HEALTH_UNHEALTHY)
-
-#define PAPR_PMEM_SAVE_FAILED (1ULL << (63 - 10))
-
-/* Bits status indicators for health bitmap indicating unflushed dimm */
-#define PAPR_PMEM_BAD_SHUTDOWN_MASK (PAPR_PMEM_SHUTDOWN_DIRTY)
-
-/* Bits status indicators for health bitmap indicating unrestored dimm */
-#define PAPR_PMEM_BAD_RESTORE_MASK (PAPR_PMEM_EMPTY)
-
-/* Bit status indicators for smart event notification */
-#define PAPR_PMEM_SMART_EVENT_MASK (PAPR_PMEM_HEALTH_CRITICAL | \
- PAPR_PMEM_HEALTH_FATAL | \
- PAPR_PMEM_HEALTH_UNHEALTHY)
-
-#define PAPR_PMEM_SAVE_MASK (PAPR_PMEM_SAVE_FAILED)
-
struct ndtest_config;
struct ndtest_priv {
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index a61df347a33d..cfd4378e2129 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -3382,5 +3382,6 @@ static __exit void nfit_test_exit(void)
module_init(nfit_test_init);
module_exit(nfit_test_exit);
+MODULE_DESCRIPTION("Test ACPI NFIT devices");
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Intel Corporation");
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index 7527f738b4a1..d1acd7d58850 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -5,8 +5,8 @@ CFLAGS += -I. -I../../include -I../../../lib -g -Og -Wall \
LDFLAGS += -fsanitize=address -fsanitize=undefined
LDLIBS+= -lpthread -lurcu
TARGETS = main idr-test multiorder xarray maple
-CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o find_bit.o bitmap.o \
- slab.o maple.o
+LIBS := slab.o find_bit.o bitmap.o hweight.o vsprintf.o
+CORE_OFILES := xarray.o radix-tree.o idr.o linux.o test.o maple.o $(LIBS)
OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
regression4.o tag_check.o multiorder.o idr-test.o iteration_check.o \
iteration_check_2.o benchmark.o
diff --git a/tools/testing/radix-tree/bitmap.c b/tools/testing/radix-tree/bitmap.c
deleted file mode 100644
index 66ec4a24a203..000000000000
--- a/tools/testing/radix-tree/bitmap.c
+++ /dev/null
@@ -1,23 +0,0 @@
-/* lib/bitmap.c pulls in at least two other files. */
-
-#include <linux/bitmap.h>
-
-void bitmap_clear(unsigned long *map, unsigned int start, int len)
-{
- unsigned long *p = map + BIT_WORD(start);
- const unsigned int size = start + len;
- int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
- unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
-
- while (len - bits_to_clear >= 0) {
- *p &= ~mask_to_clear;
- len -= bits_to_clear;
- bits_to_clear = BITS_PER_LONG;
- mask_to_clear = ~0UL;
- p++;
- }
- if (len) {
- mask_to_clear &= BITMAP_LAST_WORD_MASK(size);
- *p &= ~mask_to_clear;
- }
-}
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index ca24f6839d50..84b8c3c92c79 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -424,6 +424,7 @@ void idr_checks(void)
#define module_init(x)
#define module_exit(x)
#define MODULE_AUTHOR(x)
+#define MODULE_DESCRIPTION(X)
#define MODULE_LICENSE(x)
#define dump_stack() assert(0)
void ida_dump(struct ida *);
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index c5c9d05f29da..c0a2bb785b92 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -18,6 +18,8 @@
#define pr_info printk
#define pr_debug printk
#define pr_cont printk
+#define schedule()
+#define PAGE_SHIFT 12
#define __acquires(x)
#define __releases(x)
diff --git a/tools/testing/radix-tree/maple.c b/tools/testing/radix-tree/maple.c
index f1caf4bcf937..cd1cf05503b4 100644
--- a/tools/testing/radix-tree/maple.c
+++ b/tools/testing/radix-tree/maple.c
@@ -19,6 +19,7 @@
#define module_init(x)
#define module_exit(x)
#define MODULE_AUTHOR(x)
+#define MODULE_DESCRIPTION(X)
#define MODULE_LICENSE(x)
#define dump_stack() assert(0)
diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c
index f20e12cbbfd4..d0e53bff1eb6 100644
--- a/tools/testing/radix-tree/xarray.c
+++ b/tools/testing/radix-tree/xarray.c
@@ -10,6 +10,7 @@
#define module_init(x)
#define module_exit(x)
#define MODULE_AUTHOR(x)
+#define MODULE_DESCRIPTION(X)
#define MODULE_LICENSE(x)
#define dump_stack() assert(0)
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index e1504833654d..bc8fe9e8f7f2 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -13,12 +13,16 @@ TARGETS += core
TARGETS += cpufreq
TARGETS += cpu-hotplug
TARGETS += damon
-TARGETS += devices
+TARGETS += devices/error_logs
+TARGETS += devices/probe
TARGETS += dmabuf-heaps
TARGETS += drivers/dma-buf
TARGETS += drivers/s390x/uvdevice
+TARGETS += drivers/net
TARGETS += drivers/net/bonding
TARGETS += drivers/net/team
+TARGETS += drivers/net/virtio_net
+TARGETS += drivers/platform/x86/intel/ifs
TARGETS += dt
TARGETS += efivarfs
TARGETS += exec
@@ -63,7 +67,7 @@ TARGETS += net/hsr
TARGETS += net/mptcp
TARGETS += net/openvswitch
TARGETS += net/tcp_ao
-TARGETS += netfilter
+TARGETS += net/netfilter
TARGETS += nsfs
TARGETS += perf_events
TARGETS += pidfd
@@ -116,6 +120,13 @@ TARGETS += zram
TARGETS_HOTPLUG = cpu-hotplug
TARGETS_HOTPLUG += memory-hotplug
+# Networking tests want the net/lib target, include it automatically
+ifneq ($(filter net drivers/net drivers/net/hw,$(TARGETS)),)
+ifeq ($(filter net/lib,$(TARGETS)),)
+ INSTALL_DEP_TARGETS := net/lib
+endif
+endif
+
# User can optionally provide a TARGETS skiplist. By default we skip
# BPF since it has cutting edge build time dependencies which require
# more effort to install.
@@ -242,10 +253,11 @@ ifdef INSTALL_PATH
install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/ktap_helpers.sh $(INSTALL_PATH)/kselftest/
+ install -m 744 kselftest/ksft.py $(INSTALL_PATH)/kselftest/
install -m 744 run_kselftest.sh $(INSTALL_PATH)/
rm -f $(TEST_LIST)
@ret=1; \
- for TARGET in $(TARGETS); do \
+ for TARGET in $(TARGETS) $(INSTALL_DEP_TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install \
INSTALL_PATH=$(INSTALL_PATH)/$$TARGET \
diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile
index 5af9ba8a4645..c1ce39874e2b 100644
--- a/tools/testing/selftests/alsa/Makefile
+++ b/tools/testing/selftests/alsa/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
#
-CFLAGS += $(shell pkg-config --cflags alsa)
+CFLAGS += $(shell pkg-config --cflags alsa) $(KHDR_INCLUDES)
LDLIBS += $(shell pkg-config --libs alsa)
ifeq ($(LDLIBS),)
LDLIBS += -lasound
diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c
index 89e3656a042d..e2b3a5810f47 100644
--- a/tools/testing/selftests/alsa/conf.c
+++ b/tools/testing/selftests/alsa/conf.c
@@ -105,7 +105,7 @@ static struct card_cfg_data *conf_data_by_card(int card, bool msg)
return NULL;
}
-static int dump_config_tree(snd_config_t *top)
+static void dump_config_tree(snd_config_t *top)
{
snd_output_t *out;
int err;
diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c
index 1c04e5f638a0..2a4b2662035e 100644
--- a/tools/testing/selftests/alsa/mixer-test.c
+++ b/tools/testing/selftests/alsa/mixer-test.c
@@ -33,6 +33,8 @@
struct card_data {
snd_ctl_t *handle;
int card;
+ snd_ctl_card_info_t *info;
+ const char *card_name;
struct pollfd pollfd;
int num_ctls;
snd_ctl_elem_list_t *ctls;
@@ -91,8 +93,26 @@ static void find_controls(void)
err = snd_card_get_longname(card, &card_longname);
if (err != 0)
card_longname = "Unknown";
- ksft_print_msg("Card %d - %s (%s)\n", card,
- card_name, card_longname);
+
+ err = snd_ctl_card_info_malloc(&card_data->info);
+ if (err != 0)
+ ksft_exit_fail_msg("Failed to allocate card info: %d\n",
+ err);
+
+ err = snd_ctl_card_info(card_data->handle, card_data->info);
+ if (err == 0) {
+ card_data->card_name = snd_ctl_card_info_get_id(card_data->info);
+ if (!card_data->card_name)
+ ksft_print_msg("Failed to get card ID\n");
+ } else {
+ ksft_print_msg("Failed to get card info: %d\n", err);
+ }
+
+ if (!card_data->card_name)
+ card_data->card_name = "Unknown";
+
+ ksft_print_msg("Card %d/%s - %s (%s)\n", card,
+ card_data->card_name, card_name, card_longname);
/* Count controls */
snd_ctl_elem_list_malloc(&card_data->ctls);
@@ -389,16 +409,16 @@ static void test_ctl_get_value(struct ctl_data *ctl)
/* If the control is turned off let's be polite */
if (snd_ctl_elem_info_is_inactive(ctl->info)) {
ksft_print_msg("%s is inactive\n", ctl->name);
- ksft_test_result_skip("get_value.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result_skip("get_value.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
return;
}
/* Can't test reading on an unreadable control */
if (!snd_ctl_elem_info_is_readable(ctl->info)) {
ksft_print_msg("%s is not readable\n", ctl->name);
- ksft_test_result_skip("get_value.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result_skip("get_value.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
return;
}
@@ -413,8 +433,8 @@ static void test_ctl_get_value(struct ctl_data *ctl)
err = -EINVAL;
out:
- ksft_test_result(err >= 0, "get_value.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result(err >= 0, "get_value.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
}
static bool strend(const char *haystack, const char *needle)
@@ -431,7 +451,7 @@ static void test_ctl_name(struct ctl_data *ctl)
{
bool name_ok = true;
- ksft_print_msg("%d.%d %s\n", ctl->card->card, ctl->elem,
+ ksft_print_msg("%s.%d %s\n", ctl->card->card_name, ctl->elem,
ctl->name);
/* Only boolean controls should end in Switch */
@@ -453,8 +473,8 @@ static void test_ctl_name(struct ctl_data *ctl)
}
}
- ksft_test_result(name_ok, "name.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result(name_ok, "name.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
}
static void show_values(struct ctl_data *ctl, snd_ctl_elem_value_t *orig_val,
@@ -626,28 +646,41 @@ static int write_and_verify(struct ctl_data *ctl,
}
/*
+ * We can't verify any specific value for volatile controls
+ * but we should still check that whatever we read is a valid
+ * vale for the control.
+ */
+ if (snd_ctl_elem_info_is_volatile(ctl->info)) {
+ if (!ctl_value_valid(ctl, read_val)) {
+ ksft_print_msg("Volatile control %s has invalid value\n",
+ ctl->name);
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+
+ /*
* Check for an event if the value changed, or confirm that
* there was none if it didn't. We rely on the kernel
* generating the notification before it returns from the
* write, this is currently true, should that ever change this
* will most likely break and need updating.
*/
- if (!snd_ctl_elem_info_is_volatile(ctl->info)) {
- err = wait_for_event(ctl, 0);
- if (snd_ctl_elem_value_compare(initial_val, read_val)) {
- if (err < 1) {
- ksft_print_msg("No event generated for %s\n",
- ctl->name);
- show_values(ctl, initial_val, read_val);
- ctl->event_missing++;
- }
- } else {
- if (err != 0) {
- ksft_print_msg("Spurious event generated for %s\n",
- ctl->name);
- show_values(ctl, initial_val, read_val);
- ctl->event_spurious++;
- }
+ err = wait_for_event(ctl, 0);
+ if (snd_ctl_elem_value_compare(initial_val, read_val)) {
+ if (err < 1) {
+ ksft_print_msg("No event generated for %s\n",
+ ctl->name);
+ show_values(ctl, initial_val, read_val);
+ ctl->event_missing++;
+ }
+ } else {
+ if (err != 0) {
+ ksft_print_msg("Spurious event generated for %s\n",
+ ctl->name);
+ show_values(ctl, initial_val, read_val);
+ ctl->event_spurious++;
}
}
@@ -682,30 +715,30 @@ static void test_ctl_write_default(struct ctl_data *ctl)
/* If the control is turned off let's be polite */
if (snd_ctl_elem_info_is_inactive(ctl->info)) {
ksft_print_msg("%s is inactive\n", ctl->name);
- ksft_test_result_skip("write_default.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result_skip("write_default.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
return;
}
if (!snd_ctl_elem_info_is_writable(ctl->info)) {
ksft_print_msg("%s is not writeable\n", ctl->name);
- ksft_test_result_skip("write_default.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result_skip("write_default.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
return;
}
/* No idea what the default was for unreadable controls */
if (!snd_ctl_elem_info_is_readable(ctl->info)) {
ksft_print_msg("%s couldn't read default\n", ctl->name);
- ksft_test_result_skip("write_default.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result_skip("write_default.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
return;
}
err = write_and_verify(ctl, ctl->def_val, NULL);
- ksft_test_result(err >= 0, "write_default.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result(err >= 0, "write_default.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
}
static bool test_ctl_write_valid_boolean(struct ctl_data *ctl)
@@ -815,15 +848,15 @@ static void test_ctl_write_valid(struct ctl_data *ctl)
/* If the control is turned off let's be polite */
if (snd_ctl_elem_info_is_inactive(ctl->info)) {
ksft_print_msg("%s is inactive\n", ctl->name);
- ksft_test_result_skip("write_valid.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result_skip("write_valid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
return;
}
if (!snd_ctl_elem_info_is_writable(ctl->info)) {
ksft_print_msg("%s is not writeable\n", ctl->name);
- ksft_test_result_skip("write_valid.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result_skip("write_valid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
return;
}
@@ -846,16 +879,16 @@ static void test_ctl_write_valid(struct ctl_data *ctl)
default:
/* No tests for this yet */
- ksft_test_result_skip("write_valid.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result_skip("write_valid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
return;
}
/* Restore the default value to minimise disruption */
write_and_verify(ctl, ctl->def_val, NULL);
- ksft_test_result(pass, "write_valid.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result(pass, "write_valid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
}
static bool test_ctl_write_invalid_value(struct ctl_data *ctl,
@@ -1027,15 +1060,15 @@ static void test_ctl_write_invalid(struct ctl_data *ctl)
/* If the control is turned off let's be polite */
if (snd_ctl_elem_info_is_inactive(ctl->info)) {
ksft_print_msg("%s is inactive\n", ctl->name);
- ksft_test_result_skip("write_invalid.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result_skip("write_invalid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
return;
}
if (!snd_ctl_elem_info_is_writable(ctl->info)) {
ksft_print_msg("%s is not writeable\n", ctl->name);
- ksft_test_result_skip("write_invalid.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result_skip("write_invalid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
return;
}
@@ -1058,28 +1091,28 @@ static void test_ctl_write_invalid(struct ctl_data *ctl)
default:
/* No tests for this yet */
- ksft_test_result_skip("write_invalid.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result_skip("write_invalid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
return;
}
/* Restore the default value to minimise disruption */
write_and_verify(ctl, ctl->def_val, NULL);
- ksft_test_result(pass, "write_invalid.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result(pass, "write_invalid.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
}
static void test_ctl_event_missing(struct ctl_data *ctl)
{
- ksft_test_result(!ctl->event_missing, "event_missing.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result(!ctl->event_missing, "event_missing.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
}
static void test_ctl_event_spurious(struct ctl_data *ctl)
{
- ksft_test_result(!ctl->event_spurious, "event_spurious.%d.%d\n",
- ctl->card->card, ctl->elem);
+ ksft_test_result(!ctl->event_spurious, "event_spurious.%s.%d\n",
+ ctl->card->card_name, ctl->elem);
}
int main(void)
diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c
index de664dedb541..dbd7c222ce93 100644
--- a/tools/testing/selftests/alsa/pcm-test.c
+++ b/tools/testing/selftests/alsa/pcm-test.c
@@ -24,6 +24,8 @@ typedef struct timespec timestamp_t;
struct card_data {
int card;
+ snd_ctl_card_info_t *info;
+ const char *name;
pthread_t thread;
struct card_data *next;
};
@@ -35,6 +37,7 @@ struct pcm_data {
int card;
int device;
int subdevice;
+ const char *card_name;
snd_pcm_stream_t stream;
snd_config_t *pcm_config;
struct pcm_data *next;
@@ -167,6 +170,10 @@ static void find_pcms(void)
config = get_alsalib_config();
while (card >= 0) {
+ card_data = calloc(1, sizeof(*card_data));
+ if (!card_data)
+ ksft_exit_fail_msg("Out of memory\n");
+
sprintf(name, "hw:%d", card);
err = snd_ctl_open_lconf(&handle, name, 0, config);
@@ -182,14 +189,29 @@ static void find_pcms(void)
err = snd_card_get_longname(card, &card_longname);
if (err != 0)
card_longname = "Unknown";
- ksft_print_msg("Card %d - %s (%s)\n", card,
- card_name, card_longname);
+
+ err = snd_ctl_card_info_malloc(&card_data->info);
+ if (err != 0)
+ ksft_exit_fail_msg("Failed to allocate card info: %d\n",
+ err);
+
+ err = snd_ctl_card_info(handle, card_data->info);
+ if (err == 0) {
+ card_data->name = snd_ctl_card_info_get_id(card_data->info);
+ if (!card_data->name)
+ ksft_print_msg("Failed to get card ID\n");
+ } else {
+ ksft_print_msg("Failed to get card info: %d\n", err);
+ }
+
+ if (!card_data->name)
+ card_data->name = "Unknown";
+
+ ksft_print_msg("Card %d/%s - %s (%s)\n", card,
+ card_data->name, card_name, card_longname);
card_config = conf_by_card(card);
- card_data = calloc(1, sizeof(*card_data));
- if (!card_data)
- ksft_exit_fail_msg("Out of memory\n");
card_data->card = card;
card_data->next = card_list;
card_list = card_data;
@@ -218,6 +240,10 @@ static void find_pcms(void)
if (err < 0)
ksft_exit_fail_msg("snd_ctl_pcm_info: %d:%d:%d\n",
dev, 0, stream);
+
+ ksft_print_msg("%s.0 - %s\n", card_data->name,
+ snd_pcm_info_get_id(pcm_info));
+
count = snd_pcm_info_get_subdevices_count(pcm_info);
for (subdev = 0; subdev < count; subdev++) {
sprintf(key, "pcm.%d.%d.%s", dev, subdev, snd_pcm_stream_name(stream));
@@ -232,6 +258,7 @@ static void find_pcms(void)
pcm_data->card = card;
pcm_data->device = dev;
pcm_data->subdevice = subdev;
+ pcm_data->card_name = card_data->name;
pcm_data->stream = stream;
pcm_data->pcm_config = conf_get_subtree(card_config, key, NULL);
pcm_data->next = pcm_list;
@@ -294,9 +321,9 @@ static void test_pcm_time(struct pcm_data *data, enum test_class class,
desc = conf_get_string(pcm_cfg, "description", NULL, NULL);
if (desc)
- ksft_print_msg("%s.%s.%d.%d.%d.%s - %s\n",
+ ksft_print_msg("%s.%s.%s.%d.%d.%s - %s\n",
test_class_name, test_name,
- data->card, data->device, data->subdevice,
+ data->card_name, data->device, data->subdevice,
snd_pcm_stream_name(data->stream),
desc);
@@ -352,9 +379,9 @@ __format:
old_format = format;
format = snd_pcm_format_value(alt_formats[i]);
if (format != SND_PCM_FORMAT_UNKNOWN) {
- ksft_print_msg("%s.%d.%d.%d.%s.%s format %s -> %s\n",
+ ksft_print_msg("%s.%s.%d.%d.%s.%s format %s -> %s\n",
test_name,
- data->card, data->device, data->subdevice,
+ data->card_name, data->device, data->subdevice,
snd_pcm_stream_name(data->stream),
snd_pcm_access_name(access),
snd_pcm_format_name(old_format),
@@ -383,7 +410,7 @@ __format:
goto __close;
}
if (rrate != rate) {
- snprintf(msg, sizeof(msg), "rate mismatch %ld != %d", rate, rrate);
+ snprintf(msg, sizeof(msg), "rate mismatch %ld != %u", rate, rrate);
goto __close;
}
rperiod_size = period_size;
@@ -430,9 +457,9 @@ __format:
goto __close;
}
- ksft_print_msg("%s.%s.%d.%d.%d.%s hw_params.%s.%s.%ld.%ld.%ld.%ld sw_params.%ld\n",
+ ksft_print_msg("%s.%s.%s.%d.%d.%s hw_params.%s.%s.%ld.%ld.%ld.%ld sw_params.%ld\n",
test_class_name, test_name,
- data->card, data->device, data->subdevice,
+ data->card_name, data->device, data->subdevice,
snd_pcm_stream_name(data->stream),
snd_pcm_access_name(access),
snd_pcm_format_name(format),
@@ -491,9 +518,10 @@ __close:
* Anything specified as specific to this system
* should always be supported.
*/
- ksft_test_result(!skip, "%s.%s.%d.%d.%d.%s.params\n",
+ ksft_test_result(!skip, "%s.%s.%s.%d.%d.%s.params\n",
test_class_name, test_name,
- data->card, data->device, data->subdevice,
+ data->card_name, data->device,
+ data->subdevice,
snd_pcm_stream_name(data->stream));
break;
default:
@@ -501,14 +529,16 @@ __close:
}
if (!skip)
- ksft_test_result(pass, "%s.%s.%d.%d.%d.%s\n",
+ ksft_test_result(pass, "%s.%s.%s.%d.%d.%s\n",
test_class_name, test_name,
- data->card, data->device, data->subdevice,
+ data->card_name, data->device,
+ data->subdevice,
snd_pcm_stream_name(data->stream));
else
- ksft_test_result_skip("%s.%s.%d.%d.%d.%s\n",
+ ksft_test_result_skip("%s.%s.%s.%d.%d.%s\n",
test_class_name, test_name,
- data->card, data->device, data->subdevice,
+ data->card_name, data->device,
+ data->subdevice,
snd_pcm_stream_name(data->stream));
if (msg[0])
@@ -609,8 +639,8 @@ int main(void)
conf->filename, conf->config_id);
for (pcm = pcm_missing; pcm != NULL; pcm = pcm->next) {
- ksft_test_result(false, "test.missing.%d.%d.%d.%s\n",
- pcm->card, pcm->device, pcm->subdevice,
+ ksft_test_result(false, "test.missing.%s.%d.%d.%s\n",
+ pcm->card_name, pcm->device, pcm->subdevice,
snd_pcm_stream_name(pcm->stream));
}
diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c
index abe4d58d731d..e4fa507cbdd0 100644
--- a/tools/testing/selftests/arm64/abi/ptrace.c
+++ b/tools/testing/selftests/arm64/abi/ptrace.c
@@ -47,7 +47,7 @@ static void test_tpidr(pid_t child)
/* ...write a new value.. */
write_iov.iov_len = sizeof(uint64_t);
- write_val[0] = read_val[0]++;
+ write_val[0] = read_val[0] + 1;
ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov);
ksft_test_result(ret == 0, "write_tpidr_one\n");
@@ -156,7 +156,7 @@ static void test_hw_debug(pid_t child, int type, const char *type_name)
/* Zero is not currently architecturally valid */
ksft_test_result(arch, "%s_arch_set\n", type_name);
} else {
- ksft_test_result_skip("%s_arch_set\n");
+ ksft_test_result_skip("%s_arch_set\n", type_name);
}
}
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
index 02ee3a91b780..285c47dd42f6 100644
--- a/tools/testing/selftests/arm64/abi/tpidr2.c
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -262,7 +262,7 @@ static int write_clone_read(void)
int main(int argc, char **argv)
{
- int ret, i;
+ int ret;
putstr("TAP version 13\n");
putstr("1..");
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
index 00e52c966281..8362e7ec35ad 100644
--- a/tools/testing/selftests/arm64/fp/.gitignore
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -2,6 +2,7 @@ fp-pidbench
fp-ptrace
fp-stress
fpsimd-test
+kernel-test
rdvl-sme
rdvl-sve
sve-probe-vls
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index 55d4f00d9e8e..d171021e4cdd 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -12,6 +12,7 @@ TEST_GEN_PROGS := \
vec-syscfg \
za-fork za-ptrace
TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \
+ kernel-test \
rdvl-sme rdvl-sve \
sve-test \
ssve-test \
diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c
index dd31647b00a2..faac24bdefeb 100644
--- a/tools/testing/selftests/arm64/fp/fp-stress.c
+++ b/tools/testing/selftests/arm64/fp/fp-stress.c
@@ -319,6 +319,19 @@ static void start_fpsimd(struct child_data *child, int cpu, int copy)
ksft_print_msg("Started %s\n", child->name);
}
+static void start_kernel(struct child_data *child, int cpu, int copy)
+{
+ int ret;
+
+ ret = asprintf(&child->name, "KERNEL-%d-%d", cpu, copy);
+ if (ret == -1)
+ ksft_exit_fail_msg("asprintf() failed\n");
+
+ child_start(child, "./kernel-test");
+
+ ksft_print_msg("Started %s\n", child->name);
+}
+
static void start_sve(struct child_data *child, int vl, int cpu)
{
int ret;
@@ -438,7 +451,7 @@ int main(int argc, char **argv)
int ret;
int timeout = 10;
int cpus, i, j, c;
- int sve_vl_count, sme_vl_count, fpsimd_per_cpu;
+ int sve_vl_count, sme_vl_count;
bool all_children_started = false;
int seen_children;
int sve_vls[MAX_VLS], sme_vls[MAX_VLS];
@@ -482,12 +495,7 @@ int main(int argc, char **argv)
have_sme2 = false;
}
- /* Force context switching if we only have FPSIMD */
- if (!sve_vl_count && !sme_vl_count)
- fpsimd_per_cpu = 2;
- else
- fpsimd_per_cpu = 1;
- tests += cpus * fpsimd_per_cpu;
+ tests += cpus * 2;
ksft_print_header();
ksft_set_plan(tests);
@@ -542,8 +550,8 @@ int main(int argc, char **argv)
tests);
for (i = 0; i < cpus; i++) {
- for (j = 0; j < fpsimd_per_cpu; j++)
- start_fpsimd(&children[num_children++], i, j);
+ start_fpsimd(&children[num_children++], i, 0);
+ start_kernel(&children[num_children++], i, 0);
for (j = 0; j < sve_vl_count; j++)
start_sve(&children[num_children++], sve_vls[j], i);
diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c
new file mode 100644
index 000000000000..e8da3b4cbd23
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/kernel-test.c
@@ -0,0 +1,324 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2024 ARM Limited.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+
+#include <linux/kernel.h>
+#include <linux/if_alg.h>
+
+#define DATA_SIZE (16 * 4096)
+
+static int base, sock;
+
+static int digest_len;
+static char *ref;
+static char *digest;
+static char *alg_name;
+
+static struct iovec data_iov;
+static int zerocopy[2];
+static int sigs;
+static int iter;
+
+static void handle_exit_signal(int sig, siginfo_t *info, void *context)
+{
+ printf("Terminated by signal %d, iterations=%d, signals=%d\n",
+ sig, iter, sigs);
+ exit(0);
+}
+
+static void handle_kick_signal(int sig, siginfo_t *info, void *context)
+{
+ sigs++;
+}
+
+static char *drivers[] = {
+ "crct10dif-arm64-ce",
+ /* "crct10dif-arm64-neon", - Same priority as generic */
+ "sha1-ce",
+ "sha224-arm64",
+ "sha224-arm64-neon",
+ "sha224-ce",
+ "sha256-arm64",
+ "sha256-arm64-neon",
+ "sha256-ce",
+ "sha384-ce",
+ "sha512-ce",
+ "sha3-224-ce",
+ "sha3-256-ce",
+ "sha3-384-ce",
+ "sha3-512-ce",
+ "sm3-ce",
+ "sm3-neon",
+};
+
+static bool create_socket(void)
+{
+ FILE *proc;
+ struct sockaddr_alg addr;
+ char buf[1024];
+ char *c, *driver_name;
+ bool is_shash, match;
+ int ret, i;
+
+ ret = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (ret < 0) {
+ if (errno == EAFNOSUPPORT) {
+ printf("AF_ALG not supported\n");
+ return false;
+ }
+
+ printf("Failed to create AF_ALG socket: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+ base = ret;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.salg_family = AF_ALG;
+ strncpy((char *)addr.salg_type, "hash", sizeof(addr.salg_type));
+
+ proc = fopen("/proc/crypto", "r");
+ if (!proc) {
+ printf("Unable to open /proc/crypto\n");
+ return false;
+ }
+
+ driver_name = NULL;
+ is_shash = false;
+ match = false;
+
+ /* Look through /proc/crypto for a driver with kernel mode FP usage */
+ while (!match) {
+ c = fgets(buf, sizeof(buf), proc);
+ if (!c) {
+ if (feof(proc)) {
+ printf("Nothing found in /proc/crypto\n");
+ return false;
+ }
+ continue;
+ }
+
+ /* Algorithm descriptions are separated by a blank line */
+ if (*c == '\n') {
+ if (is_shash && driver_name) {
+ for (i = 0; i < ARRAY_SIZE(drivers); i++) {
+ if (strcmp(drivers[i],
+ driver_name) == 0) {
+ match = true;
+ }
+ }
+ }
+
+ if (!match) {
+ digest_len = 0;
+
+ free(driver_name);
+ driver_name = NULL;
+
+ free(alg_name);
+ alg_name = NULL;
+
+ is_shash = false;
+ }
+ continue;
+ }
+
+ /* Remove trailing newline */
+ c = strchr(buf, '\n');
+ if (c)
+ *c = '\0';
+
+ /* Find the field/value separator and start of the value */
+ c = strchr(buf, ':');
+ if (!c)
+ continue;
+ c += 2;
+
+ if (strncmp(buf, "digestsize", strlen("digestsize")) == 0)
+ sscanf(c, "%d", &digest_len);
+
+ if (strncmp(buf, "name", strlen("name")) == 0)
+ alg_name = strdup(c);
+
+ if (strncmp(buf, "driver", strlen("driver")) == 0)
+ driver_name = strdup(c);
+
+ if (strncmp(buf, "type", strlen("type")) == 0)
+ if (strncmp(c, "shash", strlen("shash")) == 0)
+ is_shash = true;
+ }
+
+ strncpy((char *)addr.salg_name, alg_name,
+ sizeof(addr.salg_name) - 1);
+
+ ret = bind(base, (struct sockaddr *)&addr, sizeof(addr));
+ if (ret < 0) {
+ printf("Failed to bind %s: %s (%d)\n",
+ addr.salg_name, strerror(errno), errno);
+ return false;
+ }
+
+ ret = accept(base, NULL, 0);
+ if (ret < 0) {
+ printf("Failed to accept %s: %s (%d)\n",
+ addr.salg_name, strerror(errno), errno);
+ return false;
+ }
+
+ sock = ret;
+
+ ret = pipe(zerocopy);
+ if (ret != 0) {
+ printf("Failed to create zerocopy pipe: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ ref = malloc(digest_len);
+ if (!ref) {
+ printf("Failed to allocated %d byte reference\n", digest_len);
+ return false;
+ }
+
+ digest = malloc(digest_len);
+ if (!digest) {
+ printf("Failed to allocated %d byte digest\n", digest_len);
+ return false;
+ }
+
+ return true;
+}
+
+static bool compute_digest(void *buf)
+{
+ struct iovec iov;
+ int ret, wrote;
+
+ iov = data_iov;
+ while (iov.iov_len) {
+ ret = vmsplice(zerocopy[1], &iov, 1, SPLICE_F_GIFT);
+ if (ret < 0) {
+ printf("Failed to send buffer: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ wrote = ret;
+ ret = splice(zerocopy[0], NULL, sock, NULL, wrote, 0);
+ if (ret < 0) {
+ printf("Failed to splice buffer: %s (%d)\n",
+ strerror(errno), errno);
+ } else if (ret != wrote) {
+ printf("Short splice: %d < %d\n", ret, wrote);
+ }
+
+ iov.iov_len -= wrote;
+ iov.iov_base += wrote;
+ }
+
+reread:
+ ret = recv(sock, buf, digest_len, 0);
+ if (ret == 0) {
+ printf("No digest returned\n");
+ return false;
+ }
+ if (ret != digest_len) {
+ if (errno == -EAGAIN)
+ goto reread;
+ printf("Failed to get digest: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ return true;
+}
+
+int main(void)
+{
+ char *data;
+ struct sigaction sa;
+ int ret;
+
+ /* Ensure we have unbuffered output */
+ setvbuf(stdout, NULL, _IOLBF, 0);
+
+ /* The parent will communicate with us via signals */
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handle_exit_signal;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ ret = sigaction(SIGTERM, &sa, NULL);
+ if (ret < 0)
+ printf("Failed to install SIGTERM handler: %s (%d)\n",
+ strerror(errno), errno);
+
+ sa.sa_sigaction = handle_kick_signal;
+ ret = sigaction(SIGUSR2, &sa, NULL);
+ if (ret < 0)
+ printf("Failed to install SIGUSR2 handler: %s (%d)\n",
+ strerror(errno), errno);
+
+ data = malloc(DATA_SIZE);
+ if (!data) {
+ printf("Failed to allocate data buffer\n");
+ return EXIT_FAILURE;
+ }
+ memset(data, 0, DATA_SIZE);
+
+ data_iov.iov_base = data;
+ data_iov.iov_len = DATA_SIZE;
+
+ /*
+ * If we can't create a socket assume it's a lack of system
+ * support and fall back to a basic FPSIMD test for the
+ * benefit of fp-stress.
+ */
+ if (!create_socket()) {
+ execl("./fpsimd-test", "./fpsimd-test", NULL);
+ printf("Failed to fall back to fspimd-test: %d (%s)\n",
+ errno, strerror(errno));
+ return EXIT_FAILURE;
+ }
+
+ /*
+ * Compute a reference digest we hope is repeatable, we do
+ * this at runtime partly to make it easier to play with
+ * parameters.
+ */
+ if (!compute_digest(ref)) {
+ printf("Failed to compute reference digest\n");
+ return EXIT_FAILURE;
+ }
+
+ printf("AF_ALG using %s\n", alg_name);
+
+ while (true) {
+ if (!compute_digest(digest)) {
+ printf("Failed to compute digest, iter=%d\n", iter);
+ return EXIT_FAILURE;
+ }
+
+ if (memcmp(ref, digest, digest_len) != 0) {
+ printf("Digest mismatch, iter=%d\n", iter);
+ return EXIT_FAILURE;
+ }
+
+ iter++;
+ }
+
+ return EXIT_FAILURE;
+}
diff --git a/tools/testing/selftests/arm64/tags/Makefile b/tools/testing/selftests/arm64/tags/Makefile
index 6d29cfde43a2..0a77f35295fb 100644
--- a/tools/testing/selftests/arm64/tags/Makefile
+++ b/tools/testing/selftests/arm64/tags/Makefile
@@ -2,6 +2,5 @@
CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := tags_test
-TEST_PROGS := run_tags_test.sh
include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/tags/run_tags_test.sh b/tools/testing/selftests/arm64/tags/run_tags_test.sh
deleted file mode 100755
index 745f11379930..000000000000
--- a/tools/testing/selftests/arm64/tags/run_tags_test.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-echo "--------------------"
-echo "running tags test"
-echo "--------------------"
-./tags_test
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
-else
- echo "[PASS]"
-fi
diff --git a/tools/testing/selftests/arm64/tags/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c
index 5701163460ef..8ae26e496c89 100644
--- a/tools/testing/selftests/arm64/tags/tags_test.c
+++ b/tools/testing/selftests/arm64/tags/tags_test.c
@@ -6,6 +6,7 @@
#include <stdint.h>
#include <sys/prctl.h>
#include <sys/utsname.h>
+#include "../../kselftest.h"
#define SHIFT_TAG(tag) ((uint64_t)(tag) << 56)
#define SET_TAG(ptr, tag) (((uint64_t)(ptr) & ~SHIFT_TAG(0xff)) | \
@@ -16,16 +17,21 @@ int main(void)
static int tbi_enabled = 0;
unsigned long tag = 0;
struct utsname *ptr;
- int err;
+
+ ksft_print_header();
+ ksft_set_plan(1);
if (prctl(PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE, 0, 0, 0) == 0)
tbi_enabled = 1;
ptr = (struct utsname *)malloc(sizeof(*ptr));
+ if (!ptr)
+ ksft_exit_fail_perror("Failed to allocate utsname buffer");
+
if (tbi_enabled)
tag = 0x42;
ptr = (struct utsname *)SET_TAG(ptr, tag);
- err = uname(ptr);
+ ksft_test_result(!uname(ptr), "Syscall successful with tagged address\n");
free(ptr);
- return err;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index f1aebabfb017..5025401323af 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -17,7 +17,6 @@ test_dev_cgroup
test_verifier_log
feature
test_sock
-test_sock_addr
urandom_read
test_sockmap
test_lirc_mode2_user
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
index d8ade15e2789..901349da680f 100644
--- a/tools/testing/selftests/bpf/DENYLIST.aarch64
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -1,14 +1,12 @@
bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
-fexit_sleep # The test never returns. The remaining tests cannot start.
kprobe_multi_bench_attach # needs CONFIG_FPROBE
kprobe_multi_test # needs CONFIG_FPROBE
module_attach # prog 'kprobe_multi': failed to auto-attach: -95
fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524
fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524
+tracing_struct/struct_many_args # struct_many_args:FAIL:tracing_struct_many_args__attach unexpected error: -524
fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95
missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
-verifier_arena # JIT does not support arena
-arena_htab # JIT does not support arena
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index f4a2f66a683d..3ebd77206f98 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -1,8 +1,5 @@
# TEMPORARY
# Alphabetical order
-exceptions # JIT does not support calling kfunc bpf_throw (exceptions)
get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
verifier_iterating_callbacks
-verifier_arena # JIT does not support arena
-arena_htab # JIT does not support arena
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3b9eb40d6343..81d4757ecd4c 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -53,6 +53,7 @@ progs/syscall.c-CFLAGS := -fno-strict-aliasing
progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing
progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing
progs/timer_crash.c-CFLAGS := -fno-strict-aliasing
+progs/test_global_func9.c-CFLAGS := -fno-strict-aliasing
ifneq ($(LLVM),)
# Silence some warnings when compiled with clang
@@ -81,11 +82,24 @@ TEST_INST_SUBDIRS += bpf_gcc
# The following tests contain C code that, although technically legal,
# triggers GCC warnings that cannot be disabled: declaration of
# anonymous struct types in function parameter lists.
-progs/btf_dump_test_case_bitfields.c-CFLAGS := -Wno-error
-progs/btf_dump_test_case_namespacing.c-CFLAGS := -Wno-error
-progs/btf_dump_test_case_packing.c-CFLAGS := -Wno-error
-progs/btf_dump_test_case_padding.c-CFLAGS := -Wno-error
-progs/btf_dump_test_case_syntax.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_bitfields.c-bpf_gcc-CFLAGS := -Wno-error
+progs/btf_dump_test_case_namespacing.c-bpf_gcc-CFLAGS := -Wno-error
+progs/btf_dump_test_case_packing.c-bpf_gcc-CFLAGS := -Wno-error
+progs/btf_dump_test_case_padding.c-bpf_gcc-CFLAGS := -Wno-error
+progs/btf_dump_test_case_syntax.c-bpf_gcc-CFLAGS := -Wno-error
+
+# The following tests do type-punning, via the __imm_insn macro, from
+# `struct bpf_insn' to long and then uses the value. This triggers an
+# "is used uninitialized" warning in GCC due to strict-aliasing
+# rules.
+progs/verifier_ref_tracking.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_unpriv.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_cgroup_storage.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_ld_ind.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_map_ret_val.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_spill_fill.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_subprog_precision.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
+progs/verifier_uninit.c-bpf_gcc-CFLAGS := -fno-strict-aliasing
endif
ifneq ($(CLANG_CPUV4),)
@@ -102,8 +116,6 @@ TEST_PROGS := test_kmod.sh \
test_xdp_redirect_multi.sh \
test_xdp_meta.sh \
test_xdp_veth.sh \
- test_offload.py \
- test_sock_addr.sh \
test_tunnel.sh \
test_lwt_seg6local.sh \
test_lirc_mode2.sh \
@@ -128,7 +140,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
test_xdp_vlan.sh test_bpftool.py
# Compile but not part of 'make run_tests'
-TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
+TEST_GEN_PROGS_EXTENDED = test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \
@@ -136,18 +148,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
-# Emit succinct information message describing current building step
-# $1 - generic step name (e.g., CC, LINK, etc);
-# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor];
-# $3 - target (assumed to be file); only file name will be emitted;
-# $4 - optional extra arg, emitted as-is, if provided.
-ifeq ($(V),1)
-Q =
-msg =
-else
-Q = @
-msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
-MAKEFLAGS += --no-print-directory
+ifneq ($(V),1)
submake_extras := feature_display=0
endif
@@ -274,7 +275,7 @@ $(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \
BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
- BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \
+ BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf/ \
BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \
EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \
EXTRA_LDFLAGS='$(SAN_LDFLAGS)' && \
@@ -290,11 +291,11 @@ UNPRIV_HELPERS := $(OUTPUT)/unpriv_helpers.o
TRACE_HELPERS := $(OUTPUT)/trace_helpers.o
JSON_WRITER := $(OUTPUT)/json_writer.o
CAP_HELPERS := $(OUTPUT)/cap_helpers.o
+NETWORK_HELPERS := $(OUTPUT)/network_helpers.o
$(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_skb_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_sock: $(CGROUP_HELPERS) $(TESTING_HELPERS)
-$(OUTPUT)/test_sock_addr: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS)
$(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS)
$(OUTPUT)/get_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS)
@@ -308,6 +309,7 @@ $(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS)
$(OUTPUT)/test_maps: $(TESTING_HELPERS)
$(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) $(UNPRIV_HELPERS)
$(OUTPUT)/xsk.o: $(BPFOBJ)
+$(OUTPUT)/test_tcp_check_syncookie_user: $(NETWORK_HELPERS)
BPFTOOL ?= $(DEFAULT_BPFTOOL)
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
@@ -442,7 +444,7 @@ endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
$(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
- $(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2
+ $(Q)$(BPF_GCC) $3 -DBPF_NO_PRESERVE_ACCESS_INDEX -Wno-attributes -O2 -c $1 -o $2
endef
SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
@@ -455,7 +457,7 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
trace_printk.c trace_vprintk.c map_ptr_kern.c \
core_kern.c core_kern_overflow.c test_ringbuf.c \
- test_ringbuf_map_key.c
+ test_ringbuf_n.c test_ringbuf_map_key.c test_ringbuf_write.c
# Generate both light skeleton and libbpf skeleton for these
LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
@@ -481,7 +483,7 @@ LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(ske
# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
# Parameters:
# $1 - test runner base binary name (e.g., test_progs)
-# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, gcc-bpf, etc)
+# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc)
define DEFINE_TEST_RUNNER
TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2
@@ -509,7 +511,7 @@ endef
# Using TRUNNER_XXX variables, provided by callers of DEFINE_TEST_RUNNER and
# set up by DEFINE_TEST_RUNNER itself, create test runner build rules with:
# $1 - test runner base binary name (e.g., test_progs)
-# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, gcc-bpf, etc)
+# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, bpf_gcc, etc)
define DEFINE_TEST_RUNNER_RULES
ifeq ($($(TRUNNER_OUTPUT)-dir),)
@@ -532,7 +534,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \
| $(TRUNNER_OUTPUT) $$(BPFOBJ)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS) \
- $$($$<-CFLAGS))
+ $$($$<-CFLAGS) \
+ $$($$<-$2-CFLAGS))
$(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
@@ -658,7 +661,7 @@ $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
# Define test_progs-cpuv4 test runner.
ifneq ($(CLANG_CPUV4),)
TRUNNER_BPF_BUILD_RULE := CLANG_CPUV4_BPF_BUILD_RULE
-TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS
$(eval $(call DEFINE_TEST_RUNNER,test_progs,cpuv4))
endif
@@ -695,7 +698,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
# Include find_bit.c to compile xskxceiver.
EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c
-$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
+$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
@@ -710,7 +713,7 @@ $(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp
# Make sure we are able to include and link libbpf against c++.
$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
$(call msg,CXX,,$@)
- $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
+ $(Q)$(CXX) $(subst -D_GNU_SOURCE=,,$(CFLAGS)) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
# Benchmark runner
$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ)
@@ -729,6 +732,7 @@ $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tas
$(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.skel.h
$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
+$(OUTPUT)/bench_bpf_crypto.o: $(OUTPUT)/crypto_bench.skel.h
$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
$(OUTPUT)/bench: $(OUTPUT)/bench.o \
@@ -748,6 +752,7 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
$(OUTPUT)/bench_bpf_hashmap_lookup.o \
$(OUTPUT)/bench_local_storage_create.o \
$(OUTPUT)/bench_htab_mem.o \
+ $(OUTPUT)/bench_bpf_crypto.o \
#
$(call msg,BINARY,,$@)
$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
@@ -759,7 +764,7 @@ $(OUTPUT)/veristat: $(OUTPUT)/veristat.o
$(OUTPUT)/uprobe_multi: uprobe_multi.c
$(call msg,BINARY,,$@)
- $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@
+ $(Q)$(CC) $(CFLAGS) -O0 $(LDFLAGS) $^ $(LDLIBS) -o $@
EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index b2b4c391eb0a..627b74ae041b 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -280,6 +280,8 @@ extern struct argp bench_strncmp_argp;
extern struct argp bench_hashmap_lookup_argp;
extern struct argp bench_local_storage_create_argp;
extern struct argp bench_htab_mem_argp;
+extern struct argp bench_trigger_batch_argp;
+extern struct argp bench_crypto_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
@@ -292,6 +294,8 @@ static const struct argp_child bench_parsers[] = {
{ &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
{ &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 },
{ &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 },
+ { &bench_trigger_batch_argp, 0, "BPF triggering benchmark", 0 },
+ { &bench_crypto_argp, 0, "bpf crypto benchmark", 0 },
{},
};
@@ -491,24 +495,31 @@ extern const struct bench bench_rename_kretprobe;
extern const struct bench bench_rename_rawtp;
extern const struct bench bench_rename_fentry;
extern const struct bench bench_rename_fexit;
-extern const struct bench bench_trig_base;
-extern const struct bench bench_trig_tp;
-extern const struct bench bench_trig_rawtp;
+
+/* pure counting benchmarks to establish theoretical lmits */
+extern const struct bench bench_trig_usermode_count;
+extern const struct bench bench_trig_syscall_count;
+extern const struct bench bench_trig_kernel_count;
+
+/* batched, staying mostly in-kernel benchmarks */
extern const struct bench bench_trig_kprobe;
extern const struct bench bench_trig_kretprobe;
extern const struct bench bench_trig_kprobe_multi;
extern const struct bench bench_trig_kretprobe_multi;
extern const struct bench bench_trig_fentry;
extern const struct bench bench_trig_fexit;
-extern const struct bench bench_trig_fentry_sleep;
extern const struct bench bench_trig_fmodret;
-extern const struct bench bench_trig_uprobe_base;
+extern const struct bench bench_trig_tp;
+extern const struct bench bench_trig_rawtp;
+
+/* uprobe/uretprobe benchmarks */
extern const struct bench bench_trig_uprobe_nop;
extern const struct bench bench_trig_uretprobe_nop;
extern const struct bench bench_trig_uprobe_push;
extern const struct bench bench_trig_uretprobe_push;
extern const struct bench bench_trig_uprobe_ret;
extern const struct bench bench_trig_uretprobe_ret;
+
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
extern const struct bench bench_pb_libbpf;
@@ -529,6 +540,8 @@ extern const struct bench bench_local_storage_tasks_trace;
extern const struct bench bench_bpf_hashmap_lookup;
extern const struct bench bench_local_storage_create;
extern const struct bench bench_htab_mem;
+extern const struct bench bench_crypto_encrypt;
+extern const struct bench bench_crypto_decrypt;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -539,24 +552,28 @@ static const struct bench *benchs[] = {
&bench_rename_rawtp,
&bench_rename_fentry,
&bench_rename_fexit,
- &bench_trig_base,
- &bench_trig_tp,
- &bench_trig_rawtp,
+ /* pure counting benchmarks for establishing theoretical limits */
+ &bench_trig_usermode_count,
+ &bench_trig_kernel_count,
+ &bench_trig_syscall_count,
+ /* batched, staying mostly in-kernel triggers */
&bench_trig_kprobe,
&bench_trig_kretprobe,
&bench_trig_kprobe_multi,
&bench_trig_kretprobe_multi,
&bench_trig_fentry,
&bench_trig_fexit,
- &bench_trig_fentry_sleep,
&bench_trig_fmodret,
- &bench_trig_uprobe_base,
+ &bench_trig_tp,
+ &bench_trig_rawtp,
+ /* uprobes */
&bench_trig_uprobe_nop,
&bench_trig_uretprobe_nop,
&bench_trig_uprobe_push,
&bench_trig_uretprobe_push,
&bench_trig_uprobe_ret,
&bench_trig_uretprobe_ret,
+ /* ringbuf/perfbuf benchmarks */
&bench_rb_libbpf,
&bench_rb_custom,
&bench_pb_libbpf,
@@ -577,6 +594,8 @@ static const struct bench *benchs[] = {
&bench_bpf_hashmap_lookup,
&bench_local_storage_create,
&bench_htab_mem,
+ &bench_crypto_encrypt,
+ &bench_crypto_decrypt,
};
static void find_benchmark(void)
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c b/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c
new file mode 100644
index 000000000000..2845edaba8db
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_crypto.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+#include "bench.h"
+#include "crypto_bench.skel.h"
+
+#define MAX_CIPHER_LEN 32
+static char *input;
+static struct crypto_ctx {
+ struct crypto_bench *skel;
+ int pfd;
+} ctx;
+
+static struct crypto_args {
+ u32 crypto_len;
+ char *crypto_cipher;
+} args = {
+ .crypto_len = 16,
+ .crypto_cipher = "ecb(aes)",
+};
+
+enum {
+ ARG_CRYPTO_LEN = 5000,
+ ARG_CRYPTO_CIPHER = 5001,
+};
+
+static const struct argp_option opts[] = {
+ { "crypto-len", ARG_CRYPTO_LEN, "CRYPTO_LEN", 0,
+ "Set the length of crypto buffer" },
+ { "crypto-cipher", ARG_CRYPTO_CIPHER, "CRYPTO_CIPHER", 0,
+ "Set the cipher to use (default:ecb(aes))" },
+ {},
+};
+
+static error_t crypto_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_CRYPTO_LEN:
+ args.crypto_len = strtoul(arg, NULL, 10);
+ if (!args.crypto_len ||
+ args.crypto_len > sizeof(ctx.skel->bss->dst)) {
+ fprintf(stderr, "Invalid crypto buffer len (limit %zu)\n",
+ sizeof(ctx.skel->bss->dst));
+ argp_usage(state);
+ }
+ break;
+ case ARG_CRYPTO_CIPHER:
+ args.crypto_cipher = strdup(arg);
+ if (!strlen(args.crypto_cipher) ||
+ strlen(args.crypto_cipher) > MAX_CIPHER_LEN) {
+ fprintf(stderr, "Invalid crypto cipher len (limit %d)\n",
+ MAX_CIPHER_LEN);
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_crypto_argp = {
+ .options = opts,
+ .parser = crypto_parse_arg,
+};
+
+static void crypto_validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "bpf crypto benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+}
+
+static void crypto_setup(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ int err, pfd;
+ size_t i, sz;
+
+ sz = args.crypto_len;
+ if (!sz || sz > sizeof(ctx.skel->bss->dst)) {
+ fprintf(stderr, "invalid encrypt buffer size (source %zu, target %zu)\n",
+ sz, sizeof(ctx.skel->bss->dst));
+ exit(1);
+ }
+
+ setup_libbpf();
+
+ ctx.skel = crypto_bench__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ snprintf(ctx.skel->bss->cipher, 128, "%s", args.crypto_cipher);
+ memcpy(ctx.skel->bss->key, "12345678testtest", 16);
+ ctx.skel->bss->key_len = 16;
+ ctx.skel->bss->authsize = 0;
+
+ srandom(time(NULL));
+ input = malloc(sz);
+ for (i = 0; i < sz - 1; i++)
+ input[i] = '1' + random() % 9;
+ input[sz - 1] = '\0';
+
+ ctx.skel->rodata->len = args.crypto_len;
+
+ err = crypto_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ crypto_bench__destroy(ctx.skel);
+ exit(1);
+ }
+
+ pfd = bpf_program__fd(ctx.skel->progs.crypto_setup);
+ if (pfd < 0) {
+ fprintf(stderr, "failed to get fd for setup prog\n");
+ crypto_bench__destroy(ctx.skel);
+ exit(1);
+ }
+
+ err = bpf_prog_test_run_opts(pfd, &opts);
+ if (err || ctx.skel->bss->status) {
+ fprintf(stderr, "failed to run setup prog: err %d, status %d\n",
+ err, ctx.skel->bss->status);
+ crypto_bench__destroy(ctx.skel);
+ exit(1);
+ }
+}
+
+static void crypto_encrypt_setup(void)
+{
+ crypto_setup();
+ ctx.pfd = bpf_program__fd(ctx.skel->progs.crypto_encrypt);
+}
+
+static void crypto_decrypt_setup(void)
+{
+ crypto_setup();
+ ctx.pfd = bpf_program__fd(ctx.skel->progs.crypto_decrypt);
+}
+
+static void crypto_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+static void *crypto_producer(void *unused)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .repeat = 64,
+ .data_in = input,
+ .data_size_in = args.crypto_len,
+ );
+
+ while (true)
+ (void)bpf_prog_test_run_opts(ctx.pfd, &opts);
+ return NULL;
+}
+
+const struct bench bench_crypto_encrypt = {
+ .name = "crypto-encrypt",
+ .argp = &bench_crypto_argp,
+ .validate = crypto_validate,
+ .setup = crypto_encrypt_setup,
+ .producer_thread = crypto_producer,
+ .measure = crypto_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_crypto_decrypt = {
+ .name = "crypto-decrypt",
+ .argp = &bench_crypto_argp,
+ .validate = crypto_validate,
+ .setup = crypto_decrypt_setup,
+ .producer_thread = crypto_producer,
+ .measure = crypto_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
index b36de42ee4d9..e2ff8ea1cb79 100644
--- a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
@@ -186,7 +186,7 @@ static void *task_producer(void *input)
for (i = 0; i < batch_sz; i++) {
if (!pthd_results[i])
- pthread_join(pthds[i], NULL);;
+ pthread_join(pthds[i], NULL);
}
}
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index ace0d1011a8e..4b05539f167d 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -1,15 +1,95 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <argp.h>
+#include <unistd.h>
+#include <stdint.h>
#include "bench.h"
#include "trigger_bench.skel.h"
#include "trace_helpers.h"
+#define MAX_TRIG_BATCH_ITERS 1000
+
+static struct {
+ __u32 batch_iters;
+} args = {
+ .batch_iters = 100,
+};
+
+enum {
+ ARG_TRIG_BATCH_ITERS = 7000,
+};
+
+static const struct argp_option opts[] = {
+ { "trig-batch-iters", ARG_TRIG_BATCH_ITERS, "BATCH_ITER_CNT", 0,
+ "Number of in-kernel iterations per one driver test run"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_TRIG_BATCH_ITERS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > MAX_TRIG_BATCH_ITERS) {
+ fprintf(stderr, "invalid --trig-batch-iters value (should be between %d and %d)\n",
+ 1, MAX_TRIG_BATCH_ITERS);
+ argp_usage(state);
+ }
+ args.batch_iters = ret;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_trigger_batch_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* adjust slot shift in inc_hits() if changing */
+#define MAX_BUCKETS 256
+
+#pragma GCC diagnostic ignored "-Wattributes"
+
/* BPF triggering benchmarks */
static struct trigger_ctx {
struct trigger_bench *skel;
+ bool usermode_counters;
+ int driver_prog_fd;
} ctx;
-static struct counter base_hits;
+static struct counter base_hits[MAX_BUCKETS];
+
+static __always_inline void inc_counter(struct counter *counters)
+{
+ static __thread int tid = 0;
+ unsigned slot;
+
+ if (unlikely(tid == 0))
+ tid = syscall(SYS_gettid);
+
+ /* multiplicative hashing, it's fast */
+ slot = 2654435769U * tid;
+ slot >>= 24;
+
+ atomic_inc(&base_hits[slot].value); /* use highest byte as an index */
+}
+
+static long sum_and_reset_counters(struct counter *counters)
+{
+ int i;
+ long sum = 0;
+
+ for (i = 0; i < MAX_BUCKETS; i++)
+ sum += atomic_swap(&counters[i].value, 0);
+ return sum;
+}
static void trigger_validate(void)
{
@@ -19,41 +99,63 @@ static void trigger_validate(void)
}
}
-static void *trigger_base_producer(void *input)
+static void *trigger_producer(void *input)
{
- while (true) {
- (void)syscall(__NR_getpgid);
- atomic_inc(&base_hits.value);
+ if (ctx.usermode_counters) {
+ while (true) {
+ (void)syscall(__NR_getpgid);
+ inc_counter(base_hits);
+ }
+ } else {
+ while (true)
+ (void)syscall(__NR_getpgid);
}
return NULL;
}
-static void trigger_base_measure(struct bench_res *res)
+static void *trigger_producer_batch(void *input)
{
- res->hits = atomic_swap(&base_hits.value, 0);
-}
+ int fd = ctx.driver_prog_fd ?: bpf_program__fd(ctx.skel->progs.trigger_driver);
-static void *trigger_producer(void *input)
-{
while (true)
- (void)syscall(__NR_getpgid);
+ bpf_prog_test_run_opts(fd, NULL);
+
return NULL;
}
static void trigger_measure(struct bench_res *res)
{
- res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+ if (ctx.usermode_counters)
+ res->hits = sum_and_reset_counters(base_hits);
+ else
+ res->hits = sum_and_reset_counters(ctx.skel->bss->hits);
}
static void setup_ctx(void)
{
setup_libbpf();
- ctx.skel = trigger_bench__open_and_load();
+ ctx.skel = trigger_bench__open();
if (!ctx.skel) {
fprintf(stderr, "failed to open skeleton\n");
exit(1);
}
+
+ /* default "driver" BPF program */
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, true);
+
+ ctx.skel->rodata->batch_iters = args.batch_iters;
+}
+
+static void load_ctx(void)
+{
+ int err;
+
+ err = trigger_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
}
static void attach_bpf(struct bpf_program *prog)
@@ -67,64 +169,104 @@ static void attach_bpf(struct bpf_program *prog)
}
}
-static void trigger_tp_setup(void)
+static void trigger_syscall_count_setup(void)
{
- setup_ctx();
- attach_bpf(ctx.skel->progs.bench_trigger_tp);
+ ctx.usermode_counters = true;
}
-static void trigger_rawtp_setup(void)
+/* Batched, staying mostly in-kernel triggering setups */
+static void trigger_kernel_count_setup(void)
{
setup_ctx();
- attach_bpf(ctx.skel->progs.bench_trigger_raw_tp);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_count, true);
+ load_ctx();
+ /* override driver program */
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_count);
}
static void trigger_kprobe_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
}
static void trigger_kretprobe_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
}
static void trigger_kprobe_multi_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kprobe_multi, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
}
static void trigger_kretprobe_multi_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_kretprobe_multi, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
}
static void trigger_fentry_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fentry, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
static void trigger_fexit_setup(void)
{
setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fexit, true);
+ load_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fexit);
}
-static void trigger_fentry_sleep_setup(void)
+static void trigger_fmodret_setup(void)
{
setup_ctx();
- attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_fmodret, true);
+ load_ctx();
+ /* override driver program */
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
+ attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
}
-static void trigger_fmodret_setup(void)
+static void trigger_tp_setup(void)
{
setup_ctx();
- attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_tp, true);
+ load_ctx();
+ /* override driver program */
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
+ attach_bpf(ctx.skel->progs.bench_trigger_tp);
+}
+
+static void trigger_rawtp_setup(void)
+{
+ setup_ctx();
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false);
+ bpf_program__set_autoload(ctx.skel->progs.trigger_driver_kfunc, true);
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_rawtp, true);
+ load_ctx();
+ /* override driver program */
+ ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_driver_kfunc);
+ attach_bpf(ctx.skel->progs.bench_trigger_rawtp);
}
/* make sure call is not inlined and not avoided by compiler, so __weak and
@@ -137,7 +279,7 @@ static void trigger_fmodret_setup(void)
* GCC doesn't generate stack setup preample for these functions due to them
* having no input arguments and doing nothing in the body.
*/
-__weak void uprobe_target_nop(void)
+__nocf_check __weak void uprobe_target_nop(void)
{
asm volatile ("nop");
}
@@ -146,7 +288,7 @@ __weak void opaque_noop_func(void)
{
}
-__weak int uprobe_target_push(void)
+__nocf_check __weak int uprobe_target_push(void)
{
/* overhead of function call is negligible compared to uprobe
* triggering, so this shouldn't affect benchmark results much
@@ -155,16 +297,16 @@ __weak int uprobe_target_push(void)
return 1;
}
-__weak void uprobe_target_ret(void)
+__nocf_check __weak void uprobe_target_ret(void)
{
asm volatile ("");
}
-static void *uprobe_base_producer(void *input)
+static void *uprobe_producer_count(void *input)
{
while (true) {
uprobe_target_nop();
- atomic_inc(&base_hits.value);
+ inc_counter(base_hits);
}
return NULL;
}
@@ -194,15 +336,24 @@ static void usetup(bool use_retprobe, void *target_addr)
{
size_t uprobe_offset;
struct bpf_link *link;
+ int err;
setup_libbpf();
- ctx.skel = trigger_bench__open_and_load();
+ ctx.skel = trigger_bench__open();
if (!ctx.skel) {
fprintf(stderr, "failed to open skeleton\n");
exit(1);
}
+ bpf_program__set_autoload(ctx.skel->progs.bench_trigger_uprobe, true);
+
+ err = trigger_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
+
uprobe_offset = get_uprobe_offset(target_addr);
link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
use_retprobe,
@@ -216,204 +367,90 @@ static void usetup(bool use_retprobe, void *target_addr)
ctx.skel->links.bench_trigger_uprobe = link;
}
-static void uprobe_setup_nop(void)
+static void usermode_count_setup(void)
+{
+ ctx.usermode_counters = true;
+}
+
+static void uprobe_nop_setup(void)
{
usetup(false, &uprobe_target_nop);
}
-static void uretprobe_setup_nop(void)
+static void uretprobe_nop_setup(void)
{
usetup(true, &uprobe_target_nop);
}
-static void uprobe_setup_push(void)
+static void uprobe_push_setup(void)
{
usetup(false, &uprobe_target_push);
}
-static void uretprobe_setup_push(void)
+static void uretprobe_push_setup(void)
{
usetup(true, &uprobe_target_push);
}
-static void uprobe_setup_ret(void)
+static void uprobe_ret_setup(void)
{
usetup(false, &uprobe_target_ret);
}
-static void uretprobe_setup_ret(void)
+static void uretprobe_ret_setup(void)
{
usetup(true, &uprobe_target_ret);
}
-const struct bench bench_trig_base = {
- .name = "trig-base",
+const struct bench bench_trig_syscall_count = {
+ .name = "trig-syscall-count",
.validate = trigger_validate,
- .producer_thread = trigger_base_producer,
- .measure = trigger_base_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_tp = {
- .name = "trig-tp",
- .validate = trigger_validate,
- .setup = trigger_tp_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_rawtp = {
- .name = "trig-rawtp",
- .validate = trigger_validate,
- .setup = trigger_rawtp_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_kprobe = {
- .name = "trig-kprobe",
- .validate = trigger_validate,
- .setup = trigger_kprobe_setup,
+ .setup = trigger_syscall_count_setup,
.producer_thread = trigger_producer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
};
-const struct bench bench_trig_kretprobe = {
- .name = "trig-kretprobe",
- .validate = trigger_validate,
- .setup = trigger_kretprobe_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_kprobe_multi = {
- .name = "trig-kprobe-multi",
- .validate = trigger_validate,
- .setup = trigger_kprobe_multi_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_kretprobe_multi = {
- .name = "trig-kretprobe-multi",
- .validate = trigger_validate,
- .setup = trigger_kretprobe_multi_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_fentry = {
- .name = "trig-fentry",
- .validate = trigger_validate,
- .setup = trigger_fentry_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_fexit = {
- .name = "trig-fexit",
- .validate = trigger_validate,
- .setup = trigger_fexit_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_fentry_sleep = {
- .name = "trig-fentry-sleep",
- .validate = trigger_validate,
- .setup = trigger_fentry_sleep_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_fmodret = {
- .name = "trig-fmodret",
- .validate = trigger_validate,
- .setup = trigger_fmodret_setup,
- .producer_thread = trigger_producer,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uprobe_base = {
- .name = "trig-uprobe-base",
- .setup = NULL, /* no uprobe/uretprobe is attached */
- .producer_thread = uprobe_base_producer,
- .measure = trigger_base_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uprobe_nop = {
- .name = "trig-uprobe-nop",
- .setup = uprobe_setup_nop,
- .producer_thread = uprobe_producer_nop,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uretprobe_nop = {
- .name = "trig-uretprobe-nop",
- .setup = uretprobe_setup_nop,
- .producer_thread = uprobe_producer_nop,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uprobe_push = {
- .name = "trig-uprobe-push",
- .setup = uprobe_setup_push,
- .producer_thread = uprobe_producer_push,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uretprobe_push = {
- .name = "trig-uretprobe-push",
- .setup = uretprobe_setup_push,
- .producer_thread = uprobe_producer_push,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uprobe_ret = {
- .name = "trig-uprobe-ret",
- .setup = uprobe_setup_ret,
- .producer_thread = uprobe_producer_ret,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_trig_uretprobe_ret = {
- .name = "trig-uretprobe-ret",
- .setup = uretprobe_setup_ret,
- .producer_thread = uprobe_producer_ret,
- .measure = trigger_measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
+/* batched (staying mostly in kernel) kprobe/fentry benchmarks */
+#define BENCH_TRIG_KERNEL(KIND, NAME) \
+const struct bench bench_trig_##KIND = { \
+ .name = "trig-" NAME, \
+ .setup = trigger_##KIND##_setup, \
+ .producer_thread = trigger_producer_batch, \
+ .measure = trigger_measure, \
+ .report_progress = hits_drops_report_progress, \
+ .report_final = hits_drops_report_final, \
+ .argp = &bench_trigger_batch_argp, \
+}
+
+BENCH_TRIG_KERNEL(kernel_count, "kernel-count");
+BENCH_TRIG_KERNEL(kprobe, "kprobe");
+BENCH_TRIG_KERNEL(kretprobe, "kretprobe");
+BENCH_TRIG_KERNEL(kprobe_multi, "kprobe-multi");
+BENCH_TRIG_KERNEL(kretprobe_multi, "kretprobe-multi");
+BENCH_TRIG_KERNEL(fentry, "fentry");
+BENCH_TRIG_KERNEL(fexit, "fexit");
+BENCH_TRIG_KERNEL(fmodret, "fmodret");
+BENCH_TRIG_KERNEL(tp, "tp");
+BENCH_TRIG_KERNEL(rawtp, "rawtp");
+
+/* uprobe benchmarks */
+#define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \
+const struct bench bench_trig_##KIND = { \
+ .name = "trig-" NAME, \
+ .validate = trigger_validate, \
+ .setup = KIND##_setup, \
+ .producer_thread = uprobe_producer_##PRODUCER, \
+ .measure = trigger_measure, \
+ .report_progress = hits_drops_report_progress, \
+ .report_final = hits_drops_report_final, \
+}
+
+BENCH_TRIG_USERMODE(usermode_count, count, "usermode-count");
+BENCH_TRIG_USERMODE(uprobe_nop, nop, "uprobe-nop");
+BENCH_TRIG_USERMODE(uprobe_push, push, "uprobe-push");
+BENCH_TRIG_USERMODE(uprobe_ret, ret, "uprobe-ret");
+BENCH_TRIG_USERMODE(uretprobe_nop, nop, "uretprobe-nop");
+BENCH_TRIG_USERMODE(uretprobe_push, push, "uretprobe-push");
+BENCH_TRIG_USERMODE(uretprobe_ret, ret, "uretprobe-ret");
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
index 78e83f243294..a690f5a68b6b 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -2,8 +2,22 @@
set -eufo pipefail
-for i in base tp rawtp kprobe fentry fmodret
-do
- summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
- printf "%-10s: %s\n" $i "$summary"
+def_tests=( \
+ usermode-count kernel-count syscall-count \
+ fentry fexit fmodret \
+ rawtp tp \
+ kprobe kprobe-multi \
+ kretprobe kretprobe-multi \
+)
+
+tests=("$@")
+if [ ${#tests[@]} -eq 0 ]; then
+ tests=("${def_tests[@]}")
+fi
+
+p=${PROD_CNT:-1}
+
+for t in "${tests[@]}"; do
+ summary=$(sudo ./bench -w2 -d5 -a -p$p trig-$t | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+ printf "%-15s: %s\n" $t "$summary"
done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
index 9bdcc74e03a4..af169f831f2f 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -2,7 +2,7 @@
set -eufo pipefail
-for i in base {uprobe,uretprobe}-{nop,push,ret}
+for i in usermode-count syscall-count {uprobe,uretprobe}-{nop,push,ret}
do
summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
printf "%-15s: %s\n" $i "$summary"
diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
index 567491f3e1b5..68a51dcc0669 100644
--- a/tools/testing/selftests/bpf/bpf_arena_common.h
+++ b/tools/testing/selftests/bpf/bpf_arena_common.h
@@ -34,10 +34,12 @@
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) && !defined(BPF_ARENA_FORCE_ASM)
#define __arena __attribute__((address_space(1)))
+#define __arena_global __attribute__((address_space(1)))
#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
#else
#define __arena
+#define __arena_global SEC(".addr_space.1")
#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1)
#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0)
#endif
diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h
index b99b9f408eff..85dbc3ea4da5 100644
--- a/tools/testing/selftests/bpf/bpf_arena_list.h
+++ b/tools/testing/selftests/bpf/bpf_arena_list.h
@@ -29,6 +29,7 @@ static inline void *bpf_iter_num_new(struct bpf_iter_num *it, int i, int j) { re
static inline void bpf_iter_num_destroy(struct bpf_iter_num *it) {}
static inline bool bpf_iter_num_next(struct bpf_iter_num *it) { return true; }
#define cond_break ({})
+#define can_loop true
#endif
/* Safely walk link list elements. Deletion of elements is allowed. */
@@ -36,8 +37,7 @@ static inline bool bpf_iter_num_next(struct bpf_iter_num *it) { return true; }
for (void * ___tmp = (pos = list_entry_safe((head)->first, \
typeof(*(pos)), member), \
(void *)0); \
- pos && ({ ___tmp = (void *)pos->member.next; 1; }); \
- cond_break, \
+ pos && ({ ___tmp = (void *)pos->member.next; 1; }) && can_loop; \
pos = list_entry_safe((void __arena *)___tmp, typeof(*(pos)), member))
static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h)
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
index a5b9df38c162..828556cdc2f0 100644
--- a/tools/testing/selftests/bpf/bpf_experimental.h
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -163,7 +163,7 @@ struct bpf_iter_task_vma;
extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
struct task_struct *task,
- unsigned long addr) __ksym;
+ __u64 addr) __ksym;
extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym;
extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym;
@@ -326,9 +326,49 @@ l_true: \
})
#endif
+/*
+ * Note that cond_break can only be portably used in the body of a breakable
+ * construct, whereas can_loop can be used anywhere.
+ */
+#ifdef __BPF_FEATURE_MAY_GOTO
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("may_goto %l[l_break]" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
+#define cond_break \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("may_goto %l[l_break]" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: break; \
+ l_continue:; \
+ })
+#else
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
#define cond_break \
({ __label__ l_break, l_continue; \
- asm volatile goto("1:.byte 0xe5; \
+ asm volatile goto("1:.byte 0xe5; \
.byte 0; \
.long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
.short 0" \
@@ -337,6 +377,34 @@ l_true: \
l_break: break; \
l_continue:; \
})
+#else
+#define can_loop \
+ ({ __label__ l_break, l_continue; \
+ bool ret = true; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: ret = false; \
+ l_continue:; \
+ ret; \
+ })
+
+#define cond_break \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long (((%l[l_break] - 1b - 8) / 8) & 0xffff) << 16; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: break; \
+ l_continue:; \
+ })
+#endif
+#endif
#ifndef bpf_nop_mov
#define bpf_nop_mov(var) \
@@ -386,6 +454,28 @@ l_true: \
, [as]"i"((dst_as << 16) | src_as));
#endif
+void bpf_preempt_disable(void) __weak __ksym;
+void bpf_preempt_enable(void) __weak __ksym;
+
+typedef struct {
+} __bpf_preempt_t;
+
+static inline __bpf_preempt_t __bpf_preempt_constructor(void)
+{
+ __bpf_preempt_t ret = {};
+
+ bpf_preempt_disable();
+ return ret;
+}
+static inline void __bpf_preempt_destructor(__bpf_preempt_t *t)
+{
+ bpf_preempt_enable();
+}
+#define bpf_guard_preempt() \
+ __bpf_preempt_t ___bpf_apply(preempt, __COUNTER__) \
+ __attribute__((__unused__, __cleanup__(__bpf_preempt_destructor))) = \
+ __bpf_preempt_constructor()
+
/* Description
* Assert that a conditional expression is true.
* Returns
@@ -459,4 +549,11 @@ extern int bpf_iter_css_new(struct bpf_iter_css *it,
extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym;
extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
+extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
+extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
+extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
+ int (callback_fn)(void *map, int *key, void *value),
+ unsigned int flags__k, void *aux__ign) __ksym;
+#define bpf_wq_set_callback(timer, cb, flags) \
+ bpf_wq_set_callback_impl(timer, cb, flags, NULL)
#endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
index 14ebe7d9e1a3..3b6675ab4086 100644
--- a/tools/testing/selftests/bpf/bpf_kfuncs.h
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -75,4 +75,7 @@ extern void bpf_key_put(struct bpf_key *key) __ksym;
extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
struct bpf_dynptr *sig_ptr,
struct bpf_key *trusted_keyring) __ksym;
+
+extern bool bpf_session_is_return(void) __ksym __weak;
+extern __u64 *bpf_session_cookie(void) __ksym __weak;
#endif
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
deleted file mode 100644
index 82a7c9de95f9..000000000000
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ /dev/null
@@ -1,241 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __BPF_TCP_HELPERS_H
-#define __BPF_TCP_HELPERS_H
-
-#include <stdbool.h>
-#include <linux/types.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_core_read.h>
-#include <bpf/bpf_tracing.h>
-
-#define BPF_STRUCT_OPS(name, args...) \
-SEC("struct_ops/"#name) \
-BPF_PROG(name, args)
-
-#ifndef SOL_TCP
-#define SOL_TCP 6
-#endif
-
-#ifndef TCP_CA_NAME_MAX
-#define TCP_CA_NAME_MAX 16
-#endif
-
-#define tcp_jiffies32 ((__u32)bpf_jiffies64())
-
-struct sock_common {
- unsigned char skc_state;
- __u16 skc_num;
-} __attribute__((preserve_access_index));
-
-enum sk_pacing {
- SK_PACING_NONE = 0,
- SK_PACING_NEEDED = 1,
- SK_PACING_FQ = 2,
-};
-
-struct sock {
- struct sock_common __sk_common;
-#define sk_state __sk_common.skc_state
- unsigned long sk_pacing_rate;
- __u32 sk_pacing_status; /* see enum sk_pacing */
-} __attribute__((preserve_access_index));
-
-struct inet_sock {
- struct sock sk;
-} __attribute__((preserve_access_index));
-
-struct inet_connection_sock {
- struct inet_sock icsk_inet;
- __u8 icsk_ca_state:6,
- icsk_ca_setsockopt:1,
- icsk_ca_dst_locked:1;
- struct {
- __u8 pending;
- } icsk_ack;
- __u64 icsk_ca_priv[104 / sizeof(__u64)];
-} __attribute__((preserve_access_index));
-
-struct request_sock {
- struct sock_common __req_common;
-} __attribute__((preserve_access_index));
-
-struct tcp_sock {
- struct inet_connection_sock inet_conn;
-
- __u32 rcv_nxt;
- __u32 snd_nxt;
- __u32 snd_una;
- __u32 window_clamp;
- __u8 ecn_flags;
- __u32 delivered;
- __u32 delivered_ce;
- __u32 snd_cwnd;
- __u32 snd_cwnd_cnt;
- __u32 snd_cwnd_clamp;
- __u32 snd_ssthresh;
- __u8 syn_data:1, /* SYN includes data */
- syn_fastopen:1, /* SYN includes Fast Open option */
- syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */
- syn_fastopen_ch:1, /* Active TFO re-enabling probe */
- syn_data_acked:1,/* data in SYN is acked by SYN-ACK */
- save_syn:1, /* Save headers of SYN packet */
- is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */
- syn_smc:1; /* SYN includes SMC */
- __u32 max_packets_out;
- __u32 lsndtime;
- __u32 prior_cwnd;
- __u64 tcp_mstamp; /* most recent packet received/sent */
- bool is_mptcp;
-} __attribute__((preserve_access_index));
-
-static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
-{
- return (struct inet_connection_sock *)sk;
-}
-
-static __always_inline void *inet_csk_ca(const struct sock *sk)
-{
- return (void *)inet_csk(sk)->icsk_ca_priv;
-}
-
-static __always_inline struct tcp_sock *tcp_sk(const struct sock *sk)
-{
- return (struct tcp_sock *)sk;
-}
-
-static __always_inline bool before(__u32 seq1, __u32 seq2)
-{
- return (__s32)(seq1-seq2) < 0;
-}
-#define after(seq2, seq1) before(seq1, seq2)
-
-#define TCP_ECN_OK 1
-#define TCP_ECN_QUEUE_CWR 2
-#define TCP_ECN_DEMAND_CWR 4
-#define TCP_ECN_SEEN 8
-
-enum inet_csk_ack_state_t {
- ICSK_ACK_SCHED = 1,
- ICSK_ACK_TIMER = 2,
- ICSK_ACK_PUSHED = 4,
- ICSK_ACK_PUSHED2 = 8,
- ICSK_ACK_NOW = 16 /* Send the next ACK immediately (once) */
-};
-
-enum tcp_ca_event {
- CA_EVENT_TX_START = 0,
- CA_EVENT_CWND_RESTART = 1,
- CA_EVENT_COMPLETE_CWR = 2,
- CA_EVENT_LOSS = 3,
- CA_EVENT_ECN_NO_CE = 4,
- CA_EVENT_ECN_IS_CE = 5,
-};
-
-struct ack_sample {
- __u32 pkts_acked;
- __s32 rtt_us;
- __u32 in_flight;
-} __attribute__((preserve_access_index));
-
-struct rate_sample {
- __u64 prior_mstamp; /* starting timestamp for interval */
- __u32 prior_delivered; /* tp->delivered at "prior_mstamp" */
- __s32 delivered; /* number of packets delivered over interval */
- long interval_us; /* time for tp->delivered to incr "delivered" */
- __u32 snd_interval_us; /* snd interval for delivered packets */
- __u32 rcv_interval_us; /* rcv interval for delivered packets */
- long rtt_us; /* RTT of last (S)ACKed packet (or -1) */
- int losses; /* number of packets marked lost upon ACK */
- __u32 acked_sacked; /* number of packets newly (S)ACKed upon ACK */
- __u32 prior_in_flight; /* in flight before this ACK */
- bool is_app_limited; /* is sample from packet with bubble in pipe? */
- bool is_retrans; /* is sample from retransmission? */
- bool is_ack_delayed; /* is this (likely) a delayed ACK? */
-} __attribute__((preserve_access_index));
-
-#define TCP_CA_NAME_MAX 16
-#define TCP_CONG_NEEDS_ECN 0x2
-
-struct tcp_congestion_ops {
- char name[TCP_CA_NAME_MAX];
- __u32 flags;
-
- /* initialize private data (optional) */
- void (*init)(struct sock *sk);
- /* cleanup private data (optional) */
- void (*release)(struct sock *sk);
-
- /* return slow start threshold (required) */
- __u32 (*ssthresh)(struct sock *sk);
- /* do new cwnd calculation (required) */
- void (*cong_avoid)(struct sock *sk, __u32 ack, __u32 acked);
- /* call before changing ca_state (optional) */
- void (*set_state)(struct sock *sk, __u8 new_state);
- /* call when cwnd event occurs (optional) */
- void (*cwnd_event)(struct sock *sk, enum tcp_ca_event ev);
- /* call when ack arrives (optional) */
- void (*in_ack_event)(struct sock *sk, __u32 flags);
- /* new value of cwnd after loss (required) */
- __u32 (*undo_cwnd)(struct sock *sk);
- /* hook for packet ack accounting (optional) */
- void (*pkts_acked)(struct sock *sk, const struct ack_sample *sample);
- /* override sysctl_tcp_min_tso_segs */
- __u32 (*min_tso_segs)(struct sock *sk);
- /* returns the multiplier used in tcp_sndbuf_expand (optional) */
- __u32 (*sndbuf_expand)(struct sock *sk);
- /* call when packets are delivered to update cwnd and pacing rate,
- * after all the ca_state processing. (optional)
- */
- void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
- void *owner;
-};
-
-#define min(a, b) ((a) < (b) ? (a) : (b))
-#define max(a, b) ((a) > (b) ? (a) : (b))
-#define min_not_zero(x, y) ({ \
- typeof(x) __x = (x); \
- typeof(y) __y = (y); \
- __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
-
-static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
-{
- return tp->snd_cwnd < tp->snd_ssthresh;
-}
-
-static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
-
- /* If in slow start, ensure cwnd grows to twice what was ACKed. */
- if (tcp_in_slow_start(tp))
- return tp->snd_cwnd < 2 * tp->max_packets_out;
-
- return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
-}
-
-static __always_inline bool tcp_cc_eq(const char *a, const char *b)
-{
- int i;
-
- for (i = 0; i < TCP_CA_NAME_MAX; i++) {
- if (a[i] != b[i])
- return false;
- if (!a[i])
- break;
- }
-
- return true;
-}
-
-extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
-extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
-
-struct mptcp_sock {
- struct inet_connection_sock sk;
-
- __u32 token;
- struct sock *first;
- char ca_name[TCP_CA_NAME_MAX];
-} __attribute__((preserve_access_index));
-
-#endif
diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
index b1dd889d5d7d..948eb3962732 100644
--- a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
+++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
@@ -22,12 +22,12 @@ static int dummy_init_member(const struct btf_type *t,
return 0;
}
-static int dummy_reg(void *kdata)
+static int dummy_reg(void *kdata, struct bpf_link *link)
{
return 0;
}
-static void dummy_unreg(void *kdata)
+static void dummy_unreg(void *kdata, struct bpf_link *link)
{
}
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index edcd26106557..fd28c1157bd3 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -10,18 +10,31 @@
#include <linux/percpu-defs.h>
#include <linux/sysfs.h>
#include <linux/tracepoint.h>
+#include <linux/net.h>
+#include <linux/socket.h>
+#include <linux/nsproxy.h>
+#include <linux/inet.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/un.h>
+#include <net/sock.h>
+#include <linux/namei.h>
#include "bpf_testmod.h"
#include "bpf_testmod_kfunc.h"
#define CREATE_TRACE_POINTS
#include "bpf_testmod-events.h"
+#define CONNECT_TIMEOUT_SEC 1
+
typedef int (*func_proto_typedef)(long);
typedef int (*func_proto_typedef_nested1)(func_proto_typedef);
typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1);
DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123;
long bpf_testmod_test_struct_arg_result;
+static DEFINE_MUTEX(sock_lock);
+static struct socket *sock;
struct bpf_testmod_struct_arg_1 {
int a;
@@ -41,6 +54,13 @@ struct bpf_testmod_struct_arg_4 {
int b;
};
+struct bpf_testmod_struct_arg_5 {
+ char a;
+ short b;
+ int c;
+ long d;
+};
+
__bpf_hook_start();
noinline int
@@ -99,6 +119,15 @@ bpf_testmod_test_struct_arg_8(u64 a, void *b, short c, int d, void *e,
}
noinline int
+bpf_testmod_test_struct_arg_9(u64 a, void *b, short c, int d, void *e, char f,
+ short g, struct bpf_testmod_struct_arg_5 h, long i)
+{
+ bpf_testmod_test_struct_arg_result = a + (long)b + c + d + (long)e +
+ f + g + h.a + h.b + h.c + h.d + i;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) {
bpf_testmod_test_struct_arg_result = a->a;
return bpf_testmod_test_struct_arg_result;
@@ -142,6 +171,42 @@ __bpf_kfunc void bpf_kfunc_common_test(void)
{
}
+__bpf_kfunc void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr,
+ struct bpf_dynptr *ptr__nullable)
+{
+}
+
+__bpf_kfunc struct bpf_testmod_ctx *
+bpf_testmod_ctx_create(int *err)
+{
+ struct bpf_testmod_ctx *ctx;
+
+ ctx = kzalloc(sizeof(*ctx), GFP_ATOMIC);
+ if (!ctx) {
+ *err = -ENOMEM;
+ return NULL;
+ }
+ refcount_set(&ctx->usage, 1);
+
+ return ctx;
+}
+
+static void testmod_free_cb(struct rcu_head *head)
+{
+ struct bpf_testmod_ctx *ctx;
+
+ ctx = container_of(head, struct bpf_testmod_ctx, rcu);
+ kfree(ctx);
+}
+
+__bpf_kfunc void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx)
+{
+ if (!ctx)
+ return;
+ if (refcount_dec_and_test(&ctx->usage))
+ call_rcu(&ctx->rcu, testmod_free_cb);
+}
+
struct bpf_testmod_btf_type_tag_1 {
int a;
};
@@ -257,6 +322,7 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
struct bpf_testmod_struct_arg_2 struct_arg2 = {2, 3};
struct bpf_testmod_struct_arg_3 *struct_arg3;
struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22};
+ struct bpf_testmod_struct_arg_5 struct_arg5 = {23, 24, 25, 26};
int i = 1;
while (bpf_testmod_return_ptr(i))
@@ -271,6 +337,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
(void *)20, struct_arg4);
(void)bpf_testmod_test_struct_arg_8(16, (void *)17, 18, 19,
(void *)20, struct_arg4, 23);
+ (void)bpf_testmod_test_struct_arg_9(16, (void *)17, 18, 19, (void *)20,
+ 21, 22, struct_arg5, 27);
(void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2);
@@ -346,13 +414,133 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
.write = bpf_testmod_test_write,
};
+/* bpf_testmod_uprobe sysfs attribute is so far enabled for x86_64 only,
+ * please see test_uretprobe_regs_change test
+ */
+#ifdef __x86_64__
+
+static int
+uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func,
+ struct pt_regs *regs)
+
+{
+ regs->ax = 0x12345678deadbeef;
+ regs->cx = 0x87654321feebdaed;
+ regs->r11 = (u64) -1;
+ return true;
+}
+
+struct testmod_uprobe {
+ struct path path;
+ loff_t offset;
+ struct uprobe_consumer consumer;
+};
+
+static DEFINE_MUTEX(testmod_uprobe_mutex);
+
+static struct testmod_uprobe uprobe = {
+ .consumer.ret_handler = uprobe_ret_handler,
+};
+
+static int testmod_register_uprobe(loff_t offset)
+{
+ int err = -EBUSY;
+
+ if (uprobe.offset)
+ return -EBUSY;
+
+ mutex_lock(&testmod_uprobe_mutex);
+
+ if (uprobe.offset)
+ goto out;
+
+ err = kern_path("/proc/self/exe", LOOKUP_FOLLOW, &uprobe.path);
+ if (err)
+ goto out;
+
+ err = uprobe_register_refctr(d_real_inode(uprobe.path.dentry),
+ offset, 0, &uprobe.consumer);
+ if (err)
+ path_put(&uprobe.path);
+ else
+ uprobe.offset = offset;
+
+out:
+ mutex_unlock(&testmod_uprobe_mutex);
+ return err;
+}
+
+static void testmod_unregister_uprobe(void)
+{
+ mutex_lock(&testmod_uprobe_mutex);
+
+ if (uprobe.offset) {
+ uprobe_unregister(d_real_inode(uprobe.path.dentry),
+ uprobe.offset, &uprobe.consumer);
+ uprobe.offset = 0;
+ }
+
+ mutex_unlock(&testmod_uprobe_mutex);
+}
+
+static ssize_t
+bpf_testmod_uprobe_write(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t len)
+{
+ unsigned long offset = 0;
+ int err = 0;
+
+ if (kstrtoul(buf, 0, &offset))
+ return -EINVAL;
+
+ if (offset)
+ err = testmod_register_uprobe(offset);
+ else
+ testmod_unregister_uprobe();
+
+ return err ?: strlen(buf);
+}
+
+static struct bin_attribute bin_attr_bpf_testmod_uprobe_file __ro_after_init = {
+ .attr = { .name = "bpf_testmod_uprobe", .mode = 0666, },
+ .write = bpf_testmod_uprobe_write,
+};
+
+static int register_bpf_testmod_uprobe(void)
+{
+ return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_uprobe_file);
+}
+
+static void unregister_bpf_testmod_uprobe(void)
+{
+ testmod_unregister_uprobe();
+ sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_uprobe_file);
+}
+
+#else
+static int register_bpf_testmod_uprobe(void)
+{
+ return 0;
+}
+
+static void unregister_bpf_testmod_uprobe(void) { }
+#endif
+
BTF_KFUNCS_START(bpf_testmod_common_kfunc_ids)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY)
BTF_ID_FLAGS(func, bpf_kfunc_common_test)
+BTF_ID_FLAGS(func, bpf_kfunc_dynptr_test)
+BTF_ID_FLAGS(func, bpf_testmod_ctx_create, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_testmod_ctx_release, KF_RELEASE)
BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids)
+BTF_ID_LIST(bpf_testmod_dtor_ids)
+BTF_ID(struct, bpf_testmod_ctx)
+BTF_ID(func, bpf_testmod_ctx_release)
+
static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = {
.owner = THIS_MODULE,
.set = &bpf_testmod_common_kfunc_ids,
@@ -497,6 +685,241 @@ __bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused
return arg;
}
+__bpf_kfunc void bpf_kfunc_call_test_sleepable(void)
+{
+}
+
+__bpf_kfunc int bpf_kfunc_init_sock(struct init_sock_args *args)
+{
+ int proto;
+ int err;
+
+ mutex_lock(&sock_lock);
+
+ if (sock) {
+ pr_err("%s called without releasing old sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ switch (args->af) {
+ case AF_INET:
+ case AF_INET6:
+ proto = args->type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+ break;
+ case AF_UNIX:
+ proto = PF_UNIX;
+ break;
+ default:
+ pr_err("invalid address family %d\n", args->af);
+ err = -EINVAL;
+ goto out;
+ }
+
+ err = sock_create_kern(current->nsproxy->net_ns, args->af, args->type,
+ proto, &sock);
+
+ if (!err)
+ /* Set timeout for call to kernel_connect() to prevent it from hanging,
+ * and consider the connection attempt failed if it returns
+ * -EINPROGRESS.
+ */
+ sock->sk->sk_sndtimeo = CONNECT_TIMEOUT_SEC * HZ;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc void bpf_kfunc_close_sock(void)
+{
+ mutex_lock(&sock_lock);
+
+ if (sock) {
+ sock_release(sock);
+ sock = NULL;
+ }
+
+ mutex_unlock(&sock_lock);
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_connect(struct addr_args *args)
+{
+ int err;
+
+ if (args->addrlen > sizeof(args->addr))
+ return -EINVAL;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_connect(sock, (struct sockaddr *)&args->addr,
+ args->addrlen, 0);
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_bind(struct addr_args *args)
+{
+ int err;
+
+ if (args->addrlen > sizeof(args->addr))
+ return -EINVAL;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_bind(sock, (struct sockaddr *)&args->addr, args->addrlen);
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_listen(void)
+{
+ int err;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_listen(sock, 128);
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_sendmsg(struct sendmsg_args *args)
+{
+ struct msghdr msg = {
+ .msg_name = &args->addr.addr,
+ .msg_namelen = args->addr.addrlen,
+ };
+ struct kvec iov;
+ int err;
+
+ if (args->addr.addrlen > sizeof(args->addr.addr) ||
+ args->msglen > sizeof(args->msg))
+ return -EINVAL;
+
+ iov.iov_base = args->msg;
+ iov.iov_len = args->msglen;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_sendmsg(sock, &msg, &iov, 1, args->msglen);
+ args->addr.addrlen = msg.msg_namelen;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args)
+{
+ struct msghdr msg = {
+ .msg_name = &args->addr.addr,
+ .msg_namelen = args->addr.addrlen,
+ };
+ struct kvec iov;
+ int err;
+
+ if (args->addr.addrlen > sizeof(args->addr.addr) ||
+ args->msglen > sizeof(args->msg))
+ return -EINVAL;
+
+ iov.iov_base = args->msg;
+ iov.iov_len = args->msglen;
+
+ iov_iter_kvec(&msg.msg_iter, ITER_SOURCE, &iov, 1, args->msglen);
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = sock_sendmsg(sock, &msg);
+ args->addr.addrlen = msg.msg_namelen;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_getsockname(struct addr_args *args)
+{
+ int err;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_getsockname(sock, (struct sockaddr *)&args->addr);
+ if (err < 0)
+ goto out;
+
+ args->addrlen = err;
+ err = 0;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
+__bpf_kfunc int bpf_kfunc_call_kernel_getpeername(struct addr_args *args)
+{
+ int err;
+
+ mutex_lock(&sock_lock);
+
+ if (!sock) {
+ pr_err("%s called without initializing sock", __func__);
+ err = -EPERM;
+ goto out;
+ }
+
+ err = kernel_getpeername(sock, (struct sockaddr *)&args->addr);
+ if (err < 0)
+ goto out;
+
+ args->addrlen = err;
+ err = 0;
+out:
+ mutex_unlock(&sock_lock);
+
+ return err;
+}
+
BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
@@ -523,6 +946,16 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_sleepable, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_init_sock, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_close_sock, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_connect, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_bind, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_listen, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_sendmsg, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_sock_sendmsg, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getsockname, KF_SLEEPABLE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_kernel_getpeername, KF_SLEEPABLE)
BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
static int bpf_testmod_ops_init(struct btf *btf)
@@ -563,7 +996,7 @@ static const struct bpf_verifier_ops bpf_testmod_verifier_ops = {
.is_valid_access = bpf_testmod_ops_is_valid_access,
};
-static int bpf_dummy_reg(void *kdata)
+static int bpf_dummy_reg(void *kdata, struct bpf_link *link)
{
struct bpf_testmod_ops *ops = kdata;
@@ -578,7 +1011,7 @@ static int bpf_dummy_reg(void *kdata)
return 0;
}
-static void bpf_dummy_unreg(void *kdata)
+static void bpf_dummy_unreg(void *kdata, struct bpf_link *link)
{
}
@@ -614,7 +1047,7 @@ struct bpf_struct_ops bpf_bpf_testmod_ops = {
.owner = THIS_MODULE,
};
-static int bpf_dummy_reg2(void *kdata)
+static int bpf_dummy_reg2(void *kdata, struct bpf_link *link)
{
struct bpf_testmod_ops2 *ops = kdata;
@@ -641,6 +1074,12 @@ extern int bpf_fentry_test1(int a);
static int bpf_testmod_init(void)
{
+ const struct btf_id_dtor_kfunc bpf_testmod_dtors[] = {
+ {
+ .btf_id = bpf_testmod_dtor_ids[0],
+ .kfunc_btf_id = bpf_testmod_dtor_ids[1]
+ },
+ };
int ret;
ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set);
@@ -649,11 +1088,22 @@ static int bpf_testmod_init(void)
ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set);
ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops);
ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2);
+ ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors,
+ ARRAY_SIZE(bpf_testmod_dtors),
+ THIS_MODULE);
if (ret < 0)
return ret;
if (bpf_fentry_test1(0) < 0)
return -EINVAL;
- return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+ sock = NULL;
+ mutex_init(&sock_lock);
+ ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+ if (ret < 0)
+ return ret;
+ ret = register_bpf_testmod_uprobe();
+ if (ret < 0)
+ return ret;
+ return 0;
}
static void bpf_testmod_exit(void)
@@ -666,7 +1116,9 @@ static void bpf_testmod_exit(void)
while (refcount_read(&prog_test_struct.cnt) > 1)
msleep(20);
+ bpf_kfunc_close_sock();
sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+ unregister_bpf_testmod_uprobe();
}
module_init(bpf_testmod_init);
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
index 7c664dd61059..e587a79f2239 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
@@ -64,6 +64,27 @@ struct prog_test_fail3 {
char arr2[];
};
+struct init_sock_args {
+ int af;
+ int type;
+};
+
+struct addr_args {
+ char addr[sizeof(struct __kernel_sockaddr_storage)];
+ int addrlen;
+};
+
+struct sendmsg_args {
+ struct addr_args addr;
+ char msg[10];
+ int msglen;
+};
+
+struct bpf_testmod_ctx {
+ struct callback_head rcu;
+ refcount_t usage;
+};
+
struct prog_test_ref_kfunc *
bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym;
void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
@@ -96,6 +117,7 @@ void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
void bpf_kfunc_call_test_destructive(void) __ksym;
+void bpf_kfunc_call_test_sleepable(void) __ksym;
void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p);
struct prog_test_member *bpf_kfunc_call_memb_acquire(void);
@@ -106,4 +128,20 @@ void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p);
void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len);
void bpf_kfunc_common_test(void) __ksym;
+
+int bpf_kfunc_init_sock(struct init_sock_args *args) __ksym;
+void bpf_kfunc_close_sock(void) __ksym;
+int bpf_kfunc_call_kernel_connect(struct addr_args *args) __ksym;
+int bpf_kfunc_call_kernel_bind(struct addr_args *args) __ksym;
+int bpf_kfunc_call_kernel_listen(void) __ksym;
+int bpf_kfunc_call_kernel_sendmsg(struct sendmsg_args *args) __ksym;
+int bpf_kfunc_call_sock_sendmsg(struct sendmsg_args *args) __ksym;
+int bpf_kfunc_call_kernel_getsockname(struct addr_args *args) __ksym;
+int bpf_kfunc_call_kernel_getpeername(struct addr_args *args) __ksym;
+
+void bpf_kfunc_dynptr_test(struct bpf_dynptr *ptr, struct bpf_dynptr *ptr__nullable) __ksym;
+
+struct bpf_testmod_ctx *bpf_testmod_ctx_create(int *err) __ksym;
+void bpf_testmod_ctx_release(struct bpf_testmod_ctx *ctx) __ksym;
+
#endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 19be9c63d5e8..23bb9a9e6a7d 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -429,7 +429,7 @@ int create_and_get_cgroup(const char *relative_path)
* which is an invalid cgroup id.
* If there is a failure, it prints the error to stderr.
*/
-unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)
+static unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)
{
int dirfd, err, flags, mount_id, fhsize;
union {
@@ -508,6 +508,9 @@ int cgroup_setup_and_join(const char *path) {
/**
* setup_classid_environment() - Setup the cgroupv1 net_cls environment
*
+ * This function should only be called in a custom mount namespace, e.g.
+ * created by running setup_cgroup_environment.
+ *
* After calling this function, cleanup_classid_environment should be called
* once testing is complete.
*
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 01f241ea2c67..4ca84c8d9116 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -13,7 +13,12 @@ CONFIG_BPF_SYSCALL=y
CONFIG_CGROUP_BPF=y
CONFIG_CRYPTO_HMAC=y
CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_USER_API=y
CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_CRYPTO_USER_API_SKCIPHER=y
+CONFIG_CRYPTO_SKCIPHER=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_AES=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_BTF=y
CONFIG_DEBUG_INFO_DWARF4=y
@@ -53,9 +58,12 @@ CONFIG_MPLS=y
CONFIG_MPLS_IPTUNNEL=y
CONFIG_MPLS_ROUTING=y
CONFIG_MPTCP=y
+CONFIG_NET_ACT_SKBMOD=y
+CONFIG_NET_CLS=y
CONFIG_NET_CLS_ACT=y
CONFIG_NET_CLS_BPF=y
CONFIG_NET_CLS_FLOWER=y
+CONFIG_NET_CLS_MATCHALL=y
CONFIG_NET_FOU=y
CONFIG_NET_FOU_IP_TUNNELS=y
CONFIG_NET_IPGRE=y
@@ -75,8 +83,22 @@ CONFIG_NETFILTER_XT_TARGET_CT=y
CONFIG_NETKIT=y
CONFIG_NF_CONNTRACK=y
CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_DEFRAG_IPV4=y
CONFIG_NF_DEFRAG_IPV6=y
+CONFIG_NF_TABLES=y
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NETFILTER_INGRESS=y
+CONFIG_NF_FLOW_TABLE=y
+CONFIG_NF_FLOW_TABLE_INET=y
+CONFIG_NETFILTER_NETLINK=y
+CONFIG_NFT_FLOW_OFFLOAD=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_FILTER=y
CONFIG_NF_NAT=y
CONFIG_RC_CORE=y
CONFIG_SECURITY=y
@@ -88,3 +110,5 @@ CONFIG_VSOCKETS=y
CONFIG_VXLAN=y
CONFIG_XDP_SOCKETS=y
CONFIG_XFRM_INTERFACE=y
+CONFIG_TCP_CONG_DCTCP=y
+CONFIG_TCP_CONG_BBR=y
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 6db27a9088e9..e0cba4178e41 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -52,6 +52,8 @@ struct ipv6_packet pkt_v6 = {
.tcp.doff = 5,
};
+static const struct network_helper_opts default_opts;
+
int settimeo(int fd, int timeout_ms)
{
struct timeval timeout = { .tv_sec = 3 };
@@ -78,24 +80,23 @@ int settimeo(int fd, int timeout_ms)
#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
-static int __start_server(int type, int protocol, const struct sockaddr *addr,
- socklen_t addrlen, int timeout_ms, bool reuseport)
+static int __start_server(int type, const struct sockaddr *addr, socklen_t addrlen,
+ const struct network_helper_opts *opts)
{
- int on = 1;
int fd;
- fd = socket(addr->sa_family, type, protocol);
+ fd = socket(addr->sa_family, type, opts->proto);
if (fd < 0) {
log_err("Failed to create server socket");
return -1;
}
- if (settimeo(fd, timeout_ms))
+ if (settimeo(fd, opts->timeout_ms))
goto error_close;
- if (reuseport &&
- setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) {
- log_err("Failed to set SO_REUSEPORT");
+ if (opts->post_socket_cb &&
+ opts->post_socket_cb(fd, opts->cb_opts)) {
+ log_err("Failed to call post_socket_cb");
goto error_close;
}
@@ -105,7 +106,7 @@ static int __start_server(int type, int protocol, const struct sockaddr *addr,
}
if (type == SOCK_STREAM) {
- if (listen(fd, 1) < 0) {
+ if (listen(fd, opts->backlog ? MAX(opts->backlog, 0) : 1) < 0) {
log_err("Failed to listed on socket");
goto error_close;
}
@@ -118,35 +119,45 @@ error_close:
return -1;
}
-static int start_server_proto(int family, int type, int protocol,
- const char *addr_str, __u16 port, int timeout_ms)
+int start_server_str(int family, int type, const char *addr_str, __u16 port,
+ const struct network_helper_opts *opts)
{
struct sockaddr_storage addr;
socklen_t addrlen;
+ if (!opts)
+ opts = &default_opts;
+
if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
return -1;
- return __start_server(type, protocol, (struct sockaddr *)&addr,
- addrlen, timeout_ms, false);
+ return __start_server(type, (struct sockaddr *)&addr, addrlen, opts);
}
int start_server(int family, int type, const char *addr_str, __u16 port,
int timeout_ms)
{
- return start_server_proto(family, type, 0, addr_str, port, timeout_ms);
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ };
+
+ return start_server_str(family, type, addr_str, port, &opts);
}
-int start_mptcp_server(int family, const char *addr_str, __u16 port,
- int timeout_ms)
+static int reuseport_cb(int fd, void *opts)
{
- return start_server_proto(family, SOCK_STREAM, IPPROTO_MPTCP, addr_str,
- port, timeout_ms);
+ int on = 1;
+
+ return setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on));
}
int *start_reuseport_server(int family, int type, const char *addr_str,
__u16 port, int timeout_ms, unsigned int nr_listens)
{
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ .post_socket_cb = reuseport_cb,
+ };
struct sockaddr_storage addr;
unsigned int nr_fds = 0;
socklen_t addrlen;
@@ -162,8 +173,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
if (!fds)
return NULL;
- fds[0] = __start_server(type, 0, (struct sockaddr *)&addr, addrlen,
- timeout_ms, true);
+ fds[0] = __start_server(type, (struct sockaddr *)&addr, addrlen, &opts);
if (fds[0] == -1)
goto close_fds;
nr_fds = 1;
@@ -172,8 +182,7 @@ int *start_reuseport_server(int family, int type, const char *addr_str,
goto close_fds;
for (; nr_fds < nr_listens; nr_fds++) {
- fds[nr_fds] = __start_server(type, 0, (struct sockaddr *)&addr,
- addrlen, timeout_ms, true);
+ fds[nr_fds] = __start_server(type, (struct sockaddr *)&addr, addrlen, &opts);
if (fds[nr_fds] == -1)
goto close_fds;
}
@@ -185,6 +194,15 @@ close_fds:
return NULL;
}
+int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t len,
+ const struct network_helper_opts *opts)
+{
+ if (!opts)
+ opts = &default_opts;
+
+ return __start_server(type, (struct sockaddr *)addr, len, opts);
+}
+
void free_fds(int *fds, unsigned int nr_close_fds)
{
if (fds) {
@@ -231,6 +249,34 @@ error_close:
return -1;
}
+int client_socket(int family, int type,
+ const struct network_helper_opts *opts)
+{
+ int fd;
+
+ if (!opts)
+ opts = &default_opts;
+
+ fd = socket(family, type, opts->proto);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
+ return -1;
+ }
+
+ if (settimeo(fd, opts->timeout_ms))
+ goto error_close;
+
+ if (opts->post_socket_cb &&
+ opts->post_socket_cb(fd, opts->cb_opts))
+ goto error_close;
+
+ return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
+}
+
static int connect_fd_to_addr(int fd,
const struct sockaddr_storage *addr,
socklen_t addrlen, const bool must_fail)
@@ -258,17 +304,21 @@ static int connect_fd_to_addr(int fd,
return 0;
}
-int connect_to_addr(const struct sockaddr_storage *addr, socklen_t addrlen, int type)
+int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
+ const struct network_helper_opts *opts)
{
int fd;
- fd = socket(addr->ss_family, type, 0);
+ if (!opts)
+ opts = &default_opts;
+
+ fd = client_socket(addr->ss_family, type, opts);
if (fd < 0) {
log_err("Failed to create client socket");
return -1;
}
- if (connect_fd_to_addr(fd, addr, addrlen, false))
+ if (connect_fd_to_addr(fd, addr, addrlen, opts->must_fail))
goto error_close;
return fd;
@@ -278,68 +328,21 @@ error_close:
return -1;
}
-static const struct network_helper_opts default_opts;
-
-int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
+int connect_to_fd_opts(int server_fd, int type, const struct network_helper_opts *opts)
{
struct sockaddr_storage addr;
- struct sockaddr_in *addr_in;
- socklen_t addrlen, optlen;
- int fd, type, protocol;
+ socklen_t addrlen;
if (!opts)
opts = &default_opts;
- optlen = sizeof(type);
-
- if (opts->type) {
- type = opts->type;
- } else {
- if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
- log_err("getsockopt(SOL_TYPE)");
- return -1;
- }
- }
-
- if (opts->proto) {
- protocol = opts->proto;
- } else {
- if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
- log_err("getsockopt(SOL_PROTOCOL)");
- return -1;
- }
- }
-
addrlen = sizeof(addr);
if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
log_err("Failed to get server addr");
return -1;
}
- addr_in = (struct sockaddr_in *)&addr;
- fd = socket(addr_in->sin_family, type, protocol);
- if (fd < 0) {
- log_err("Failed to create client socket");
- return -1;
- }
-
- if (settimeo(fd, opts->timeout_ms))
- goto error_close;
-
- if (opts->cc && opts->cc[0] &&
- setsockopt(fd, SOL_TCP, TCP_CONGESTION, opts->cc,
- strlen(opts->cc) + 1))
- goto error_close;
-
- if (!opts->noconnect)
- if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail))
- goto error_close;
-
- return fd;
-
-error_close:
- save_errno_close(fd);
- return -1;
+ return connect_to_addr(type, &addr, addrlen, opts);
}
int connect_to_fd(int server_fd, int timeout_ms)
@@ -347,8 +350,23 @@ int connect_to_fd(int server_fd, int timeout_ms)
struct network_helper_opts opts = {
.timeout_ms = timeout_ms,
};
+ int type, protocol;
+ socklen_t optlen;
+
+ optlen = sizeof(type);
+ if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
+ log_err("getsockopt(SOL_TYPE)");
+ return -1;
+ }
- return connect_to_fd_opts(server_fd, &opts);
+ optlen = sizeof(protocol);
+ if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
+ log_err("getsockopt(SOL_PROTOCOL)");
+ return -1;
+ }
+ opts.proto = protocol;
+
+ return connect_to_fd_opts(server_fd, type, &opts);
}
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
@@ -442,25 +460,35 @@ struct nstoken *open_netns(const char *name)
struct nstoken *token;
token = calloc(1, sizeof(struct nstoken));
- if (!ASSERT_OK_PTR(token, "malloc token"))
+ if (!token) {
+ log_err("Failed to malloc token");
return NULL;
+ }
token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
- if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
+ if (token->orig_netns_fd == -1) {
+ log_err("Failed to open(/proc/self/ns/net)");
goto fail;
+ }
snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
- if (!ASSERT_GE(nsfd, 0, "open netns fd"))
+ if (nsfd == -1) {
+ log_err("Failed to open(%s)", nspath);
goto fail;
+ }
err = setns(nsfd, CLONE_NEWNET);
close(nsfd);
- if (!ASSERT_OK(err, "setns"))
+ if (err) {
+ log_err("Failed to setns(nsfd)");
goto fail;
+ }
return token;
fail:
+ if (token->orig_netns_fd != -1)
+ close(token->orig_netns_fd);
free(token);
return NULL;
}
@@ -470,7 +498,8 @@ void close_netns(struct nstoken *token)
if (!token)
return;
- ASSERT_OK(setns(token->orig_netns_fd, CLONE_NEWNET), "setns");
+ if (setns(token->orig_netns_fd, CLONE_NEWNET))
+ log_err("Failed to setns(orig_netns_fd)");
close(token->orig_netns_fd);
free(token);
}
@@ -497,3 +526,153 @@ int get_socket_local_port(int sock_fd)
return -1;
}
+
+int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
+{
+ struct ifreq ifr = {0};
+ int sockfd, err;
+
+ sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sockfd < 0)
+ return -errno;
+
+ memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ ring_param->cmd = ETHTOOL_GRINGPARAM;
+ ifr.ifr_data = (char *)ring_param;
+
+ if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
+ err = errno;
+ close(sockfd);
+ return -err;
+ }
+
+ close(sockfd);
+ return 0;
+}
+
+int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param)
+{
+ struct ifreq ifr = {0};
+ int sockfd, err;
+
+ sockfd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sockfd < 0)
+ return -errno;
+
+ memcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
+
+ ring_param->cmd = ETHTOOL_SRINGPARAM;
+ ifr.ifr_data = (char *)ring_param;
+
+ if (ioctl(sockfd, SIOCETHTOOL, &ifr) < 0) {
+ err = errno;
+ close(sockfd);
+ return -err;
+ }
+
+ close(sockfd);
+ return 0;
+}
+
+struct send_recv_arg {
+ int fd;
+ uint32_t bytes;
+ int stop;
+};
+
+static void *send_recv_server(void *arg)
+{
+ struct send_recv_arg *a = (struct send_recv_arg *)arg;
+ ssize_t nr_sent = 0, bytes = 0;
+ char batch[1500];
+ int err = 0, fd;
+
+ fd = accept(a->fd, NULL, NULL);
+ while (fd == -1) {
+ if (errno == EINTR)
+ continue;
+ err = -errno;
+ goto done;
+ }
+
+ if (settimeo(fd, 0)) {
+ err = -errno;
+ goto done;
+ }
+
+ while (bytes < a->bytes && !READ_ONCE(a->stop)) {
+ nr_sent = send(fd, &batch,
+ MIN(a->bytes - bytes, sizeof(batch)), 0);
+ if (nr_sent == -1 && errno == EINTR)
+ continue;
+ if (nr_sent == -1) {
+ err = -errno;
+ break;
+ }
+ bytes += nr_sent;
+ }
+
+ if (bytes != a->bytes) {
+ log_err("send %zd expected %u", bytes, a->bytes);
+ if (!err)
+ err = bytes > a->bytes ? -E2BIG : -EINTR;
+ }
+
+done:
+ if (fd >= 0)
+ close(fd);
+ if (err) {
+ WRITE_ONCE(a->stop, 1);
+ return ERR_PTR(err);
+ }
+ return NULL;
+}
+
+int send_recv_data(int lfd, int fd, uint32_t total_bytes)
+{
+ ssize_t nr_recv = 0, bytes = 0;
+ struct send_recv_arg arg = {
+ .fd = lfd,
+ .bytes = total_bytes,
+ .stop = 0,
+ };
+ pthread_t srv_thread;
+ void *thread_ret;
+ char batch[1500];
+ int err = 0;
+
+ err = pthread_create(&srv_thread, NULL, send_recv_server, (void *)&arg);
+ if (err) {
+ log_err("Failed to pthread_create");
+ return err;
+ }
+
+ /* recv total_bytes */
+ while (bytes < total_bytes && !READ_ONCE(arg.stop)) {
+ nr_recv = recv(fd, &batch,
+ MIN(total_bytes - bytes, sizeof(batch)), 0);
+ if (nr_recv == -1 && errno == EINTR)
+ continue;
+ if (nr_recv == -1) {
+ err = -errno;
+ break;
+ }
+ bytes += nr_recv;
+ }
+
+ if (bytes != total_bytes) {
+ log_err("recv %zd expected %u", bytes, total_bytes);
+ if (!err)
+ err = bytes > total_bytes ? -E2BIG : -EINTR;
+ }
+
+ WRITE_ONCE(arg.stop, 1);
+ pthread_join(srv_thread, &thread_ret);
+ if (IS_ERR(thread_ret)) {
+ log_err("Failed in thread_ret %ld", PTR_ERR(thread_ret));
+ err = err ? : PTR_ERR(thread_ret);
+ }
+
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 94b9be24e39b..aac5b94d6379 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -9,8 +9,12 @@ typedef __u16 __sum16;
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/ethtool.h>
+#include <linux/sockios.h>
+#include <linux/err.h>
#include <netinet/tcp.h>
#include <bpf/bpf_endian.h>
+#include <net/if.h>
#define MAGIC_VAL 0x1234
#define NUM_ITER 100000
@@ -18,12 +22,21 @@ typedef __u16 __sum16;
#define MAGIC_BYTES 123
struct network_helper_opts {
- const char *cc;
int timeout_ms;
bool must_fail;
- bool noconnect;
- int type;
int proto;
+ /* +ve: Passed to listen() as-is.
+ * 0: Default when the test does not set
+ * a particular value during the struct init.
+ * It is changed to 1 before passing to listen().
+ * Most tests only have one on-going connection.
+ * -ve: It is changed to 0 before passing to listen().
+ * It is useful to force syncookie without
+ * changing the "tcp_syncookies" sysctl from 1 to 2.
+ */
+ int backlog;
+ int (*post_socket_cb)(int fd, void *opts);
+ void *cb_opts;
};
/* ipv4 test vector */
@@ -43,17 +56,22 @@ struct ipv6_packet {
extern struct ipv6_packet pkt_v6;
int settimeo(int fd, int timeout_ms);
+int start_server_str(int family, int type, const char *addr_str, __u16 port,
+ const struct network_helper_opts *opts);
int start_server(int family, int type, const char *addr, __u16 port,
int timeout_ms);
-int start_mptcp_server(int family, const char *addr, __u16 port,
- int timeout_ms);
int *start_reuseport_server(int family, int type, const char *addr_str,
__u16 port, int timeout_ms,
unsigned int nr_listens);
+int start_server_addr(int type, const struct sockaddr_storage *addr, socklen_t len,
+ const struct network_helper_opts *opts);
void free_fds(int *fds, unsigned int nr_close_fds);
-int connect_to_addr(const struct sockaddr_storage *addr, socklen_t len, int type);
+int client_socket(int family, int type,
+ const struct network_helper_opts *opts);
+int connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t len,
+ const struct network_helper_opts *opts);
int connect_to_fd(int server_fd, int timeout_ms);
-int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts);
+int connect_to_fd_opts(int server_fd, int type, const struct network_helper_opts *opts);
int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
int timeout_ms);
@@ -61,6 +79,8 @@ int make_sockaddr(int family, const char *addr_str, __u16 port,
struct sockaddr_storage *addr, socklen_t *len);
char *ping_command(int family);
int get_socket_local_port(int sock_fd);
+int get_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
+int set_hw_ring_size(char *ifname, struct ethtool_ringparam *ring_param);
struct nstoken;
/**
@@ -71,6 +91,7 @@ struct nstoken;
*/
struct nstoken *open_netns(const char *name);
void close_netns(struct nstoken *token);
+int send_recv_data(int lfd, int fd, uint32_t total_bytes);
static __u16 csum_fold(__u32 csum)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_atomics.c b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
new file mode 100644
index 000000000000..26e7c06c6cb4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_atomics.c
@@ -0,0 +1,204 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "arena_atomics.skel.h"
+
+static void test_add(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.add);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->add64_value, 3, "add64_value");
+ ASSERT_EQ(skel->arena->add64_result, 1, "add64_result");
+
+ ASSERT_EQ(skel->arena->add32_value, 3, "add32_value");
+ ASSERT_EQ(skel->arena->add32_result, 1, "add32_result");
+
+ ASSERT_EQ(skel->arena->add_stack_value_copy, 3, "add_stack_value");
+ ASSERT_EQ(skel->arena->add_stack_result, 1, "add_stack_result");
+
+ ASSERT_EQ(skel->arena->add_noreturn_value, 3, "add_noreturn_value");
+}
+
+static void test_sub(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.sub);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->sub64_value, -1, "sub64_value");
+ ASSERT_EQ(skel->arena->sub64_result, 1, "sub64_result");
+
+ ASSERT_EQ(skel->arena->sub32_value, -1, "sub32_value");
+ ASSERT_EQ(skel->arena->sub32_result, 1, "sub32_result");
+
+ ASSERT_EQ(skel->arena->sub_stack_value_copy, -1, "sub_stack_value");
+ ASSERT_EQ(skel->arena->sub_stack_result, 1, "sub_stack_result");
+
+ ASSERT_EQ(skel->arena->sub_noreturn_value, -1, "sub_noreturn_value");
+}
+
+static void test_and(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.and);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->and64_value, 0x010ull << 32, "and64_value");
+ ASSERT_EQ(skel->arena->and32_value, 0x010, "and32_value");
+}
+
+static void test_or(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.or);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->or64_value, 0x111ull << 32, "or64_value");
+ ASSERT_EQ(skel->arena->or32_value, 0x111, "or32_value");
+}
+
+static void test_xor(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.xor);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->xor64_value, 0x101ull << 32, "xor64_value");
+ ASSERT_EQ(skel->arena->xor32_value, 0x101, "xor32_value");
+}
+
+static void test_cmpxchg(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.cmpxchg);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->cmpxchg64_value, 2, "cmpxchg64_value");
+ ASSERT_EQ(skel->arena->cmpxchg64_result_fail, 1, "cmpxchg_result_fail");
+ ASSERT_EQ(skel->arena->cmpxchg64_result_succeed, 1, "cmpxchg_result_succeed");
+
+ ASSERT_EQ(skel->arena->cmpxchg32_value, 2, "lcmpxchg32_value");
+ ASSERT_EQ(skel->arena->cmpxchg32_result_fail, 1, "cmpxchg_result_fail");
+ ASSERT_EQ(skel->arena->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed");
+}
+
+static void test_xchg(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.xchg);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->xchg64_value, 2, "xchg64_value");
+ ASSERT_EQ(skel->arena->xchg64_result, 1, "xchg64_result");
+
+ ASSERT_EQ(skel->arena->xchg32_value, 2, "xchg32_value");
+ ASSERT_EQ(skel->arena->xchg32_result, 1, "xchg32_result");
+}
+
+static void test_uaf(struct arena_atomics *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ /* No need to attach it, just run it directly */
+ prog_fd = bpf_program__fd(skel->progs.uaf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->arena->uaf_recovery_fails, 0, "uaf_recovery_fails");
+}
+
+void test_arena_atomics(void)
+{
+ struct arena_atomics *skel;
+ int err;
+
+ skel = arena_atomics__open();
+ if (!ASSERT_OK_PTR(skel, "arena atomics skeleton open"))
+ return;
+
+ if (skel->data->skip_tests) {
+ printf("%s:SKIP:no ENABLE_ATOMICS_TESTS or no addr_space_cast support in clang",
+ __func__);
+ test__skip();
+ goto cleanup;
+ }
+ err = arena_atomics__load(skel);
+ if (!ASSERT_OK(err, "arena atomics skeleton load"))
+ return;
+ skel->bss->pid = getpid();
+
+ if (test__start_subtest("add"))
+ test_add(skel);
+ if (test__start_subtest("sub"))
+ test_sub(skel);
+ if (test__start_subtest("and"))
+ test_and(skel);
+ if (test__start_subtest("or"))
+ test_or(skel);
+ if (test__start_subtest("xor"))
+ test_xor(skel);
+ if (test__start_subtest("cmpxchg"))
+ test_cmpxchg(skel);
+ if (test__start_subtest("xchg"))
+ test_xchg(skel);
+ if (test__start_subtest("uaf"))
+ test_uaf(skel);
+
+cleanup:
+ arena_atomics__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 1454cebc262b..070c52c312e5 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -451,7 +451,7 @@ static void pe_subtest(struct test_bpf_cookie *skel)
attr.type = PERF_TYPE_SOFTWARE;
attr.config = PERF_COUNT_SW_CPU_CLOCK;
attr.freq = 1;
- attr.sample_freq = 1000;
+ attr.sample_freq = 10000;
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
if (!ASSERT_GE(pfd, 0, "perf_fd"))
goto cleanup;
@@ -573,6 +573,115 @@ cleanup:
close(lsm_fd);
}
+static void tp_btf_subtest(struct test_bpf_cookie *skel)
+{
+ __u64 cookie;
+ int prog_fd, link_fd = -1;
+ struct bpf_link *link = NULL;
+ LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+ LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts);
+ LIBBPF_OPTS(bpf_trace_opts, trace_opts);
+
+ /* There are three different ways to attach tp_btf (BTF-aware raw
+ * tracepoint) programs. Let's test all of them.
+ */
+ prog_fd = bpf_program__fd(skel->progs.handle_tp_btf);
+
+ /* low-level BPF_RAW_TRACEPOINT_OPEN command wrapper */
+ skel->bss->tp_btf_res = 0;
+
+ raw_tp_opts.cookie = cookie = 0x11000000000000L;
+ link_fd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_tp_opts);
+ if (!ASSERT_GE(link_fd, 0, "bpf_raw_tracepoint_open_opts"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ close(link_fd); /* detach */
+ link_fd = -1;
+
+ ASSERT_EQ(skel->bss->tp_btf_res, cookie, "raw_tp_open_res");
+
+ /* low-level generic bpf_link_create() API */
+ skel->bss->tp_btf_res = 0;
+
+ link_opts.tracing.cookie = cookie = 0x22000000000000L;
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_RAW_TP, &link_opts);
+ if (!ASSERT_GE(link_fd, 0, "bpf_link_create"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ close(link_fd); /* detach */
+ link_fd = -1;
+
+ ASSERT_EQ(skel->bss->tp_btf_res, cookie, "link_create_res");
+
+ /* high-level bpf_link-based bpf_program__attach_trace_opts() API */
+ skel->bss->tp_btf_res = 0;
+
+ trace_opts.cookie = cookie = 0x33000000000000L;
+ link = bpf_program__attach_trace_opts(skel->progs.handle_tp_btf, &trace_opts);
+ if (!ASSERT_OK_PTR(link, "attach_trace_opts"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ bpf_link__destroy(link); /* detach */
+ link = NULL;
+
+ ASSERT_EQ(skel->bss->tp_btf_res, cookie, "attach_trace_opts_res");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ bpf_link__destroy(link);
+}
+
+static void raw_tp_subtest(struct test_bpf_cookie *skel)
+{
+ __u64 cookie;
+ int prog_fd, link_fd = -1;
+ struct bpf_link *link = NULL;
+ LIBBPF_OPTS(bpf_raw_tp_opts, raw_tp_opts);
+ LIBBPF_OPTS(bpf_raw_tracepoint_opts, opts);
+
+ /* There are two different ways to attach raw_tp programs */
+ prog_fd = bpf_program__fd(skel->progs.handle_raw_tp);
+
+ /* low-level BPF_RAW_TRACEPOINT_OPEN command wrapper */
+ skel->bss->raw_tp_res = 0;
+
+ raw_tp_opts.tp_name = "sys_enter";
+ raw_tp_opts.cookie = cookie = 0x55000000000000L;
+ link_fd = bpf_raw_tracepoint_open_opts(prog_fd, &raw_tp_opts);
+ if (!ASSERT_GE(link_fd, 0, "bpf_raw_tracepoint_open_opts"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ close(link_fd); /* detach */
+ link_fd = -1;
+
+ ASSERT_EQ(skel->bss->raw_tp_res, cookie, "raw_tp_open_res");
+
+ /* high-level bpf_link-based bpf_program__attach_raw_tracepoint_opts() API */
+ skel->bss->raw_tp_res = 0;
+
+ opts.cookie = cookie = 0x66000000000000L;
+ link = bpf_program__attach_raw_tracepoint_opts(skel->progs.handle_raw_tp,
+ "sys_enter", &opts);
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp_opts"))
+ goto cleanup;
+
+ usleep(1); /* trigger */
+ bpf_link__destroy(link); /* detach */
+ link = NULL;
+
+ ASSERT_EQ(skel->bss->raw_tp_res, cookie, "attach_raw_tp_opts_res");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ bpf_link__destroy(link);
+}
+
void test_bpf_cookie(void)
{
struct test_bpf_cookie *skel;
@@ -601,6 +710,9 @@ void test_bpf_cookie(void)
tracing_subtest(skel);
if (test__start_subtest("lsm"))
lsm_subtest(skel);
-
+ if (test__start_subtest("tp_btf"))
+ tp_btf_subtest(skel);
+ if (test__start_subtest("raw_tp"))
+ raw_tp_subtest(skel);
test_bpf_cookie__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index b30ff6b3b81a..a4a1f93878d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -104,6 +104,7 @@ static void test_bpf_nf_ct(int mode)
ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
+ ASSERT_EQ(skel->bss->test_einval_reserved_new, -EINVAL, "Test EINVAL for reserved in new struct not set to 0");
ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
@@ -122,6 +123,12 @@ static void test_bpf_nf_ct(int mode)
ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting");
ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting");
+ ASSERT_EQ(skel->data->test_ct_zone_id_alloc_entry, 0, "Test for alloc new entry in specified ct zone");
+ ASSERT_EQ(skel->data->test_ct_zone_id_insert_entry, 0, "Test for insert new entry in specified ct zone");
+ ASSERT_EQ(skel->data->test_ct_zone_id_succ_lookup, 0, "Test for successful lookup in specified ct_zone");
+ ASSERT_EQ(skel->bss->test_ct_zone_dir_enoent_lookup, -ENOENT, "Test ENOENT for lookup with wrong ct zone dir");
+ ASSERT_EQ(skel->bss->test_ct_zone_id_enoent_lookup, -ENOENT, "Test ENOENT for lookup in wrong ct zone");
+
end:
if (client_fd != -1)
close(client_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index a88e6e07e4f5..63422f4f3896 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -13,6 +13,8 @@
#include "tcp_ca_write_sk_pacing.skel.h"
#include "tcp_ca_incompl_cong_ops.skel.h"
#include "tcp_ca_unsupp_cong_op.skel.h"
+#include "tcp_ca_kfunc.skel.h"
+#include "bpf_cc_cubic.skel.h"
#ifndef ENOTSUPP
#define ENOTSUPP 524
@@ -20,7 +22,11 @@
static const unsigned int total_bytes = 10 * 1024 * 1024;
static int expected_stg = 0xeB9F;
-static int stop;
+
+struct cb_opts {
+ const char *cc;
+ int map_fd;
+};
static int settcpca(int fd, const char *tcp_ca)
{
@@ -33,141 +39,66 @@ static int settcpca(int fd, const char *tcp_ca)
return 0;
}
-static void *server(void *arg)
+static bool start_test(char *addr_str,
+ const struct network_helper_opts *srv_opts,
+ const struct network_helper_opts *cli_opts,
+ int *srv_fd, int *cli_fd)
{
- int lfd = (int)(long)arg, err = 0, fd;
- ssize_t nr_sent = 0, bytes = 0;
- char batch[1500];
-
- fd = accept(lfd, NULL, NULL);
- while (fd == -1) {
- if (errno == EINTR)
- continue;
- err = -errno;
- goto done;
- }
+ *srv_fd = start_server_str(AF_INET6, SOCK_STREAM, addr_str, 0, srv_opts);
+ if (!ASSERT_NEQ(*srv_fd, -1, "start_server_str"))
+ goto err;
- if (settimeo(fd, 0)) {
- err = -errno;
- goto done;
- }
-
- while (bytes < total_bytes && !READ_ONCE(stop)) {
- nr_sent = send(fd, &batch,
- MIN(total_bytes - bytes, sizeof(batch)), 0);
- if (nr_sent == -1 && errno == EINTR)
- continue;
- if (nr_sent == -1) {
- err = -errno;
- break;
- }
- bytes += nr_sent;
- }
+ /* connect to server */
+ *cli_fd = connect_to_fd_opts(*srv_fd, SOCK_STREAM, cli_opts);
+ if (!ASSERT_NEQ(*cli_fd, -1, "connect_to_fd_opts"))
+ goto err;
- ASSERT_EQ(bytes, total_bytes, "send");
+ return true;
-done:
- if (fd >= 0)
- close(fd);
- if (err) {
- WRITE_ONCE(stop, 1);
- return ERR_PTR(err);
+err:
+ if (*srv_fd != -1) {
+ close(*srv_fd);
+ *srv_fd = -1;
+ }
+ if (*cli_fd != -1) {
+ close(*cli_fd);
+ *cli_fd = -1;
}
- return NULL;
+ return false;
}
-static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
+static void do_test(const struct network_helper_opts *opts)
{
- struct sockaddr_in6 sa6 = {};
- ssize_t nr_recv = 0, bytes = 0;
int lfd = -1, fd = -1;
- pthread_t srv_thread;
- socklen_t addrlen = sizeof(sa6);
- void *thread_ret;
- char batch[1500];
- int err;
-
- WRITE_ONCE(stop, 0);
-
- lfd = socket(AF_INET6, SOCK_STREAM, 0);
- if (!ASSERT_NEQ(lfd, -1, "socket"))
- return;
-
- fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (!ASSERT_NEQ(fd, -1, "socket")) {
- close(lfd);
- return;
- }
- if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
- settimeo(lfd, 0) || settimeo(fd, 0))
+ if (!start_test(NULL, opts, opts, &lfd, &fd))
goto done;
- /* bind, listen and start server thread to accept */
- sa6.sin6_family = AF_INET6;
- sa6.sin6_addr = in6addr_loopback;
- err = bind(lfd, (struct sockaddr *)&sa6, addrlen);
- if (!ASSERT_NEQ(err, -1, "bind"))
- goto done;
-
- err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
- if (!ASSERT_NEQ(err, -1, "getsockname"))
- goto done;
-
- err = listen(lfd, 1);
- if (!ASSERT_NEQ(err, -1, "listen"))
- goto done;
-
- if (sk_stg_map) {
- err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd,
- &expected_stg, BPF_NOEXIST);
- if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)"))
- goto done;
- }
-
- /* connect to server */
- err = connect(fd, (struct sockaddr *)&sa6, addrlen);
- if (!ASSERT_NEQ(err, -1, "connect"))
- goto done;
-
- if (sk_stg_map) {
- int tmp_stg;
-
- err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd,
- &tmp_stg);
- if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") ||
- !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)"))
- goto done;
- }
-
- err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
- if (!ASSERT_OK(err, "pthread_create"))
- goto done;
+ ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data");
- /* recv total_bytes */
- while (bytes < total_bytes && !READ_ONCE(stop)) {
- nr_recv = recv(fd, &batch,
- MIN(total_bytes - bytes, sizeof(batch)), 0);
- if (nr_recv == -1 && errno == EINTR)
- continue;
- if (nr_recv == -1)
- break;
- bytes += nr_recv;
- }
-
- ASSERT_EQ(bytes, total_bytes, "recv");
+done:
+ if (lfd != -1)
+ close(lfd);
+ if (fd != -1)
+ close(fd);
+}
- WRITE_ONCE(stop, 1);
- pthread_join(srv_thread, &thread_ret);
- ASSERT_OK(IS_ERR(thread_ret), "thread_ret");
+static int cc_cb(int fd, void *opts)
+{
+ struct cb_opts *cb_opts = (struct cb_opts *)opts;
-done:
- close(lfd);
- close(fd);
+ return settcpca(fd, cb_opts->cc);
}
static void test_cubic(void)
{
+ struct cb_opts cb_opts = {
+ .cc = "bpf_cubic",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
struct bpf_cubic *cubic_skel;
struct bpf_link *link;
@@ -181,7 +112,7 @@ static void test_cubic(void)
return;
}
- do_test("bpf_cubic", NULL);
+ do_test(&opts);
ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called");
@@ -189,8 +120,37 @@ static void test_cubic(void)
bpf_cubic__destroy(cubic_skel);
}
+static int stg_post_socket_cb(int fd, void *opts)
+{
+ struct cb_opts *cb_opts = (struct cb_opts *)opts;
+ int err;
+
+ err = settcpca(fd, cb_opts->cc);
+ if (err)
+ return err;
+
+ err = bpf_map_update_elem(cb_opts->map_fd, &fd,
+ &expected_stg, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)"))
+ return err;
+
+ return 0;
+}
+
static void test_dctcp(void)
{
+ struct cb_opts cb_opts = {
+ .cc = "bpf_dctcp",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
+ struct network_helper_opts cli_opts = {
+ .post_socket_cb = stg_post_socket_cb,
+ .cb_opts = &cb_opts,
+ };
+ int lfd = -1, fd = -1, tmp_stg, err;
struct bpf_dctcp *dctcp_skel;
struct bpf_link *link;
@@ -204,11 +164,58 @@ static void test_dctcp(void)
return;
}
- do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map);
+ cb_opts.map_fd = bpf_map__fd(dctcp_skel->maps.sk_stg_map);
+ if (!start_test(NULL, &opts, &cli_opts, &lfd, &fd))
+ goto done;
+
+ err = bpf_map_lookup_elem(cb_opts.map_fd, &fd, &tmp_stg);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") ||
+ !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)"))
+ goto done;
+
+ ASSERT_OK(send_recv_data(lfd, fd, total_bytes), "send_recv_data");
ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result");
+done:
bpf_link__destroy(link);
bpf_dctcp__destroy(dctcp_skel);
+ if (lfd != -1)
+ close(lfd);
+ if (fd != -1)
+ close(fd);
+}
+
+static void test_dctcp_autoattach_map(void)
+{
+ struct cb_opts cb_opts = {
+ .cc = "bpf_dctcp",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
+ struct bpf_dctcp *dctcp_skel;
+ struct bpf_link *link;
+
+ dctcp_skel = bpf_dctcp__open_and_load();
+ if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load"))
+ return;
+
+ bpf_map__set_autoattach(dctcp_skel->maps.dctcp, true);
+ bpf_map__set_autoattach(dctcp_skel->maps.dctcp_nouse, false);
+
+ if (!ASSERT_OK(bpf_dctcp__attach(dctcp_skel), "bpf_dctcp__attach"))
+ goto destroy;
+
+ /* struct_ops is auto-attached */
+ link = dctcp_skel->links.dctcp;
+ if (!ASSERT_OK_PTR(link, "link"))
+ goto destroy;
+
+ do_test(&opts);
+
+destroy:
+ bpf_dctcp__destroy(dctcp_skel);
}
static char *err_str;
@@ -256,11 +263,22 @@ static void test_invalid_license(void)
static void test_dctcp_fallback(void)
{
int err, lfd = -1, cli_fd = -1, srv_fd = -1;
- struct network_helper_opts opts = {
- .cc = "cubic",
- };
struct bpf_dctcp *dctcp_skel;
struct bpf_link *link = NULL;
+ struct cb_opts dctcp = {
+ .cc = "bpf_dctcp",
+ };
+ struct network_helper_opts srv_opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &dctcp,
+ };
+ struct cb_opts cubic = {
+ .cc = "cubic",
+ };
+ struct network_helper_opts cli_opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cubic,
+ };
char srv_cc[16];
socklen_t cc_len = sizeof(srv_cc);
@@ -275,13 +293,7 @@ static void test_dctcp_fallback(void)
if (!ASSERT_OK_PTR(link, "dctcp link"))
goto done;
- lfd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
- if (!ASSERT_GE(lfd, 0, "lfd") ||
- !ASSERT_OK(settcpca(lfd, "bpf_dctcp"), "lfd=>bpf_dctcp"))
- goto done;
-
- cli_fd = connect_to_fd_opts(lfd, &opts);
- if (!ASSERT_GE(cli_fd, 0, "cli_fd"))
+ if (!start_test("::1", &srv_opts, &cli_opts, &lfd, &cli_fd))
goto done;
srv_fd = accept(lfd, NULL, 0);
@@ -315,7 +327,7 @@ static void test_rel_setsockopt(void)
struct bpf_dctcp_release *rel_skel;
libbpf_print_fn_t old_print_fn;
- err_str = "unknown func bpf_setsockopt";
+ err_str = "program of this type cannot use helper bpf_setsockopt";
found = false;
old_print_fn = libbpf_set_print(libbpf_debug_print);
@@ -382,6 +394,13 @@ static void test_unsupp_cong_op(void)
static void test_update_ca(void)
{
+ struct cb_opts cb_opts = {
+ .cc = "tcp_ca_update",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
struct tcp_ca_update *skel;
struct bpf_link *link;
int saved_ca1_cnt;
@@ -392,25 +411,34 @@ static void test_update_ca(void)
return;
link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
- ASSERT_OK_PTR(link, "attach_struct_ops");
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
+ goto out;
- do_test("tcp_ca_update", NULL);
+ do_test(&opts);
saved_ca1_cnt = skel->bss->ca1_cnt;
ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
err = bpf_link__update_map(link, skel->maps.ca_update_2);
ASSERT_OK(err, "update_map");
- do_test("tcp_ca_update", NULL);
+ do_test(&opts);
ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt");
bpf_link__destroy(link);
+out:
tcp_ca_update__destroy(skel);
}
static void test_update_wrong(void)
{
+ struct cb_opts cb_opts = {
+ .cc = "tcp_ca_update",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
struct tcp_ca_update *skel;
struct bpf_link *link;
int saved_ca1_cnt;
@@ -421,24 +449,33 @@ static void test_update_wrong(void)
return;
link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
- ASSERT_OK_PTR(link, "attach_struct_ops");
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
+ goto out;
- do_test("tcp_ca_update", NULL);
+ do_test(&opts);
saved_ca1_cnt = skel->bss->ca1_cnt;
ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
err = bpf_link__update_map(link, skel->maps.ca_wrong);
ASSERT_ERR(err, "update_map");
- do_test("tcp_ca_update", NULL);
+ do_test(&opts);
ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
bpf_link__destroy(link);
+out:
tcp_ca_update__destroy(skel);
}
static void test_mixed_links(void)
{
+ struct cb_opts cb_opts = {
+ .cc = "tcp_ca_update",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
struct tcp_ca_update *skel;
struct bpf_link *link, *link_nl;
int err;
@@ -448,12 +485,13 @@ static void test_mixed_links(void)
return;
link_nl = bpf_map__attach_struct_ops(skel->maps.ca_no_link);
- ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl");
+ if (!ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl"))
+ goto out;
link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
ASSERT_OK_PTR(link, "attach_struct_ops");
- do_test("tcp_ca_update", NULL);
+ do_test(&opts);
ASSERT_GT(skel->bss->ca1_cnt, 0, "ca1_ca1_cnt");
err = bpf_link__update_map(link, skel->maps.ca_no_link);
@@ -461,6 +499,7 @@ static void test_mixed_links(void)
bpf_link__destroy(link);
bpf_link__destroy(link_nl);
+out:
tcp_ca_update__destroy(skel);
}
@@ -503,7 +542,8 @@ static void test_link_replace(void)
bpf_link__destroy(link);
link = bpf_map__attach_struct_ops(skel->maps.ca_update_2);
- ASSERT_OK_PTR(link, "attach_struct_ops_2nd");
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops_2nd"))
+ goto out;
/* BPF_F_REPLACE with a wrong old map Fd. It should fail!
*
@@ -526,9 +566,47 @@ static void test_link_replace(void)
bpf_link__destroy(link);
+out:
tcp_ca_update__destroy(skel);
}
+static void test_tcp_ca_kfunc(void)
+{
+ struct tcp_ca_kfunc *skel;
+
+ skel = tcp_ca_kfunc__open_and_load();
+ ASSERT_OK_PTR(skel, "tcp_ca_kfunc__open_and_load");
+ tcp_ca_kfunc__destroy(skel);
+}
+
+static void test_cc_cubic(void)
+{
+ struct cb_opts cb_opts = {
+ .cc = "bpf_cc_cubic",
+ };
+ struct network_helper_opts opts = {
+ .post_socket_cb = cc_cb,
+ .cb_opts = &cb_opts,
+ };
+ struct bpf_cc_cubic *cc_cubic_skel;
+ struct bpf_link *link;
+
+ cc_cubic_skel = bpf_cc_cubic__open_and_load();
+ if (!ASSERT_OK_PTR(cc_cubic_skel, "bpf_cc_cubic__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(cc_cubic_skel->maps.cc_cubic);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
+ bpf_cc_cubic__destroy(cc_cubic_skel);
+ return;
+ }
+
+ do_test(&opts);
+
+ bpf_link__destroy(link);
+ bpf_cc_cubic__destroy(cc_cubic_skel);
+}
+
void test_bpf_tcp_ca(void)
{
if (test__start_subtest("dctcp"))
@@ -557,4 +635,10 @@ void test_bpf_tcp_ca(void)
test_multi_links();
if (test__start_subtest("link_replace"))
test_link_replace();
+ if (test__start_subtest("tcp_ca_kfunc"))
+ test_tcp_ca_kfunc();
+ if (test__start_subtest("cc_cubic"))
+ test_cc_cubic();
+ if (test__start_subtest("dctcp_autoattach_map"))
+ test_dctcp_autoattach_map();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index 4c6ada5b270b..73f669014b69 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -45,12 +45,6 @@ err_out:
return err;
}
-struct scale_test_def {
- const char *file;
- enum bpf_prog_type attach_type;
- bool fails;
-};
-
static void scale_test(const char *file,
enum bpf_prog_type attach_type,
bool should_fail)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_distill.c b/tools/testing/selftests/bpf/prog_tests/btf_distill.c
new file mode 100644
index 000000000000..bfbe795823a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_distill.c
@@ -0,0 +1,552 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "btf_helpers.h"
+
+/* Fabricate base, split BTF with references to base types needed; then create
+ * split BTF with distilled base BTF and ensure expectations are met:
+ * - only referenced base types from split BTF are present
+ * - struct/union/enum are represented as empty unless anonymous, when they
+ * are represented in full in split BTF
+ */
+static void test_distilled_base(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_ptr(btf1, 1); /* [2] ptr to int */
+ btf__add_struct(btf1, "s1", 8); /* [3] struct s1 { */
+ btf__add_field(btf1, "f1", 2, 0, 0); /* int *f1; */
+ /* } */
+ btf__add_struct(btf1, "", 12); /* [4] struct { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ btf__add_field(btf1, "f2", 3, 32, 0); /* struct s1 f2; */
+ /* } */
+ btf__add_int(btf1, "unsigned int", 4, 0); /* [5] unsigned int */
+ btf__add_union(btf1, "u1", 12); /* [6] union u1 { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ btf__add_field(btf1, "f2", 2, 0, 0); /* int *f2; */
+ /* } */
+ btf__add_union(btf1, "", 4); /* [7] union { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ btf__add_enum(btf1, "e1", 4); /* [8] enum e1 { */
+ btf__add_enum_value(btf1, "v1", 1); /* v1 = 1; */
+ /* } */
+ btf__add_enum(btf1, "", 4); /* [9] enum { */
+ btf__add_enum_value(btf1, "av1", 2); /* av1 = 2; */
+ /* } */
+ btf__add_enum64(btf1, "e641", 8, true); /* [10] enum64 { */
+ btf__add_enum64_value(btf1, "v1", 1024); /* v1 = 1024; */
+ /* } */
+ btf__add_enum64(btf1, "", 8, true); /* [11] enum64 { */
+ btf__add_enum64_value(btf1, "v1", 1025); /* v1 = 1025; */
+ /* } */
+ btf__add_struct(btf1, "unneeded", 4); /* [12] struct unneeded { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ btf__add_struct(btf1, "embedded", 4); /* [13] struct embedded { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ btf__add_func_proto(btf1, 1); /* [14] int (*)(int *p1); */
+ btf__add_func_param(btf1, "p1", 1);
+
+ btf__add_array(btf1, 1, 1, 3); /* [15] int [3]; */
+
+ btf__add_struct(btf1, "from_proto", 4); /* [16] struct from_proto { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ btf__add_union(btf1, "u1", 4); /* [17] union u1 { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=8 vlen=1\n"
+ "\t'f1' type_id=2 bits_offset=0",
+ "[4] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=32",
+ "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)",
+ "[6] UNION 'u1' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=2 bits_offset=0",
+ "[7] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'v1' val=1",
+ "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1024",
+ "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[12] STRUCT 'unneeded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[13] STRUCT 'embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3",
+ "[16] STRUCT 'from_proto' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[17] UNION 'u1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0");
+
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+
+ btf__add_ptr(btf2, 3); /* [18] ptr to struct s1 */
+ /* add ptr to struct anon */
+ btf__add_ptr(btf2, 4); /* [19] ptr to struct (anon) */
+ btf__add_const(btf2, 6); /* [20] const union u1 */
+ btf__add_restrict(btf2, 7); /* [21] restrict union (anon) */
+ btf__add_volatile(btf2, 8); /* [22] volatile enum e1 */
+ btf__add_typedef(btf2, "et", 9); /* [23] typedef enum (anon) */
+ btf__add_const(btf2, 10); /* [24] const enum64 e641 */
+ btf__add_ptr(btf2, 11); /* [25] restrict enum64 (anon) */
+ btf__add_struct(btf2, "with_embedded", 4); /* [26] struct with_embedded { */
+ btf__add_field(btf2, "f1", 13, 0, 0); /* struct embedded f1; */
+ /* } */
+ btf__add_func(btf2, "fn", BTF_FUNC_STATIC, 14); /* [27] int fn(int p1); */
+ btf__add_typedef(btf2, "arraytype", 15); /* [28] typedef int[3] foo; */
+ btf__add_func_proto(btf2, 1); /* [29] int (*)(struct from proto p1); */
+ btf__add_func_param(btf2, "p1", 16);
+
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=8 vlen=1\n"
+ "\t'f1' type_id=2 bits_offset=0",
+ "[4] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=32",
+ "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)",
+ "[6] UNION 'u1' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=2 bits_offset=0",
+ "[7] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'v1' val=1",
+ "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1024",
+ "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[12] STRUCT 'unneeded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[13] STRUCT 'embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3",
+ "[16] STRUCT 'from_proto' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[17] UNION 'u1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[18] PTR '(anon)' type_id=3",
+ "[19] PTR '(anon)' type_id=4",
+ "[20] CONST '(anon)' type_id=6",
+ "[21] RESTRICT '(anon)' type_id=7",
+ "[22] VOLATILE '(anon)' type_id=8",
+ "[23] TYPEDEF 'et' type_id=9",
+ "[24] CONST '(anon)' type_id=10",
+ "[25] PTR '(anon)' type_id=11",
+ "[26] STRUCT 'with_embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=13 bits_offset=0",
+ "[27] FUNC 'fn' type_id=14 linkage=static",
+ "[28] TYPEDEF 'arraytype' type_id=15",
+ "[29] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=16");
+
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(8, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf4,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's1' size=8 vlen=0",
+ "[3] UNION 'u1' size=12 vlen=0",
+ "[4] ENUM 'e1' encoding=UNSIGNED size=4 vlen=0",
+ "[5] ENUM 'e641' encoding=UNSIGNED size=8 vlen=0",
+ "[6] STRUCT 'embedded' size=4 vlen=0",
+ "[7] STRUCT 'from_proto' size=4 vlen=0",
+ /* split BTF; these types should match split BTF above from 17-28, with
+ * updated type id references
+ */
+ "[8] PTR '(anon)' type_id=2",
+ "[9] PTR '(anon)' type_id=20",
+ "[10] CONST '(anon)' type_id=3",
+ "[11] RESTRICT '(anon)' type_id=21",
+ "[12] VOLATILE '(anon)' type_id=4",
+ "[13] TYPEDEF 'et' type_id=22",
+ "[14] CONST '(anon)' type_id=5",
+ "[15] PTR '(anon)' type_id=23",
+ "[16] STRUCT 'with_embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=6 bits_offset=0",
+ "[17] FUNC 'fn' type_id=24 linkage=static",
+ "[18] TYPEDEF 'arraytype' type_id=25",
+ "[19] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=7",
+ /* split BTF types added from original base BTF below */
+ "[20] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=2 bits_offset=32",
+ "[21] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[22] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[23] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[24] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[25] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3");
+
+ if (!ASSERT_EQ(btf__relocate(btf4, btf1), 0, "relocate_split"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf4,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=8 vlen=1\n"
+ "\t'f1' type_id=2 bits_offset=0",
+ "[4] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=32",
+ "[5] INT 'unsigned int' size=4 bits_offset=0 nr_bits=32 encoding=(none)",
+ "[6] UNION 'u1' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=2 bits_offset=0",
+ "[7] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[8] ENUM 'e1' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'v1' val=1",
+ "[9] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[10] ENUM64 'e641' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1024",
+ "[11] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[12] STRUCT 'unneeded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[13] STRUCT 'embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[14] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[15] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3",
+ "[16] STRUCT 'from_proto' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[17] UNION 'u1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[18] PTR '(anon)' type_id=3",
+ "[19] PTR '(anon)' type_id=30",
+ "[20] CONST '(anon)' type_id=6",
+ "[21] RESTRICT '(anon)' type_id=31",
+ "[22] VOLATILE '(anon)' type_id=8",
+ "[23] TYPEDEF 'et' type_id=32",
+ "[24] CONST '(anon)' type_id=10",
+ "[25] PTR '(anon)' type_id=33",
+ "[26] STRUCT 'with_embedded' size=4 vlen=1\n"
+ "\t'f1' type_id=13 bits_offset=0",
+ "[27] FUNC 'fn' type_id=34 linkage=static",
+ "[28] TYPEDEF 'arraytype' type_id=35",
+ "[29] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=16",
+ /* below here are (duplicate) anon base types added by distill
+ * process to split BTF.
+ */
+ "[30] STRUCT '(anon)' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=32",
+ "[31] UNION '(anon)' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[32] ENUM '(anon)' encoding=UNSIGNED size=4 vlen=1\n"
+ "\t'av1' val=2",
+ "[33] ENUM64 '(anon)' encoding=SIGNED size=8 vlen=1\n"
+ "\t'v1' val=1025",
+ "[34] FUNC_PROTO '(anon)' ret_type_id=1 vlen=1\n"
+ "\t'p1' type_id=1",
+ "[35] ARRAY '(anon)' type_id=1 index_type_id=1 nr_elems=3");
+
+cleanup:
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* ensure we can cope with multiple types with the same name in
+ * distilled base BTF. In this case because sizes are different,
+ * we can still disambiguate them.
+ */
+static void test_distilled_base_multi(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf1, "int", 8, BTF_INT_SIGNED); /* [2] int */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+ btf__add_ptr(btf2, 1);
+ btf__add_const(btf2, 2);
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] CONST '(anon)' type_id=2");
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ btf3,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
+ if (!ASSERT_EQ(btf__relocate(btf4, btf1), 0, "relocate_split"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf4,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] CONST '(anon)' type_id=2");
+
+cleanup:
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* If a needed type is not present in the base BTF we wish to relocate
+ * with, btf__relocate() should error our.
+ */
+static void test_distilled_base_missing_err(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf1, "int", 8, BTF_INT_SIGNED); /* [2] int */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+ btf__add_ptr(btf2, 1);
+ btf__add_const(btf2, 2);
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] CONST '(anon)' type_id=2");
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ btf3,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED");
+ btf5 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf"))
+ return;
+ btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ VALIDATE_RAW_BTF(
+ btf5,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split");
+
+cleanup:
+ btf__free(btf5);
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* With 2 types of same size in distilled base BTF, relocation should
+ * fail as we have no means to choose between them.
+ */
+static void test_distilled_base_multi_err(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [2] int */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+ btf__add_ptr(btf2, 1);
+ btf__add_const(btf2, 2);
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] CONST '(anon)' type_id=2");
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(3, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ btf3,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ ASSERT_EQ(btf__relocate(btf4, btf1), -EINVAL, "relocate_split");
+cleanup:
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* With 2 types of same size in base BTF, relocation should
+ * fail as we have no means to choose between them.
+ */
+static void test_distilled_base_multi_err2(void)
+{
+ struct btf *btf1 = NULL, *btf2 = NULL, *btf3 = NULL, *btf4 = NULL, *btf5 = NULL;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+ btf__add_ptr(btf2, 1);
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1");
+ if (!ASSERT_EQ(0, btf__distill_base(btf2, &btf3, &btf4),
+ "distilled_base") ||
+ !ASSERT_OK_PTR(btf3, "distilled_base") ||
+ !ASSERT_OK_PTR(btf4, "distilled_split") ||
+ !ASSERT_EQ(2, btf__type_cnt(btf3), "distilled_base_type_cnt"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ btf3,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ btf5 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf5, "empty_reloc_btf"))
+ return;
+ btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf5, "int", 4, BTF_INT_SIGNED); /* [2] int */
+ VALIDATE_RAW_BTF(
+ btf5,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED");
+ ASSERT_EQ(btf__relocate(btf4, btf5), -EINVAL, "relocate_split");
+cleanup:
+ btf__free(btf5);
+ btf__free(btf4);
+ btf__free(btf3);
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+/* create split reference BTF from vmlinux + split BTF with a few type references;
+ * ensure the resultant split reference BTF is as expected, containing only types
+ * needed to disambiguate references from split BTF.
+ */
+static void test_distilled_base_vmlinux(void)
+{
+ struct btf *split_btf = NULL, *vmlinux_btf = btf__load_vmlinux_btf();
+ struct btf *split_dist = NULL, *base_dist = NULL;
+ __s32 int_id, myint_id;
+
+ if (!ASSERT_OK_PTR(vmlinux_btf, "load_vmlinux"))
+ return;
+ int_id = btf__find_by_name_kind(vmlinux_btf, "int", BTF_KIND_INT);
+ if (!ASSERT_GT(int_id, 0, "find_int"))
+ goto cleanup;
+ split_btf = btf__new_empty_split(vmlinux_btf);
+ if (!ASSERT_OK_PTR(split_btf, "new_split"))
+ goto cleanup;
+ myint_id = btf__add_typedef(split_btf, "myint", int_id);
+ btf__add_ptr(split_btf, myint_id);
+
+ if (!ASSERT_EQ(btf__distill_base(split_btf, &base_dist, &split_dist), 0,
+ "distill_vmlinux_base"))
+ goto cleanup;
+
+ if (!ASSERT_OK_PTR(split_dist, "split_distilled") ||
+ !ASSERT_OK_PTR(base_dist, "base_dist"))
+ goto cleanup;
+ VALIDATE_RAW_BTF(
+ split_dist,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] TYPEDEF 'myint' type_id=1",
+ "[3] PTR '(anon)' type_id=2");
+
+cleanup:
+ btf__free(split_dist);
+ btf__free(base_dist);
+ btf__free(split_btf);
+ btf__free(vmlinux_btf);
+}
+
+void test_btf_distill(void)
+{
+ if (test__start_subtest("distilled_base"))
+ test_distilled_base();
+ if (test__start_subtest("distilled_base_multi"))
+ test_distilled_base_multi();
+ if (test__start_subtest("distilled_base_missing_err"))
+ test_distilled_base_missing_err();
+ if (test__start_subtest("distilled_base_multi_err"))
+ test_distilled_base_multi_err();
+ if (test__start_subtest("distilled_base_multi_err2"))
+ test_distilled_base_multi_err2();
+ if (test__start_subtest("distilled_base_vmlinux"))
+ test_distilled_base_vmlinux();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index e9ea38aa8248..09a8e6f9b379 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -653,7 +653,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
cmpstr =
"(struct file_operations){\n"
" .owner = (struct module *)0xffffffffffffffff,\n"
-" .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,";
+" .fop_flags = (fop_flags_t)4294967295,";
ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c b/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c
new file mode 100644
index 000000000000..32159d3eb281
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_field_iter.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "btf_helpers.h"
+#include "bpf/libbpf_internal.h"
+
+struct field_data {
+ __u32 ids[5];
+ const char *strs[5];
+} fields[] = {
+ { .ids = {}, .strs = {} },
+ { .ids = {}, .strs = { "int" } },
+ { .ids = {}, .strs = { "int64" } },
+ { .ids = { 1 }, .strs = { "" } },
+ { .ids = { 2, 1 }, .strs = { "" } },
+ { .ids = { 3, 1 }, .strs = { "s1", "f1", "f2" } },
+ { .ids = { 1, 5 }, .strs = { "u1", "f1", "f2" } },
+ { .ids = {}, .strs = { "e1", "v1", "v2" } },
+ { .ids = {}, .strs = { "fw1" } },
+ { .ids = { 1 }, .strs = { "t" } },
+ { .ids = { 2 }, .strs = { "" } },
+ { .ids = { 1 }, .strs = { "" } },
+ { .ids = { 3 }, .strs = { "" } },
+ { .ids = { 1, 1, 3 }, .strs = { "", "p1", "p2" } },
+ { .ids = { 13 }, .strs = { "func" } },
+ { .ids = { 1 }, .strs = { "var1" } },
+ { .ids = { 3 }, .strs = { "var2" } },
+ { .ids = {}, .strs = { "float" } },
+ { .ids = { 11 }, .strs = { "decltag" } },
+ { .ids = { 6 }, .strs = { "typetag" } },
+ { .ids = {}, .strs = { "e64", "eval1", "eval2", "eval3" } },
+ { .ids = { 15, 16 }, .strs = { "datasec1" } }
+
+};
+
+/* Fabricate BTF with various types and check BTF field iteration finds types,
+ * strings expected.
+ */
+void test_btf_field_iter(void)
+{
+ struct btf *btf = NULL;
+ int id;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_btf"))
+ return;
+
+ btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_int(btf, "int64", 8, BTF_INT_SIGNED); /* [2] int64 */
+ btf__add_ptr(btf, 1); /* [3] int * */
+ btf__add_array(btf, 1, 2, 3); /* [4] int64[3] */
+ btf__add_struct(btf, "s1", 12); /* [5] struct s1 { */
+ btf__add_field(btf, "f1", 3, 0, 0); /* int *f1; */
+ btf__add_field(btf, "f2", 1, 0, 0); /* int f2; */
+ /* } */
+ btf__add_union(btf, "u1", 12); /* [6] union u1 { */
+ btf__add_field(btf, "f1", 1, 0, 0); /* int f1; */
+ btf__add_field(btf, "f2", 5, 0, 0); /* struct s1 f2; */
+ /* } */
+ btf__add_enum(btf, "e1", 4); /* [7] enum e1 { */
+ btf__add_enum_value(btf, "v1", 1); /* v1 = 1; */
+ btf__add_enum_value(btf, "v2", 2); /* v2 = 2; */
+ /* } */
+
+ btf__add_fwd(btf, "fw1", BTF_FWD_STRUCT); /* [8] struct fw1; */
+ btf__add_typedef(btf, "t", 1); /* [9] typedef int t; */
+ btf__add_volatile(btf, 2); /* [10] volatile int64; */
+ btf__add_const(btf, 1); /* [11] const int; */
+ btf__add_restrict(btf, 3); /* [12] restrict int *; */
+ btf__add_func_proto(btf, 1); /* [13] int (*)(int p1, int *p2); */
+ btf__add_func_param(btf, "p1", 1);
+ btf__add_func_param(btf, "p2", 3);
+
+ btf__add_func(btf, "func", BTF_FUNC_GLOBAL, 13);/* [14] int func(int p1, int *p2); */
+ btf__add_var(btf, "var1", BTF_VAR_STATIC, 1); /* [15] static int var1; */
+ btf__add_var(btf, "var2", BTF_VAR_STATIC, 3); /* [16] static int *var2; */
+ btf__add_float(btf, "float", 4); /* [17] float; */
+ btf__add_decl_tag(btf, "decltag", 11, -1); /* [18] decltag const int; */
+ btf__add_type_tag(btf, "typetag", 6); /* [19] typetag union u1; */
+ btf__add_enum64(btf, "e64", 8, true); /* [20] enum { */
+ btf__add_enum64_value(btf, "eval1", 1000); /* eval1 = 1000, */
+ btf__add_enum64_value(btf, "eval2", 2000); /* eval2 = 2000, */
+ btf__add_enum64_value(btf, "eval3", 3000); /* eval3 = 3000 */
+ /* } */
+ btf__add_datasec(btf, "datasec1", 12); /* [21] datasec datasec1 */
+ btf__add_datasec_var_info(btf, 15, 0, 4);
+ btf__add_datasec_var_info(btf, 16, 4, 8);
+
+ VALIDATE_RAW_BTF(
+ btf,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] INT 'int64' size=8 bits_offset=0 nr_bits=64 encoding=SIGNED",
+ "[3] PTR '(anon)' type_id=1",
+ "[4] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=3",
+ "[5] STRUCT 's1' size=12 vlen=2\n"
+ "\t'f1' type_id=3 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=0",
+ "[6] UNION 'u1' size=12 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=5 bits_offset=0",
+ "[7] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
+ "\t'v1' val=1\n"
+ "\t'v2' val=2",
+ "[8] FWD 'fw1' fwd_kind=struct",
+ "[9] TYPEDEF 't' type_id=1",
+ "[10] VOLATILE '(anon)' type_id=2",
+ "[11] CONST '(anon)' type_id=1",
+ "[12] RESTRICT '(anon)' type_id=3",
+ "[13] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
+ "\t'p1' type_id=1\n"
+ "\t'p2' type_id=3",
+ "[14] FUNC 'func' type_id=13 linkage=global",
+ "[15] VAR 'var1' type_id=1, linkage=static",
+ "[16] VAR 'var2' type_id=3, linkage=static",
+ "[17] FLOAT 'float' size=4",
+ "[18] DECL_TAG 'decltag' type_id=11 component_idx=-1",
+ "[19] TYPE_TAG 'typetag' type_id=6",
+ "[20] ENUM64 'e64' encoding=SIGNED size=8 vlen=3\n"
+ "\t'eval1' val=1000\n"
+ "\t'eval2' val=2000\n"
+ "\t'eval3' val=3000",
+ "[21] DATASEC 'datasec1' size=12 vlen=2\n"
+ "\ttype_id=15 offset=0 size=4\n"
+ "\ttype_id=16 offset=4 size=8");
+
+ for (id = 1; id < btf__type_cnt(btf); id++) {
+ struct btf_type *t = btf_type_by_id(btf, id);
+ struct btf_field_iter it_strs, it_ids;
+ int str_idx = 0, id_idx = 0;
+ __u32 *next_str, *next_id;
+
+ if (!ASSERT_OK_PTR(t, "btf_type_by_id"))
+ break;
+ if (!ASSERT_OK(btf_field_iter_init(&it_strs, t, BTF_FIELD_ITER_STRS),
+ "iter_init_strs"))
+ break;
+ if (!ASSERT_OK(btf_field_iter_init(&it_ids, t, BTF_FIELD_ITER_IDS),
+ "iter_init_ids"))
+ break;
+ while ((next_str = btf_field_iter_next(&it_strs))) {
+ const char *str = btf__str_by_offset(btf, *next_str);
+
+ if (!ASSERT_OK(strcmp(fields[id].strs[str_idx], str), "field_str_match"))
+ break;
+ str_idx++;
+ }
+ /* ensure no more strings are expected */
+ ASSERT_EQ(fields[id].strs[str_idx], NULL, "field_str_cnt");
+
+ while ((next_id = btf_field_iter_next(&it_ids))) {
+ if (!ASSERT_EQ(*next_id, fields[id].ids[id_idx], "field_id_match"))
+ break;
+ id_idx++;
+ }
+ /* ensure no more ids are expected */
+ ASSERT_EQ(fields[id].ids[id_idx], 0, "field_id_cnt");
+ }
+ btf__free(btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
index a8b53b8736f0..f66ceccd7029 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
@@ -25,7 +25,7 @@ static void test_lookup_update(void)
int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id;
int outer_arr_fd, outer_hash_fd, outer_arr_dyn_fd;
struct test_btf_map_in_map *skel;
- int err, key = 0, val, i, fd;
+ int err, key = 0, val, i;
skel = test_btf_map_in_map__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
@@ -102,30 +102,6 @@ static void test_lookup_update(void)
CHECK(map1_id == 0, "map1_id", "failed to get ID 1\n");
CHECK(map2_id == 0, "map2_id", "failed to get ID 2\n");
- test_btf_map_in_map__destroy(skel);
- skel = NULL;
-
- /* we need to either wait for or force synchronize_rcu(), before
- * checking for "still exists" condition, otherwise map could still be
- * resolvable by ID, causing false positives.
- *
- * Older kernels (5.8 and earlier) freed map only after two
- * synchronize_rcu()s, so trigger two, to be entirely sure.
- */
- CHECK(kern_sync_rcu(), "sync_rcu", "failed\n");
- CHECK(kern_sync_rcu(), "sync_rcu", "failed\n");
-
- fd = bpf_map_get_fd_by_id(map1_id);
- if (CHECK(fd >= 0, "map1_leak", "inner_map1 leaked!\n")) {
- close(fd);
- goto cleanup;
- }
- fd = bpf_map_get_fd_by_id(map2_id);
- if (CHECK(fd >= 0, "map2_leak", "inner_map2 leaked!\n")) {
- close(fd);
- goto cleanup;
- }
-
cleanup:
test_btf_map_in_map__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
index 74d6d7546f40..25332e596750 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
@@ -87,9 +87,12 @@ void test_cgroup1_hierarchy(void)
goto destroy;
/* Setup cgroup1 hierarchy */
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ goto destroy;
err = setup_classid_environment();
if (!ASSERT_OK(err, "setup_classid_environment"))
- goto destroy;
+ goto cleanup_cgroup;
err = join_classid();
if (!ASSERT_OK(err, "join_cgroup1"))
@@ -153,6 +156,8 @@ void test_cgroup1_hierarchy(void)
cleanup:
cleanup_classid_environment();
+cleanup_cgroup:
+ cleanup_cgroup_environment();
destroy:
test_cgroup1_hierarchy__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
index addf720428f7..9709c8db7275 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -32,7 +32,7 @@ static int run_test(int cgroup_fd, int server_fd, bool classid)
goto out;
}
- fd = connect_to_fd_opts(server_fd, &opts);
+ fd = connect_to_fd_opts(server_fd, SOCK_STREAM, &opts);
if (fd < 0)
err = -1;
else
@@ -52,7 +52,7 @@ void test_cgroup_v1v2(void)
server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
if (!ASSERT_GE(server_fd, 0, "server_fd"))
return;
- client_fd = connect_to_fd_opts(server_fd, &opts);
+ client_fd = connect_to_fd_opts(server_fd, SOCK_STREAM, &opts);
if (!ASSERT_GE(client_fd, 0, "client_fd")) {
close(server_fd);
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index 2a55f717fc07..34b59f6baca1 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -10,6 +10,7 @@
#include <netinet/tcp.h>
#include <test_progs.h>
+#include "network_helpers.h"
#include "progs/test_cls_redirect.h"
#include "test_cls_redirect.skel.h"
@@ -35,39 +36,6 @@ struct tuple {
struct addr_port dst;
};
-static int start_server(const struct sockaddr *addr, socklen_t len, int type)
-{
- int fd = socket(addr->sa_family, type, 0);
- if (CHECK_FAIL(fd == -1))
- return -1;
- if (CHECK_FAIL(bind(fd, addr, len) == -1))
- goto err;
- if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1))
- goto err;
-
- return fd;
-
-err:
- close(fd);
- return -1;
-}
-
-static int connect_to_server(const struct sockaddr *addr, socklen_t len,
- int type)
-{
- int fd = socket(addr->sa_family, type, 0);
- if (CHECK_FAIL(fd == -1))
- return -1;
- if (CHECK_FAIL(connect(fd, addr, len)))
- goto err;
-
- return fd;
-
-err:
- close(fd);
- return -1;
-}
-
static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap)
{
const struct sockaddr_in6 *in6;
@@ -98,14 +66,14 @@ static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type,
socklen_t slen = sizeof(ss);
struct sockaddr *sa = (struct sockaddr *)&ss;
- *server = start_server(addr, len, type);
+ *server = start_server_addr(type, (struct sockaddr_storage *)addr, len, NULL);
if (*server < 0)
return false;
if (CHECK_FAIL(getsockname(*server, sa, &slen)))
goto close_server;
- *conn = connect_to_server(sa, slen, type);
+ *conn = connect_to_addr(type, (struct sockaddr_storage *)sa, slen, NULL);
if (*conn < 0)
goto close_server;
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
index ecf89df78109..2570bd4b0cb2 100644
--- a/tools/testing/selftests/bpf/prog_tests/cpumask.c
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -18,6 +18,11 @@ static const char * const cpumask_success_testcases[] = {
"test_insert_leave",
"test_insert_remove_release",
"test_global_mask_rcu",
+ "test_global_mask_array_one_rcu",
+ "test_global_mask_array_rcu",
+ "test_global_mask_array_l2_rcu",
+ "test_global_mask_nested_rcu",
+ "test_global_mask_nested_deep_rcu",
"test_cpumask_weight",
};
diff --git a/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c b/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c
new file mode 100644
index 000000000000..b1a3a49a822a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/crypto_sanity.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <linux/in6.h>
+#include <linux/if_alg.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "crypto_sanity.skel.h"
+#include "crypto_basic.skel.h"
+
+#define NS_TEST "crypto_sanity_ns"
+#define IPV6_IFACE_ADDR "face::1"
+static const unsigned char crypto_key[] = "testtest12345678";
+static const char plain_text[] = "stringtoencrypt0";
+static int opfd = -1, tfmfd = -1;
+static const char algo[] = "ecb(aes)";
+static int init_afalg(void)
+{
+ struct sockaddr_alg sa = {
+ .salg_family = AF_ALG,
+ .salg_type = "skcipher",
+ .salg_name = "ecb(aes)"
+ };
+
+ tfmfd = socket(AF_ALG, SOCK_SEQPACKET, 0);
+ if (tfmfd == -1)
+ return errno;
+ if (bind(tfmfd, (struct sockaddr *)&sa, sizeof(sa)) == -1)
+ return errno;
+ if (setsockopt(tfmfd, SOL_ALG, ALG_SET_KEY, crypto_key, 16) == -1)
+ return errno;
+ opfd = accept(tfmfd, NULL, 0);
+ if (opfd == -1)
+ return errno;
+ return 0;
+}
+
+static void deinit_afalg(void)
+{
+ if (tfmfd != -1)
+ close(tfmfd);
+ if (opfd != -1)
+ close(opfd);
+}
+
+static void do_crypt_afalg(const void *src, void *dst, int size, bool encrypt)
+{
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ char cbuf[CMSG_SPACE(4)] = {0};
+ struct iovec iov;
+
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_ALG;
+ cmsg->cmsg_type = ALG_SET_OP;
+ cmsg->cmsg_len = CMSG_LEN(4);
+ *(__u32 *)CMSG_DATA(cmsg) = encrypt ? ALG_OP_ENCRYPT : ALG_OP_DECRYPT;
+
+ iov.iov_base = (char *)src;
+ iov.iov_len = size;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ sendmsg(opfd, &msg, 0);
+ read(opfd, dst, size);
+}
+
+void test_crypto_basic(void)
+{
+ RUN_TESTS(crypto_basic);
+}
+
+void test_crypto_sanity(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_hook, .attach_point = BPF_TC_EGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach_enc);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach_dec);
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct nstoken *nstoken = NULL;
+ struct crypto_sanity *skel;
+ char afalg_plain[16] = {0};
+ char afalg_dst[16] = {0};
+ struct sockaddr_in6 addr;
+ int sockfd, err, pfd;
+ socklen_t addrlen;
+ u16 udp_test_port;
+
+ skel = crypto_sanity__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open"))
+ return;
+
+ SYS(fail, "ip netns add %s", NS_TEST);
+ SYS(fail, "ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR);
+ SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
+
+ nstoken = open_netns(NS_TEST);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto fail;
+
+ err = init_afalg();
+ if (!ASSERT_OK(err, "AF_ALG init fail"))
+ goto fail;
+
+ qdisc_hook.ifindex = if_nametoindex("lo");
+ if (!ASSERT_GT(qdisc_hook.ifindex, 0, "if_nametoindex lo"))
+ goto fail;
+
+ skel->bss->key_len = 16;
+ skel->bss->authsize = 0;
+ udp_test_port = skel->data->udp_test_port;
+ memcpy(skel->bss->key, crypto_key, sizeof(crypto_key));
+ snprintf(skel->bss->algo, 128, "%s", algo);
+ pfd = bpf_program__fd(skel->progs.skb_crypto_setup);
+ if (!ASSERT_GT(pfd, 0, "skb_crypto_setup fd"))
+ goto fail;
+
+ err = bpf_prog_test_run_opts(pfd, &opts);
+ if (!ASSERT_OK(err, "skb_crypto_setup") ||
+ !ASSERT_OK(opts.retval, "skb_crypto_setup retval"))
+ goto fail;
+
+ if (!ASSERT_OK(skel->bss->status, "skb_crypto_setup status"))
+ goto fail;
+
+ err = bpf_tc_hook_create(&qdisc_hook);
+ if (!ASSERT_OK(err, "create qdisc hook"))
+ goto fail;
+
+ addrlen = sizeof(addr);
+ err = make_sockaddr(AF_INET6, IPV6_IFACE_ADDR, udp_test_port,
+ (void *)&addr, &addrlen);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ goto fail;
+
+ tc_attach_enc.prog_fd = bpf_program__fd(skel->progs.encrypt_sanity);
+ err = bpf_tc_attach(&qdisc_hook, &tc_attach_enc);
+ if (!ASSERT_OK(err, "attach encrypt filter"))
+ goto fail;
+
+ sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (!ASSERT_NEQ(sockfd, -1, "encrypt socket"))
+ goto fail;
+ err = sendto(sockfd, plain_text, sizeof(plain_text), 0, (void *)&addr, addrlen);
+ close(sockfd);
+ if (!ASSERT_EQ(err, sizeof(plain_text), "encrypt send"))
+ goto fail;
+
+ do_crypt_afalg(plain_text, afalg_dst, sizeof(afalg_dst), true);
+
+ if (!ASSERT_OK(skel->bss->status, "encrypt status"))
+ goto fail;
+ if (!ASSERT_STRNEQ(skel->bss->dst, afalg_dst, sizeof(afalg_dst), "encrypt AF_ALG"))
+ goto fail;
+
+ tc_attach_enc.flags = tc_attach_enc.prog_fd = tc_attach_enc.prog_id = 0;
+ err = bpf_tc_detach(&qdisc_hook, &tc_attach_enc);
+ if (!ASSERT_OK(err, "bpf_tc_detach encrypt"))
+ goto fail;
+
+ tc_attach_dec.prog_fd = bpf_program__fd(skel->progs.decrypt_sanity);
+ err = bpf_tc_attach(&qdisc_hook, &tc_attach_dec);
+ if (!ASSERT_OK(err, "attach decrypt filter"))
+ goto fail;
+
+ sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (!ASSERT_NEQ(sockfd, -1, "decrypt socket"))
+ goto fail;
+ err = sendto(sockfd, afalg_dst, sizeof(afalg_dst), 0, (void *)&addr, addrlen);
+ close(sockfd);
+ if (!ASSERT_EQ(err, sizeof(afalg_dst), "decrypt send"))
+ goto fail;
+
+ do_crypt_afalg(afalg_dst, afalg_plain, sizeof(afalg_plain), false);
+
+ if (!ASSERT_OK(skel->bss->status, "decrypt status"))
+ goto fail;
+ if (!ASSERT_STRNEQ(skel->bss->dst, afalg_plain, sizeof(afalg_plain), "decrypt AF_ALG"))
+ goto fail;
+
+ tc_attach_dec.flags = tc_attach_dec.prog_fd = tc_attach_dec.prog_id = 0;
+ err = bpf_tc_detach(&qdisc_hook, &tc_attach_dec);
+ ASSERT_OK(err, "bpf_tc_detach decrypt");
+
+fail:
+ close_netns(nstoken);
+ deinit_afalg();
+ SYS_NOFAIL("ip netns del " NS_TEST " &> /dev/null");
+ crypto_sanity__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
index 3b7c57fe55a5..08b6391f2f56 100644
--- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
+++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
@@ -69,15 +69,17 @@ static struct test_case test_cases[] = {
{
N(SCHED_CLS, struct __sk_buff, tstamp),
.read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
- "w11 &= 3;"
- "if w11 != 0x3 goto pc+2;"
+ "if w11 & 0x4 goto pc+1;"
+ "goto pc+4;"
+ "if w11 & 0x3 goto pc+1;"
+ "goto pc+2;"
"$dst = 0;"
"goto pc+1;"
"$dst = *(u64 *)($ctx + sk_buff::tstamp);",
.write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
- "if w11 & 0x2 goto pc+1;"
+ "if w11 & 0x4 goto pc+1;"
"goto pc+2;"
- "w11 &= -2;"
+ "w11 &= -4;"
"*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;"
"*(u64 *)($ctx + sk_buff::tstamp) = $src;",
},
diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
index f43fcb13d2c4..d3d94596ab79 100644
--- a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
+++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
@@ -98,7 +98,8 @@ done:
static void test_dummy_multiple_args(void)
{
- __u64 args[5] = {0, -100, 0x8a5f, 'c', 0x1234567887654321ULL};
+ struct bpf_dummy_ops_state st = { 7 };
+ __u64 args[5] = {(__u64)&st, -100, 0x8a5f, 'c', 0x1234567887654321ULL};
LIBBPF_OPTS(bpf_test_run_opts, attr,
.ctx_in = args,
.ctx_size_in = sizeof(args),
@@ -115,6 +116,7 @@ static void test_dummy_multiple_args(void)
fd = bpf_program__fd(skel->progs.test_2);
err = bpf_prog_test_run_opts(fd, &attr);
ASSERT_OK(err, "test_run");
+ args[0] = 7;
for (i = 0; i < ARRAY_SIZE(args); i++) {
snprintf(name, sizeof(name), "arg %zu", i);
ASSERT_EQ(skel->bss->test_2_args[i], args[i], name);
@@ -125,7 +127,8 @@ static void test_dummy_multiple_args(void)
static void test_dummy_sleepable(void)
{
- __u64 args[1] = {0};
+ struct bpf_dummy_ops_state st;
+ __u64 args[1] = {(__u64)&st};
LIBBPF_OPTS(bpf_test_run_opts, attr,
.ctx_in = args,
.ctx_size_in = sizeof(args),
@@ -144,6 +147,31 @@ static void test_dummy_sleepable(void)
dummy_st_ops_success__destroy(skel);
}
+/* dummy_st_ops.test_sleepable() parameter is not marked as nullable,
+ * thus bpf_prog_test_run_opts() below should be rejected as it tries
+ * to pass NULL for this parameter.
+ */
+static void test_dummy_sleepable_reject_null(void)
+{
+ __u64 args[1] = {0};
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+ struct dummy_st_ops_success *skel;
+ int fd, err;
+
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.test_sleepable);
+ err = bpf_prog_test_run_opts(fd, &attr);
+ ASSERT_EQ(err, -EINVAL, "test_run");
+
+ dummy_st_ops_success__destroy(skel);
+}
+
void test_dummy_st_ops(void)
{
if (test__start_subtest("dummy_st_ops_attach"))
@@ -156,6 +184,8 @@ void test_dummy_st_ops(void)
test_dummy_multiple_args();
if (test__start_subtest("dummy_sleepable"))
test_dummy_sleepable();
+ if (test__start_subtest("dummy_sleepable_reject_null"))
+ test_dummy_sleepable_reject_null();
RUN_TESTS(dummy_st_ops_fail);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
index 261228eb68e8..438583e1f2d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/empty_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
@@ -94,6 +94,8 @@ void test_empty_skb(void)
SYS(out, "ip netns add empty_skb");
tok = open_netns("empty_skb");
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
SYS(out, "ip link add veth0 type veth peer veth1");
SYS(out, "ip link set dev veth0 up");
SYS(out, "ip link set dev veth1 up");
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
index f949647dbbc2..552a0875ca6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -21,13 +21,13 @@ static int do_sleep(void *skel)
}
#define STACK_SIZE (1024 * 1024)
-static char child_stack[STACK_SIZE];
void test_fexit_sleep(void)
{
struct fexit_sleep_lskel *fexit_skel = NULL;
int wstatus, duration = 0;
pid_t cpid;
+ char *child_stack = NULL;
int err, fexit_cnt;
fexit_skel = fexit_sleep_lskel__open_and_load();
@@ -38,6 +38,11 @@ void test_fexit_sleep(void)
if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
goto cleanup;
+ child_stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE |
+ MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (!ASSERT_NEQ(child_stack, MAP_FAILED, "mmap"))
+ goto cleanup;
+
cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel);
if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
goto cleanup;
@@ -78,5 +83,6 @@ void test_fexit_sleep(void)
goto cleanup;
cleanup:
+ munmap(child_stack, STACK_SIZE);
fexit_sleep_lskel__destroy(fexit_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index 596536def43d..49b1ffc9af1f 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -50,9 +50,9 @@ void serial_test_fexit_stress(void)
out:
for (i = 0; i < bpf_max_tramp_links; i++) {
- if (link_fd[i])
+ if (link_fd[i] > 0)
close(link_fd[i]);
- if (fexit_fd[i])
+ if (fexit_fd[i] > 0)
close(fexit_fd[i]);
}
free(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
index 3379df2d4cf2..bd7658958004 100644
--- a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -26,6 +26,17 @@
#define IPV6_TBID_ADDR "fd00::FFFF"
#define IPV6_TBID_NET "fd00::"
#define IPV6_TBID_DST "fd00::2"
+#define MARK_NO_POLICY 33
+#define MARK 42
+#define MARK_TABLE "200"
+#define IPV4_REMOTE_DST "1.2.3.4"
+#define IPV4_LOCAL "10.4.0.3"
+#define IPV4_GW1 "10.4.0.1"
+#define IPV4_GW2 "10.4.0.2"
+#define IPV6_REMOTE_DST "be:ef::b0:10"
+#define IPV6_LOCAL "fd01::3"
+#define IPV6_GW1 "fd01::1"
+#define IPV6_GW2 "fd01::2"
#define DMAC "11:11:11:11:11:11"
#define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, }
#define DMAC2 "01:01:01:01:01:01"
@@ -36,9 +47,11 @@ struct fib_lookup_test {
const char *daddr;
int expected_ret;
const char *expected_src;
+ const char *expected_dst;
int lookup_flags;
__u32 tbid;
__u8 dmac[6];
+ __u32 mark;
};
static const struct fib_lookup_test tests[] = {
@@ -90,10 +103,47 @@ static const struct fib_lookup_test tests[] = {
.daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
.expected_src = IPV6_IFACE_ADDR_SEC,
.lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ /* policy routing */
+ { .desc = "IPv4 policy routing, default",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv4 policy routing, mark doesn't point to a policy",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK_NO_POLICY, },
+ { .desc = "IPv4 policy routing, mark points to a policy",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW2,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
+ { .desc = "IPv4 policy routing, mark points to a policy, but no flag",
+ .daddr = IPV4_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV4_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
+ { .desc = "IPv6 policy routing, default",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv6 policy routing, mark doesn't point to a policy",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK_NO_POLICY, },
+ { .desc = "IPv6 policy routing, mark points to a policy",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW2,
+ .lookup_flags = BPF_FIB_LOOKUP_MARK | BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
+ { .desc = "IPv6 policy routing, mark points to a policy, but no flag",
+ .daddr = IPV6_REMOTE_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_dst = IPV6_GW1,
+ .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH,
+ .mark = MARK, },
};
-static int ifindex;
-
static int setup_netns(void)
{
int err;
@@ -144,12 +194,24 @@ static int setup_netns(void)
if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
goto fail;
+ /* Setup for policy routing tests */
+ SYS(fail, "ip addr add %s/24 dev veth1", IPV4_LOCAL);
+ SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_LOCAL);
+ SYS(fail, "ip route add %s/32 via %s", IPV4_REMOTE_DST, IPV4_GW1);
+ SYS(fail, "ip route add %s/32 via %s table %s", IPV4_REMOTE_DST, IPV4_GW2, MARK_TABLE);
+ SYS(fail, "ip -6 route add %s/128 via %s", IPV6_REMOTE_DST, IPV6_GW1);
+ SYS(fail, "ip -6 route add %s/128 via %s table %s", IPV6_REMOTE_DST, IPV6_GW2, MARK_TABLE);
+ SYS(fail, "ip rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
+ SYS(fail, "ip -6 rule add prio 2 fwmark %d lookup %s", MARK, MARK_TABLE);
+
return 0;
fail:
return -1;
}
-static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_lookup_test *test)
+static int set_lookup_params(struct bpf_fib_lookup *params,
+ const struct fib_lookup_test *test,
+ int ifindex)
{
int ret;
@@ -158,6 +220,7 @@ static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_loo
params->l4_protocol = IPPROTO_TCP;
params->ifindex = ifindex;
params->tbid = test->tbid;
+ params->mark = test->mark;
if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) {
params->family = AF_INET6;
@@ -190,40 +253,45 @@ static void mac_str(char *b, const __u8 *mac)
mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
}
-static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src)
+static void assert_ip_address(int family, void *addr, const char *expected_str)
{
+ char str[INET6_ADDRSTRLEN];
+ u8 expected_addr[16];
+ int addr_len = 0;
int ret;
- __u32 src6[4];
- __be32 src4;
- switch (fib_params->family) {
+ switch (family) {
case AF_INET6:
- ret = inet_pton(AF_INET6, expected_src, src6);
- ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
-
- ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src));
- if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) {
- char str_src6[64];
-
- inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6,
- sizeof(str_src6));
- printf("ipv6 expected %s actual %s ", expected_src,
- str_src6);
- }
-
+ ret = inet_pton(AF_INET6, expected_str, expected_addr);
+ ASSERT_EQ(ret, 1, "inet_pton(AF_INET6, expected_str)");
+ addr_len = 16;
break;
case AF_INET:
- ret = inet_pton(AF_INET, expected_src, &src4);
- ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
-
- ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src");
-
+ ret = inet_pton(AF_INET, expected_str, expected_addr);
+ ASSERT_EQ(ret, 1, "inet_pton(AF_INET, expected_str)");
+ addr_len = 4;
break;
default:
- PRINT_FAIL("invalid addr family: %d", fib_params->family);
+ PRINT_FAIL("invalid address family: %d", family);
+ break;
+ }
+
+ if (memcmp(addr, expected_addr, addr_len)) {
+ inet_ntop(family, addr, str, sizeof(str));
+ PRINT_FAIL("expected %s actual %s ", expected_str, str);
}
}
+static void assert_src_ip(struct bpf_fib_lookup *params, const char *expected)
+{
+ assert_ip_address(params->family, params->ipv6_src, expected);
+}
+
+static void assert_dst_ip(struct bpf_fib_lookup *params, const char *expected)
+{
+ assert_ip_address(params->family, params->ipv6_dst, expected);
+}
+
void test_fib_lookup(void)
{
struct bpf_fib_lookup *fib_params;
@@ -256,15 +324,18 @@ void test_fib_lookup(void)
if (setup_netns())
goto fail;
- ifindex = if_nametoindex("veth1");
- skb.ifindex = ifindex;
+ skb.ifindex = if_nametoindex("veth1");
+ if (!ASSERT_NEQ(skb.ifindex, 0, "if_nametoindex(veth1)"))
+ goto fail;
+
fib_params = &skel->bss->fib_params;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
printf("Testing %s ", tests[i].desc);
- if (set_lookup_params(fib_params, &tests[i]))
+ if (set_lookup_params(fib_params, &tests[i], skb.ifindex))
continue;
+
skel->bss->fib_lookup_ret = -1;
skel->bss->lookup_flags = tests[i].lookup_flags;
@@ -278,6 +349,9 @@ void test_fib_lookup(void)
if (tests[i].expected_src)
assert_src_ip(fib_params, tests[i].expected_src);
+ if (tests[i].expected_dst)
+ assert_dst_ip(fib_params, tests[i].expected_dst);
+
ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac));
if (!ASSERT_EQ(ret, 0, "dmac not match")) {
char expected[18], actual[18];
diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
index 5165b38f0e59..f7619e0ade10 100644
--- a/tools/testing/selftests/bpf/prog_tests/find_vma.c
+++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
@@ -29,8 +29,8 @@ static int open_pe(void)
/* create perf event */
attr.size = sizeof(attr);
- attr.type = PERF_TYPE_HARDWARE;
- attr.config = PERF_COUNT_HW_CPU_CYCLES;
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
attr.freq = 1;
attr.sample_freq = 1000;
pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index c4773173a4e4..9e5f38739104 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -2,7 +2,6 @@
#include <test_progs.h>
#include <network_helpers.h>
#include <error.h>
-#include <linux/if.h>
#include <linux/if_tun.h>
#include <sys/uio.h>
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 8963f8a549f2..09f6487f58b9 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -5,6 +5,7 @@
#include "for_each_hash_map_elem.skel.h"
#include "for_each_array_map_elem.skel.h"
#include "for_each_map_elem_write_key.skel.h"
+#include "for_each_multi_maps.skel.h"
static unsigned int duration;
@@ -143,6 +144,65 @@ static void test_write_map_key(void)
for_each_map_elem_write_key__destroy(skel);
}
+static void test_multi_maps(void)
+{
+ struct for_each_multi_maps *skel;
+ __u64 val, array_total, hash_total;
+ __u32 key, max_entries;
+ int i, err;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = for_each_multi_maps__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_multi_maps__open_and_load"))
+ return;
+
+ array_total = 0;
+ max_entries = bpf_map__max_entries(skel->maps.arraymap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i + 1;
+ array_total += val;
+ err = bpf_map__update_elem(skel->maps.arraymap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "array_map_update"))
+ goto out;
+ }
+
+ hash_total = 0;
+ max_entries = bpf_map__max_entries(skel->maps.hashmap);
+ for (i = 0; i < max_entries; i++) {
+ key = i + 100;
+ val = i + 1;
+ hash_total += val;
+ err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "hash_map_update"))
+ goto out;
+ }
+
+ skel->bss->data_output = 0;
+ skel->bss->use_array = 1;
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_OK(topts.retval, "retval");
+ ASSERT_EQ(skel->bss->data_output, array_total, "array output");
+
+ skel->bss->data_output = 0;
+ skel->bss->use_array = 0;
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_OK(topts.retval, "retval");
+ ASSERT_EQ(skel->bss->data_output, hash_total, "hash output");
+
+out:
+ for_each_multi_maps__destroy(skel);
+}
+
void test_for_each(void)
{
if (test__start_subtest("hash_map"))
@@ -151,4 +211,6 @@ void test_for_each(void)
test_array_map();
if (test__start_subtest("write_map_key"))
test_write_map_key();
+ if (test__start_subtest("multi_maps"))
+ test_multi_maps();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
index 8dd2af9081f4..4ddb8a5fece8 100644
--- a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
+++ b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
@@ -88,6 +88,8 @@ static int attach(struct ip_check_defrag *skel, bool ipv6)
int err = -1;
nstoken = open_netns(NS1);
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ goto out;
skel->links.defrag = bpf_program__attach_netfilter(skel->progs.defrag, &opts);
if (!ASSERT_OK_PTR(skel->links.defrag, "program attach"))
@@ -156,15 +158,13 @@ static int send_frags6(int client)
void test_bpf_ip_check_defrag_ok(bool ipv6)
{
+ int family = ipv6 ? AF_INET6 : AF_INET;
struct network_helper_opts rx_opts = {
.timeout_ms = 1000,
- .noconnect = true,
};
struct network_helper_opts tx_ops = {
.timeout_ms = 1000,
- .type = SOCK_RAW,
.proto = IPPROTO_RAW,
- .noconnect = true,
};
struct sockaddr_storage caddr;
struct ip_check_defrag *skel;
@@ -190,7 +190,7 @@ void test_bpf_ip_check_defrag_ok(bool ipv6)
nstoken = open_netns(NS1);
if (!ASSERT_OK_PTR(nstoken, "setns ns1"))
goto out;
- srv_fd = start_server(ipv6 ? AF_INET6 : AF_INET, SOCK_DGRAM, NULL, SERVER_PORT, 0);
+ srv_fd = start_server(family, SOCK_DGRAM, NULL, SERVER_PORT, 0);
close_netns(nstoken);
if (!ASSERT_GE(srv_fd, 0, "start_server"))
goto out;
@@ -199,18 +199,18 @@ void test_bpf_ip_check_defrag_ok(bool ipv6)
nstoken = open_netns(NS0);
if (!ASSERT_OK_PTR(nstoken, "setns ns0"))
goto out;
- client_tx_fd = connect_to_fd_opts(srv_fd, &tx_ops);
+ client_tx_fd = client_socket(family, SOCK_RAW, &tx_ops);
close_netns(nstoken);
- if (!ASSERT_GE(client_tx_fd, 0, "connect_to_fd_opts"))
+ if (!ASSERT_GE(client_tx_fd, 0, "client_socket"))
goto out;
/* Open rx socket in ns0 */
nstoken = open_netns(NS0);
if (!ASSERT_OK_PTR(nstoken, "setns ns0"))
goto out;
- client_rx_fd = connect_to_fd_opts(srv_fd, &rx_opts);
+ client_rx_fd = client_socket(family, SOCK_DGRAM, &rx_opts);
close_netns(nstoken);
- if (!ASSERT_GE(client_rx_fd, 0, "connect_to_fd_opts"))
+ if (!ASSERT_GE(client_rx_fd, 0, "client_socket"))
goto out;
/* Bind rx socket to a premeditated port */
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 2eb71559713c..5b743212292f 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -78,6 +78,7 @@ static struct kfunc_test_params kfunc_tests[] = {
SYSCALL_TEST(kfunc_syscall_test, 0),
SYSCALL_NULL_CTX_TEST(kfunc_syscall_test_null, 0),
TC_TEST(kfunc_call_test_static_unused_arg, 0),
+ TC_TEST(kfunc_call_ctx, 0),
};
struct syscall_test_args {
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c b/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c
new file mode 100644
index 000000000000..c8f4dcaac7c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_param_nullable.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2024 Meta Platforms, Inc */
+
+#include <test_progs.h>
+#include "test_kfunc_param_nullable.skel.h"
+
+void test_kfunc_param_nullable(void)
+{
+ RUN_TESTS(test_kfunc_param_nullable);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
index 05000810e28e..960c9323d1e0 100644
--- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -4,6 +4,8 @@
#include "trace_helpers.h"
#include "kprobe_multi_empty.skel.h"
#include "kprobe_multi_override.skel.h"
+#include "kprobe_multi_session.skel.h"
+#include "kprobe_multi_session_cookie.skel.h"
#include "bpf/libbpf_internal.h"
#include "bpf/hashmap.h"
@@ -326,6 +328,74 @@ cleanup:
kprobe_multi__destroy(skel);
}
+static void test_session_skel_api(void)
+{
+ struct kprobe_multi_session *skel = NULL;
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_link *link = NULL;
+ int i, err, prog_fd;
+
+ skel = kprobe_multi_session__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_session__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ err = kprobe_multi_session__attach(skel);
+ if (!ASSERT_OK(err, " kprobe_multi_session__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ /* bpf_fentry_test1-4 trigger return probe, result is 2 */
+ for (i = 0; i < 4; i++)
+ ASSERT_EQ(skel->bss->kprobe_session_result[i], 2, "kprobe_session_result");
+
+ /* bpf_fentry_test5-8 trigger only entry probe, result is 1 */
+ for (i = 4; i < 8; i++)
+ ASSERT_EQ(skel->bss->kprobe_session_result[i], 1, "kprobe_session_result");
+
+cleanup:
+ bpf_link__destroy(link);
+ kprobe_multi_session__destroy(skel);
+}
+
+static void test_session_cookie_skel_api(void)
+{
+ struct kprobe_multi_session_cookie *skel = NULL;
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_link *link = NULL;
+ int err, prog_fd;
+
+ skel = kprobe_multi_session_cookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ err = kprobe_multi_session_cookie__attach(skel);
+ if (!ASSERT_OK(err, " kprobe_multi_wrapper__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(skel->bss->test_kprobe_1_result, 1, "test_kprobe_1_result");
+ ASSERT_EQ(skel->bss->test_kprobe_2_result, 2, "test_kprobe_2_result");
+ ASSERT_EQ(skel->bss->test_kprobe_3_result, 3, "test_kprobe_3_result");
+
+cleanup:
+ bpf_link__destroy(link);
+ kprobe_multi_session_cookie__destroy(skel);
+}
+
static size_t symbol_hash(long key, void *ctx __maybe_unused)
{
return str_hash((const char *) key);
@@ -336,15 +406,80 @@ static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
return strcmp((const char *) key1, (const char *) key2) == 0;
}
+static bool is_invalid_entry(char *buf, bool kernel)
+{
+ if (kernel && strchr(buf, '['))
+ return true;
+ if (!kernel && !strchr(buf, '['))
+ return true;
+ return false;
+}
+
+static bool skip_entry(char *name)
+{
+ /*
+ * We attach to almost all kernel functions and some of them
+ * will cause 'suspicious RCU usage' when fprobe is attached
+ * to them. Filter out the current culprits - arch_cpu_idle
+ * default_idle and rcu_* functions.
+ */
+ if (!strcmp(name, "arch_cpu_idle"))
+ return true;
+ if (!strcmp(name, "default_idle"))
+ return true;
+ if (!strncmp(name, "rcu_", 4))
+ return true;
+ if (!strcmp(name, "bpf_dispatcher_xdp_func"))
+ return true;
+ if (!strncmp(name, "__ftrace_invalid_address__",
+ sizeof("__ftrace_invalid_address__") - 1))
+ return true;
+ return false;
+}
+
+/* Do comparision by ignoring '.llvm.<hash>' suffixes. */
+static int compare_name(const char *name1, const char *name2)
+{
+ const char *res1, *res2;
+ int len1, len2;
+
+ res1 = strstr(name1, ".llvm.");
+ res2 = strstr(name2, ".llvm.");
+ len1 = res1 ? res1 - name1 : strlen(name1);
+ len2 = res2 ? res2 - name2 : strlen(name2);
+
+ if (len1 == len2)
+ return strncmp(name1, name2, len1);
+ if (len1 < len2)
+ return strncmp(name1, name2, len1) <= 0 ? -1 : 1;
+ return strncmp(name1, name2, len2) >= 0 ? 1 : -1;
+}
+
+static int load_kallsyms_compare(const void *p1, const void *p2)
+{
+ return compare_name(((const struct ksym *)p1)->name, ((const struct ksym *)p2)->name);
+}
+
+static int search_kallsyms_compare(const void *p1, const struct ksym *p2)
+{
+ return compare_name(p1, p2->name);
+}
+
static int get_syms(char ***symsp, size_t *cntp, bool kernel)
{
- size_t cap = 0, cnt = 0, i;
- char *name = NULL, **syms = NULL;
+ size_t cap = 0, cnt = 0;
+ char *name = NULL, *ksym_name, **syms = NULL;
struct hashmap *map;
+ struct ksyms *ksyms;
+ struct ksym *ks;
char buf[256];
FILE *f;
int err = 0;
+ ksyms = load_kallsyms_custom_local(load_kallsyms_compare);
+ if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_custom_local"))
+ return -EINVAL;
+
/*
* The available_filter_functions contains many duplicates,
* but other than that all symbols are usable in kprobe multi
@@ -368,33 +503,23 @@ static int get_syms(char ***symsp, size_t *cntp, bool kernel)
}
while (fgets(buf, sizeof(buf), f)) {
- if (kernel && strchr(buf, '['))
- continue;
- if (!kernel && !strchr(buf, '['))
+ if (is_invalid_entry(buf, kernel))
continue;
free(name);
if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
continue;
- /*
- * We attach to almost all kernel functions and some of them
- * will cause 'suspicious RCU usage' when fprobe is attached
- * to them. Filter out the current culprits - arch_cpu_idle
- * default_idle and rcu_* functions.
- */
- if (!strcmp(name, "arch_cpu_idle"))
- continue;
- if (!strcmp(name, "default_idle"))
- continue;
- if (!strncmp(name, "rcu_", 4))
- continue;
- if (!strcmp(name, "bpf_dispatcher_xdp_func"))
- continue;
- if (!strncmp(name, "__ftrace_invalid_address__",
- sizeof("__ftrace_invalid_address__") - 1))
+ if (skip_entry(name))
continue;
- err = hashmap__add(map, name, 0);
+ ks = search_kallsyms_custom_local(ksyms, name, search_kallsyms_compare);
+ if (!ks) {
+ err = -EINVAL;
+ goto error;
+ }
+
+ ksym_name = ks->name;
+ err = hashmap__add(map, ksym_name, 0);
if (err == -EEXIST) {
err = 0;
continue;
@@ -407,8 +532,7 @@ static int get_syms(char ***symsp, size_t *cntp, bool kernel)
if (err)
goto error;
- syms[cnt++] = name;
- name = NULL;
+ syms[cnt++] = ksym_name;
}
*symsp = syms;
@@ -418,42 +542,88 @@ error:
free(name);
fclose(f);
hashmap__free(map);
- if (err) {
- for (i = 0; i < cnt; i++)
- free(syms[i]);
+ if (err)
free(syms);
+ return err;
+}
+
+static int get_addrs(unsigned long **addrsp, size_t *cntp, bool kernel)
+{
+ unsigned long *addr, *addrs, *tmp_addrs;
+ int err = 0, max_cnt, inc_cnt;
+ char *name = NULL;
+ size_t cnt = 0;
+ char buf[256];
+ FILE *f;
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions_addrs", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions_addrs", "r");
+
+ if (!f)
+ return -ENOENT;
+
+ /* In my local setup, the number of entries is 50k+ so Let us initially
+ * allocate space to hold 64k entries. If 64k is not enough, incrementally
+ * increase 1k each time.
+ */
+ max_cnt = 65536;
+ inc_cnt = 1024;
+ addrs = malloc(max_cnt * sizeof(long));
+ if (addrs == NULL) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (is_invalid_entry(buf, kernel))
+ continue;
+
+ free(name);
+ if (sscanf(buf, "%p %ms$*[^\n]\n", &addr, &name) != 2)
+ continue;
+ if (skip_entry(name))
+ continue;
+
+ if (cnt == max_cnt) {
+ max_cnt += inc_cnt;
+ tmp_addrs = realloc(addrs, max_cnt);
+ if (!tmp_addrs) {
+ err = -ENOMEM;
+ goto error;
+ }
+ addrs = tmp_addrs;
+ }
+
+ addrs[cnt++] = (unsigned long)addr;
}
+
+ *addrsp = addrs;
+ *cntp = cnt;
+
+error:
+ free(name);
+ fclose(f);
+ if (err)
+ free(addrs);
return err;
}
-static void test_kprobe_multi_bench_attach(bool kernel)
+static void do_bench_test(struct kprobe_multi_empty *skel, struct bpf_kprobe_multi_opts *opts)
{
- LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
- struct kprobe_multi_empty *skel = NULL;
long attach_start_ns, attach_end_ns;
long detach_start_ns, detach_end_ns;
double attach_delta, detach_delta;
struct bpf_link *link = NULL;
- char **syms = NULL;
- size_t cnt = 0, i;
-
- if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms"))
- return;
-
- skel = kprobe_multi_empty__open_and_load();
- if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
- goto cleanup;
-
- opts.syms = (const char **) syms;
- opts.cnt = cnt;
attach_start_ns = get_time_ns();
link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_empty,
- NULL, &opts);
+ NULL, opts);
attach_end_ns = get_time_ns();
if (!ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts"))
- goto cleanup;
+ return;
detach_start_ns = get_time_ns();
bpf_link__destroy(link);
@@ -462,17 +632,65 @@ static void test_kprobe_multi_bench_attach(bool kernel)
attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
- printf("%s: found %lu functions\n", __func__, cnt);
+ printf("%s: found %lu functions\n", __func__, opts->cnt);
printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
+}
+
+static void test_kprobe_multi_bench_attach(bool kernel)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct kprobe_multi_empty *skel = NULL;
+ char **syms = NULL;
+ size_t cnt = 0;
+
+ if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms"))
+ return;
+
+ skel = kprobe_multi_empty__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+ goto cleanup;
+
+ opts.syms = (const char **) syms;
+ opts.cnt = cnt;
+
+ do_bench_test(skel, &opts);
cleanup:
kprobe_multi_empty__destroy(skel);
- if (syms) {
- for (i = 0; i < cnt; i++)
- free(syms[i]);
+ if (syms)
free(syms);
+}
+
+static void test_kprobe_multi_bench_attach_addr(bool kernel)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct kprobe_multi_empty *skel = NULL;
+ unsigned long *addrs = NULL;
+ size_t cnt = 0;
+ int err;
+
+ err = get_addrs(&addrs, &cnt, kernel);
+ if (err == -ENOENT) {
+ test__skip();
+ return;
}
+
+ if (!ASSERT_OK(err, "get_addrs"))
+ return;
+
+ skel = kprobe_multi_empty__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+ goto cleanup;
+
+ opts.addrs = addrs;
+ opts.cnt = cnt;
+
+ do_bench_test(skel, &opts);
+
+cleanup:
+ kprobe_multi_empty__destroy(skel);
+ free(addrs);
}
static void test_attach_override(void)
@@ -515,6 +733,10 @@ void serial_test_kprobe_multi_bench_attach(void)
test_kprobe_multi_bench_attach(true);
if (test__start_subtest("modules"))
test_kprobe_multi_bench_attach(false);
+ if (test__start_subtest("kernel"))
+ test_kprobe_multi_bench_attach_addr(true);
+ if (test__start_subtest("modules"))
+ test_kprobe_multi_bench_attach_addr(false);
}
void test_kprobe_multi_test(void)
@@ -538,4 +760,8 @@ void test_kprobe_multi_test(void)
test_attach_api_fails();
if (test__start_subtest("attach_override"))
test_attach_override();
+ if (test__start_subtest("session"))
+ test_session_skel_api();
+ if (test__start_subtest("session_cookie"))
+ test_session_cookie_skel_api();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c
index b295969b263b..dc7aab532fb1 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c
@@ -5,8 +5,6 @@
#include "test_ksyms.skel.h"
#include <sys/stat.h>
-static int duration;
-
void test_ksyms(void)
{
const char *btf_path = "/sys/kernel/btf/vmlinux";
@@ -18,43 +16,37 @@ void test_ksyms(void)
int err;
err = kallsyms_find("bpf_link_fops", &link_fops_addr);
- if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ if (!ASSERT_NEQ(err, -EINVAL, "bpf_link_fops: kallsyms_fopen"))
return;
- if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n"))
+ if (!ASSERT_NEQ(err, -ENOENT, "bpf_link_fops: ksym_find"))
return;
err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr);
- if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ if (!ASSERT_NEQ(err, -EINVAL, "__per_cpu_start: kallsyms_fopen"))
return;
- if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n"))
+ if (!ASSERT_NEQ(err, -ENOENT, "__per_cpu_start: ksym_find"))
return;
- if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
+ if (!ASSERT_OK(stat(btf_path, &st), "stat_btf"))
return;
btf_size = st.st_size;
skel = test_ksyms__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "test_ksyms__open_and_load"))
return;
err = test_ksyms__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_ksyms__attach"))
goto cleanup;
/* trigger tracepoint */
usleep(1);
data = skel->data;
- CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops",
- "got 0x%llx, exp 0x%llx\n",
- data->out__bpf_link_fops, link_fops_addr);
- CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1",
- "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0);
- CHECK(data->out__btf_size != btf_size, "btf_size",
- "got %llu, exp %llu\n", data->out__btf_size, btf_size);
- CHECK(data->out__per_cpu_start != per_cpu_start_addr, "__per_cpu_start",
- "got %llu, exp %llu\n", data->out__per_cpu_start,
- per_cpu_start_addr);
+ ASSERT_EQ(data->out__bpf_link_fops, link_fops_addr, "bpf_link_fops");
+ ASSERT_EQ(data->out__bpf_link_fops1, 0, "bpf_link_fops1");
+ ASSERT_EQ(data->out__btf_size, btf_size, "btf_size");
+ ASSERT_EQ(data->out__per_cpu_start, per_cpu_start_addr, "__per_cpu_start");
cleanup:
test_ksyms__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
index 2fb89de63bd2..77d07e0a4a55 100644
--- a/tools/testing/selftests/bpf/prog_tests/linked_list.c
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -183,6 +183,18 @@ static void test_linked_list_success(int mode, bool leave_in_map)
if (!leave_in_map)
clear_fields(skel->maps.bss_A);
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_nested), &opts);
+ ASSERT_OK(ret, "global_list_push_pop_nested");
+ ASSERT_OK(opts.retval, "global_list_push_pop_nested retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.bss_A);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_array_push_pop), &opts);
+ ASSERT_OK(ret, "global_list_array_push_pop");
+ ASSERT_OK(opts.retval, "global_list_array_push_pop retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.bss_A);
+
if (mode == PUSH_POP)
goto end;
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index f53d658ed080..6d391d95f96e 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -51,6 +51,10 @@ void test_module_attach(void)
0, "bpf_testmod_test_read");
ASSERT_OK(err, "set_attach_target");
+ err = bpf_program__set_attach_target(skel->progs.handle_fentry_explicit_manual,
+ 0, "bpf_testmod:bpf_testmod_test_read");
+ ASSERT_OK(err, "set_attach_target_explicit");
+
err = test_module_attach__load(skel);
if (CHECK(err, "skel_load", "failed to load skeleton\n"))
return;
@@ -70,6 +74,8 @@ void test_module_attach(void)
ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf");
ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry");
ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual");
+ ASSERT_EQ(bss->fentry_explicit_read_sz, READ_SZ, "fentry_explicit");
+ ASSERT_EQ(bss->fentry_explicit_manual_read_sz, READ_SZ, "fentry_explicit_manual");
ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit");
ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
index 8f8d792307c1..d2ca32fa3b21 100644
--- a/tools/testing/selftests/bpf/prog_tests/mptcp.c
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -82,6 +82,17 @@ static void cleanup_netns(struct nstoken *nstoken)
SYS_NOFAIL("ip netns del %s", NS_TEST);
}
+static int start_mptcp_server(int family, const char *addr_str, __u16 port,
+ int timeout_ms)
+{
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ .proto = IPPROTO_MPTCP,
+ };
+
+ return start_server_str(family, SOCK_STREAM, addr_str, port, &opts);
+}
+
static int verify_tsk(int map_fd, int client_fd)
{
int err, cfd = client_fd;
@@ -273,6 +284,8 @@ static int run_mptcpify(int cgroup_fd)
if (!ASSERT_OK_PTR(mptcpify_skel, "skel_open_load"))
return libbpf_get_error(mptcpify_skel);
+ mptcpify_skel->bss->pid = getpid();
+
err = mptcpify__attach(mptcpify_skel);
if (!ASSERT_OK(err, "skel_attach"))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 24d493482ffc..e72d75d6baa7 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -12,77 +12,229 @@
#include <sys/wait.h>
#include <sys/mount.h>
#include <sys/fcntl.h>
+#include "network_helpers.h"
#define STACK_SIZE (1024 * 1024)
static char child_stack[STACK_SIZE];
-static int test_current_pid_tgid(void *args)
+static int get_pid_tgid(pid_t *pid, pid_t *tgid,
+ struct test_ns_current_pid_tgid__bss *bss)
{
- struct test_ns_current_pid_tgid__bss *bss;
- struct test_ns_current_pid_tgid *skel;
- int err = -1, duration = 0;
- pid_t tgid, pid;
struct stat st;
+ int err;
- skel = test_ns_current_pid_tgid__open_and_load();
- if (CHECK(!skel, "skel_open_load", "failed to load skeleton\n"))
- goto cleanup;
-
- pid = syscall(SYS_gettid);
- tgid = getpid();
+ *pid = syscall(SYS_gettid);
+ *tgid = getpid();
err = stat("/proc/self/ns/pid", &st);
- if (CHECK(err, "stat", "failed /proc/self/ns/pid: %d\n", err))
- goto cleanup;
+ if (!ASSERT_OK(err, "stat /proc/self/ns/pid"))
+ return err;
- bss = skel->bss;
bss->dev = st.st_dev;
bss->ino = st.st_ino;
bss->user_pid = 0;
bss->user_tgid = 0;
+ return 0;
+}
+
+static int test_current_pid_tgid_tp(void *args)
+{
+ struct test_ns_current_pid_tgid__bss *bss;
+ struct test_ns_current_pid_tgid *skel;
+ int ret = -1, err;
+ pid_t tgid, pid;
+
+ skel = test_ns_current_pid_tgid__open();
+ if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open"))
+ return ret;
+
+ bpf_program__set_autoload(skel->progs.tp_handler, true);
+
+ err = test_ns_current_pid_tgid__load(skel);
+ if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load"))
+ goto cleanup;
+
+ bss = skel->bss;
+ if (get_pid_tgid(&pid, &tgid, bss))
+ goto cleanup;
err = test_ns_current_pid_tgid__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_ns_current_pid_tgid__attach"))
goto cleanup;
/* trigger tracepoint */
usleep(1);
- ASSERT_EQ(bss->user_pid, pid, "pid");
- ASSERT_EQ(bss->user_tgid, tgid, "tgid");
- err = 0;
+ if (!ASSERT_EQ(bss->user_pid, pid, "pid"))
+ goto cleanup;
+ if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid"))
+ goto cleanup;
+ ret = 0;
+
+cleanup:
+ test_ns_current_pid_tgid__destroy(skel);
+ return ret;
+}
+
+static int test_current_pid_tgid_cgrp(void *args)
+{
+ struct test_ns_current_pid_tgid__bss *bss;
+ struct test_ns_current_pid_tgid *skel;
+ int server_fd = -1, ret = -1, err;
+ int cgroup_fd = *(int *)args;
+ pid_t tgid, pid;
+
+ skel = test_ns_current_pid_tgid__open();
+ if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open"))
+ return ret;
+
+ bpf_program__set_autoload(skel->progs.cgroup_bind4, true);
+
+ err = test_ns_current_pid_tgid__load(skel);
+ if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load"))
+ goto cleanup;
+
+ bss = skel->bss;
+ if (get_pid_tgid(&pid, &tgid, bss))
+ goto cleanup;
+
+ skel->links.cgroup_bind4 = bpf_program__attach_cgroup(
+ skel->progs.cgroup_bind4, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.cgroup_bind4, "bpf_program__attach_cgroup"))
+ goto cleanup;
+
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(bss->user_pid, pid, "pid"))
+ goto cleanup;
+ if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid"))
+ goto cleanup;
+ ret = 0;
cleanup:
- test_ns_current_pid_tgid__destroy(skel);
+ if (server_fd >= 0)
+ close(server_fd);
+ test_ns_current_pid_tgid__destroy(skel);
+ return ret;
+}
+
+static int test_current_pid_tgid_sk_msg(void *args)
+{
+ int verdict, map, server_fd = -1, client_fd = -1;
+ struct test_ns_current_pid_tgid__bss *bss;
+ static const char send_msg[] = "message";
+ struct test_ns_current_pid_tgid *skel;
+ int ret = -1, err, key = 0;
+ pid_t tgid, pid;
+
+ skel = test_ns_current_pid_tgid__open();
+ if (!ASSERT_OK_PTR(skel, "test_ns_current_pid_tgid__open"))
+ return ret;
+
+ bpf_program__set_autoload(skel->progs.sk_msg, true);
+
+ err = test_ns_current_pid_tgid__load(skel);
+ if (!ASSERT_OK(err, "test_ns_current_pid_tgid__load"))
+ goto cleanup;
+
+ bss = skel->bss;
+ if (get_pid_tgid(&pid, &tgid, skel->bss))
+ goto cleanup;
+
+ verdict = bpf_program__fd(skel->progs.sk_msg);
+ map = bpf_map__fd(skel->maps.sock_map);
+ err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "prog_attach"))
+ goto cleanup;
+
+ server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto cleanup;
- return err;
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto cleanup;
+
+ err = bpf_map_update_elem(map, &key, &client_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto cleanup;
+
+ err = send(client_fd, send_msg, sizeof(send_msg), 0);
+ if (!ASSERT_EQ(err, sizeof(send_msg), "send(msg)"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(bss->user_pid, pid, "pid"))
+ goto cleanup;
+ if (!ASSERT_EQ(bss->user_tgid, tgid, "tgid"))
+ goto cleanup;
+ ret = 0;
+
+cleanup:
+ if (server_fd >= 0)
+ close(server_fd);
+ if (client_fd >= 0)
+ close(client_fd);
+ test_ns_current_pid_tgid__destroy(skel);
+ return ret;
}
-static void test_ns_current_pid_tgid_new_ns(void)
+static void test_ns_current_pid_tgid_new_ns(int (*fn)(void *), void *arg)
{
- int wstatus, duration = 0;
+ int wstatus;
pid_t cpid;
/* Create a process in a new namespace, this process
* will be the init process of this new namespace hence will be pid 1.
*/
- cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE,
- CLONE_NEWPID | SIGCHLD, NULL);
+ cpid = clone(fn, child_stack + STACK_SIZE,
+ CLONE_NEWPID | SIGCHLD, arg);
- if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
+ if (!ASSERT_NEQ(cpid, -1, "clone"))
return;
- if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno)))
+ if (!ASSERT_NEQ(waitpid(cpid, &wstatus, 0), -1, "waitpid"))
return;
- if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed"))
+ if (!ASSERT_OK(WEXITSTATUS(wstatus), "newns_pidtgid"))
return;
}
+static void test_in_netns(int (*fn)(void *), void *arg)
+{
+ struct nstoken *nstoken = NULL;
+
+ SYS(cleanup, "ip netns add ns_current_pid_tgid");
+ SYS(cleanup, "ip -net ns_current_pid_tgid link set dev lo up");
+
+ nstoken = open_netns("ns_current_pid_tgid");
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto cleanup;
+
+ test_ns_current_pid_tgid_new_ns(fn, arg);
+
+cleanup:
+ if (nstoken)
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del ns_current_pid_tgid");
+}
+
/* TODO: use a different tracepoint */
void serial_test_ns_current_pid_tgid(void)
{
- if (test__start_subtest("ns_current_pid_tgid_root_ns"))
- test_current_pid_tgid(NULL);
- if (test__start_subtest("ns_current_pid_tgid_new_ns"))
- test_ns_current_pid_tgid_new_ns();
+ if (test__start_subtest("root_ns_tp"))
+ test_current_pid_tgid_tp(NULL);
+ if (test__start_subtest("new_ns_tp"))
+ test_ns_current_pid_tgid_new_ns(test_current_pid_tgid_tp, NULL);
+ if (test__start_subtest("new_ns_cgrp")) {
+ int cgroup_fd = -1;
+
+ cgroup_fd = test__join_cgroup("/sock_addr");
+ if (ASSERT_GE(cgroup_fd, 0, "join_cgroup")) {
+ test_in_netns(test_current_pid_tgid_cgrp, &cgroup_fd);
+ close(cgroup_fd);
+ }
+ }
+ if (test__start_subtest("new_ns_sk_msg"))
+ test_in_netns(test_current_pid_tgid_sk_msg, NULL);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_skip.c b/tools/testing/selftests/bpf/prog_tests/perf_skip.c
new file mode 100644
index 000000000000..37d8618800e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_skip.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <test_progs.h>
+#include "test_perf_skip.skel.h"
+#include <linux/compiler.h>
+#include <linux/hw_breakpoint.h>
+#include <sys/mman.h>
+
+#ifndef TRAP_PERF
+#define TRAP_PERF 6
+#endif
+
+int sigio_count, sigtrap_count;
+
+static void handle_sigio(int sig __always_unused)
+{
+ ++sigio_count;
+}
+
+static void handle_sigtrap(int signum __always_unused,
+ siginfo_t *info,
+ void *ucontext __always_unused)
+{
+ ASSERT_EQ(info->si_code, TRAP_PERF, "si_code");
+ ++sigtrap_count;
+}
+
+static noinline int test_function(void)
+{
+ asm volatile ("");
+ return 0;
+}
+
+void serial_test_perf_skip(void)
+{
+ struct sigaction action = {};
+ struct sigaction previous_sigtrap;
+ sighandler_t previous_sigio = SIG_ERR;
+ struct test_perf_skip *skel = NULL;
+ struct perf_event_attr attr = {};
+ int perf_fd = -1;
+ int err;
+ struct f_owner_ex owner;
+ struct bpf_link *prog_link = NULL;
+
+ action.sa_flags = SA_SIGINFO | SA_NODEFER;
+ action.sa_sigaction = handle_sigtrap;
+ sigemptyset(&action.sa_mask);
+ if (!ASSERT_OK(sigaction(SIGTRAP, &action, &previous_sigtrap), "sigaction"))
+ return;
+
+ previous_sigio = signal(SIGIO, handle_sigio);
+ if (!ASSERT_NEQ(previous_sigio, SIG_ERR, "signal"))
+ goto cleanup;
+
+ skel = test_perf_skip__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ attr.type = PERF_TYPE_BREAKPOINT;
+ attr.size = sizeof(attr);
+ attr.bp_type = HW_BREAKPOINT_X;
+ attr.bp_addr = (uintptr_t)test_function;
+ attr.bp_len = sizeof(long);
+ attr.sample_period = 1;
+ attr.sample_type = PERF_SAMPLE_IP;
+ attr.pinned = 1;
+ attr.exclude_kernel = 1;
+ attr.exclude_hv = 1;
+ attr.precise_ip = 3;
+ attr.sigtrap = 1;
+ attr.remove_on_exec = 1;
+
+ perf_fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0);
+ if (perf_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) {
+ printf("SKIP:no PERF_TYPE_BREAKPOINT/HW_BREAKPOINT_X\n");
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_OK(perf_fd < 0, "perf_event_open"))
+ goto cleanup;
+
+ /* Configure the perf event to signal on sample. */
+ err = fcntl(perf_fd, F_SETFL, O_ASYNC);
+ if (!ASSERT_OK(err, "fcntl(F_SETFL, O_ASYNC)"))
+ goto cleanup;
+
+ owner.type = F_OWNER_TID;
+ owner.pid = syscall(__NR_gettid);
+ err = fcntl(perf_fd, F_SETOWN_EX, &owner);
+ if (!ASSERT_OK(err, "fcntl(F_SETOWN_EX)"))
+ goto cleanup;
+
+ /* Allow at most one sample. A sample rejected by bpf should
+ * not count against this.
+ */
+ err = ioctl(perf_fd, PERF_EVENT_IOC_REFRESH, 1);
+ if (!ASSERT_OK(err, "ioctl(PERF_EVENT_IOC_REFRESH)"))
+ goto cleanup;
+
+ prog_link = bpf_program__attach_perf_event(skel->progs.handler, perf_fd);
+ if (!ASSERT_OK_PTR(prog_link, "bpf_program__attach_perf_event"))
+ goto cleanup;
+
+ /* Configure the bpf program to suppress the sample. */
+ skel->bss->ip = (uintptr_t)test_function;
+ test_function();
+
+ ASSERT_EQ(sigio_count, 0, "sigio_count");
+ ASSERT_EQ(sigtrap_count, 0, "sigtrap_count");
+
+ /* Configure the bpf program to allow the sample. */
+ skel->bss->ip = 0;
+ test_function();
+
+ ASSERT_EQ(sigio_count, 1, "sigio_count");
+ ASSERT_EQ(sigtrap_count, 1, "sigtrap_count");
+
+ /* Test that the sample above is the only one allowed (by perf, not
+ * by bpf)
+ */
+ test_function();
+
+ ASSERT_EQ(sigio_count, 1, "sigio_count");
+ ASSERT_EQ(sigtrap_count, 1, "sigtrap_count");
+
+cleanup:
+ bpf_link__destroy(prog_link);
+ if (perf_fd >= 0)
+ close(perf_fd);
+ test_perf_skip__destroy(skel);
+
+ if (previous_sigio != SIG_ERR)
+ signal(SIGIO, previous_sigio);
+ sigaction(SIGTRAP, &previous_sigtrap, NULL);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/preempt_lock.c b/tools/testing/selftests/bpf/prog_tests/preempt_lock.c
new file mode 100644
index 000000000000..02917c672441
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/preempt_lock.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <preempt_lock.skel.h>
+
+void test_preempt_lock(void)
+{
+ RUN_TESTS(preempt_lock);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c
index e9300c96607d..9818f06c97c5 100644
--- a/tools/testing/selftests/bpf/prog_tests/rbtree.c
+++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c
@@ -31,6 +31,28 @@ static void test_rbtree_add_nodes(void)
rbtree__destroy(skel);
}
+static void test_rbtree_add_nodes_nested(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_nodes_nested), &opts);
+ ASSERT_OK(ret, "rbtree_add_nodes_nested run");
+ ASSERT_OK(opts.retval, "rbtree_add_nodes_nested retval");
+ ASSERT_EQ(skel->data->less_callback_ran, 1, "rbtree_add_nodes_nested less_callback_ran");
+
+ rbtree__destroy(skel);
+}
+
static void test_rbtree_add_and_remove(void)
{
LIBBPF_OPTS(bpf_test_run_opts, opts,
@@ -53,6 +75,27 @@ static void test_rbtree_add_and_remove(void)
rbtree__destroy(skel);
}
+static void test_rbtree_add_and_remove_array(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_and_remove_array), &opts);
+ ASSERT_OK(ret, "rbtree_add_and_remove_array");
+ ASSERT_OK(opts.retval, "rbtree_add_and_remove_array retval");
+
+ rbtree__destroy(skel);
+}
+
static void test_rbtree_first_and_remove(void)
{
LIBBPF_OPTS(bpf_test_run_opts, opts,
@@ -104,8 +147,12 @@ void test_rbtree_success(void)
{
if (test__start_subtest("rbtree_add_nodes"))
test_rbtree_add_nodes();
+ if (test__start_subtest("rbtree_add_nodes_nested"))
+ test_rbtree_add_nodes_nested();
if (test__start_subtest("rbtree_add_and_remove"))
test_rbtree_add_and_remove();
+ if (test__start_subtest("rbtree_add_and_remove_array"))
+ test_rbtree_add_and_remove_array();
if (test__start_subtest("rbtree_first_and_remove"))
test_rbtree_first_and_remove();
if (test__start_subtest("rbtree_api_release_aliasing"))
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index 48c5695b7abf..da430df45aa4 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -12,8 +12,11 @@
#include <sys/sysinfo.h>
#include <linux/perf_event.h>
#include <linux/ring_buffer.h>
+
#include "test_ringbuf.lskel.h"
+#include "test_ringbuf_n.lskel.h"
#include "test_ringbuf_map_key.lskel.h"
+#include "test_ringbuf_write.lskel.h"
#define EDONE 7777
@@ -83,6 +86,58 @@ static void *poll_thread(void *input)
return (void *)(long)ring_buffer__poll(ringbuf, timeout);
}
+static void ringbuf_write_subtest(void)
+{
+ struct test_ringbuf_write_lskel *skel;
+ int page_size = getpagesize();
+ size_t *mmap_ptr;
+ int err, rb_fd;
+
+ skel = test_ringbuf_write_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->maps.ringbuf.max_entries = 0x4000;
+
+ err = test_ringbuf_write_lskel__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ rb_fd = skel->maps.ringbuf.map_fd;
+
+ mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
+ if (!ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos"))
+ goto cleanup;
+ *mmap_ptr = 0x3000;
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
+
+ skel->bss->pid = getpid();
+
+ ringbuf = ring_buffer__new(rb_fd, process_sample, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ringbuf_new"))
+ goto cleanup;
+
+ err = test_ringbuf_write_lskel__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup_ringbuf;
+
+ skel->bss->discarded = 0;
+ skel->bss->passed = 0;
+
+ /* trigger exactly two samples */
+ syscall(__NR_getpgid);
+ syscall(__NR_getpgid);
+
+ ASSERT_EQ(skel->bss->discarded, 2, "discarded");
+ ASSERT_EQ(skel->bss->passed, 0, "passed");
+
+ test_ringbuf_write_lskel__detach(skel);
+cleanup_ringbuf:
+ ring_buffer__free(ringbuf);
+cleanup:
+ test_ringbuf_write_lskel__destroy(skel);
+}
+
static void ringbuf_subtest(void)
{
const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
@@ -326,6 +381,68 @@ cleanup:
test_ringbuf_lskel__destroy(skel);
}
+/*
+ * Test ring_buffer__consume_n() by producing N_TOT_SAMPLES samples in the ring
+ * buffer, via getpid(), and consuming them in chunks of N_SAMPLES.
+ */
+#define N_TOT_SAMPLES 32
+#define N_SAMPLES 4
+
+/* Sample value to verify the callback validity */
+#define SAMPLE_VALUE 42L
+
+static int process_n_sample(void *ctx, void *data, size_t len)
+{
+ struct sample *s = data;
+
+ ASSERT_EQ(s->value, SAMPLE_VALUE, "sample_value");
+
+ return 0;
+}
+
+static void ringbuf_n_subtest(void)
+{
+ struct test_ringbuf_n_lskel *skel_n;
+ int err, i;
+
+ skel_n = test_ringbuf_n_lskel__open();
+ if (!ASSERT_OK_PTR(skel_n, "test_ringbuf_n_lskel__open"))
+ return;
+
+ skel_n->maps.ringbuf.max_entries = getpagesize();
+ skel_n->bss->pid = getpid();
+
+ err = test_ringbuf_n_lskel__load(skel_n);
+ if (!ASSERT_OK(err, "test_ringbuf_n_lskel__load"))
+ goto cleanup;
+
+ ringbuf = ring_buffer__new(skel_n->maps.ringbuf.map_fd,
+ process_n_sample, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
+ goto cleanup;
+
+ err = test_ringbuf_n_lskel__attach(skel_n);
+ if (!ASSERT_OK(err, "test_ringbuf_n_lskel__attach"))
+ goto cleanup_ringbuf;
+
+ /* Produce N_TOT_SAMPLES samples in the ring buffer by calling getpid() */
+ skel_n->bss->value = SAMPLE_VALUE;
+ for (i = 0; i < N_TOT_SAMPLES; i++)
+ syscall(__NR_getpgid);
+
+ /* Consume all samples from the ring buffer in batches of N_SAMPLES */
+ for (i = 0; i < N_TOT_SAMPLES; i += err) {
+ err = ring_buffer__consume_n(ringbuf, N_SAMPLES);
+ if (!ASSERT_EQ(err, N_SAMPLES, "rb_consume"))
+ goto cleanup_ringbuf;
+ }
+
+cleanup_ringbuf:
+ ring_buffer__free(ringbuf);
+cleanup:
+ test_ringbuf_n_lskel__destroy(skel_n);
+}
+
static int process_map_key_sample(void *ctx, void *data, size_t len)
{
struct sample *s;
@@ -384,6 +501,10 @@ void test_ringbuf(void)
{
if (test__start_subtest("ringbuf"))
ringbuf_subtest();
+ if (test__start_subtest("ringbuf_n"))
+ ringbuf_n_subtest();
if (test__start_subtest("ringbuf_map_key"))
ringbuf_map_key_subtest();
+ if (test__start_subtest("ringbuf_write"))
+ ringbuf_write_subtest();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index b15b343ebb6b..6cc69900b310 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -156,7 +156,8 @@ static void test_send_signal_tracepoint(bool signal_thread)
static void test_send_signal_perf(bool signal_thread)
{
struct perf_event_attr attr = {
- .sample_period = 1,
+ .freq = 1,
+ .sample_freq = 1000,
.type = PERF_TYPE_SOFTWARE,
.config = PERF_COUNT_SW_CPU_CLOCK,
};
@@ -179,7 +180,7 @@ static void test_send_signal_nmi(bool signal_thread)
pmu_fd = syscall(__NR_perf_event_open, &attr, 0 /* pid */,
-1 /* cpu */, -1 /* group_fd */, 0 /* flags */);
if (pmu_fd == -1) {
- if (errno == ENOENT) {
+ if (errno == ENOENT || errno == EOPNOTSUPP) {
printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n",
__func__);
test__skip();
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 1374b626a985..0b9bd1d6f7cc 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -15,6 +15,7 @@
#include <unistd.h>
#include "test_progs.h"
+#include "network_helpers.h"
#define BIND_PORT 1234
#define CONNECT_PORT 4321
@@ -22,8 +23,6 @@
#define NS_SELF "/proc/self/ns/net"
#define SERVER_MAP_PATH "/sys/fs/bpf/tc/globals/server_map"
-static const struct timeval timeo_sec = { .tv_sec = 3 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
static int stop, duration;
static bool
@@ -73,52 +72,6 @@ configure_stack(void)
return true;
}
-static int
-start_server(const struct sockaddr *addr, socklen_t len, int type)
-{
- int fd;
-
- fd = socket(addr->sa_family, type, 0);
- if (CHECK_FAIL(fd == -1))
- goto out;
- if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
- timeo_optlen)))
- goto close_out;
- if (CHECK_FAIL(bind(fd, addr, len) == -1))
- goto close_out;
- if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1))
- goto close_out;
-
- goto out;
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int
-connect_to_server(const struct sockaddr *addr, socklen_t len, int type)
-{
- int fd = -1;
-
- fd = socket(addr->sa_family, type, 0);
- if (CHECK_FAIL(fd == -1))
- goto out;
- if (CHECK_FAIL(setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
- timeo_optlen)))
- goto close_out;
- if (CHECK_FAIL(connect(fd, addr, len)))
- goto close_out;
-
- goto out;
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
static in_port_t
get_port(int fd)
{
@@ -161,7 +114,7 @@ run_test(int server_fd, const struct sockaddr *addr, socklen_t len, int type)
in_port_t port;
int ret = 1;
- client = connect_to_server(addr, len, type);
+ client = connect_to_addr(type, (struct sockaddr_storage *)addr, len, NULL);
if (client == -1) {
perror("Cannot connect to server");
goto out;
@@ -310,7 +263,9 @@ void test_sk_assign(void)
continue;
prepare_addr(test->addr, test->family, BIND_PORT, false);
addr = (const struct sockaddr *)test->addr;
- server = start_server(addr, test->len, test->type);
+ server = start_server_addr(test->type,
+ (const struct sockaddr_storage *)addr,
+ test->len, NULL);
if (server == -1)
goto close;
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 597d0467a926..ae87c00867ba 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -77,6 +77,12 @@ struct test {
bool reuseport_has_conns; /* Add a connected socket to reuseport group */
};
+struct cb_opts {
+ int family;
+ int sotype;
+ bool reuseport;
+};
+
static __u32 duration; /* for CHECK macro */
static bool is_ipv6(const char *ip)
@@ -142,19 +148,14 @@ static int make_socket(int sotype, const char *ip, int port,
return fd;
}
-static int make_server(int sotype, const char *ip, int port,
- struct bpf_program *reuseport_prog)
+static int setsockopts(int fd, void *opts)
{
- struct sockaddr_storage addr = {0};
+ struct cb_opts *co = (struct cb_opts *)opts;
const int one = 1;
- int err, fd = -1;
-
- fd = make_socket(sotype, ip, port, &addr);
- if (fd < 0)
- return -1;
+ int err = 0;
/* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */
- if (sotype == SOCK_DGRAM) {
+ if (co->sotype == SOCK_DGRAM) {
err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one,
sizeof(one));
if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) {
@@ -163,7 +164,7 @@ static int make_server(int sotype, const char *ip, int port,
}
}
- if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) {
+ if (co->sotype == SOCK_DGRAM && co->family == AF_INET6) {
err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one,
sizeof(one));
if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) {
@@ -172,7 +173,7 @@ static int make_server(int sotype, const char *ip, int port,
}
}
- if (sotype == SOCK_STREAM) {
+ if (co->sotype == SOCK_STREAM) {
err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one,
sizeof(one));
if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) {
@@ -181,7 +182,7 @@ static int make_server(int sotype, const char *ip, int port,
}
}
- if (reuseport_prog) {
+ if (co->reuseport) {
err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one,
sizeof(one));
if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) {
@@ -190,19 +191,28 @@ static int make_server(int sotype, const char *ip, int port,
}
}
- err = bind(fd, (void *)&addr, inetaddr_len(&addr));
- if (CHECK(err, "bind", "failed\n")) {
- log_err("failed to bind listen socket");
- goto fail;
- }
+fail:
+ return err;
+}
- if (sotype == SOCK_STREAM) {
- err = listen(fd, SOMAXCONN);
- if (CHECK(err, "make_server", "listen")) {
- log_err("failed to listen on port %d", port);
- goto fail;
- }
- }
+static int make_server(int sotype, const char *ip, int port,
+ struct bpf_program *reuseport_prog)
+{
+ struct cb_opts cb_opts = {
+ .family = is_ipv6(ip) ? AF_INET6 : AF_INET,
+ .sotype = sotype,
+ .reuseport = reuseport_prog,
+ };
+ struct network_helper_opts opts = {
+ .backlog = SOMAXCONN,
+ .post_socket_cb = setsockopts,
+ .cb_opts = &cb_opts,
+ };
+ int err, fd;
+
+ fd = start_server_str(cb_opts.family, sotype, ip, port, &opts);
+ if (!ASSERT_OK_FD(fd, "start_server_str"))
+ return -1;
/* Late attach reuseport prog so we can have one init path */
if (reuseport_prog) {
@@ -406,18 +416,12 @@ static int udp_recv_send(int server_fd)
}
/* Reply from original destination address. */
- fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0);
- if (CHECK(fd < 0, "socket", "failed\n")) {
+ fd = start_server_addr(SOCK_DGRAM, dst_addr, sizeof(*dst_addr), NULL);
+ if (!ASSERT_OK_FD(fd, "start_server_addr")) {
log_err("failed to create tx socket");
return -1;
}
- ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr));
- if (CHECK(ret, "bind", "failed\n")) {
- log_err("failed to bind tx socket");
- goto out;
- }
-
msg.msg_control = NULL;
msg.msg_controllen = 0;
n = sendmsg(fd, &msg, 0);
@@ -629,9 +633,6 @@ static void run_lookup_prog(const struct test *t)
* BPF socket lookup.
*/
if (t->reuseport_has_conns) {
- struct sockaddr_storage addr = {};
- socklen_t len = sizeof(addr);
-
/* Add an extra socket to reuseport group */
reuse_conn_fd = make_server(t->sotype, t->listen_at.ip,
t->listen_at.port,
@@ -639,12 +640,9 @@ static void run_lookup_prog(const struct test *t)
if (reuse_conn_fd < 0)
goto close;
- /* Connect the extra socket to itself */
- err = getsockname(reuse_conn_fd, (void *)&addr, &len);
- if (CHECK(err, "getsockname", "errno %d\n", errno))
- goto close;
- err = connect(reuse_conn_fd, (void *)&addr, len);
- if (CHECK(err, "connect", "errno %d\n", errno))
+ /* Connect the extra socket to itself */
+ err = connect_fd_to_fd(reuse_conn_fd, reuse_conn_fd, 0);
+ if (!ASSERT_OK(err, "connect_fd_to_fd"))
goto close;
}
@@ -994,7 +992,7 @@ static void drop_on_reuseport(const struct test *t)
err = update_lookup_map(t->sock_map, SERVER_A, server1);
if (err)
- goto detach;
+ goto close_srv1;
/* second server on destination address we should never reach */
server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port,
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_addr.c b/tools/testing/selftests/bpf/prog_tests/sock_addr.c
index 5fd617718991..b880c564a204 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_addr.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_addr.c
@@ -3,16 +3,56 @@
#include "test_progs.h"
+#include "sock_addr_kern.skel.h"
+#include "bind4_prog.skel.h"
+#include "bind6_prog.skel.h"
#include "connect_unix_prog.skel.h"
+#include "connect4_prog.skel.h"
+#include "connect6_prog.skel.h"
+#include "sendmsg4_prog.skel.h"
+#include "sendmsg6_prog.skel.h"
+#include "recvmsg4_prog.skel.h"
+#include "recvmsg6_prog.skel.h"
#include "sendmsg_unix_prog.skel.h"
#include "recvmsg_unix_prog.skel.h"
+#include "getsockname4_prog.skel.h"
+#include "getsockname6_prog.skel.h"
#include "getsockname_unix_prog.skel.h"
+#include "getpeername4_prog.skel.h"
+#include "getpeername6_prog.skel.h"
#include "getpeername_unix_prog.skel.h"
#include "network_helpers.h"
+#ifndef ENOTSUPP
+# define ENOTSUPP 524
+#endif
+
+#define TEST_NS "sock_addr"
+#define TEST_IF_PREFIX "test_sock_addr"
+#define TEST_IPV4 "127.0.0.4"
+#define TEST_IPV6 "::6"
+
+#define SERV4_IP "192.168.1.254"
+#define SERV4_REWRITE_IP "127.0.0.1"
+#define SRC4_IP "172.16.0.1"
+#define SRC4_REWRITE_IP TEST_IPV4
+#define SERV4_PORT 4040
+#define SERV4_REWRITE_PORT 4444
+
+#define SERV6_IP "face:b00c:1234:5678::abcd"
+#define SERV6_REWRITE_IP "::1"
+#define SERV6_V4MAPPED_IP "::ffff:192.168.0.4"
+#define SRC6_IP "::1"
+#define SRC6_REWRITE_IP TEST_IPV6
+#define WILDCARD6_IP "::"
+#define SERV6_PORT 6060
+#define SERV6_REWRITE_PORT 6666
+
#define SERVUN_ADDRESS "bpf_cgroup_unix_test"
#define SERVUN_REWRITE_ADDRESS "bpf_cgroup_unix_test_rewrite"
-#define SRCUN_ADDRESS "bpf_cgroup_unix_test_src"
+#define SRCUN_ADDRESS "bpf_cgroup_unix_test_src"
+
+#define save_errno_do(op) ({ int __save = errno; op; errno = __save; })
enum sock_addr_test_type {
SOCK_ADDR_TEST_BIND,
@@ -23,152 +63,955 @@ enum sock_addr_test_type {
SOCK_ADDR_TEST_GETPEERNAME,
};
-typedef void *(*load_fn)(int cgroup_fd);
+typedef void *(*load_fn)(int cgroup_fd,
+ enum bpf_attach_type attach_type,
+ bool expect_reject);
typedef void (*destroy_fn)(void *skel);
-struct sock_addr_test {
- enum sock_addr_test_type type;
- const char *name;
- /* BPF prog properties */
- load_fn loadfn;
- destroy_fn destroyfn;
- /* Socket properties */
- int socket_family;
- int socket_type;
- /* IP:port pairs for BPF prog to override */
- const char *requested_addr;
- unsigned short requested_port;
- const char *expected_addr;
- unsigned short expected_port;
- const char *expected_src_addr;
+static int cmp_addr(const struct sockaddr_storage *addr1, socklen_t addr1_len,
+ const struct sockaddr_storage *addr2, socklen_t addr2_len,
+ bool cmp_port);
+
+struct init_sock_args {
+ int af;
+ int type;
};
-static void *connect_unix_prog_load(int cgroup_fd)
-{
- struct connect_unix_prog *skel;
+struct addr_args {
+ char addr[sizeof(struct sockaddr_storage)];
+ int addrlen;
+};
- skel = connect_unix_prog__open_and_load();
- if (!ASSERT_OK_PTR(skel, "skel_open"))
- goto cleanup;
+struct sendmsg_args {
+ struct addr_args addr;
+ char msg[10];
+ int msglen;
+};
- skel->links.connect_unix_prog = bpf_program__attach_cgroup(
- skel->progs.connect_unix_prog, cgroup_fd);
- if (!ASSERT_OK_PTR(skel->links.connect_unix_prog, "prog_attach"))
- goto cleanup;
+static struct sock_addr_kern *skel;
- return skel;
-cleanup:
- connect_unix_prog__destroy(skel);
- return NULL;
+static int run_bpf_prog(const char *prog_name, void *ctx, int ctx_size)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_program *prog;
+ int prog_fd, err;
+
+ topts.ctx_in = ctx;
+ topts.ctx_size_in = ctx_size;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto err;
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, prog_name))
+ goto err;
+
+ err = topts.retval;
+ errno = -topts.retval;
+ goto out;
+err:
+ err = -1;
+out:
+ return err;
}
-static void connect_unix_prog_destroy(void *skel)
+static int kernel_init_sock(int af, int type, int protocol)
{
- connect_unix_prog__destroy(skel);
+ struct init_sock_args args = {
+ .af = af,
+ .type = type,
+ };
+
+ return run_bpf_prog("init_sock", &args, sizeof(args));
}
-static void *sendmsg_unix_prog_load(int cgroup_fd)
+static int kernel_close_sock(int fd)
{
- struct sendmsg_unix_prog *skel;
+ return run_bpf_prog("close_sock", NULL, 0);
+}
- skel = sendmsg_unix_prog__open_and_load();
- if (!ASSERT_OK_PTR(skel, "skel_open"))
- goto cleanup;
+static int sock_addr_op(const char *name, struct sockaddr *addr,
+ socklen_t *addrlen, bool expect_change)
+{
+ struct addr_args args;
+ int err;
- skel->links.sendmsg_unix_prog = bpf_program__attach_cgroup(
- skel->progs.sendmsg_unix_prog, cgroup_fd);
- if (!ASSERT_OK_PTR(skel->links.sendmsg_unix_prog, "prog_attach"))
- goto cleanup;
+ if (addrlen)
+ args.addrlen = *addrlen;
- return skel;
-cleanup:
- sendmsg_unix_prog__destroy(skel);
- return NULL;
+ if (addr)
+ memcpy(&args.addr, addr, *addrlen);
+
+ err = run_bpf_prog(name, &args, sizeof(args));
+
+ if (!expect_change && addr)
+ if (!ASSERT_EQ(cmp_addr((struct sockaddr_storage *)addr,
+ *addrlen,
+ (struct sockaddr_storage *)&args.addr,
+ args.addrlen, 1),
+ 0, "address_param_modified"))
+ return -1;
+
+ if (addrlen)
+ *addrlen = args.addrlen;
+
+ if (addr)
+ memcpy(addr, &args.addr, *addrlen);
+
+ return err;
}
-static void sendmsg_unix_prog_destroy(void *skel)
+static int send_msg_op(const char *name, struct sockaddr *addr,
+ socklen_t addrlen, const char *msg, int msglen)
{
- sendmsg_unix_prog__destroy(skel);
+ struct sendmsg_args args;
+ int err;
+
+ memset(&args, 0, sizeof(args));
+ memcpy(&args.addr.addr, addr, addrlen);
+ args.addr.addrlen = addrlen;
+ memcpy(args.msg, msg, msglen);
+ args.msglen = msglen;
+
+ err = run_bpf_prog(name, &args, sizeof(args));
+
+ if (!ASSERT_EQ(cmp_addr((struct sockaddr_storage *)addr,
+ addrlen,
+ (struct sockaddr_storage *)&args.addr.addr,
+ args.addr.addrlen, 1),
+ 0, "address_param_modified"))
+ return -1;
+
+ return err;
}
-static void *recvmsg_unix_prog_load(int cgroup_fd)
+static int kernel_connect(struct sockaddr *addr, socklen_t addrlen)
{
- struct recvmsg_unix_prog *skel;
-
- skel = recvmsg_unix_prog__open_and_load();
- if (!ASSERT_OK_PTR(skel, "skel_open"))
- goto cleanup;
+ return sock_addr_op("kernel_connect", addr, &addrlen, false);
+}
- skel->links.recvmsg_unix_prog = bpf_program__attach_cgroup(
- skel->progs.recvmsg_unix_prog, cgroup_fd);
- if (!ASSERT_OK_PTR(skel->links.recvmsg_unix_prog, "prog_attach"))
- goto cleanup;
+static int kernel_bind(int fd, struct sockaddr *addr, socklen_t addrlen)
+{
+ return sock_addr_op("kernel_bind", addr, &addrlen, false);
+}
- return skel;
-cleanup:
- recvmsg_unix_prog__destroy(skel);
- return NULL;
+static int kernel_listen(void)
+{
+ return sock_addr_op("kernel_listen", NULL, NULL, false);
}
-static void recvmsg_unix_prog_destroy(void *skel)
+static int kernel_sendmsg(int fd, struct sockaddr *addr, socklen_t addrlen,
+ char *msg, int msglen)
{
- recvmsg_unix_prog__destroy(skel);
+ return send_msg_op("kernel_sendmsg", addr, addrlen, msg, msglen);
}
-static void *getsockname_unix_prog_load(int cgroup_fd)
+static int sock_sendmsg(int fd, struct sockaddr *addr, socklen_t addrlen,
+ char *msg, int msglen)
{
- struct getsockname_unix_prog *skel;
+ return send_msg_op("sock_sendmsg", addr, addrlen, msg, msglen);
+}
- skel = getsockname_unix_prog__open_and_load();
- if (!ASSERT_OK_PTR(skel, "skel_open"))
- goto cleanup;
+static int kernel_getsockname(int fd, struct sockaddr *addr, socklen_t *addrlen)
+{
+ return sock_addr_op("kernel_getsockname", addr, addrlen, true);
+}
- skel->links.getsockname_unix_prog = bpf_program__attach_cgroup(
- skel->progs.getsockname_unix_prog, cgroup_fd);
- if (!ASSERT_OK_PTR(skel->links.getsockname_unix_prog, "prog_attach"))
- goto cleanup;
+static int kernel_getpeername(int fd, struct sockaddr *addr, socklen_t *addrlen)
+{
+ return sock_addr_op("kernel_getpeername", addr, addrlen, true);
+}
- return skel;
-cleanup:
- getsockname_unix_prog__destroy(skel);
- return NULL;
+int kernel_connect_to_addr(int type, const struct sockaddr_storage *addr, socklen_t addrlen,
+ const struct network_helper_opts *opts)
+{
+ int err;
+
+ if (!ASSERT_OK(kernel_init_sock(addr->ss_family, type, 0),
+ "kernel_init_sock"))
+ goto err;
+
+ if (kernel_connect((struct sockaddr *)addr, addrlen) < 0)
+ goto err;
+
+ /* Test code expects a "file descriptor" on success. */
+ err = 1;
+ goto out;
+err:
+ err = -1;
+ save_errno_do(ASSERT_OK(kernel_close_sock(0), "kernel_close_sock"));
+out:
+ return err;
}
-static void getsockname_unix_prog_destroy(void *skel)
+int kernel_start_server(int family, int type, const char *addr_str, __u16 port,
+ int timeout_ms)
{
- getsockname_unix_prog__destroy(skel);
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+ int err;
+
+ if (!ASSERT_OK(kernel_init_sock(family, type, 0), "kernel_init_sock"))
+ goto err;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ goto err;
+
+ if (kernel_bind(0, (struct sockaddr *)&addr, addrlen) < 0)
+ goto err;
+
+ if (type == SOCK_STREAM) {
+ if (!ASSERT_OK(kernel_listen(), "kernel_listen"))
+ goto err;
+ }
+
+ /* Test code expects a "file descriptor" on success. */
+ err = 1;
+ goto out;
+err:
+ err = -1;
+ save_errno_do(ASSERT_OK(kernel_close_sock(0), "kernel_close_sock"));
+out:
+ return err;
}
-static void *getpeername_unix_prog_load(int cgroup_fd)
+struct sock_ops {
+ int (*connect_to_addr)(int type, const struct sockaddr_storage *addr,
+ socklen_t addrlen,
+ const struct network_helper_opts *opts);
+ int (*start_server)(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms);
+ int (*socket)(int famil, int type, int protocol);
+ int (*bind)(int fd, struct sockaddr *addr, socklen_t addrlen);
+ int (*getsockname)(int fd, struct sockaddr *addr, socklen_t *addrlen);
+ int (*getpeername)(int fd, struct sockaddr *addr, socklen_t *addrlen);
+ int (*sendmsg)(int fd, struct sockaddr *addr, socklen_t addrlen,
+ char *msg, int msglen);
+ int (*close)(int fd);
+};
+
+static int user_sendmsg(int fd, struct sockaddr *addr, socklen_t addrlen,
+ char *msg, int msglen)
{
- struct getpeername_unix_prog *skel;
+ struct msghdr hdr;
+ struct iovec iov;
- skel = getpeername_unix_prog__open_and_load();
- if (!ASSERT_OK_PTR(skel, "skel_open"))
- goto cleanup;
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = msg;
+ iov.iov_len = msglen;
- skel->links.getpeername_unix_prog = bpf_program__attach_cgroup(
- skel->progs.getpeername_unix_prog, cgroup_fd);
- if (!ASSERT_OK_PTR(skel->links.getpeername_unix_prog, "prog_attach"))
- goto cleanup;
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_name = (void *)addr;
+ hdr.msg_namelen = addrlen;
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
- return skel;
-cleanup:
- getpeername_unix_prog__destroy(skel);
- return NULL;
+ return sendmsg(fd, &hdr, 0);
}
-static void getpeername_unix_prog_destroy(void *skel)
+static int user_bind(int fd, struct sockaddr *addr, socklen_t addrlen)
{
- getpeername_unix_prog__destroy(skel);
+ return bind(fd, (const struct sockaddr *)addr, addrlen);
+}
+
+struct sock_ops user_ops = {
+ .connect_to_addr = connect_to_addr,
+ .start_server = start_server,
+ .socket = socket,
+ .bind = user_bind,
+ .getsockname = getsockname,
+ .getpeername = getpeername,
+ .sendmsg = user_sendmsg,
+ .close = close,
+};
+
+struct sock_ops kern_ops_sock_sendmsg = {
+ .connect_to_addr = kernel_connect_to_addr,
+ .start_server = kernel_start_server,
+ .socket = kernel_init_sock,
+ .bind = kernel_bind,
+ .getsockname = kernel_getsockname,
+ .getpeername = kernel_getpeername,
+ .sendmsg = sock_sendmsg,
+ .close = kernel_close_sock,
+};
+
+struct sock_ops kern_ops_kernel_sendmsg = {
+ .connect_to_addr = kernel_connect_to_addr,
+ .start_server = kernel_start_server,
+ .socket = kernel_init_sock,
+ .bind = kernel_bind,
+ .getsockname = kernel_getsockname,
+ .getpeername = kernel_getpeername,
+ .sendmsg = kernel_sendmsg,
+ .close = kernel_close_sock,
+};
+
+struct sock_addr_test {
+ enum sock_addr_test_type type;
+ const char *name;
+ /* BPF prog properties */
+ load_fn loadfn;
+ destroy_fn destroyfn;
+ enum bpf_attach_type attach_type;
+ /* Socket operations */
+ struct sock_ops *ops;
+ /* Socket properties */
+ int socket_family;
+ int socket_type;
+ /* IP:port pairs for BPF prog to override */
+ const char *requested_addr;
+ unsigned short requested_port;
+ const char *expected_addr;
+ unsigned short expected_port;
+ const char *expected_src_addr;
+ /* Expected test result */
+ enum {
+ LOAD_REJECT,
+ ATTACH_REJECT,
+ SYSCALL_EPERM,
+ SYSCALL_ENOTSUPP,
+ SUCCESS,
+ } expected_result;
+};
+
+#define BPF_SKEL_FUNCS_RAW(skel_name, prog_name) \
+static void *prog_name##_load_raw(int cgroup_fd, \
+ enum bpf_attach_type attach_type, \
+ bool expect_reject) \
+{ \
+ struct skel_name *skel = skel_name##__open(); \
+ int prog_fd = -1; \
+ if (!ASSERT_OK_PTR(skel, "skel_open")) \
+ goto cleanup; \
+ if (!ASSERT_OK(skel_name##__load(skel), "load")) \
+ goto cleanup; \
+ prog_fd = bpf_program__fd(skel->progs.prog_name); \
+ if (!ASSERT_GT(prog_fd, 0, "prog_fd")) \
+ goto cleanup; \
+ if (bpf_prog_attach(prog_fd, cgroup_fd, attach_type, \
+ BPF_F_ALLOW_OVERRIDE), "bpf_prog_attach") { \
+ ASSERT_TRUE(expect_reject, "unexpected rejection"); \
+ goto cleanup; \
+ } \
+ if (!ASSERT_FALSE(expect_reject, "expected rejection")) \
+ goto cleanup; \
+cleanup: \
+ if (prog_fd > 0) \
+ bpf_prog_detach(cgroup_fd, attach_type); \
+ skel_name##__destroy(skel); \
+ return NULL; \
+} \
+static void prog_name##_destroy_raw(void *progfd) \
+{ \
+ /* No-op. *_load_raw does all cleanup. */ \
+} \
+
+#define BPF_SKEL_FUNCS(skel_name, prog_name) \
+static void *prog_name##_load(int cgroup_fd, \
+ enum bpf_attach_type attach_type, \
+ bool expect_reject) \
+{ \
+ struct skel_name *skel = skel_name##__open(); \
+ if (!ASSERT_OK_PTR(skel, "skel_open")) \
+ goto cleanup; \
+ if (!ASSERT_OK(bpf_program__set_expected_attach_type(skel->progs.prog_name, \
+ attach_type), \
+ "set_expected_attach_type")) \
+ goto cleanup; \
+ if (skel_name##__load(skel)) { \
+ ASSERT_TRUE(expect_reject, "unexpected rejection"); \
+ goto cleanup; \
+ } \
+ if (!ASSERT_FALSE(expect_reject, "expected rejection")) \
+ goto cleanup; \
+ skel->links.prog_name = bpf_program__attach_cgroup( \
+ skel->progs.prog_name, cgroup_fd); \
+ if (!ASSERT_OK_PTR(skel->links.prog_name, "prog_attach")) \
+ goto cleanup; \
+ return skel; \
+cleanup: \
+ skel_name##__destroy(skel); \
+ return NULL; \
+} \
+static void prog_name##_destroy(void *skel) \
+{ \
+ skel_name##__destroy(skel); \
}
+BPF_SKEL_FUNCS(bind4_prog, bind_v4_prog);
+BPF_SKEL_FUNCS_RAW(bind4_prog, bind_v4_prog);
+BPF_SKEL_FUNCS(bind4_prog, bind_v4_deny_prog);
+BPF_SKEL_FUNCS(bind6_prog, bind_v6_prog);
+BPF_SKEL_FUNCS_RAW(bind6_prog, bind_v6_prog);
+BPF_SKEL_FUNCS(bind6_prog, bind_v6_deny_prog);
+BPF_SKEL_FUNCS(connect4_prog, connect_v4_prog);
+BPF_SKEL_FUNCS_RAW(connect4_prog, connect_v4_prog);
+BPF_SKEL_FUNCS(connect4_prog, connect_v4_deny_prog);
+BPF_SKEL_FUNCS(connect6_prog, connect_v6_prog);
+BPF_SKEL_FUNCS_RAW(connect6_prog, connect_v6_prog);
+BPF_SKEL_FUNCS(connect6_prog, connect_v6_deny_prog);
+BPF_SKEL_FUNCS(connect_unix_prog, connect_unix_prog);
+BPF_SKEL_FUNCS_RAW(connect_unix_prog, connect_unix_prog);
+BPF_SKEL_FUNCS(connect_unix_prog, connect_unix_deny_prog);
+BPF_SKEL_FUNCS(sendmsg4_prog, sendmsg_v4_prog);
+BPF_SKEL_FUNCS_RAW(sendmsg4_prog, sendmsg_v4_prog);
+BPF_SKEL_FUNCS(sendmsg4_prog, sendmsg_v4_deny_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_prog);
+BPF_SKEL_FUNCS_RAW(sendmsg6_prog, sendmsg_v6_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_deny_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_preserve_dst_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_v4mapped_prog);
+BPF_SKEL_FUNCS(sendmsg6_prog, sendmsg_v6_wildcard_prog);
+BPF_SKEL_FUNCS(sendmsg_unix_prog, sendmsg_unix_prog);
+BPF_SKEL_FUNCS_RAW(sendmsg_unix_prog, sendmsg_unix_prog);
+BPF_SKEL_FUNCS(sendmsg_unix_prog, sendmsg_unix_deny_prog);
+BPF_SKEL_FUNCS(recvmsg4_prog, recvmsg4_prog);
+BPF_SKEL_FUNCS_RAW(recvmsg4_prog, recvmsg4_prog);
+BPF_SKEL_FUNCS(recvmsg6_prog, recvmsg6_prog);
+BPF_SKEL_FUNCS_RAW(recvmsg6_prog, recvmsg6_prog);
+BPF_SKEL_FUNCS(recvmsg_unix_prog, recvmsg_unix_prog);
+BPF_SKEL_FUNCS_RAW(recvmsg_unix_prog, recvmsg_unix_prog);
+BPF_SKEL_FUNCS(getsockname_unix_prog, getsockname_unix_prog);
+BPF_SKEL_FUNCS_RAW(getsockname_unix_prog, getsockname_unix_prog);
+BPF_SKEL_FUNCS(getsockname4_prog, getsockname_v4_prog);
+BPF_SKEL_FUNCS_RAW(getsockname4_prog, getsockname_v4_prog);
+BPF_SKEL_FUNCS(getsockname6_prog, getsockname_v6_prog);
+BPF_SKEL_FUNCS_RAW(getsockname6_prog, getsockname_v6_prog);
+BPF_SKEL_FUNCS(getpeername_unix_prog, getpeername_unix_prog);
+BPF_SKEL_FUNCS_RAW(getpeername_unix_prog, getpeername_unix_prog);
+BPF_SKEL_FUNCS(getpeername4_prog, getpeername_v4_prog);
+BPF_SKEL_FUNCS_RAW(getpeername4_prog, getpeername_v4_prog);
+BPF_SKEL_FUNCS(getpeername6_prog, getpeername_v6_prog);
+BPF_SKEL_FUNCS_RAW(getpeername6_prog, getpeername_v6_prog);
+
static struct sock_addr_test tests[] = {
+ /* bind - system calls */
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: bind (stream)",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: bind deny (stream)",
+ bind_v4_deny_prog_load,
+ bind_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: bind (dgram)",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: bind deny (dgram)",
+ bind_v4_deny_prog_load,
+ bind_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: load prog with wrong expected attach type",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: attach prog with wrong attach type",
+ bind_v4_prog_load_raw,
+ bind_v4_prog_destroy_raw,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: bind (stream)",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: bind deny (stream)",
+ bind_v6_deny_prog_load,
+ bind_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: bind (dgram)",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: bind deny (dgram)",
+ bind_v6_deny_prog_load,
+ bind_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: load prog with wrong expected attach type",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: attach prog with wrong attach type",
+ bind_v6_prog_load_raw,
+ bind_v6_prog_destroy_raw,
+ BPF_CGROUP_INET4_BIND,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* bind - kernel calls */
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: kernel_bind (stream)",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: kernel_bind deny (stream)",
+ bind_v4_deny_prog_load,
+ bind_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: kernel_bind (dgram)",
+ bind_v4_prog_load,
+ bind_v4_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind4: kernel_bind deny (dgram)",
+ bind_v4_deny_prog_load,
+ bind_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: kernel_bind (stream)",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: kernel_bind deny (stream)",
+ bind_v6_deny_prog_load,
+ bind_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: kernel_bind (dgram)",
+ bind_v6_prog_load,
+ bind_v6_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_BIND,
+ "bind6: kernel_bind deny (dgram)",
+ bind_v6_deny_prog_load,
+ bind_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_BIND,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ NULL,
+ SYSCALL_EPERM,
+ },
+
+ /* connect - system calls */
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: connect (stream)",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: connect deny (stream)",
+ connect_v4_deny_prog_load,
+ connect_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: connect (dgram)",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: connect deny (dgram)",
+ connect_v4_deny_prog_load,
+ connect_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: load prog with wrong expected attach type",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: attach prog with wrong attach type",
+ connect_v4_prog_load_raw,
+ connect_v4_prog_destroy_raw,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: connect (stream)",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: connect deny (stream)",
+ connect_v6_deny_prog_load,
+ connect_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
{
SOCK_ADDR_TEST_CONNECT,
- "connect_unix",
+ "connect6: connect (dgram)",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: connect deny (dgram)",
+ connect_v6_deny_prog_load,
+ connect_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: load prog with wrong expected attach type",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: attach prog with wrong attach type",
+ connect_v6_prog_load_raw,
+ connect_v6_prog_destroy_raw,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: connect (stream)",
connect_unix_prog_load,
connect_unix_prog_destroy,
+ BPF_CGROUP_UNIX_CONNECT,
+ &user_ops,
AF_UNIX,
SOCK_STREAM,
SERVUN_ADDRESS,
@@ -176,12 +1019,631 @@ static struct sock_addr_test tests[] = {
SERVUN_REWRITE_ADDRESS,
0,
NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: connect deny (stream)",
+ connect_unix_deny_prog_load,
+ connect_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_CONNECT,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: attach prog with wrong attach type",
+ connect_unix_prog_load_raw,
+ connect_unix_prog_destroy_raw,
+ BPF_CGROUP_INET4_CONNECT,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* connect - kernel calls */
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: kernel_connect (stream)",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: kernel_connect deny (stream)",
+ connect_v4_deny_prog_load,
+ connect_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: kernel_connect (dgram)",
+ connect_v4_prog_load,
+ connect_v4_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect4: kernel_connect deny (dgram)",
+ connect_v4_deny_prog_load,
+ connect_v4_deny_prog_destroy,
+ BPF_CGROUP_INET4_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: kernel_connect (stream)",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: kernel_connect deny (stream)",
+ connect_v6_deny_prog_load,
+ connect_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: kernel_connect (dgram)",
+ connect_v6_prog_load,
+ connect_v6_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect6: kernel_connect deny (dgram)",
+ connect_v6_deny_prog_load,
+ connect_v6_deny_prog_destroy,
+ BPF_CGROUP_INET6_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: kernel_connect (dgram)",
+ connect_unix_prog_load,
+ connect_unix_prog_destroy,
+ BPF_CGROUP_UNIX_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix: kernel_connect deny (dgram)",
+ connect_unix_deny_prog_load,
+ connect_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_CONNECT,
+ &kern_ops_sock_sendmsg,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+
+ /* sendmsg - system calls */
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: sendmsg (dgram)",
+ sendmsg_v4_prog_load,
+ sendmsg_v4_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: sendmsg deny (dgram)",
+ sendmsg_v4_deny_prog_load,
+ sendmsg_v4_deny_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: load prog with wrong expected attach type",
+ sendmsg_v4_prog_load,
+ sendmsg_v4_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: attach prog with wrong attach type",
+ sendmsg_v4_prog_load_raw,
+ sendmsg_v4_prog_destroy_raw,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg (dgram)",
+ sendmsg_v6_prog_load,
+ sendmsg_v6_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg [::] (BSD'ism) (dgram)",
+ sendmsg_v6_preserve_dst_prog_load,
+ sendmsg_v6_preserve_dst_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ WILDCARD6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_PORT,
+ SRC6_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg deny (dgram)",
+ sendmsg_v6_deny_prog_load,
+ sendmsg_v6_deny_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg IPv4-mapped IPv6 (dgram)",
+ sendmsg_v6_v4mapped_prog_load,
+ sendmsg_v6_v4mapped_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_ENOTSUPP,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sendmsg dst IP = [::] (BSD'ism) (dgram)",
+ sendmsg_v6_wildcard_prog_load,
+ sendmsg_v6_wildcard_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: load prog with wrong expected attach type",
+ sendmsg_v6_prog_load,
+ sendmsg_v6_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ LOAD_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: attach prog with wrong attach type",
+ sendmsg_v6_prog_load_raw,
+ sendmsg_v6_prog_destroy_raw,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ NULL,
+ 0,
+ NULL,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sendmsg (dgram)",
+ sendmsg_unix_prog_load,
+ sendmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &user_ops,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sendmsg deny (dgram)",
+ sendmsg_unix_deny_prog_load,
+ sendmsg_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &user_ops,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: attach prog with wrong attach type",
+ sendmsg_unix_prog_load_raw,
+ sendmsg_unix_prog_destroy_raw,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &user_ops,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* sendmsg - kernel calls (sock_sendmsg) */
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: sock_sendmsg (dgram)",
+ sendmsg_v4_prog_load,
+ sendmsg_v4_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: sock_sendmsg deny (dgram)",
+ sendmsg_v4_deny_prog_load,
+ sendmsg_v4_deny_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sock_sendmsg (dgram)",
+ sendmsg_v6_prog_load,
+ sendmsg_v6_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sock_sendmsg [::] (BSD'ism) (dgram)",
+ sendmsg_v6_preserve_dst_prog_load,
+ sendmsg_v6_preserve_dst_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ WILDCARD6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_PORT,
+ SRC6_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: sock_sendmsg deny (dgram)",
+ sendmsg_v6_deny_prog_load,
+ sendmsg_v6_deny_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sock_sendmsg (dgram)",
+ sendmsg_unix_prog_load,
+ sendmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sock_sendmsg deny (dgram)",
+ sendmsg_unix_deny_prog_load,
+ sendmsg_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &kern_ops_sock_sendmsg,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+
+ /* sendmsg - kernel calls (kernel_sendmsg) */
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: kernel_sendmsg (dgram)",
+ sendmsg_v4_prog_load,
+ sendmsg_v4_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg4: kernel_sendmsg deny (dgram)",
+ sendmsg_v4_deny_prog_load,
+ sendmsg_v4_deny_prog_destroy,
+ BPF_CGROUP_UDP4_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_IP,
+ SERV4_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SRC4_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: kernel_sendmsg (dgram)",
+ sendmsg_v6_prog_load,
+ sendmsg_v6_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg6: kernel_sendmsg [::] (BSD'ism) (dgram)",
+ sendmsg_v6_preserve_dst_prog_load,
+ sendmsg_v6_preserve_dst_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ WILDCARD6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_PORT,
+ SRC6_IP,
+ SUCCESS,
},
{
SOCK_ADDR_TEST_SENDMSG,
- "sendmsg_unix",
+ "sendmsg6: kernel_sendmsg deny (dgram)",
+ sendmsg_v6_deny_prog_load,
+ sendmsg_v6_deny_prog_destroy,
+ BPF_CGROUP_UDP6_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_IP,
+ SERV6_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SRC6_REWRITE_IP,
+ SYSCALL_EPERM,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: sock_sendmsg (dgram)",
sendmsg_unix_prog_load,
sendmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &kern_ops_kernel_sendmsg,
AF_UNIX,
SOCK_DGRAM,
SERVUN_ADDRESS,
@@ -189,12 +1651,97 @@ static struct sock_addr_test tests[] = {
SERVUN_REWRITE_ADDRESS,
0,
NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix: kernel_sendmsg deny (dgram)",
+ sendmsg_unix_deny_prog_load,
+ sendmsg_unix_deny_prog_destroy,
+ BPF_CGROUP_UNIX_SENDMSG,
+ &kern_ops_kernel_sendmsg,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SYSCALL_EPERM,
+ },
+
+ /* recvmsg - system calls */
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg4: recvfrom (dgram)",
+ recvmsg4_prog_load,
+ recvmsg4_prog_destroy,
+ BPF_CGROUP_UDP4_RECVMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg4: attach prog with wrong attach type",
+ recvmsg4_prog_load_raw,
+ recvmsg4_prog_destroy_raw,
+ BPF_CGROUP_UDP6_RECVMSG,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ ATTACH_REJECT,
},
{
SOCK_ADDR_TEST_RECVMSG,
- "recvmsg_unix-dgram",
+ "recvmsg6: recvfrom (dgram)",
+ recvmsg6_prog_load,
+ recvmsg6_prog_destroy,
+ BPF_CGROUP_UDP6_RECVMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg6: attach prog with wrong attach type",
+ recvmsg6_prog_load_raw,
+ recvmsg6_prog_destroy_raw,
+ BPF_CGROUP_UDP4_RECVMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg_unix: recvfrom (dgram)",
recvmsg_unix_prog_load,
recvmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_RECVMSG,
+ &user_ops,
AF_UNIX,
SOCK_DGRAM,
SERVUN_REWRITE_ADDRESS,
@@ -202,12 +1749,15 @@ static struct sock_addr_test tests[] = {
SERVUN_REWRITE_ADDRESS,
0,
SERVUN_ADDRESS,
+ SUCCESS,
},
{
SOCK_ADDR_TEST_RECVMSG,
- "recvmsg_unix-stream",
+ "recvmsg_unix: recvfrom (stream)",
recvmsg_unix_prog_load,
recvmsg_unix_prog_destroy,
+ BPF_CGROUP_UNIX_RECVMSG,
+ &user_ops,
AF_UNIX,
SOCK_STREAM,
SERVUN_REWRITE_ADDRESS,
@@ -215,12 +1765,227 @@ static struct sock_addr_test tests[] = {
SERVUN_REWRITE_ADDRESS,
0,
SERVUN_ADDRESS,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg_unix: attach prog with wrong attach type",
+ recvmsg_unix_prog_load_raw,
+ recvmsg_unix_prog_destroy_raw,
+ BPF_CGROUP_UDP4_RECVMSG,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_ADDRESS,
+ ATTACH_REJECT,
},
+
+ /* getsockname - system calls */
{
SOCK_ADDR_TEST_GETSOCKNAME,
- "getsockname_unix",
+ "getsockname4: getsockname (stream)",
+ getsockname_v4_prog_load,
+ getsockname_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname4: getsockname (dgram)",
+ getsockname_v4_prog_load,
+ getsockname_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname4: attach prog with wrong attach type",
+ getsockname_v4_prog_load_raw,
+ getsockname_v4_prog_destroy_raw,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: getsockname (stream)",
+ getsockname_v6_prog_load,
+ getsockname_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: getsockname (dgram)",
+ getsockname_v6_prog_load,
+ getsockname_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: attach prog with wrong attach type",
+ getsockname_v6_prog_load_raw,
+ getsockname_v6_prog_destroy_raw,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname_unix: getsockname",
+ getsockname_unix_prog_load,
+ getsockname_unix_prog_destroy,
+ BPF_CGROUP_UNIX_GETSOCKNAME,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname_unix: attach prog with wrong attach type",
+ getsockname_unix_prog_load_raw,
+ getsockname_unix_prog_destroy_raw,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* getsockname - kernel calls */
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname4: kernel_getsockname (stream)",
+ getsockname_v4_prog_load,
+ getsockname_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname4: kernel_getsockname (dgram)",
+ getsockname_v4_prog_load,
+ getsockname_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: kernel_getsockname (stream)",
+ getsockname_v6_prog_load,
+ getsockname_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname6: kernel_getsockname (dgram)",
+ getsockname_v6_prog_load,
+ getsockname_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname_unix: kernel_getsockname",
getsockname_unix_prog_load,
getsockname_unix_prog_destroy,
+ BPF_CGROUP_UNIX_GETSOCKNAME,
+ &kern_ops_kernel_sendmsg,
AF_UNIX,
SOCK_STREAM,
SERVUN_ADDRESS,
@@ -228,12 +1993,113 @@ static struct sock_addr_test tests[] = {
SERVUN_REWRITE_ADDRESS,
0,
NULL,
+ SUCCESS,
+ },
+
+ /* getpeername - system calls */
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername4: getpeername (stream)",
+ getpeername_v4_prog_load,
+ getpeername_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername4: getpeername (dgram)",
+ getpeername_v4_prog_load,
+ getpeername_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ &user_ops,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
},
{
SOCK_ADDR_TEST_GETPEERNAME,
- "getpeername_unix",
+ "getpeername4: attach prog with wrong attach type",
+ getpeername_v4_prog_load_raw,
+ getpeername_v4_prog_destroy_raw,
+ BPF_CGROUP_INET6_GETSOCKNAME,
+ &user_ops,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: getpeername (stream)",
+ getpeername_v6_prog_load,
+ getpeername_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: getpeername (dgram)",
+ getpeername_v6_prog_load,
+ getpeername_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: attach prog with wrong attach type",
+ getpeername_v6_prog_load_raw,
+ getpeername_v6_prog_destroy_raw,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ ATTACH_REJECT,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername_unix: getpeername",
getpeername_unix_prog_load,
getpeername_unix_prog_destroy,
+ BPF_CGROUP_UNIX_GETPEERNAME,
+ &user_ops,
AF_UNIX,
SOCK_STREAM,
SERVUN_ADDRESS,
@@ -241,6 +2107,105 @@ static struct sock_addr_test tests[] = {
SERVUN_REWRITE_ADDRESS,
0,
NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername_unix: attach prog with wrong attach type",
+ getpeername_unix_prog_load_raw,
+ getpeername_unix_prog_destroy_raw,
+ BPF_CGROUP_INET4_GETSOCKNAME,
+ &user_ops,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ ATTACH_REJECT,
+ },
+
+ /* getpeername - kernel calls */
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername4: kernel_getpeername (stream)",
+ getpeername_v4_prog_load,
+ getpeername_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_STREAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername4: kernel_getpeername (dgram)",
+ getpeername_v4_prog_load,
+ getpeername_v4_prog_destroy,
+ BPF_CGROUP_INET4_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET,
+ SOCK_DGRAM,
+ SERV4_REWRITE_IP,
+ SERV4_REWRITE_PORT,
+ SERV4_IP,
+ SERV4_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: kernel_getpeername (stream)",
+ getpeername_v6_prog_load,
+ getpeername_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_STREAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername6: kernel_getpeername (dgram)",
+ getpeername_v6_prog_load,
+ getpeername_v6_prog_destroy,
+ BPF_CGROUP_INET6_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_INET6,
+ SOCK_DGRAM,
+ SERV6_REWRITE_IP,
+ SERV6_REWRITE_PORT,
+ SERV6_IP,
+ SERV6_PORT,
+ NULL,
+ SUCCESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername_unix: kernel_getpeername",
+ getpeername_unix_prog_load,
+ getpeername_unix_prog_destroy,
+ BPF_CGROUP_UNIX_GETPEERNAME,
+ &kern_ops_kernel_sendmsg,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ SUCCESS,
},
};
@@ -294,28 +2259,40 @@ static int cmp_sock_addr(info_fn fn, int sock1,
return cmp_addr(&addr1, len1, addr2, addr2_len, cmp_port);
}
-static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2,
- socklen_t addr2_len, bool cmp_port)
+static int load_sock_addr_kern(void)
{
- return cmp_sock_addr(getsockname, sock1, addr2, addr2_len, cmp_port);
+ int err;
+
+ skel = sock_addr_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto err;
+
+ err = 0;
+ goto out;
+err:
+ err = -1;
+out:
+ return err;
}
-static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2,
- socklen_t addr2_len, bool cmp_port)
+static void unload_sock_addr_kern(void)
{
- return cmp_sock_addr(getpeername, sock1, addr2, addr2_len, cmp_port);
+ sock_addr_kern__destroy(skel);
}
-static void test_bind(struct sock_addr_test *test)
+static int test_bind(struct sock_addr_test *test)
{
struct sockaddr_storage expected_addr;
socklen_t expected_addr_len = sizeof(struct sockaddr_storage);
int serv = -1, client = -1, err;
- serv = start_server(test->socket_family, test->socket_type,
- test->requested_addr, test->requested_port, 0);
- if (!ASSERT_GE(serv, 0, "start_server"))
- goto cleanup;
+ serv = test->ops->start_server(test->socket_family, test->socket_type,
+ test->requested_addr,
+ test->requested_port, 0);
+ if (serv < 0) {
+ err = errno;
+ goto err;
+ }
err = make_sockaddr(test->socket_family,
test->expected_addr, test->expected_port,
@@ -323,23 +2300,28 @@ static void test_bind(struct sock_addr_test *test)
if (!ASSERT_EQ(err, 0, "make_sockaddr"))
goto cleanup;
- err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true);
+ err = cmp_sock_addr(test->ops->getsockname, serv, &expected_addr,
+ expected_addr_len, true);
if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
goto cleanup;
/* Try to connect to server just in case */
- client = connect_to_addr(&expected_addr, expected_addr_len, test->socket_type);
+ client = connect_to_addr(test->socket_type, &expected_addr, expected_addr_len, NULL);
if (!ASSERT_GE(client, 0, "connect_to_addr"))
goto cleanup;
cleanup:
+ err = 0;
+err:
if (client != -1)
close(client);
if (serv != -1)
- close(serv);
+ test->ops->close(serv);
+
+ return err;
}
-static void test_connect(struct sock_addr_test *test)
+static int test_connect(struct sock_addr_test *test)
{
struct sockaddr_storage addr, expected_addr, expected_src_addr;
socklen_t addr_len = sizeof(struct sockaddr_storage),
@@ -357,9 +2339,12 @@ static void test_connect(struct sock_addr_test *test)
if (!ASSERT_EQ(err, 0, "make_sockaddr"))
goto cleanup;
- client = connect_to_addr(&addr, addr_len, test->socket_type);
- if (!ASSERT_GE(client, 0, "connect_to_addr"))
- goto cleanup;
+ client = test->ops->connect_to_addr(test->socket_type, &addr, addr_len,
+ NULL);
+ if (client < 0) {
+ err = errno;
+ goto err;
+ }
err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port,
&expected_addr, &expected_addr_len);
@@ -373,29 +2358,34 @@ static void test_connect(struct sock_addr_test *test)
goto cleanup;
}
- err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true);
+ err = cmp_sock_addr(test->ops->getpeername, client, &expected_addr,
+ expected_addr_len, true);
if (!ASSERT_EQ(err, 0, "cmp_peer_addr"))
goto cleanup;
if (test->expected_src_addr) {
- err = cmp_local_addr(client, &expected_src_addr, expected_src_addr_len, false);
+ err = cmp_sock_addr(test->ops->getsockname, client,
+ &expected_src_addr, expected_src_addr_len,
+ false);
if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
goto cleanup;
}
cleanup:
+ err = 0;
+err:
if (client != -1)
- close(client);
+ test->ops->close(client);
if (serv != -1)
close(serv);
+
+ return err;
}
-static void test_xmsg(struct sock_addr_test *test)
+static int test_xmsg(struct sock_addr_test *test)
{
struct sockaddr_storage addr, src_addr;
socklen_t addr_len = sizeof(struct sockaddr_storage),
src_addr_len = sizeof(struct sockaddr_storage);
- struct msghdr hdr;
- struct iovec iov;
char data = 'a';
int serv = -1, client = -1, err;
@@ -408,7 +2398,7 @@ static void test_xmsg(struct sock_addr_test *test)
if (!ASSERT_GE(serv, 0, "start_server"))
goto cleanup;
- client = socket(test->socket_family, test->socket_type, 0);
+ client = test->ops->socket(test->socket_family, test->socket_type, 0);
if (!ASSERT_GE(client, 0, "socket"))
goto cleanup;
@@ -418,7 +2408,8 @@ static void test_xmsg(struct sock_addr_test *test)
if (!ASSERT_EQ(err, 0, "make_sockaddr"))
goto cleanup;
- err = bind(client, (const struct sockaddr *) &src_addr, src_addr_len);
+ err = test->ops->bind(client, (struct sockaddr *)&src_addr,
+ src_addr_len);
if (!ASSERT_OK(err, "bind"))
goto cleanup;
}
@@ -429,17 +2420,13 @@ static void test_xmsg(struct sock_addr_test *test)
goto cleanup;
if (test->socket_type == SOCK_DGRAM) {
- memset(&iov, 0, sizeof(iov));
- iov.iov_base = &data;
- iov.iov_len = sizeof(data);
-
- memset(&hdr, 0, sizeof(hdr));
- hdr.msg_name = (void *)&addr;
- hdr.msg_namelen = addr_len;
- hdr.msg_iov = &iov;
- hdr.msg_iovlen = 1;
+ err = test->ops->sendmsg(client, (struct sockaddr *)&addr,
+ addr_len, &data, sizeof(data));
+ if (err < 0) {
+ err = errno;
+ goto err;
+ }
- err = sendmsg(client, &hdr, 0);
if (!ASSERT_EQ(err, sizeof(data), "sendmsg"))
goto cleanup;
} else {
@@ -489,19 +2476,23 @@ static void test_xmsg(struct sock_addr_test *test)
}
cleanup:
+ err = 0;
+err:
if (client != -1)
- close(client);
+ test->ops->close(client);
if (serv != -1)
close(serv);
+
+ return err;
}
-static void test_getsockname(struct sock_addr_test *test)
+static int test_getsockname(struct sock_addr_test *test)
{
struct sockaddr_storage expected_addr;
socklen_t expected_addr_len = sizeof(struct sockaddr_storage);
int serv = -1, err;
- serv = start_server(test->socket_family, test->socket_type,
+ serv = test->ops->start_server(test->socket_family, test->socket_type,
test->requested_addr, test->requested_port, 0);
if (!ASSERT_GE(serv, 0, "start_server"))
goto cleanup;
@@ -512,16 +2503,18 @@ static void test_getsockname(struct sock_addr_test *test)
if (!ASSERT_EQ(err, 0, "make_sockaddr"))
goto cleanup;
- err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true);
+ err = cmp_sock_addr(test->ops->getsockname, serv, &expected_addr, expected_addr_len, true);
if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
goto cleanup;
cleanup:
if (serv != -1)
- close(serv);
+ test->ops->close(serv);
+
+ return 0;
}
-static void test_getpeername(struct sock_addr_test *test)
+static int test_getpeername(struct sock_addr_test *test)
{
struct sockaddr_storage addr, expected_addr;
socklen_t addr_len = sizeof(struct sockaddr_storage),
@@ -538,7 +2531,8 @@ static void test_getpeername(struct sock_addr_test *test)
if (!ASSERT_EQ(err, 0, "make_sockaddr"))
goto cleanup;
- client = connect_to_addr(&addr, addr_len, test->socket_type);
+ client = test->ops->connect_to_addr(test->socket_type, &addr, addr_len,
+ NULL);
if (!ASSERT_GE(client, 0, "connect_to_addr"))
goto cleanup;
@@ -547,19 +2541,58 @@ static void test_getpeername(struct sock_addr_test *test)
if (!ASSERT_EQ(err, 0, "make_sockaddr"))
goto cleanup;
- err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true);
+ err = cmp_sock_addr(test->ops->getpeername, client, &expected_addr,
+ expected_addr_len, true);
if (!ASSERT_EQ(err, 0, "cmp_peer_addr"))
goto cleanup;
cleanup:
if (client != -1)
- close(client);
+ test->ops->close(client);
if (serv != -1)
close(serv);
+
+ return 0;
+}
+
+static int setup_test_env(struct nstoken **tok)
+{
+ int err;
+
+ SYS_NOFAIL("ip netns delete %s", TEST_NS);
+ SYS(fail, "ip netns add %s", TEST_NS);
+ *tok = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(*tok, "netns token"))
+ goto fail;
+
+ SYS(fail, "ip link add dev %s1 type veth peer name %s2", TEST_IF_PREFIX,
+ TEST_IF_PREFIX);
+ SYS(fail, "ip link set lo up");
+ SYS(fail, "ip link set %s1 up", TEST_IF_PREFIX);
+ SYS(fail, "ip link set %s2 up", TEST_IF_PREFIX);
+ SYS(fail, "ip -4 addr add %s/8 dev %s1", TEST_IPV4, TEST_IF_PREFIX);
+ SYS(fail, "ip -6 addr add %s/128 nodad dev %s1", TEST_IPV6, TEST_IF_PREFIX);
+
+ err = 0;
+ goto out;
+fail:
+ err = -1;
+ close_netns(*tok);
+ *tok = NULL;
+ SYS_NOFAIL("ip netns delete %s", TEST_NS);
+out:
+ return err;
+}
+
+static void cleanup_test_env(struct nstoken *tok)
+{
+ close_netns(tok);
+ SYS_NOFAIL("ip netns delete %s", TEST_NS);
}
void test_sock_addr(void)
{
+ struct nstoken *tok = NULL;
int cgroup_fd = -1;
void *skel;
@@ -567,13 +2600,22 @@ void test_sock_addr(void)
if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
goto cleanup;
+ if (!ASSERT_OK(setup_test_env(&tok), "setup_test_env"))
+ goto cleanup;
+
+ if (!ASSERT_OK(load_sock_addr_kern(), "load_sock_addr_kern"))
+ goto cleanup;
+
for (size_t i = 0; i < ARRAY_SIZE(tests); ++i) {
struct sock_addr_test *test = &tests[i];
+ int err;
if (!test__start_subtest(test->name))
continue;
- skel = test->loadfn(cgroup_fd);
+ skel = test->loadfn(cgroup_fd, test->attach_type,
+ test->expected_result == LOAD_REJECT ||
+ test->expected_result == ATTACH_REJECT);
if (!skel)
continue;
@@ -583,30 +2625,39 @@ void test_sock_addr(void)
* the future.
*/
case SOCK_ADDR_TEST_BIND:
- test_bind(test);
+ err = test_bind(test);
break;
case SOCK_ADDR_TEST_CONNECT:
- test_connect(test);
+ err = test_connect(test);
break;
case SOCK_ADDR_TEST_SENDMSG:
case SOCK_ADDR_TEST_RECVMSG:
- test_xmsg(test);
+ err = test_xmsg(test);
break;
case SOCK_ADDR_TEST_GETSOCKNAME:
- test_getsockname(test);
+ err = test_getsockname(test);
break;
case SOCK_ADDR_TEST_GETPEERNAME:
- test_getpeername(test);
+ err = test_getpeername(test);
break;
default:
ASSERT_TRUE(false, "Unknown sock addr test type");
break;
}
+ if (test->expected_result == SYSCALL_EPERM)
+ ASSERT_EQ(err, EPERM, "socket operation returns EPERM");
+ else if (test->expected_result == SYSCALL_ENOTSUPP)
+ ASSERT_EQ(err, ENOTSUPP, "socket operation returns ENOTSUPP");
+ else if (test->expected_result == SUCCESS)
+ ASSERT_OK(err, "socket operation succeeds");
+
test->destroyfn(skel);
}
cleanup:
+ unload_sock_addr_kern();
+ cleanup_test_env(tok);
if (cgroup_fd >= 0)
close(cgroup_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 77e26ecffa9d..1337153eb0ad 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -131,6 +131,65 @@ out:
test_skmsg_load_helpers__destroy(skel);
}
+static void test_skmsg_helpers_with_link(enum bpf_map_type map_type)
+{
+ struct bpf_program *prog, *prog_clone, *prog_clone2;
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, opts);
+ struct test_skmsg_load_helpers *skel;
+ struct bpf_link *link, *link2;
+ int err, map;
+
+ skel = test_skmsg_load_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_skmsg_load_helpers__open_and_load"))
+ return;
+
+ prog = skel->progs.prog_msg_verdict;
+ prog_clone = skel->progs.prog_msg_verdict_clone;
+ prog_clone2 = skel->progs.prog_msg_verdict_clone2;
+ map = bpf_map__fd(skel->maps.sock_map);
+
+ link = bpf_program__attach_sockmap(prog, map);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+ goto out;
+
+ /* Fail since bpf_link for the same prog has been created. */
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_ERR(err, "bpf_prog_attach"))
+ goto out;
+
+ /* Fail since bpf_link for the same prog type has been created. */
+ link2 = bpf_program__attach_sockmap(prog_clone, map);
+ if (!ASSERT_ERR_PTR(link2, "bpf_program__attach_sockmap")) {
+ bpf_link__detach(link2);
+ goto out;
+ }
+
+ err = bpf_link__update_program(link, prog_clone);
+ if (!ASSERT_OK(err, "bpf_link__update_program"))
+ goto out;
+
+ /* Fail since a prog with different type attempts to do update. */
+ err = bpf_link__update_program(link, skel->progs.prog_skb_verdict);
+ if (!ASSERT_ERR(err, "bpf_link__update_program"))
+ goto out;
+
+ /* Fail since the old prog does not match the one in the kernel. */
+ opts.old_prog_fd = bpf_program__fd(prog_clone2);
+ opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), &opts);
+ if (!ASSERT_ERR(err, "bpf_link_update"))
+ goto out;
+
+ opts.old_prog_fd = bpf_program__fd(prog_clone);
+ opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(link), bpf_program__fd(prog), &opts);
+ if (!ASSERT_OK(err, "bpf_link_update"))
+ goto out;
+out:
+ bpf_link__detach(link);
+ test_skmsg_load_helpers__destroy(skel);
+}
+
static void test_sockmap_update(enum bpf_map_type map_type)
{
int err, prog, src;
@@ -298,6 +357,40 @@ out:
test_sockmap_skb_verdict_attach__destroy(skel);
}
+static void test_sockmap_skb_verdict_attach_with_link(void)
+{
+ struct test_sockmap_skb_verdict_attach *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ int err, map;
+
+ skel = test_sockmap_skb_verdict_attach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+ prog = skel->progs.prog_skb_verdict;
+ map = bpf_map__fd(skel->maps.sock_map);
+ link = bpf_program__attach_sockmap(prog, map);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+ goto out;
+
+ bpf_link__detach(link);
+
+ err = bpf_prog_attach(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ /* Fail since attaching with the same prog/map has been done. */
+ link = bpf_program__attach_sockmap(prog, map);
+ if (!ASSERT_ERR_PTR(link, "bpf_program__attach_sockmap"))
+ bpf_link__detach(link);
+
+ err = bpf_prog_detach2(bpf_program__fd(prog), map, BPF_SK_SKB_STREAM_VERDICT);
+ if (!ASSERT_OK(err, "bpf_prog_detach2"))
+ goto out;
+out:
+ test_sockmap_skb_verdict_attach__destroy(skel);
+}
+
static __u32 query_prog_id(int prog_fd)
{
struct bpf_prog_info info = {};
@@ -475,30 +568,19 @@ out:
test_sockmap_drop_prog__destroy(drop);
}
-static void test_sockmap_skb_verdict_peek(void)
+static void test_sockmap_skb_verdict_peek_helper(int map)
{
- int err, map, verdict, s, c1, p1, zero = 0, sent, recvd, avail;
- struct test_sockmap_pass_prog *pass;
+ int err, s, c1, p1, zero = 0, sent, recvd, avail;
char snd[256] = "0123456789";
char rcv[256] = "0";
- pass = test_sockmap_pass_prog__open_and_load();
- if (!ASSERT_OK_PTR(pass, "open_and_load"))
- return;
- verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
- map = bpf_map__fd(pass->maps.sock_map_rx);
-
- err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
- if (!ASSERT_OK(err, "bpf_prog_attach"))
- goto out;
-
s = socket_loopback(AF_INET, SOCK_STREAM);
if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
- goto out;
+ return;
err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
if (!ASSERT_OK(err, "create_pairs(s)"))
- goto out;
+ return;
err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
@@ -520,7 +602,58 @@ static void test_sockmap_skb_verdict_peek(void)
out_close:
close(c1);
close(p1);
+}
+
+static void test_sockmap_skb_verdict_peek(void)
+{
+ struct test_sockmap_pass_prog *pass;
+ int err, map, verdict;
+
+ pass = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(pass, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
+ map = bpf_map__fd(pass->maps.sock_map_rx);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ test_sockmap_skb_verdict_peek_helper(map);
+
+out:
+ test_sockmap_pass_prog__destroy(pass);
+}
+
+static void test_sockmap_skb_verdict_peek_with_link(void)
+{
+ struct test_sockmap_pass_prog *pass;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ int err, map;
+
+ pass = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(pass, "open_and_load"))
+ return;
+ prog = pass->progs.prog_skb_verdict;
+ map = bpf_map__fd(pass->maps.sock_map_rx);
+ link = bpf_program__attach_sockmap(prog, map);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+ goto out;
+
+ err = bpf_link__update_program(link, pass->progs.prog_skb_verdict_clone);
+ if (!ASSERT_OK(err, "bpf_link__update_program"))
+ goto out;
+
+ /* Fail since a prog with different attach type attempts to do update. */
+ err = bpf_link__update_program(link, pass->progs.prog_skb_parser);
+ if (!ASSERT_ERR(err, "bpf_link__update_program"))
+ goto out;
+
+ test_sockmap_skb_verdict_peek_helper(map);
+ ASSERT_EQ(pass->bss->clone_called, 1, "clone_called");
out:
+ bpf_link__detach(link);
test_sockmap_pass_prog__destroy(pass);
}
@@ -788,6 +921,8 @@ void test_sockmap_basic(void)
test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
BPF_SK_SKB_VERDICT);
}
+ if (test__start_subtest("sockmap skb_verdict attach_with_link"))
+ test_sockmap_skb_verdict_attach_with_link();
if (test__start_subtest("sockmap msg_verdict progs query"))
test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
if (test__start_subtest("sockmap stream_parser progs query"))
@@ -804,6 +939,8 @@ void test_sockmap_basic(void)
test_sockmap_skb_verdict_fionread(false);
if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
test_sockmap_skb_verdict_peek();
+ if (test__start_subtest("sockmap skb_verdict msg_f_peek with link"))
+ test_sockmap_skb_verdict_peek_with_link();
if (test__start_subtest("sockmap unconnected af_unix"))
test_sockmap_unconnected_unix();
if (test__start_subtest("sockmap one socket to many map entries"))
@@ -812,4 +949,8 @@ void test_sockmap_basic(void)
test_sockmap_many_maps();
if (test__start_subtest("sockmap same socket replace"))
test_sockmap_same_sock();
+ if (test__start_subtest("sockmap sk_msg attach sockmap helpers with link"))
+ test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash sk_msg attach sockhash helpers with link"))
+ test_skmsg_helpers_with_link(BPF_MAP_TYPE_SOCKHASH);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index a92807bfcd13..9ce0e0e0b7da 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -29,6 +29,8 @@
#include "sockmap_helpers.h"
+#define NO_FLAGS 0
+
static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
int family, int sotype, int mapfd)
{
@@ -767,6 +769,24 @@ static void test_msg_redir_to_connected(struct test_sockmap_listen *skel,
xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
}
+static void test_msg_redir_to_connected_with_link(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int prog_msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int link_fd;
+
+ link_fd = bpf_link_create(prog_msg_verdict, sock_map, BPF_SK_MSG_VERDICT, NULL);
+ if (!ASSERT_GE(link_fd, 0, "bpf_link_create"))
+ return;
+
+ redir_to_connected(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
+
+ close(link_fd);
+}
+
static void redir_to_listening(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
@@ -869,6 +889,24 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
}
+static void test_msg_redir_to_listening_with_link(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ struct bpf_program *verdict = skel->progs.prog_msg_verdict;
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ struct bpf_link *link;
+
+ link = bpf_program__attach_sockmap(verdict, sock_map);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_sockmap"))
+ return;
+
+ redir_to_listening(family, sotype, sock_map, verdict_map, REDIR_EGRESS);
+
+ bpf_link__detach(link);
+}
+
static void redir_partial(int family, int sotype, int sock_map, int parser_map)
{
int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
@@ -1316,7 +1354,9 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
TEST(test_skb_redir_to_listening),
TEST(test_skb_redir_partial),
TEST(test_msg_redir_to_connected),
+ TEST(test_msg_redir_to_connected_with_link),
TEST(test_msg_redir_to_listening),
+ TEST(test_msg_redir_to_listening_with_link),
};
const char *family_name, *map_name;
const struct redir_test *t;
@@ -1338,7 +1378,8 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
int sock_mapfd, int nop_mapfd,
- int verd_mapfd, enum redir_mode mode)
+ int verd_mapfd, enum redir_mode mode,
+ int send_flags)
{
const char *log_prefix = redir_mode_str(mode);
unsigned int pass;
@@ -1358,12 +1399,11 @@ static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
return;
}
- n = write(cli1, "a", 1);
- if (n < 0)
- FAIL_ERRNO("%s: write", log_prefix);
- if (n == 0)
- FAIL("%s: incomplete write", log_prefix);
- if (n < 1)
+ /* Last byte is OOB data when send_flags has MSG_OOB bit set */
+ n = xsend(cli1, "ab", 2, send_flags);
+ if (n >= 0 && n < 2)
+ FAIL("%s: incomplete send", log_prefix);
+ if (n < 2)
return;
key = SK_PASS;
@@ -1378,6 +1418,25 @@ static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
FAIL_ERRNO("%s: recv_timeout", log_prefix);
if (n == 0)
FAIL("%s: incomplete recv", log_prefix);
+
+ if (send_flags & MSG_OOB) {
+ /* Check that we can't read OOB while in sockmap */
+ errno = 0;
+ n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
+ if (n != -1 || errno != EOPNOTSUPP)
+ FAIL("%s: recv(MSG_OOB): expected EOPNOTSUPP: retval=%d errno=%d",
+ log_prefix, n, errno);
+
+ /* Remove peer1 from sockmap */
+ xbpf_map_delete_elem(sock_mapfd, &(int){ 1 });
+
+ /* Check that OOB was dropped on redirect */
+ errno = 0;
+ n = recv(peer1, &b, 1, MSG_OOB | MSG_DONTWAIT);
+ if (n != -1 || errno != EINVAL)
+ FAIL("%s: recv(MSG_OOB): expected EINVAL: retval=%d errno=%d",
+ log_prefix, n, errno);
+ }
}
static void unix_redir_to_connected(int sotype, int sock_mapfd,
@@ -1394,7 +1453,8 @@ static void unix_redir_to_connected(int sotype, int sock_mapfd,
goto close0;
c1 = sfd[0], p1 = sfd[1];
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
+ pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
+ mode, NO_FLAGS);
xclose(c1);
xclose(p1);
@@ -1684,7 +1744,8 @@ static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
if (err)
goto close_cli0;
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
+ pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
+ mode, NO_FLAGS);
xclose(c1);
xclose(p1);
@@ -1742,7 +1803,8 @@ static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
if (err)
goto close;
- pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
+ pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd,
+ mode, NO_FLAGS);
xclose(c1);
xclose(p1);
@@ -1777,10 +1839,9 @@ static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
}
-static void unix_inet_redir_to_connected(int family, int type,
- int sock_mapfd, int nop_mapfd,
- int verd_mapfd,
- enum redir_mode mode)
+static void unix_inet_redir_to_connected(int family, int type, int sock_mapfd,
+ int nop_mapfd, int verd_mapfd,
+ enum redir_mode mode, int send_flags)
{
int c0, c1, p0, p1;
int sfd[2];
@@ -1790,19 +1851,18 @@ static void unix_inet_redir_to_connected(int family, int type,
if (err)
return;
- if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+ if (socketpair(AF_UNIX, type | SOCK_NONBLOCK, 0, sfd))
goto close_cli0;
c1 = sfd[0], p1 = sfd[1];
- pairs_redir_to_connected(c0, p0, c1, p1,
- sock_mapfd, nop_mapfd, verd_mapfd, mode);
+ pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, nop_mapfd,
+ verd_mapfd, mode, send_flags);
xclose(c1);
xclose(p1);
close_cli0:
xclose(c0);
xclose(p0);
-
}
static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
@@ -1821,31 +1881,42 @@ static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
skel->bss->test_ingress = false;
unix_inet_redir_to_connected(family, SOCK_DGRAM,
sock_map, -1, verdict_map,
- REDIR_EGRESS);
+ REDIR_EGRESS, NO_FLAGS);
unix_inet_redir_to_connected(family, SOCK_DGRAM,
sock_map, -1, verdict_map,
- REDIR_EGRESS);
+ REDIR_EGRESS, NO_FLAGS);
unix_inet_redir_to_connected(family, SOCK_DGRAM,
sock_map, nop_map, verdict_map,
- REDIR_EGRESS);
+ REDIR_EGRESS, NO_FLAGS);
+ unix_inet_redir_to_connected(family, SOCK_STREAM,
+ sock_map, nop_map, verdict_map,
+ REDIR_EGRESS, NO_FLAGS);
+
+ /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
unix_inet_redir_to_connected(family, SOCK_STREAM,
sock_map, nop_map, verdict_map,
- REDIR_EGRESS);
+ REDIR_EGRESS, MSG_OOB);
+
skel->bss->test_ingress = true;
unix_inet_redir_to_connected(family, SOCK_DGRAM,
sock_map, -1, verdict_map,
- REDIR_INGRESS);
+ REDIR_INGRESS, NO_FLAGS);
unix_inet_redir_to_connected(family, SOCK_STREAM,
sock_map, -1, verdict_map,
- REDIR_INGRESS);
+ REDIR_INGRESS, NO_FLAGS);
unix_inet_redir_to_connected(family, SOCK_DGRAM,
sock_map, nop_map, verdict_map,
- REDIR_INGRESS);
+ REDIR_INGRESS, NO_FLAGS);
+ unix_inet_redir_to_connected(family, SOCK_STREAM,
+ sock_map, nop_map, verdict_map,
+ REDIR_INGRESS, NO_FLAGS);
+
+ /* MSG_OOB not supported by AF_UNIX SOCK_DGRAM */
unix_inet_redir_to_connected(family, SOCK_STREAM,
sock_map, nop_map, verdict_map,
- REDIR_INGRESS);
+ REDIR_INGRESS, MSG_OOB);
xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c
index 5a4491d4edfe..eaac83a7f388 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c
@@ -24,6 +24,7 @@ enum sockopt_test_error {
static struct sockopt_test {
const char *descr;
const struct bpf_insn insns[64];
+ enum bpf_prog_type prog_type;
enum bpf_attach_type attach_type;
enum bpf_attach_type expected_attach_type;
@@ -928,9 +929,40 @@ static struct sockopt_test {
.error = EPERM_SETSOCKOPT,
},
+
+ /* ==================== prog_type ==================== */
+
+ {
+ .descr = "can attach only BPF_CGROUP_SETSOCKOP",
+ .insns = {
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = 0,
+ .error = DENY_ATTACH,
+ },
+
+ {
+ .descr = "can attach only BPF_CGROUP_GETSOCKOP",
+ .insns = {
+ /* return 1 */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+
+ },
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+ .attach_type = BPF_CGROUP_GETSOCKOPT,
+ .expected_attach_type = 0,
+ .error = DENY_ATTACH,
+ },
};
static int load_prog(const struct bpf_insn *insns,
+ enum bpf_prog_type prog_type,
enum bpf_attach_type expected_attach_type)
{
LIBBPF_OPTS(bpf_prog_load_opts, opts,
@@ -947,7 +979,7 @@ static int load_prog(const struct bpf_insn *insns,
}
insns_cnt++;
- fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCKOPT, NULL, "GPL", insns, insns_cnt, &opts);
+ fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts);
if (verbose && fd < 0)
fprintf(stderr, "%s\n", bpf_log_buf);
@@ -1036,13 +1068,18 @@ static int call_getsockopt(bool use_io_uring, int fd, int level, int optname,
return getsockopt(fd, level, optname, optval, optlen);
}
-static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring)
+static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring,
+ bool use_link)
{
- int sock_fd, err, prog_fd;
+ int prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT;
+ int sock_fd, err, prog_fd, link_fd = -1;
void *optval = NULL;
int ret = 0;
- prog_fd = load_prog(test->insns, test->expected_attach_type);
+ if (test->prog_type)
+ prog_type = test->prog_type;
+
+ prog_fd = load_prog(test->insns, prog_type, test->expected_attach_type);
if (prog_fd < 0) {
if (test->error == DENY_LOAD)
return 0;
@@ -1051,7 +1088,12 @@ static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring)
return -1;
}
- err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0);
+ if (use_link) {
+ err = bpf_link_create(prog_fd, cgroup_fd, test->attach_type, NULL);
+ link_fd = err;
+ } else {
+ err = bpf_prog_attach(prog_fd, cgroup_fd, test->attach_type, 0);
+ }
if (err < 0) {
if (test->error == DENY_ATTACH)
goto close_prog_fd;
@@ -1142,7 +1184,12 @@ free_optval:
close_sock_fd:
close(sock_fd);
detach_prog:
- bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type);
+ if (use_link) {
+ if (link_fd >= 0)
+ close(link_fd);
+ } else {
+ bpf_prog_detach2(prog_fd, cgroup_fd, test->attach_type);
+ }
close_prog_fd:
close(prog_fd);
return ret;
@@ -1160,10 +1207,12 @@ void test_sockopt(void)
if (!test__start_subtest(tests[i].descr))
continue;
- ASSERT_OK(run_test(cgroup_fd, &tests[i], false),
+ ASSERT_OK(run_test(cgroup_fd, &tests[i], false, false),
+ tests[i].descr);
+ ASSERT_OK(run_test(cgroup_fd, &tests[i], false, true),
tests[i].descr);
if (tests[i].io_uring_support)
- ASSERT_OK(run_test(cgroup_fd, &tests[i], true),
+ ASSERT_OK(run_test(cgroup_fd, &tests[i], true, false),
tests[i].descr);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index 917f486db826..7cd8be2780ca 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "cgroup_helpers.h"
+#include "network_helpers.h"
#include "sockopt_inherit.skel.h"
@@ -9,35 +10,6 @@
#define CUSTOM_INHERIT2 1
#define CUSTOM_LISTENER 2
-static int connect_to_server(int server_fd)
-{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int fd;
-
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- log_err("Failed to create client socket");
- return -1;
- }
-
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
- log_err("Failed to get server addr");
- goto out;
- }
-
- if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
- log_err("Fail to connect to server");
- goto out;
- }
-
- return fd;
-
-out:
- close(fd);
- return -1;
-}
-
static int verify_sockopt(int fd, int optname, const char *msg, char expected)
{
socklen_t optlen = 1;
@@ -98,47 +70,36 @@ static void *server_thread(void *arg)
return (void *)(long)err;
}
-static int start_server(void)
+static int custom_cb(int fd, void *opts)
{
- struct sockaddr_in addr = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
- };
char buf;
int err;
- int fd;
int i;
- fd = socket(AF_INET, SOCK_STREAM, 0);
- if (fd < 0) {
- log_err("Failed to create server socket");
- return -1;
- }
-
for (i = CUSTOM_INHERIT1; i <= CUSTOM_LISTENER; i++) {
buf = 0x01;
err = setsockopt(fd, SOL_CUSTOM, i, &buf, 1);
if (err) {
log_err("Failed to call setsockopt(%d)", i);
- close(fd);
return -1;
}
}
- if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
- log_err("Failed to bind socket");
- close(fd);
- return -1;
- }
-
- return fd;
+ return 0;
}
static void run_test(int cgroup_fd)
{
struct bpf_link *link_getsockopt = NULL;
struct bpf_link *link_setsockopt = NULL;
+ struct network_helper_opts opts = {
+ .post_socket_cb = custom_cb,
+ };
int server_fd = -1, client_fd;
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ };
struct sockopt_inherit *obj;
void *server_err;
pthread_t tid;
@@ -160,7 +121,8 @@ static void run_test(int cgroup_fd)
if (!ASSERT_OK_PTR(link_setsockopt, "cg-attach-setsockopt"))
goto close_bpf_object;
- server_fd = start_server();
+ server_fd = start_server_addr(SOCK_STREAM, (struct sockaddr_storage *)&addr,
+ sizeof(addr), &opts);
if (!ASSERT_GE(server_fd, 0, "start_server"))
goto close_bpf_object;
@@ -173,7 +135,7 @@ static void run_test(int cgroup_fd)
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);
- client_fd = connect_to_server(server_fd);
+ client_fd = connect_to_fd(server_fd, 0);
if (!ASSERT_GE(client_fd, 0, "connect_to_server"))
goto close_server_fd;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index 5db9eec24b5b..0832fd787457 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -35,7 +35,7 @@ retry:
pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
0 /* cpu 0 */, -1 /* group id */,
0 /* flags */);
- if (pmu_fd < 0 && errno == ENOENT) {
+ if (pmu_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) {
printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
test__skip();
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c
index bc9841144685..1af9ec1149aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_links.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c
@@ -9,6 +9,8 @@
#define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null"
#include "test_tc_link.skel.h"
+
+#include "netlink_helpers.h"
#include "tc_helpers.h"
void serial_test_tc_links_basic(void)
@@ -1787,6 +1789,65 @@ void serial_test_tc_links_ingress(void)
test_tc_links_ingress(BPF_TCX_INGRESS, false, false);
}
+struct qdisc_req {
+ struct nlmsghdr n;
+ struct tcmsg t;
+ char buf[1024];
+};
+
+static int qdisc_replace(int ifindex, const char *kind, bool block)
+{
+ struct rtnl_handle rth = { .fd = -1 };
+ struct qdisc_req req;
+ int err;
+
+ err = rtnl_open(&rth, 0);
+ if (!ASSERT_OK(err, "open_rtnetlink"))
+ return err;
+
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct tcmsg));
+ req.n.nlmsg_flags = NLM_F_CREATE | NLM_F_REPLACE | NLM_F_REQUEST;
+ req.n.nlmsg_type = RTM_NEWQDISC;
+ req.t.tcm_family = AF_UNSPEC;
+ req.t.tcm_ifindex = ifindex;
+ req.t.tcm_parent = 0xfffffff1;
+
+ addattr_l(&req.n, sizeof(req), TCA_KIND, kind, strlen(kind) + 1);
+ if (block)
+ addattr32(&req.n, sizeof(req), TCA_INGRESS_BLOCK, 1);
+
+ err = rtnl_talk(&rth, &req.n, NULL);
+ ASSERT_OK(err, "talk_rtnetlink");
+ rtnl_close(&rth);
+ return err;
+}
+
+void serial_test_tc_links_dev_chain0(void)
+{
+ int err, ifindex;
+
+ ASSERT_OK(system("ip link add dev foo type veth peer name bar"), "add veth");
+ ifindex = if_nametoindex("foo");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+ err = qdisc_replace(ifindex, "ingress", true);
+ if (!ASSERT_OK(err, "attaching ingress"))
+ goto cleanup;
+ ASSERT_OK(system("tc filter add block 1 matchall action skbmod swap mac"), "add block");
+ err = qdisc_replace(ifindex, "clsact", false);
+ if (!ASSERT_OK(err, "attaching clsact"))
+ goto cleanup;
+ /* Heuristic: kern_sync_rcu() alone does not work; a wait-time of ~5s
+ * triggered the issue without the fix reliably 100% of the time.
+ */
+ sleep(5);
+ ASSERT_OK(system("tc filter add dev foo ingress matchall action skbmod swap mac"), "add filter");
+cleanup:
+ ASSERT_OK(system("ip link del dev foo"), "del veth");
+ ASSERT_EQ(if_nametoindex("foo"), 0, "foo removed");
+ ASSERT_EQ(if_nametoindex("bar"), 0, "bar removed");
+}
+
static void test_tc_links_dev_mixed(int target)
{
LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
index 15ee7b2fc410..b9135720024c 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
@@ -73,6 +73,16 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
"up primary");
ASSERT_OK(system("ip addr add dev " netkit_name " 10.0.0.1/24"),
"addr primary");
+
+ if (mode == NETKIT_L3) {
+ ASSERT_EQ(system("ip link set dev " netkit_name
+ " addr ee:ff:bb:cc:aa:dd 2> /dev/null"), 512,
+ "set hwaddress");
+ } else {
+ ASSERT_OK(system("ip link set dev " netkit_name
+ " addr ee:ff:bb:cc:aa:dd"),
+ "set hwaddress");
+ }
if (same_netns) {
ASSERT_OK(system("ip link set dev " netkit_peer " up"),
"up peer");
@@ -89,6 +99,16 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
return err;
}
+static void move_netkit(void)
+{
+ ASSERT_OK(system("ip link set " netkit_peer " netns foo"),
+ "move peer");
+ ASSERT_OK(system("ip netns exec foo ip link set dev "
+ netkit_peer " up"), "up peer");
+ ASSERT_OK(system("ip netns exec foo ip addr add dev "
+ netkit_peer " 10.0.0.2/24"), "addr peer");
+}
+
static void destroy_netkit(void)
{
ASSERT_OK(system("ip link del dev " netkit_name), "del primary");
@@ -685,3 +705,77 @@ void serial_test_tc_netkit_neigh_links(void)
serial_test_tc_netkit_neigh_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
serial_test_tc_netkit_neigh_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
}
+
+static void serial_test_tc_netkit_pkt_type_mode(int mode)
+{
+ LIBBPF_OPTS(bpf_netkit_opts, optl_nk);
+ LIBBPF_OPTS(bpf_tcx_opts, optl_tcx);
+ int err, ifindex, ifindex2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+
+ err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, true);
+ if (err)
+ return;
+
+ ifindex2 = if_nametoindex(netkit_peer);
+ ASSERT_NEQ(ifindex, ifindex2, "ifindex_1_2");
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc7,
+ BPF_TCX_INGRESS), 0, "tc7_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 0);
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl_nk);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc7, ifindex2, &optl_tcx);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc7 = link;
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex2, BPF_TCX_INGRESS, 1);
+
+ move_netkit();
+
+ tc_skel_reset_all_seen(skel);
+ skel->bss->set_type = true;
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc7, true, "seen_tc7");
+
+ ASSERT_EQ(skel->bss->seen_host, true, "seen_host");
+ ASSERT_EQ(skel->bss->seen_mcast, true, "seen_mcast");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_pkt_type(void)
+{
+ serial_test_tc_netkit_pkt_type_mode(NETKIT_L2);
+ serial_test_tc_netkit_pkt_type_mode(NETKIT_L3);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index dbe06aeaa2b2..327d51f59142 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -530,7 +530,7 @@ static int wait_netstamp_needed_key(void)
__u64 tstamp = 0;
nstoken = open_netns(NS_DST);
- if (!nstoken)
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
return -1;
srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0);
@@ -890,9 +890,6 @@ static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
dtime_cnt_str(t, INGRESS_FWDNS_P100));
- /* non mono delivery time is not forwarded */
- ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
- dtime_cnt_str(t, INGRESS_FWDNS_P101));
for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index 8fe84da1b9b4..f2b99d95d916 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -10,6 +10,9 @@ struct tcp_rtt_storage {
__u32 delivered;
__u32 delivered_ce;
__u32 icsk_retransmits;
+
+ __u32 mrtt_us; /* args[0] */
+ __u32 srtt; /* args[1] */
};
static void send_byte(int fd)
@@ -83,6 +86,17 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
err++;
}
+ /* Precise values of mrtt and srtt are unavailable, just make sure they are nonzero */
+ if (val.mrtt_us == 0) {
+ log_err("%s: unexpected bpf_tcp_sock.args[0] (mrtt_us) %u == 0", msg, val.mrtt_us);
+ err++;
+ }
+
+ if (val.srtt == 0) {
+ log_err("%s: unexpected bpf_tcp_sock.args[1] (srtt) %u == 0", msg, val.srtt);
+ err++;
+ }
+
return err;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
index ae93411fd582..09ca13bdf6ca 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
@@ -11,6 +11,7 @@ static int sanity_run(struct bpf_program *prog)
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.repeat = 1,
+ .flags = BPF_F_TEST_SKB_CHECKSUM_COMPLETE,
);
prog_fd = bpf_program__fd(prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
index ee5372c7f2c7..bbcf12696a6b 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
@@ -3,7 +3,12 @@
#include <test_progs.h>
#include <time.h>
+#include <sys/epoll.h>
+
#include "struct_ops_module.skel.h"
+#include "struct_ops_nulled_out_cb.skel.h"
+#include "struct_ops_forgotten_cb.skel.h"
+#include "struct_ops_detach.skel.h"
static void check_map_info(struct bpf_map_info *info)
{
@@ -66,6 +71,7 @@ static void test_struct_ops_load(void)
* auto-loading, or it will fail to load.
*/
bpf_program__set_autoload(skel->progs.test_2, false);
+ bpf_map__set_autocreate(skel->maps.testmod_zeroed, false);
err = struct_ops_module__load(skel);
if (!ASSERT_OK(err, "struct_ops_module_load"))
@@ -93,9 +99,217 @@ cleanup:
struct_ops_module__destroy(skel);
}
+static void test_struct_ops_not_zeroed(void)
+{
+ struct struct_ops_module *skel;
+ int err;
+
+ /* zeroed is 0, and zeroed_op is null */
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open"))
+ return;
+
+ skel->struct_ops.testmod_zeroed->zeroed = 0;
+ /* zeroed_op prog should be not loaded automatically now */
+ skel->struct_ops.testmod_zeroed->zeroed_op = NULL;
+
+ err = struct_ops_module__load(skel);
+ ASSERT_OK(err, "struct_ops_module_load");
+
+ struct_ops_module__destroy(skel);
+
+ /* zeroed is not 0 */
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_not_zeroed"))
+ return;
+
+ /* libbpf should reject the testmod_zeroed since struct
+ * bpf_testmod_ops in the kernel has no "zeroed" field and the
+ * value of "zeroed" is non-zero.
+ */
+ skel->struct_ops.testmod_zeroed->zeroed = 0xdeadbeef;
+ skel->struct_ops.testmod_zeroed->zeroed_op = NULL;
+ err = struct_ops_module__load(skel);
+ ASSERT_ERR(err, "struct_ops_module_load_not_zeroed");
+
+ struct_ops_module__destroy(skel);
+
+ /* zeroed_op is not null */
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_not_zeroed_op"))
+ return;
+
+ /* libbpf should reject the testmod_zeroed since the value of its
+ * "zeroed_op" is not null.
+ */
+ skel->struct_ops.testmod_zeroed->zeroed_op = skel->progs.test_3;
+ err = struct_ops_module__load(skel);
+ ASSERT_ERR(err, "struct_ops_module_load_not_zeroed_op");
+
+ struct_ops_module__destroy(skel);
+}
+
+/* The signature of an implementation might not match the signature of the
+ * function pointer prototype defined in the BPF program. This mismatch
+ * should be allowed as long as the behavior of the operator program
+ * adheres to the signature in the kernel. Libbpf should not enforce the
+ * signature; rather, let the kernel verifier handle the enforcement.
+ */
+static void test_struct_ops_incompatible(void)
+{
+ struct struct_ops_module *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open"))
+ return;
+
+ bpf_map__set_autocreate(skel->maps.testmod_zeroed, false);
+
+ err = struct_ops_module__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_incompatible);
+ if (ASSERT_OK_PTR(link, "attach_struct_ops"))
+ bpf_link__destroy(link);
+
+cleanup:
+ struct_ops_module__destroy(skel);
+}
+
+/* validate that it's ok to "turn off" callback that kernel supports */
+static void test_struct_ops_nulled_out_cb(void)
+{
+ struct struct_ops_nulled_out_cb *skel;
+ int err;
+
+ skel = struct_ops_nulled_out_cb__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ /* kernel knows about test_1, but we still null it out */
+ skel->struct_ops.ops->test_1 = NULL;
+
+ err = struct_ops_nulled_out_cb__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.test_1_turn_off), "prog_autoload");
+ ASSERT_LT(bpf_program__fd(skel->progs.test_1_turn_off), 0, "prog_fd");
+
+cleanup:
+ struct_ops_nulled_out_cb__destroy(skel);
+}
+
+/* validate that libbpf generates reasonable error message if struct_ops is
+ * not referenced in any struct_ops map
+ */
+static void test_struct_ops_forgotten_cb(void)
+{
+ struct struct_ops_forgotten_cb *skel;
+ char *log;
+ int err;
+
+ skel = struct_ops_forgotten_cb__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ start_libbpf_log_capture();
+
+ err = struct_ops_forgotten_cb__load(skel);
+ if (!ASSERT_ERR(err, "skel_load"))
+ goto cleanup;
+
+ log = stop_libbpf_log_capture();
+ ASSERT_HAS_SUBSTR(log,
+ "prog 'test_1_forgotten': SEC(\"struct_ops\") program isn't referenced anywhere, did you forget to use it?",
+ "libbpf_log");
+ free(log);
+
+ struct_ops_forgotten_cb__destroy(skel);
+
+ /* now let's programmatically use it, we should be fine now */
+ skel = struct_ops_forgotten_cb__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->struct_ops.ops->test_1 = skel->progs.test_1_forgotten; /* not anymore */
+
+ err = struct_ops_forgotten_cb__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+cleanup:
+ struct_ops_forgotten_cb__destroy(skel);
+}
+
+/* Detach a link from a user space program */
+static void test_detach_link(void)
+{
+ struct epoll_event ev, events[2];
+ struct struct_ops_detach *skel;
+ struct bpf_link *link = NULL;
+ int fd, epollfd = -1, nfds;
+ int err;
+
+ skel = struct_ops_detach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_detach__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_do_detach);
+ if (!ASSERT_OK_PTR(link, "attach_struct_ops"))
+ goto cleanup;
+
+ fd = bpf_link__fd(link);
+ if (!ASSERT_GE(fd, 0, "link_fd"))
+ goto cleanup;
+
+ epollfd = epoll_create1(0);
+ if (!ASSERT_GE(epollfd, 0, "epoll_create1"))
+ goto cleanup;
+
+ ev.events = EPOLLHUP;
+ ev.data.fd = fd;
+ err = epoll_ctl(epollfd, EPOLL_CTL_ADD, fd, &ev);
+ if (!ASSERT_OK(err, "epoll_ctl"))
+ goto cleanup;
+
+ err = bpf_link__detach(link);
+ if (!ASSERT_OK(err, "detach_link"))
+ goto cleanup;
+
+ /* Wait for EPOLLHUP */
+ nfds = epoll_wait(epollfd, events, 2, 500);
+ if (!ASSERT_EQ(nfds, 1, "epoll_wait"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(events[0].data.fd, fd, "epoll_wait_fd"))
+ goto cleanup;
+ if (!ASSERT_TRUE(events[0].events & EPOLLHUP, "events[0].events"))
+ goto cleanup;
+
+cleanup:
+ if (epollfd >= 0)
+ close(epollfd);
+ bpf_link__destroy(link);
+ struct_ops_detach__destroy(skel);
+}
+
void serial_test_struct_ops_module(void)
{
- if (test__start_subtest("test_struct_ops_load"))
+ if (test__start_subtest("struct_ops_load"))
test_struct_ops_load();
+ if (test__start_subtest("struct_ops_not_zeroed"))
+ test_struct_ops_not_zeroed();
+ if (test__start_subtest("struct_ops_incompatible"))
+ test_struct_ops_incompatible();
+ if (test__start_subtest("struct_ops_null_out_cb"))
+ test_struct_ops_nulled_out_cb();
+ if (test__start_subtest("struct_ops_forgotten_cb"))
+ test_struct_ops_forgotten_cb();
+ if (test__start_subtest("test_detach_link"))
+ test_detach_link();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
index 5f1fb0a2ea56..cec746e77cd3 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -612,6 +612,8 @@ static void test_ipip_tunnel(enum ipip_encap encap)
/* ping from at_ns0 namespace test */
nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ goto done;
err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
if (!ASSERT_OK(err, "test_ping"))
goto done;
@@ -666,6 +668,8 @@ static void test_xfrm_tunnel(void)
/* ping from at_ns0 namespace test */
nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ goto done;
err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
close_netns(nstoken);
if (!ASSERT_OK(err, "test_ping"))
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_lockup.c b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
new file mode 100644
index 000000000000..871d16cb95cf
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_lockup.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include <pthread.h>
+#include <network_helpers.h>
+
+#include "timer_lockup.skel.h"
+
+static long cpu;
+static int *timer1_err;
+static int *timer2_err;
+static bool skip;
+
+volatile int k = 0;
+
+static void *timer_lockup_thread(void *arg)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1000,
+ );
+ int i, prog_fd = *(int *)arg;
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(__sync_fetch_and_add(&cpu, 1), &cpuset);
+ ASSERT_OK(pthread_setaffinity_np(pthread_self(), sizeof(cpuset),
+ &cpuset),
+ "cpu affinity");
+
+ for (i = 0; !READ_ONCE(*timer1_err) && !READ_ONCE(*timer2_err); i++) {
+ bpf_prog_test_run_opts(prog_fd, &opts);
+ /* Skip the test if we can't reproduce the race in a reasonable
+ * amount of time.
+ */
+ if (i > 50) {
+ WRITE_ONCE(skip, true);
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+void test_timer_lockup(void)
+{
+ int timer1_prog, timer2_prog;
+ struct timer_lockup *skel;
+ pthread_t thrds[2];
+ void *ret;
+
+ skel = timer_lockup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "timer_lockup__open_and_load"))
+ return;
+
+ timer1_prog = bpf_program__fd(skel->progs.timer1_prog);
+ timer2_prog = bpf_program__fd(skel->progs.timer2_prog);
+
+ timer1_err = &skel->bss->timer1_err;
+ timer2_err = &skel->bss->timer2_err;
+
+ if (!ASSERT_OK(pthread_create(&thrds[0], NULL, timer_lockup_thread,
+ &timer1_prog),
+ "pthread_create thread1"))
+ goto out;
+ if (!ASSERT_OK(pthread_create(&thrds[1], NULL, timer_lockup_thread,
+ &timer2_prog),
+ "pthread_create thread2")) {
+ pthread_exit(&thrds[0]);
+ goto out;
+ }
+
+ pthread_join(thrds[1], &ret);
+ pthread_join(thrds[0], &ret);
+
+ if (skip) {
+ test__skip();
+ goto out;
+ }
+
+ if (*timer1_err != -EDEADLK && *timer1_err != 0)
+ ASSERT_FAIL("timer1_err bad value");
+ if (*timer2_err != -EDEADLK && *timer2_err != 0)
+ ASSERT_FAIL("timer2_err bad value");
+out:
+ timer_lockup__destroy(skel);
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
index 7b9124d506a5..e56e88596d64 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -5,18 +5,19 @@
#include "trace_printk.lskel.h"
-#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
-#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
#define SEARCHMSG "testing,testing"
+static void trace_pipe_cb(const char *str, void *data)
+{
+ if (strstr(str, SEARCHMSG) != NULL)
+ (*(int *)data)++;
+}
+
void serial_test_trace_printk(void)
{
struct trace_printk_lskel__bss *bss;
- int err = 0, iter = 0, found = 0;
struct trace_printk_lskel *skel;
- char *buf = NULL;
- FILE *fp = NULL;
- size_t buflen;
+ int err = 0, found = 0;
skel = trace_printk_lskel__open();
if (!ASSERT_OK_PTR(skel, "trace_printk__open"))
@@ -35,16 +36,6 @@ void serial_test_trace_printk(void)
if (!ASSERT_OK(err, "trace_printk__attach"))
goto cleanup;
- if (access(TRACEFS_PIPE, F_OK) == 0)
- fp = fopen(TRACEFS_PIPE, "r");
- else
- fp = fopen(DEBUGFS_PIPE, "r");
- if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)"))
- goto cleanup;
-
- /* We do not want to wait forever if this test fails... */
- fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
-
/* wait for tracepoint to trigger */
usleep(1);
trace_printk_lskel__detach(skel);
@@ -56,21 +47,12 @@ void serial_test_trace_printk(void)
goto cleanup;
/* verify our search string is in the trace buffer */
- while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) {
- if (strstr(buf, SEARCHMSG) != NULL)
- found++;
- if (found == bss->trace_printk_ran)
- break;
- if (++iter > 1000)
- break;
- }
+ ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000),
+ "read_trace_pipe_iter");
if (!ASSERT_EQ(found, bss->trace_printk_ran, "found"))
goto cleanup;
cleanup:
trace_printk_lskel__destroy(skel);
- free(buf);
- if (fp)
- fclose(fp);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
index 44ea2fd88f4c..2af6a6f2096a 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
@@ -5,18 +5,19 @@
#include "trace_vprintk.lskel.h"
-#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
-#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
#define SEARCHMSG "1,2,3,4,5,6,7,8,9,10"
+static void trace_pipe_cb(const char *str, void *data)
+{
+ if (strstr(str, SEARCHMSG) != NULL)
+ (*(int *)data)++;
+}
+
void serial_test_trace_vprintk(void)
{
struct trace_vprintk_lskel__bss *bss;
- int err = 0, iter = 0, found = 0;
struct trace_vprintk_lskel *skel;
- char *buf = NULL;
- FILE *fp = NULL;
- size_t buflen;
+ int err = 0, found = 0;
skel = trace_vprintk_lskel__open_and_load();
if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load"))
@@ -28,16 +29,6 @@ void serial_test_trace_vprintk(void)
if (!ASSERT_OK(err, "trace_vprintk__attach"))
goto cleanup;
- if (access(TRACEFS_PIPE, F_OK) == 0)
- fp = fopen(TRACEFS_PIPE, "r");
- else
- fp = fopen(DEBUGFS_PIPE, "r");
- if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)"))
- goto cleanup;
-
- /* We do not want to wait forever if this test fails... */
- fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
-
/* wait for tracepoint to trigger */
usleep(1);
trace_vprintk_lskel__detach(skel);
@@ -49,14 +40,8 @@ void serial_test_trace_vprintk(void)
goto cleanup;
/* verify our search string is in the trace buffer */
- while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) {
- if (strstr(buf, SEARCHMSG) != NULL)
- found++;
- if (found == bss->trace_vprintk_ran)
- break;
- if (++iter > 1000)
- break;
- }
+ ASSERT_OK(read_trace_pipe_iter(trace_pipe_cb, &found, 1000),
+ "read_trace_pipe_iter");
if (!ASSERT_EQ(found, bss->trace_vprintk_ran, "found"))
goto cleanup;
@@ -66,7 +51,4 @@ void serial_test_trace_vprintk(void)
cleanup:
trace_vprintk_lskel__destroy(skel);
- free(buf);
- if (fp)
- fclose(fp);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
index fe0fb0c9849a..19e68d4b3532 100644
--- a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
@@ -3,8 +3,9 @@
#include <test_progs.h>
#include "tracing_struct.skel.h"
+#include "tracing_struct_many_args.skel.h"
-static void test_fentry(void)
+static void test_struct_args(void)
{
struct tracing_struct *skel;
int err;
@@ -55,6 +56,25 @@ static void test_fentry(void)
ASSERT_EQ(skel->bss->t6, 1, "t6 ret");
+destroy_skel:
+ tracing_struct__destroy(skel);
+}
+
+static void test_struct_many_args(void)
+{
+ struct tracing_struct_many_args *skel;
+ int err;
+
+ skel = tracing_struct_many_args__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "tracing_struct_many_args__open_and_load"))
+ return;
+
+ err = tracing_struct_many_args__attach(skel);
+ if (!ASSERT_OK(err, "tracing_struct_many_args__attach"))
+ goto destroy_skel;
+
+ ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
ASSERT_EQ(skel->bss->t7_a, 16, "t7:a");
ASSERT_EQ(skel->bss->t7_b, 17, "t7:b");
ASSERT_EQ(skel->bss->t7_c, 18, "t7:c");
@@ -74,12 +94,28 @@ static void test_fentry(void)
ASSERT_EQ(skel->bss->t8_g, 23, "t8:g");
ASSERT_EQ(skel->bss->t8_ret, 156, "t8 ret");
- tracing_struct__detach(skel);
+ ASSERT_EQ(skel->bss->t9_a, 16, "t9:a");
+ ASSERT_EQ(skel->bss->t9_b, 17, "t9:b");
+ ASSERT_EQ(skel->bss->t9_c, 18, "t9:c");
+ ASSERT_EQ(skel->bss->t9_d, 19, "t9:d");
+ ASSERT_EQ(skel->bss->t9_e, 20, "t9:e");
+ ASSERT_EQ(skel->bss->t9_f, 21, "t9:f");
+ ASSERT_EQ(skel->bss->t9_g, 22, "t9:f");
+ ASSERT_EQ(skel->bss->t9_h_a, 23, "t9:h.a");
+ ASSERT_EQ(skel->bss->t9_h_b, 24, "t9:h.b");
+ ASSERT_EQ(skel->bss->t9_h_c, 25, "t9:h.c");
+ ASSERT_EQ(skel->bss->t9_h_d, 26, "t9:h.d");
+ ASSERT_EQ(skel->bss->t9_i, 27, "t9:i");
+ ASSERT_EQ(skel->bss->t9_ret, 258, "t9 ret");
+
destroy_skel:
- tracing_struct__destroy(skel);
+ tracing_struct_many_args__destroy(skel);
}
void test_tracing_struct(void)
{
- test_fentry();
+ if (test__start_subtest("struct_args"))
+ test_struct_args();
+ if (test__start_subtest("struct_many_args"))
+ test_struct_many_args();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
index 8269cdee33ae..bf6ca8e3eb13 100644
--- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -1,12 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
#include <unistd.h>
+#include <pthread.h>
#include <test_progs.h>
#include "uprobe_multi.skel.h"
#include "uprobe_multi_bench.skel.h"
#include "uprobe_multi_usdt.skel.h"
#include "bpf/libbpf_internal.h"
#include "testing_helpers.h"
+#include "../sdt.h"
static char test_data[] = "test_data";
@@ -25,9 +27,17 @@ noinline void uprobe_multi_func_3(void)
asm volatile ("");
}
+noinline void usdt_trigger(void)
+{
+ STAP_PROBE(test, pid_filter_usdt);
+}
+
struct child {
int go[2];
+ int c2p[2]; /* child -> parent channel */
int pid;
+ int tid;
+ pthread_t thread;
};
static void release_child(struct child *child)
@@ -38,6 +48,10 @@ static void release_child(struct child *child)
return;
close(child->go[1]);
close(child->go[0]);
+ if (child->thread)
+ pthread_join(child->thread, NULL);
+ close(child->c2p[0]);
+ close(child->c2p[1]);
if (child->pid > 0)
waitpid(child->pid, &child_status, 0);
}
@@ -63,7 +77,7 @@ static struct child *spawn_child(void)
if (pipe(child.go))
return NULL;
- child.pid = fork();
+ child.pid = child.tid = fork();
if (child.pid < 0) {
release_child(&child);
errno = EINVAL;
@@ -82,6 +96,7 @@ static struct child *spawn_child(void)
uprobe_multi_func_1();
uprobe_multi_func_2();
uprobe_multi_func_3();
+ usdt_trigger();
exit(errno);
}
@@ -89,6 +104,67 @@ static struct child *spawn_child(void)
return &child;
}
+static void *child_thread(void *ctx)
+{
+ struct child *child = ctx;
+ int c = 0, err;
+
+ child->tid = syscall(SYS_gettid);
+
+ /* let parent know we are ready */
+ err = write(child->c2p[1], &c, 1);
+ if (err != 1)
+ pthread_exit(&err);
+
+ /* wait for parent's kick */
+ err = read(child->go[0], &c, 1);
+ if (err != 1)
+ pthread_exit(&err);
+
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+ usdt_trigger();
+
+ err = 0;
+ pthread_exit(&err);
+}
+
+static struct child *spawn_thread(void)
+{
+ static struct child child;
+ int c, err;
+
+ /* pipe to notify child to execute the trigger functions */
+ if (pipe(child.go))
+ return NULL;
+ /* pipe to notify parent that child thread is ready */
+ if (pipe(child.c2p)) {
+ close(child.go[0]);
+ close(child.go[1]);
+ return NULL;
+ }
+
+ child.pid = getpid();
+
+ err = pthread_create(&child.thread, NULL, child_thread, &child);
+ if (err) {
+ err = -errno;
+ close(child.go[0]);
+ close(child.go[1]);
+ close(child.c2p[0]);
+ close(child.c2p[1]);
+ errno = -err;
+ return NULL;
+ }
+
+ err = read(child.c2p[0], &c, 1);
+ if (!ASSERT_EQ(err, 1, "child_thread_ready"))
+ return NULL;
+
+ return &child;
+}
+
static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child)
{
skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1;
@@ -103,15 +179,23 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child
* passed at the probe attach.
*/
skel->bss->pid = child ? 0 : getpid();
+ skel->bss->expect_pid = child ? child->pid : 0;
+
+ /* trigger all probes, if we are testing child *process*, just to make
+ * sure that PID filtering doesn't let through activations from wrong
+ * PIDs; when we test child *thread*, we don't want to do this to
+ * avoid double counting number of triggering events
+ */
+ if (!child || !child->thread) {
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+ usdt_trigger();
+ }
if (child)
kick_child(child);
- /* trigger all probes */
- uprobe_multi_func_1();
- uprobe_multi_func_2();
- uprobe_multi_func_3();
-
/*
* There are 2 entry and 2 exit probe called for each uprobe_multi_func_[123]
* function and each slepable probe (6) increments uprobe_multi_sleep_result.
@@ -126,8 +210,12 @@ static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child
ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 6, "uprobe_multi_sleep_result");
- if (child)
+ ASSERT_FALSE(skel->bss->bad_pid_seen, "bad_pid_seen");
+
+ if (child) {
ASSERT_EQ(skel->bss->child_pid, child->pid, "uprobe_multi_child_pid");
+ ASSERT_EQ(skel->bss->child_tid, child->tid, "uprobe_multi_child_tid");
+ }
}
static void test_skel_api(void)
@@ -190,8 +278,24 @@ __test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_mul
if (!ASSERT_OK_PTR(skel->links.uprobe_extra, "bpf_program__attach_uprobe_multi"))
goto cleanup;
+ /* Attach (uprobe-backed) USDTs */
+ skel->links.usdt_pid = bpf_program__attach_usdt(skel->progs.usdt_pid, pid, binary,
+ "test", "pid_filter_usdt", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_pid, "attach_usdt_pid"))
+ goto cleanup;
+
+ skel->links.usdt_extra = bpf_program__attach_usdt(skel->progs.usdt_extra, -1, binary,
+ "test", "pid_filter_usdt", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_extra, "attach_usdt_extra"))
+ goto cleanup;
+
uprobe_multi_test_run(skel, child);
+ ASSERT_FALSE(skel->bss->bad_pid_seen_usdt, "bad_pid_seen_usdt");
+ if (child) {
+ ASSERT_EQ(skel->bss->child_pid_usdt, child->pid, "usdt_multi_child_pid");
+ ASSERT_EQ(skel->bss->child_tid_usdt, child->tid, "usdt_multi_child_tid");
+ }
cleanup:
uprobe_multi__destroy(skel);
}
@@ -210,6 +314,13 @@ test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi
return;
__test_attach_api(binary, pattern, opts, child);
+
+ /* pid filter (thread) */
+ child = spawn_thread();
+ if (!ASSERT_OK_PTR(child, "spawn_thread"))
+ return;
+
+ __test_attach_api(binary, pattern, opts, child);
}
static void test_attach_api_pattern(void)
@@ -397,7 +508,7 @@ static void test_attach_api_fails(void)
link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
if (!ASSERT_ERR(link_fd, "link_fd"))
goto cleanup;
- ASSERT_EQ(link_fd, -ESRCH, "pid_is_wrong");
+ ASSERT_EQ(link_fd, -EINVAL, "pid_is_wrong");
cleanup:
if (link_fd >= 0)
@@ -495,6 +606,13 @@ static void test_link_api(void)
return;
__test_link_api(child);
+
+ /* pid filter (thread) */
+ child = spawn_thread();
+ if (!ASSERT_OK_PTR(child, "spawn_thread"))
+ return;
+
+ __test_link_api(child);
}
static void test_bench_attach_uprobe(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
new file mode 100644
index 000000000000..c397336fe1ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_syscall.c
@@ -0,0 +1,385 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#ifdef __x86_64__
+
+#include <unistd.h>
+#include <asm/ptrace.h>
+#include <linux/compiler.h>
+#include <linux/stringify.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/prctl.h>
+#include <asm/prctl.h>
+#include "uprobe_syscall.skel.h"
+#include "uprobe_syscall_executed.skel.h"
+
+__naked unsigned long uretprobe_regs_trigger(void)
+{
+ asm volatile (
+ "movq $0xdeadbeef, %rax\n"
+ "ret\n"
+ );
+}
+
+__naked void uretprobe_regs(struct pt_regs *before, struct pt_regs *after)
+{
+ asm volatile (
+ "movq %r15, 0(%rdi)\n"
+ "movq %r14, 8(%rdi)\n"
+ "movq %r13, 16(%rdi)\n"
+ "movq %r12, 24(%rdi)\n"
+ "movq %rbp, 32(%rdi)\n"
+ "movq %rbx, 40(%rdi)\n"
+ "movq %r11, 48(%rdi)\n"
+ "movq %r10, 56(%rdi)\n"
+ "movq %r9, 64(%rdi)\n"
+ "movq %r8, 72(%rdi)\n"
+ "movq %rax, 80(%rdi)\n"
+ "movq %rcx, 88(%rdi)\n"
+ "movq %rdx, 96(%rdi)\n"
+ "movq %rsi, 104(%rdi)\n"
+ "movq %rdi, 112(%rdi)\n"
+ "movq $0, 120(%rdi)\n" /* orig_rax */
+ "movq $0, 128(%rdi)\n" /* rip */
+ "movq $0, 136(%rdi)\n" /* cs */
+ "pushf\n"
+ "pop %rax\n"
+ "movq %rax, 144(%rdi)\n" /* eflags */
+ "movq %rsp, 152(%rdi)\n" /* rsp */
+ "movq $0, 160(%rdi)\n" /* ss */
+
+ /* save 2nd argument */
+ "pushq %rsi\n"
+ "call uretprobe_regs_trigger\n"
+
+ /* save return value and load 2nd argument pointer to rax */
+ "pushq %rax\n"
+ "movq 8(%rsp), %rax\n"
+
+ "movq %r15, 0(%rax)\n"
+ "movq %r14, 8(%rax)\n"
+ "movq %r13, 16(%rax)\n"
+ "movq %r12, 24(%rax)\n"
+ "movq %rbp, 32(%rax)\n"
+ "movq %rbx, 40(%rax)\n"
+ "movq %r11, 48(%rax)\n"
+ "movq %r10, 56(%rax)\n"
+ "movq %r9, 64(%rax)\n"
+ "movq %r8, 72(%rax)\n"
+ "movq %rcx, 88(%rax)\n"
+ "movq %rdx, 96(%rax)\n"
+ "movq %rsi, 104(%rax)\n"
+ "movq %rdi, 112(%rax)\n"
+ "movq $0, 120(%rax)\n" /* orig_rax */
+ "movq $0, 128(%rax)\n" /* rip */
+ "movq $0, 136(%rax)\n" /* cs */
+
+ /* restore return value and 2nd argument */
+ "pop %rax\n"
+ "pop %rsi\n"
+
+ "movq %rax, 80(%rsi)\n"
+
+ "pushf\n"
+ "pop %rax\n"
+
+ "movq %rax, 144(%rsi)\n" /* eflags */
+ "movq %rsp, 152(%rsi)\n" /* rsp */
+ "movq $0, 160(%rsi)\n" /* ss */
+ "ret\n"
+);
+}
+
+static void test_uretprobe_regs_equal(void)
+{
+ struct uprobe_syscall *skel = NULL;
+ struct pt_regs before = {}, after = {};
+ unsigned long *pb = (unsigned long *) &before;
+ unsigned long *pa = (unsigned long *) &after;
+ unsigned long *pp;
+ unsigned int i, cnt;
+ int err;
+
+ skel = uprobe_syscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall__open_and_load"))
+ goto cleanup;
+
+ err = uprobe_syscall__attach(skel);
+ if (!ASSERT_OK(err, "uprobe_syscall__attach"))
+ goto cleanup;
+
+ uretprobe_regs(&before, &after);
+
+ pp = (unsigned long *) &skel->bss->regs;
+ cnt = sizeof(before)/sizeof(*pb);
+
+ for (i = 0; i < cnt; i++) {
+ unsigned int offset = i * sizeof(unsigned long);
+
+ /*
+ * Check register before and after uretprobe_regs_trigger call
+ * that triggers the uretprobe.
+ */
+ switch (offset) {
+ case offsetof(struct pt_regs, rax):
+ ASSERT_EQ(pa[i], 0xdeadbeef, "return value");
+ break;
+ default:
+ if (!ASSERT_EQ(pb[i], pa[i], "register before-after value check"))
+ fprintf(stdout, "failed register offset %u\n", offset);
+ }
+
+ /*
+ * Check register seen from bpf program and register after
+ * uretprobe_regs_trigger call
+ */
+ switch (offset) {
+ /*
+ * These values will be different (not set in uretprobe_regs),
+ * we don't care.
+ */
+ case offsetof(struct pt_regs, orig_rax):
+ case offsetof(struct pt_regs, rip):
+ case offsetof(struct pt_regs, cs):
+ case offsetof(struct pt_regs, rsp):
+ case offsetof(struct pt_regs, ss):
+ break;
+ default:
+ if (!ASSERT_EQ(pp[i], pa[i], "register prog-after value check"))
+ fprintf(stdout, "failed register offset %u\n", offset);
+ }
+ }
+
+cleanup:
+ uprobe_syscall__destroy(skel);
+}
+
+#define BPF_TESTMOD_UPROBE_TEST_FILE "/sys/kernel/bpf_testmod_uprobe"
+
+static int write_bpf_testmod_uprobe(unsigned long offset)
+{
+ size_t n, ret;
+ char buf[30];
+ int fd;
+
+ n = sprintf(buf, "%lu", offset);
+
+ fd = open(BPF_TESTMOD_UPROBE_TEST_FILE, O_WRONLY);
+ if (fd < 0)
+ return -errno;
+
+ ret = write(fd, buf, n);
+ close(fd);
+ return ret != n ? (int) ret : 0;
+}
+
+static void test_uretprobe_regs_change(void)
+{
+ struct pt_regs before = {}, after = {};
+ unsigned long *pb = (unsigned long *) &before;
+ unsigned long *pa = (unsigned long *) &after;
+ unsigned long cnt = sizeof(before)/sizeof(*pb);
+ unsigned int i, err, offset;
+
+ offset = get_uprobe_offset(uretprobe_regs_trigger);
+
+ err = write_bpf_testmod_uprobe(offset);
+ if (!ASSERT_OK(err, "register_uprobe"))
+ return;
+
+ uretprobe_regs(&before, &after);
+
+ err = write_bpf_testmod_uprobe(0);
+ if (!ASSERT_OK(err, "unregister_uprobe"))
+ return;
+
+ for (i = 0; i < cnt; i++) {
+ unsigned int offset = i * sizeof(unsigned long);
+
+ switch (offset) {
+ case offsetof(struct pt_regs, rax):
+ ASSERT_EQ(pa[i], 0x12345678deadbeef, "rax");
+ break;
+ case offsetof(struct pt_regs, rcx):
+ ASSERT_EQ(pa[i], 0x87654321feebdaed, "rcx");
+ break;
+ case offsetof(struct pt_regs, r11):
+ ASSERT_EQ(pa[i], (__u64) -1, "r11");
+ break;
+ default:
+ if (!ASSERT_EQ(pa[i], pb[i], "register before-after value check"))
+ fprintf(stdout, "failed register offset %u\n", offset);
+ }
+ }
+}
+
+#ifndef __NR_uretprobe
+#define __NR_uretprobe 335
+#endif
+
+__naked unsigned long uretprobe_syscall_call_1(void)
+{
+ /*
+ * Pretend we are uretprobe trampoline to trigger the return
+ * probe invocation in order to verify we get SIGILL.
+ */
+ asm volatile (
+ "pushq %rax\n"
+ "pushq %rcx\n"
+ "pushq %r11\n"
+ "movq $" __stringify(__NR_uretprobe) ", %rax\n"
+ "syscall\n"
+ "popq %r11\n"
+ "popq %rcx\n"
+ "retq\n"
+ );
+}
+
+__naked unsigned long uretprobe_syscall_call(void)
+{
+ asm volatile (
+ "call uretprobe_syscall_call_1\n"
+ "retq\n"
+ );
+}
+
+static void test_uretprobe_syscall_call(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
+ .retprobe = true,
+ );
+ struct uprobe_syscall_executed *skel;
+ int pid, status, err, go[2], c;
+
+ if (!ASSERT_OK(pipe(go), "pipe"))
+ return;
+
+ skel = uprobe_syscall_executed__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_syscall_executed__open_and_load"))
+ goto cleanup;
+
+ pid = fork();
+ if (!ASSERT_GE(pid, 0, "fork"))
+ goto cleanup;
+
+ /* child */
+ if (pid == 0) {
+ close(go[1]);
+
+ /* wait for parent's kick */
+ err = read(go[0], &c, 1);
+ if (err != 1)
+ exit(-1);
+
+ uretprobe_syscall_call();
+ _exit(0);
+ }
+
+ skel->links.test = bpf_program__attach_uprobe_multi(skel->progs.test, pid,
+ "/proc/self/exe",
+ "uretprobe_syscall_call", &opts);
+ if (!ASSERT_OK_PTR(skel->links.test, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ /* kick the child */
+ write(go[1], &c, 1);
+ err = waitpid(pid, &status, 0);
+ ASSERT_EQ(err, pid, "waitpid");
+
+ /* verify the child got killed with SIGILL */
+ ASSERT_EQ(WIFSIGNALED(status), 1, "WIFSIGNALED");
+ ASSERT_EQ(WTERMSIG(status), SIGILL, "WTERMSIG");
+
+ /* verify the uretprobe program wasn't called */
+ ASSERT_EQ(skel->bss->executed, 0, "executed");
+
+cleanup:
+ uprobe_syscall_executed__destroy(skel);
+ close(go[1]);
+ close(go[0]);
+}
+
+/*
+ * Borrowed from tools/testing/selftests/x86/test_shadow_stack.c.
+ *
+ * For use in inline enablement of shadow stack.
+ *
+ * The program can't return from the point where shadow stack gets enabled
+ * because there will be no address on the shadow stack. So it can't use
+ * syscall() for enablement, since it is a function.
+ *
+ * Based on code from nolibc.h. Keep a copy here because this can't pull
+ * in all of nolibc.h.
+ */
+#define ARCH_PRCTL(arg1, arg2) \
+({ \
+ long _ret; \
+ register long _num asm("eax") = __NR_arch_prctl; \
+ register long _arg1 asm("rdi") = (long)(arg1); \
+ register long _arg2 asm("rsi") = (long)(arg2); \
+ \
+ asm volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+#ifndef ARCH_SHSTK_ENABLE
+#define ARCH_SHSTK_ENABLE 0x5001
+#define ARCH_SHSTK_DISABLE 0x5002
+#define ARCH_SHSTK_SHSTK (1ULL << 0)
+#endif
+
+static void test_uretprobe_shadow_stack(void)
+{
+ if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) {
+ test__skip();
+ return;
+ }
+
+ /* Run all of the uretprobe tests. */
+ test_uretprobe_regs_equal();
+ test_uretprobe_regs_change();
+ test_uretprobe_syscall_call();
+
+ ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK);
+}
+#else
+static void test_uretprobe_regs_equal(void)
+{
+ test__skip();
+}
+
+static void test_uretprobe_regs_change(void)
+{
+ test__skip();
+}
+
+static void test_uretprobe_syscall_call(void)
+{
+ test__skip();
+}
+
+static void test_uretprobe_shadow_stack(void)
+{
+ test__skip();
+}
+#endif
+
+void test_uprobe_syscall(void)
+{
+ if (test__start_subtest("uretprobe_regs_equal"))
+ test_uretprobe_regs_equal();
+ if (test__start_subtest("uretprobe_regs_change"))
+ test_uretprobe_regs_change();
+ if (test__start_subtest("uretprobe_syscall_call"))
+ test_uretprobe_syscall_call();
+ if (test__start_subtest("uretprobe_shadow_stack"))
+ test_uretprobe_shadow_stack();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uretprobe_stack.c b/tools/testing/selftests/bpf/prog_tests/uretprobe_stack.c
new file mode 100644
index 000000000000..6deb8d560ddd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uretprobe_stack.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "uretprobe_stack.skel.h"
+#include "../sdt.h"
+
+/* We set up target_1() -> target_2() -> target_3() -> target_4() -> USDT()
+ * call chain, each being traced by our BPF program. On entry or return from
+ * each target_*() we are capturing user stack trace and recording it in
+ * global variable, so that user space part of the test can validate it.
+ *
+ * Note, we put each target function into a custom section to get those
+ * __start_XXX/__stop_XXX symbols, generated by linker for us, which allow us
+ * to know address range of those functions
+ */
+__attribute__((section("uprobe__target_4")))
+__weak int target_4(void)
+{
+ STAP_PROBE1(uretprobe_stack, target, 42);
+ return 42;
+}
+
+extern const void *__start_uprobe__target_4;
+extern const void *__stop_uprobe__target_4;
+
+__attribute__((section("uprobe__target_3")))
+__weak int target_3(void)
+{
+ return target_4();
+}
+
+extern const void *__start_uprobe__target_3;
+extern const void *__stop_uprobe__target_3;
+
+__attribute__((section("uprobe__target_2")))
+__weak int target_2(void)
+{
+ return target_3();
+}
+
+extern const void *__start_uprobe__target_2;
+extern const void *__stop_uprobe__target_2;
+
+__attribute__((section("uprobe__target_1")))
+__weak int target_1(int depth)
+{
+ if (depth < 1)
+ return 1 + target_1(depth + 1);
+ else
+ return target_2();
+}
+
+extern const void *__start_uprobe__target_1;
+extern const void *__stop_uprobe__target_1;
+
+extern const void *__start_uretprobe_stack_sec;
+extern const void *__stop_uretprobe_stack_sec;
+
+struct range {
+ long start;
+ long stop;
+};
+
+static struct range targets[] = {
+ {}, /* we want target_1 to map to target[1], so need 1-based indexing */
+ { (long)&__start_uprobe__target_1, (long)&__stop_uprobe__target_1 },
+ { (long)&__start_uprobe__target_2, (long)&__stop_uprobe__target_2 },
+ { (long)&__start_uprobe__target_3, (long)&__stop_uprobe__target_3 },
+ { (long)&__start_uprobe__target_4, (long)&__stop_uprobe__target_4 },
+};
+
+static struct range caller = {
+ (long)&__start_uretprobe_stack_sec,
+ (long)&__stop_uretprobe_stack_sec,
+};
+
+static void validate_stack(__u64 *ips, int stack_len, int cnt, ...)
+{
+ int i, j;
+ va_list args;
+
+ if (!ASSERT_GT(stack_len, 0, "stack_len"))
+ return;
+
+ stack_len /= 8;
+
+ /* check if we have enough entries to satisfy test expectations */
+ if (!ASSERT_GE(stack_len, cnt, "stack_len2"))
+ return;
+
+ if (env.verbosity >= VERBOSE_NORMAL) {
+ printf("caller: %#lx - %#lx\n", caller.start, caller.stop);
+ for (i = 1; i < ARRAY_SIZE(targets); i++)
+ printf("target_%d: %#lx - %#lx\n", i, targets[i].start, targets[i].stop);
+ for (i = 0; i < stack_len; i++) {
+ for (j = 1; j < ARRAY_SIZE(targets); j++) {
+ if (ips[i] >= targets[j].start && ips[i] < targets[j].stop)
+ break;
+ }
+ if (j < ARRAY_SIZE(targets)) { /* found target match */
+ printf("ENTRY #%d: %#lx (in target_%d)\n", i, (long)ips[i], j);
+ } else if (ips[i] >= caller.start && ips[i] < caller.stop) {
+ printf("ENTRY #%d: %#lx (in caller)\n", i, (long)ips[i]);
+ } else {
+ printf("ENTRY #%d: %#lx\n", i, (long)ips[i]);
+ }
+ }
+ }
+
+ va_start(args, cnt);
+
+ for (i = cnt - 1; i >= 0; i--) {
+ /* most recent entry is the deepest target function */
+ const struct range *t = va_arg(args, const struct range *);
+
+ ASSERT_GE(ips[i], t->start, "addr_start");
+ ASSERT_LT(ips[i], t->stop, "addr_stop");
+ }
+
+ va_end(args);
+}
+
+/* __weak prevents inlining */
+__attribute__((section("uretprobe_stack_sec")))
+__weak void test_uretprobe_stack(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct uretprobe_stack *skel;
+ int err;
+
+ skel = uretprobe_stack__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ err = uretprobe_stack__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ ASSERT_EQ(target_1(0), 42 + 1, "trigger_return");
+
+ /*
+ * Stacks captured on ENTRY uprobes
+ */
+
+ /* (uprobe 1) target_1 in stack trace*/
+ validate_stack(skel->bss->entry_stack1, skel->bss->entry1_len,
+ 2, &caller, &targets[1]);
+ /* (uprobe 1, recursed) */
+ validate_stack(skel->bss->entry_stack1_recur, skel->bss->entry1_recur_len,
+ 3, &caller, &targets[1], &targets[1]);
+ /* (uprobe 2) caller -> target_1 -> target_1 -> target_2 */
+ validate_stack(skel->bss->entry_stack2, skel->bss->entry2_len,
+ 4, &caller, &targets[1], &targets[1], &targets[2]);
+ /* (uprobe 3) */
+ validate_stack(skel->bss->entry_stack3, skel->bss->entry3_len,
+ 5, &caller, &targets[1], &targets[1], &targets[2], &targets[3]);
+ /* (uprobe 4) caller -> target_1 -> target_1 -> target_2 -> target_3 -> target_4 */
+ validate_stack(skel->bss->entry_stack4, skel->bss->entry4_len,
+ 6, &caller, &targets[1], &targets[1], &targets[2], &targets[3], &targets[4]);
+
+ /* (USDT): full caller -> target_1 -> target_1 -> target_2 (uretprobed)
+ * -> target_3 -> target_4 (uretprobes) chain
+ */
+ validate_stack(skel->bss->usdt_stack, skel->bss->usdt_len,
+ 6, &caller, &targets[1], &targets[1], &targets[2], &targets[3], &targets[4]);
+
+ /*
+ * Now stacks captured on the way out in EXIT uprobes
+ */
+
+ /* (uretprobe 4) everything up to target_4, but excluding it */
+ validate_stack(skel->bss->exit_stack4, skel->bss->exit4_len,
+ 5, &caller, &targets[1], &targets[1], &targets[2], &targets[3]);
+ /* we didn't install uretprobes on target_2 and target_3 */
+ /* (uretprobe 1, recur) first target_1 call only */
+ validate_stack(skel->bss->exit_stack1_recur, skel->bss->exit1_recur_len,
+ 2, &caller, &targets[1]);
+ /* (uretprobe 1) just a caller in the stack trace */
+ validate_stack(skel->bss->exit_stack1, skel->bss->exit1_len,
+ 1, &caller);
+
+cleanup:
+ uretprobe_stack__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
index c4f9f306646e..9dc3687bc406 100644
--- a/tools/testing/selftests/bpf/prog_tests/verifier.c
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -53,6 +53,7 @@
#include "verifier_movsx.skel.h"
#include "verifier_netfilter_ctx.skel.h"
#include "verifier_netfilter_retcode.skel.h"
+#include "verifier_or_jmp32_k.skel.h"
#include "verifier_precision.skel.h"
#include "verifier_prevent_map_lookup.skel.h"
#include "verifier_raw_stack.skel.h"
@@ -66,6 +67,8 @@
#include "verifier_sdiv.skel.h"
#include "verifier_search_pruning.skel.h"
#include "verifier_sock.skel.h"
+#include "verifier_sock_addr.skel.h"
+#include "verifier_sockmap_mutate.skel.h"
#include "verifier_spill_fill.skel.h"
#include "verifier_spin_lock.skel.h"
#include "verifier_stack_ptr.skel.h"
@@ -84,6 +87,7 @@
#include "verifier_xadd.skel.h"
#include "verifier_xdp.skel.h"
#include "verifier_xdp_direct_packet_access.skel.h"
+#include "verifier_bits_iter.skel.h"
#define MAX_ENTRIES 11
@@ -168,6 +172,7 @@ void test_verifier_meta_access(void) { RUN(verifier_meta_access); }
void test_verifier_movsx(void) { RUN(verifier_movsx); }
void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); }
void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); }
+void test_verifier_or_jmp32_k(void) { RUN(verifier_or_jmp32_k); }
void test_verifier_precision(void) { RUN(verifier_precision); }
void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); }
void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); }
@@ -181,6 +186,8 @@ void test_verifier_scalar_ids(void) { RUN(verifier_scalar_ids); }
void test_verifier_sdiv(void) { RUN(verifier_sdiv); }
void test_verifier_search_pruning(void) { RUN(verifier_search_pruning); }
void test_verifier_sock(void) { RUN(verifier_sock); }
+void test_verifier_sock_addr(void) { RUN(verifier_sock_addr); }
+void test_verifier_sockmap_mutate(void) { RUN(verifier_sockmap_mutate); }
void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); }
void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); }
void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); }
@@ -198,6 +205,7 @@ void test_verifier_var_off(void) { RUN(verifier_var_off); }
void test_verifier_xadd(void) { RUN(verifier_xadd); }
void test_verifier_xdp(void) { RUN(verifier_xdp); }
void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); }
+void test_verifier_bits_iter(void) { RUN(verifier_bits_iter); }
static int init_test_val_map(struct bpf_object *obj, char *map_name)
{
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c b/tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c
new file mode 100644
index 000000000000..3918ecc2ee91
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verifier_kfunc_prog_types.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+
+#include "verifier_kfunc_prog_types.skel.h"
+
+void test_verifier_kfunc_prog_types(void)
+{
+ RUN_TESTS(verifier_kfunc_prog_types);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/wq.c b/tools/testing/selftests/bpf/prog_tests/wq.c
new file mode 100644
index 000000000000..99e438fe12ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/wq.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Benjamin Tissoires */
+#include <test_progs.h>
+#include "wq.skel.h"
+#include "wq_failures.skel.h"
+
+void serial_test_wq(void)
+{
+ struct wq *wq_skel = NULL;
+ int err, prog_fd;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ RUN_TESTS(wq);
+
+ /* re-run the success test to check if the timer was actually executed */
+
+ wq_skel = wq__open_and_load();
+ if (!ASSERT_OK_PTR(wq_skel, "wq_skel_load"))
+ return;
+
+ err = wq__attach(wq_skel);
+ if (!ASSERT_OK(err, "wq_attach"))
+ return;
+
+ prog_fd = bpf_program__fd(wq_skel->progs.test_syscall_array_sleepable);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ usleep(50); /* 10 usecs should be enough, but give it extra */
+
+ ASSERT_EQ(wq_skel->bss->ok_sleepable, (1 << 1), "ok_sleepable");
+ wq__destroy(wq_skel);
+}
+
+void serial_test_failures_wq(void)
+{
+ RUN_TESTS(wq_failures);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index f09505f8b038..53d6ad8c2257 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -222,7 +222,7 @@ static void test_xdp_adjust_frags_tail_grow(void)
prog = bpf_object__next_program(obj, NULL);
if (bpf_object__load(obj))
- return;
+ goto out;
prog_fd = bpf_program__fd(prog);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
index 498d3bdaa4b0..bad0ea167be7 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -107,8 +107,8 @@ void test_xdp_do_redirect(void)
.attach_point = BPF_TC_INGRESS);
memcpy(&data[sizeof(__u64)], &pkt_udp, sizeof(pkt_udp));
- *((__u32 *)data) = 0x42; /* metadata test value */
- *((__u32 *)data + 4) = 0;
+ ((__u32 *)data)[0] = 0x42; /* metadata test value */
+ ((__u32 *)data)[1] = 0;
skel = test_xdp_do_redirect__open();
if (!ASSERT_OK_PTR(skel, "skel"))
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
new file mode 100644
index 000000000000..e1bf141d3401
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_flowtable.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <bpf/btf.h>
+#include <linux/if_link.h>
+#include <linux/udp.h>
+#include <net/if.h>
+#include <unistd.h>
+
+#include "xdp_flowtable.skel.h"
+
+#define TX_NETNS_NAME "ns0"
+#define RX_NETNS_NAME "ns1"
+
+#define TX_NAME "v0"
+#define FORWARD_NAME "v1"
+#define RX_NAME "d0"
+
+#define TX_MAC "00:00:00:00:00:01"
+#define FORWARD_MAC "00:00:00:00:00:02"
+#define RX_MAC "00:00:00:00:00:03"
+#define DST_MAC "00:00:00:00:00:04"
+
+#define TX_ADDR "10.0.0.1"
+#define FORWARD_ADDR "10.0.0.2"
+#define RX_ADDR "20.0.0.1"
+#define DST_ADDR "20.0.0.2"
+
+#define PREFIX_LEN "8"
+#define N_PACKETS 10
+#define UDP_PORT 12345
+#define UDP_PORT_STR "12345"
+
+static int send_udp_traffic(void)
+{
+ struct sockaddr_storage addr;
+ int i, sock;
+
+ if (make_sockaddr(AF_INET, DST_ADDR, UDP_PORT, &addr, NULL))
+ return -EINVAL;
+
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0)
+ return sock;
+
+ for (i = 0; i < N_PACKETS; i++) {
+ unsigned char buf[] = { 0xaa, 0xbb, 0xcc };
+ int n;
+
+ n = sendto(sock, buf, sizeof(buf), MSG_NOSIGNAL | MSG_CONFIRM,
+ (struct sockaddr *)&addr, sizeof(addr));
+ if (n != sizeof(buf)) {
+ close(sock);
+ return -EINVAL;
+ }
+
+ usleep(50000); /* 50ms */
+ }
+ close(sock);
+
+ return 0;
+}
+
+void test_xdp_flowtable(void)
+{
+ struct xdp_flowtable *skel = NULL;
+ struct nstoken *tok = NULL;
+ int iifindex, stats_fd;
+ __u32 value, key = 0;
+ struct bpf_link *link;
+
+ if (SYS_NOFAIL("nft -v")) {
+ fprintf(stdout, "Missing required nft tool\n");
+ test__skip();
+ return;
+ }
+
+ SYS(out, "ip netns add " TX_NETNS_NAME);
+ SYS(out, "ip netns add " RX_NETNS_NAME);
+
+ tok = open_netns(RX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ SYS(out, "sysctl -qw net.ipv4.conf.all.forwarding=1");
+
+ SYS(out, "ip link add " TX_NAME " type veth peer " FORWARD_NAME);
+ SYS(out, "ip link set " TX_NAME " netns " TX_NETNS_NAME);
+ SYS(out, "ip link set dev " FORWARD_NAME " address " FORWARD_MAC);
+ SYS(out,
+ "ip addr add " FORWARD_ADDR "/" PREFIX_LEN " dev " FORWARD_NAME);
+ SYS(out, "ip link set dev " FORWARD_NAME " up");
+
+ SYS(out, "ip link add " RX_NAME " type dummy");
+ SYS(out, "ip link set dev " RX_NAME " address " RX_MAC);
+ SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
+ SYS(out, "ip link set dev " RX_NAME " up");
+
+ /* configure the flowtable */
+ SYS(out, "nft add table ip filter");
+ SYS(out,
+ "nft add flowtable ip filter f { hook ingress priority 0\\; "
+ "devices = { " FORWARD_NAME ", " RX_NAME " }\\; }");
+ SYS(out,
+ "nft add chain ip filter forward "
+ "{ type filter hook forward priority 0\\; }");
+ SYS(out,
+ "nft add rule ip filter forward ip protocol udp th dport "
+ UDP_PORT_STR " flow add @f");
+
+ /* Avoid ARP calls */
+ SYS(out,
+ "ip -4 neigh add " DST_ADDR " lladdr " DST_MAC " dev " RX_NAME);
+
+ close_netns(tok);
+ tok = open_netns(TX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME);
+ SYS(out, "ip link set dev " TX_NAME " address " TX_MAC);
+ SYS(out, "ip link set dev " TX_NAME " up");
+ SYS(out, "ip route add default via " FORWARD_ADDR);
+
+ close_netns(tok);
+ tok = open_netns(RX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ iifindex = if_nametoindex(FORWARD_NAME);
+ if (!ASSERT_NEQ(iifindex, 0, "iifindex"))
+ goto out;
+
+ skel = xdp_flowtable__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto out;
+
+ link = bpf_program__attach_xdp(skel->progs.xdp_flowtable_do_lookup,
+ iifindex);
+ if (!ASSERT_OK_PTR(link, "prog_attach"))
+ goto out;
+
+ close_netns(tok);
+ tok = open_netns(TX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ if (!ASSERT_OK(send_udp_traffic(), "send udp"))
+ goto out;
+
+ close_netns(tok);
+ tok = open_netns(RX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
+
+ stats_fd = bpf_map__fd(skel->maps.stats);
+ if (!ASSERT_OK(bpf_map_lookup_elem(stats_fd, &key, &value),
+ "bpf_map_update_elem stats"))
+ goto out;
+
+ ASSERT_GE(value, N_PACKETS - 2, "bpf_xdp_flow_lookup failed");
+out:
+ xdp_flowtable__destroy(skel);
+ if (tok)
+ close_netns(tok);
+ SYS_NOFAIL("ip netns del " TX_NETNS_NAME);
+ SYS_NOFAIL("ip netns del " RX_NETNS_NAME);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
index 05edcf32f528..c87ee2bf558c 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -68,7 +68,8 @@ static int open_xsk(int ifindex, struct xsk *xsk)
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
- .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG | XDP_UMEM_TX_SW_CSUM,
+ .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG | XDP_UMEM_TX_SW_CSUM |
+ XDP_UMEM_TX_METADATA_LEN,
.tx_metadata_len = sizeof(struct xsk_tx_metadata),
};
__u32 idx;
@@ -384,6 +385,8 @@ void test_xdp_metadata(void)
SYS(out, "ip netns add " RX_NETNS_NAME);
tok = open_netns(TX_NETNS_NAME);
+ if (!ASSERT_OK_PTR(tok, "setns"))
+ goto out;
SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
" type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
SYS(out, "ip link set " RX_NAME " netns " RX_NETNS_NAME);
@@ -400,6 +403,8 @@ void test_xdp_metadata(void)
SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME_VLAN);
switch_ns_to_rx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns rx"))
+ goto out;
SYS(out, "ip link set dev " RX_NAME " address " RX_MAC);
SYS(out, "ip link set dev " RX_NAME " up");
@@ -449,6 +454,8 @@ void test_xdp_metadata(void)
goto out;
switch_ns_to_tx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns tx"))
+ goto out;
/* Setup separate AF_XDP for TX interface nad send packet to the RX socket. */
tx_ifindex = if_nametoindex(TX_NAME);
@@ -461,6 +468,8 @@ void test_xdp_metadata(void)
goto out;
switch_ns_to_rx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns rx"))
+ goto out;
/* Verify packet sent from AF_XDP has proper metadata. */
if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, true), 0,
@@ -468,6 +477,8 @@ void test_xdp_metadata(void)
goto out;
switch_ns_to_tx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns tx"))
+ goto out;
complete_tx(&tx_xsk);
/* Now check metadata of packet, generated with network stack */
@@ -475,6 +486,8 @@ void test_xdp_metadata(void)
goto out;
switch_ns_to_rx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns rx"))
+ goto out;
if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, false), 0,
"verify_xsk_metadata"))
@@ -498,6 +511,8 @@ void test_xdp_metadata(void)
goto out;
switch_ns_to_tx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns tx"))
+ goto out;
/* Send packet to trigger . */
if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
@@ -505,6 +520,8 @@ void test_xdp_metadata(void)
goto out;
switch_ns_to_rx(&tok);
+ if (!ASSERT_OK_PTR(tok, "setns rx"))
+ goto out;
while (!retries--) {
if (bpf_obj2->bss->called)
diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c
new file mode 100644
index 000000000000..bb0acd79d28a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_atomics.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "bpf_arena_common.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 10); /* number of pages */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */
+#else
+ __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */
+#endif
+} arena SEC(".maps");
+
+#if defined(ENABLE_ATOMICS_TESTS) && defined(__BPF_FEATURE_ADDR_SPACE_CAST)
+bool skip_tests __attribute((__section__(".data"))) = false;
+#else
+bool skip_tests = true;
+#endif
+
+__u32 pid = 0;
+
+__u64 __arena_global add64_value = 1;
+__u64 __arena_global add64_result = 0;
+__u32 __arena_global add32_value = 1;
+__u32 __arena_global add32_result = 0;
+__u64 __arena_global add_stack_value_copy = 0;
+__u64 __arena_global add_stack_result = 0;
+__u64 __arena_global add_noreturn_value = 1;
+
+SEC("raw_tp/sys_enter")
+int add(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __u64 add_stack_value = 1;
+
+ add64_result = __sync_fetch_and_add(&add64_value, 2);
+ add32_result = __sync_fetch_and_add(&add32_value, 2);
+ add_stack_result = __sync_fetch_and_add(&add_stack_value, 2);
+ add_stack_value_copy = add_stack_value;
+ __sync_fetch_and_add(&add_noreturn_value, 2);
+#endif
+
+ return 0;
+}
+
+__s64 __arena_global sub64_value = 1;
+__s64 __arena_global sub64_result = 0;
+__s32 __arena_global sub32_value = 1;
+__s32 __arena_global sub32_result = 0;
+__s64 __arena_global sub_stack_value_copy = 0;
+__s64 __arena_global sub_stack_result = 0;
+__s64 __arena_global sub_noreturn_value = 1;
+
+SEC("raw_tp/sys_enter")
+int sub(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __u64 sub_stack_value = 1;
+
+ sub64_result = __sync_fetch_and_sub(&sub64_value, 2);
+ sub32_result = __sync_fetch_and_sub(&sub32_value, 2);
+ sub_stack_result = __sync_fetch_and_sub(&sub_stack_value, 2);
+ sub_stack_value_copy = sub_stack_value;
+ __sync_fetch_and_sub(&sub_noreturn_value, 2);
+#endif
+
+ return 0;
+}
+
+__u64 __arena_global and64_value = (0x110ull << 32);
+__u32 __arena_global and32_value = 0x110;
+
+SEC("raw_tp/sys_enter")
+int and(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+
+ __sync_fetch_and_and(&and64_value, 0x011ull << 32);
+ __sync_fetch_and_and(&and32_value, 0x011);
+#endif
+
+ return 0;
+}
+
+__u32 __arena_global or32_value = 0x110;
+__u64 __arena_global or64_value = (0x110ull << 32);
+
+SEC("raw_tp/sys_enter")
+int or(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __sync_fetch_and_or(&or64_value, 0x011ull << 32);
+ __sync_fetch_and_or(&or32_value, 0x011);
+#endif
+
+ return 0;
+}
+
+__u64 __arena_global xor64_value = (0x110ull << 32);
+__u32 __arena_global xor32_value = 0x110;
+
+SEC("raw_tp/sys_enter")
+int xor(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __sync_fetch_and_xor(&xor64_value, 0x011ull << 32);
+ __sync_fetch_and_xor(&xor32_value, 0x011);
+#endif
+
+ return 0;
+}
+
+__u32 __arena_global cmpxchg32_value = 1;
+__u32 __arena_global cmpxchg32_result_fail = 0;
+__u32 __arena_global cmpxchg32_result_succeed = 0;
+__u64 __arena_global cmpxchg64_value = 1;
+__u64 __arena_global cmpxchg64_result_fail = 0;
+__u64 __arena_global cmpxchg64_result_succeed = 0;
+
+SEC("raw_tp/sys_enter")
+int cmpxchg(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ cmpxchg64_result_fail = __sync_val_compare_and_swap(&cmpxchg64_value, 0, 3);
+ cmpxchg64_result_succeed = __sync_val_compare_and_swap(&cmpxchg64_value, 1, 2);
+
+ cmpxchg32_result_fail = __sync_val_compare_and_swap(&cmpxchg32_value, 0, 3);
+ cmpxchg32_result_succeed = __sync_val_compare_and_swap(&cmpxchg32_value, 1, 2);
+#endif
+
+ return 0;
+}
+
+__u64 __arena_global xchg64_value = 1;
+__u64 __arena_global xchg64_result = 0;
+__u32 __arena_global xchg32_value = 1;
+__u32 __arena_global xchg32_result = 0;
+
+SEC("raw_tp/sys_enter")
+int xchg(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __u64 val64 = 2;
+ __u32 val32 = 2;
+
+ xchg64_result = __sync_lock_test_and_set(&xchg64_value, val64);
+ xchg32_result = __sync_lock_test_and_set(&xchg32_value, val32);
+#endif
+
+ return 0;
+}
+
+__u64 __arena_global uaf_sink;
+volatile __u64 __arena_global uaf_recovery_fails;
+
+SEC("syscall")
+int uaf(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#if defined(ENABLE_ATOMICS_TESTS) && !defined(__TARGET_ARCH_arm64) && \
+ !defined(__TARGET_ARCH_x86)
+ __u32 __arena *page32;
+ __u64 __arena *page64;
+ void __arena *page;
+
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ bpf_arena_free_pages(&arena, page, 1);
+ uaf_recovery_fails = 24;
+
+ page32 = (__u32 __arena *)page;
+ uaf_sink += __sync_fetch_and_add(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_add_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_sub(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_sub_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_and(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_and_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_or(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_or_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_xor(page32, 1);
+ uaf_recovery_fails -= 1;
+ __sync_xor_and_fetch(page32, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_val_compare_and_swap(page32, 0, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_lock_test_and_set(page32, 1);
+ uaf_recovery_fails -= 1;
+
+ page64 = (__u64 __arena *)page;
+ uaf_sink += __sync_fetch_and_add(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_add_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_sub(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_sub_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_and(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_and_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_or(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_or_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_fetch_and_xor(page64, 1);
+ uaf_recovery_fails -= 1;
+ __sync_xor_and_fetch(page64, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_val_compare_and_swap(page64, 0, 1);
+ uaf_recovery_fails -= 1;
+ uaf_sink += __sync_lock_test_and_set(page64, 1);
+ uaf_recovery_fails -= 1;
+#endif
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c
index 1e6ac187a6a0..81eaa94afeb0 100644
--- a/tools/testing/selftests/bpf/progs/arena_htab.c
+++ b/tools/testing/selftests/bpf/progs/arena_htab.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -18,25 +19,35 @@ void __arena *htab_for_user;
bool skip = false;
int zero = 0;
+char __arena arr1[100000];
+char arr2[1000];
SEC("syscall")
int arena_htab_llvm(void *ctx)
{
#if defined(__BPF_FEATURE_ADDR_SPACE_CAST) || defined(BPF_ARENA_FORCE_ASM)
struct htab __arena *htab;
+ char __arena *arr = arr1;
__u64 i;
htab = bpf_alloc(sizeof(*htab));
cast_kern(htab);
htab_init(htab);
+ cast_kern(arr);
+
/* first run. No old elems in the table */
- for (i = zero; i < 1000; i++)
+ for (i = zero; i < 100000 && can_loop; i++) {
htab_update_elem(htab, i, i);
+ arr[i] = i;
+ }
- /* should replace all elems with new ones */
- for (i = zero; i < 1000; i++)
+ /* should replace some elems with new ones */
+ for (i = zero; i < 1000 && can_loop; i++) {
htab_update_elem(htab, i, i);
+ /* Access mem to make the verifier use bounded loop logic */
+ arr2[i] = i;
+ }
cast_user(htab);
htab_for_user = htab;
#else
diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c
index c0422c58cee2..3a2ddcacbea6 100644
--- a/tools/testing/selftests/bpf/progs/arena_list.c
+++ b/tools/testing/selftests/bpf/progs/arena_list.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -49,7 +50,7 @@ int arena_list_add(void *ctx)
list_head = &global_head;
- for (i = zero; i < cnt; cond_break, i++) {
+ for (i = zero; i < cnt && can_loop; i++) {
struct elem __arena *n = bpf_alloc(sizeof(*n));
test_val++;
diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
index e4bfbba6c193..c8ec0d0368e4 100644
--- a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
+++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
@@ -61,14 +61,15 @@ SEC("lsm.s/socket_post_create")
int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
int protocol, int kern)
{
+ struct sock *sk = sock->sk;
struct storage *stg;
__u32 pid;
pid = bpf_get_current_pid_tgid() >> 32;
- if (pid != bench_pid)
+ if (pid != bench_pid || !sk)
return 0;
- stg = bpf_sk_storage_get(&sk_storage_map, sock->sk, NULL,
+ stg = bpf_sk_storage_get(&sk_storage_map, sk, NULL,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (stg)
diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
index a487f60b73ac..b7ddf8ec4ee8 100644
--- a/tools/testing/selftests/bpf/progs/bind4_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -12,6 +12,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bind_prog.h"
+
#define SERV4_IP 0xc0a801feU /* 192.168.1.254 */
#define SERV4_PORT 4040
#define SERV4_REWRITE_IP 0x7f000001U /* 127.0.0.1 */
@@ -118,23 +120,23 @@ int bind_v4_prog(struct bpf_sock_addr *ctx)
// u8 narrow loads:
user_ip4 = 0;
- user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[0] << 0;
- user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[1] << 8;
- user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[2] << 16;
- user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[3] << 24;
+ user_ip4 |= load_byte(ctx->user_ip4, 0, sizeof(user_ip4));
+ user_ip4 |= load_byte(ctx->user_ip4, 1, sizeof(user_ip4));
+ user_ip4 |= load_byte(ctx->user_ip4, 2, sizeof(user_ip4));
+ user_ip4 |= load_byte(ctx->user_ip4, 3, sizeof(user_ip4));
if (ctx->user_ip4 != user_ip4)
return 0;
user_port = 0;
- user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
- user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+ user_port |= load_byte(ctx->user_port, 0, sizeof(user_port));
+ user_port |= load_byte(ctx->user_port, 1, sizeof(user_port));
if (ctx->user_port != user_port)
return 0;
// u16 narrow loads:
user_ip4 = 0;
- user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[0] << 0;
- user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[1] << 16;
+ user_ip4 |= load_word(ctx->user_ip4, 0, sizeof(user_ip4));
+ user_ip4 |= load_word(ctx->user_ip4, 1, sizeof(user_ip4));
if (ctx->user_ip4 != user_ip4)
return 0;
@@ -156,4 +158,10 @@ int bind_v4_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/bind4")
+int bind_v4_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
index d62cd9e9cf0e..501c3fc11d35 100644
--- a/tools/testing/selftests/bpf/progs/bind6_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -12,6 +12,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bind_prog.h"
+
#define SERV6_IP_0 0xfaceb00c /* face:b00c:1234:5678::abcd */
#define SERV6_IP_1 0x12345678
#define SERV6_IP_2 0x00000000
@@ -129,25 +131,25 @@ int bind_v6_prog(struct bpf_sock_addr *ctx)
// u8 narrow loads:
for (i = 0; i < 4; i++) {
user_ip6 = 0;
- user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[0] << 0;
- user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[1] << 8;
- user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[2] << 16;
- user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[3] << 24;
+ user_ip6 |= load_byte(ctx->user_ip6[i], 0, sizeof(user_ip6));
+ user_ip6 |= load_byte(ctx->user_ip6[i], 1, sizeof(user_ip6));
+ user_ip6 |= load_byte(ctx->user_ip6[i], 2, sizeof(user_ip6));
+ user_ip6 |= load_byte(ctx->user_ip6[i], 3, sizeof(user_ip6));
if (ctx->user_ip6[i] != user_ip6)
return 0;
}
user_port = 0;
- user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
- user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+ user_port |= load_byte(ctx->user_port, 0, sizeof(user_port));
+ user_port |= load_byte(ctx->user_port, 1, sizeof(user_port));
if (ctx->user_port != user_port)
return 0;
// u16 narrow loads:
for (i = 0; i < 4; i++) {
user_ip6 = 0;
- user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[0] << 0;
- user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[1] << 16;
+ user_ip6 |= load_word(ctx->user_ip6[i], 0, sizeof(user_ip6));
+ user_ip6 |= load_word(ctx->user_ip6[i], 1, sizeof(user_ip6));
if (ctx->user_ip6[i] != user_ip6)
return 0;
}
@@ -173,4 +175,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/bind6")
+int bind_v6_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bind_prog.h b/tools/testing/selftests/bpf/progs/bind_prog.h
new file mode 100644
index 000000000000..e830caa940c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind_prog.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BIND_PROG_H__
+#define __BIND_PROG_H__
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define load_byte(src, b, s) \
+ (((volatile __u8 *)&(src))[b] << 8 * b)
+#define load_word(src, w, s) \
+ (((volatile __u16 *)&(src))[w] << 16 * w)
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+#define load_byte(src, b, s) \
+ (((volatile __u8 *)&(src))[(b) + (sizeof(src) - (s))] << 8 * ((s) - (b) - 1))
+#define load_word(src, w, s) \
+ (((volatile __u16 *)&(src))[w] << 16 * (((s) / 2) - (w) - 1))
+#else
+# error "Fix your compiler's __BYTE_ORDER__?!"
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
new file mode 100644
index 000000000000..1654a530aa3d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_cc_cubic.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* Highlights:
+ * 1. The major difference between this bpf program and tcp_cubic.c
+ * is that this bpf program relies on `cong_control` rather than
+ * `cong_avoid` in the struct tcp_congestion_ops.
+ * 2. Logic such as tcp_cwnd_reduction, tcp_cong_avoid, and
+ * tcp_update_pacing_rate is bypassed when `cong_control` is
+ * defined, so moving these logic to `cong_control`.
+ * 3. WARNING: This bpf program is NOT the same as tcp_cubic.c.
+ * The main purpose is to show use cases of the arguments in
+ * `cong_control`. For simplicity's sake, it reuses tcp cubic's
+ * kernel functions.
+ */
+
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define USEC_PER_SEC 1000000UL
+#define TCP_PACING_SS_RATIO (200)
+#define TCP_PACING_CA_RATIO (120)
+#define TCP_REORDERING (12)
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define after(seq2, seq1) before(seq1, seq2)
+
+extern void cubictcp_init(struct sock *sk) __ksym;
+extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
+extern __u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym;
+extern void cubictcp_state(struct sock *sk, __u8 new_state) __ksym;
+extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
+extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
+extern void cubictcp_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
+
+static bool before(__u32 seq1, __u32 seq2)
+{
+ return (__s32)(seq1-seq2) < 0;
+}
+
+static __u64 div64_u64(__u64 dividend, __u64 divisor)
+{
+ return dividend / divisor;
+}
+
+static void tcp_update_pacing_rate(struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+ __u64 rate;
+
+ /* set sk_pacing_rate to 200 % of current rate (mss * cwnd / srtt) */
+ rate = (__u64)tp->mss_cache * ((USEC_PER_SEC / 100) << 3);
+
+ /* current rate is (cwnd * mss) / srtt
+ * In Slow Start [1], set sk_pacing_rate to 200 % the current rate.
+ * In Congestion Avoidance phase, set it to 120 % the current rate.
+ *
+ * [1] : Normal Slow Start condition is (tp->snd_cwnd < tp->snd_ssthresh)
+ * If snd_cwnd >= (tp->snd_ssthresh / 2), we are approaching
+ * end of slow start and should slow down.
+ */
+ if (tp->snd_cwnd < tp->snd_ssthresh / 2)
+ rate *= TCP_PACING_SS_RATIO;
+ else
+ rate *= TCP_PACING_CA_RATIO;
+
+ rate *= max(tp->snd_cwnd, tp->packets_out);
+
+ if (tp->srtt_us)
+ rate = div64_u64(rate, (__u64)tp->srtt_us);
+
+ sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate);
+}
+
+static void tcp_cwnd_reduction(struct sock *sk, int newly_acked_sacked,
+ int newly_lost, int flag)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int sndcnt = 0;
+ __u32 pkts_in_flight = tp->packets_out - (tp->sacked_out + tp->lost_out) + tp->retrans_out;
+ int delta = tp->snd_ssthresh - pkts_in_flight;
+
+ if (newly_acked_sacked <= 0 || !tp->prior_cwnd)
+ return;
+
+ __u32 prr_delivered = tp->prr_delivered + newly_acked_sacked;
+
+ if (delta < 0) {
+ __u64 dividend =
+ (__u64)tp->snd_ssthresh * prr_delivered + tp->prior_cwnd - 1;
+ sndcnt = (__u32)div64_u64(dividend, (__u64)tp->prior_cwnd) - tp->prr_out;
+ } else {
+ sndcnt = max(prr_delivered - tp->prr_out, newly_acked_sacked);
+ if (flag & FLAG_SND_UNA_ADVANCED && !newly_lost)
+ sndcnt++;
+ sndcnt = min(delta, sndcnt);
+ }
+ /* Force a fast retransmit upon entering fast recovery */
+ sndcnt = max(sndcnt, (tp->prr_out ? 0 : 1));
+ tp->snd_cwnd = pkts_in_flight + sndcnt;
+}
+
+/* Decide wheather to run the increase function of congestion control. */
+static bool tcp_may_raise_cwnd(const struct sock *sk, const int flag)
+{
+ if (tcp_sk(sk)->reordering > TCP_REORDERING)
+ return flag & FLAG_FORWARD_PROGRESS;
+
+ return flag & FLAG_DATA_ACKED;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_init, struct sock *sk)
+{
+ cubictcp_init(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
+{
+ cubictcp_cwnd_event(sk, event);
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_cong_control, struct sock *sk, __u32 ack, int flag,
+ const struct rate_sample *rs)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (((1<<TCP_CA_CWR) | (1<<TCP_CA_Recovery)) &
+ (1 << inet_csk(sk)->icsk_ca_state)) {
+ /* Reduce cwnd if state mandates */
+ tcp_cwnd_reduction(sk, rs->acked_sacked, rs->losses, flag);
+
+ if (!before(tp->snd_una, tp->high_seq)) {
+ /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
+ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
+ inet_csk(sk)->icsk_ca_state == TCP_CA_CWR) {
+ tp->snd_cwnd = tp->snd_ssthresh;
+ tp->snd_cwnd_stamp = tcp_jiffies32;
+ }
+ }
+ } else if (tcp_may_raise_cwnd(sk, flag)) {
+ /* Advance cwnd if state allows */
+ cubictcp_cong_avoid(sk, ack, rs->acked_sacked);
+ tp->snd_cwnd_stamp = tcp_jiffies32;
+ }
+
+ tcp_update_pacing_rate(sk);
+}
+
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk)
+{
+ return cubictcp_recalc_ssthresh(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state)
+{
+ cubictcp_state(sk, new_state);
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample)
+{
+ cubictcp_acked(sk, sample);
+}
+
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk)
+{
+ return tcp_reno_undo_cwnd(sk);
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops cc_cubic = {
+ .init = (void *)bpf_cubic_init,
+ .ssthresh = (void *)bpf_cubic_recalc_ssthresh,
+ .cong_control = (void *)bpf_cubic_cong_control,
+ .set_state = (void *)bpf_cubic_state,
+ .undo_cwnd = (void *)bpf_cubic_undo_cwnd,
+ .cwnd_event = (void *)bpf_cubic_cwnd_event,
+ .pkts_acked = (void *)bpf_cubic_acked,
+ .name = "bpf_cc_cubic",
+};
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index c997e3e3d3fb..d665b8a15cc4 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -14,14 +14,22 @@
* "ca->ack_cnt / delta" operation.
*/
-#include <linux/bpf.h>
-#include <linux/stddef.h>
-#include <linux/tcp.h>
-#include "bpf_tcp_helpers.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define max(a, b) ((a) > (b) ? (a) : (b))
+static bool before(__u32 seq1, __u32 seq2)
+{
+ return (__s32)(seq1-seq2) < 0;
+}
+#define after(seq2, seq1) before(seq1, seq2)
+
+extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
+extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
* max_cwnd = snd_cwnd * beta
@@ -70,7 +78,7 @@ static const __u64 cube_factor = (__u64)(1ull << (10+3*BICTCP_HZ))
/ (bic_scale * 10);
/* BIC TCP Parameters */
-struct bictcp {
+struct bpf_bictcp {
__u32 cnt; /* increase cwnd by 1 after ACKs */
__u32 last_max_cwnd; /* last maximum snd_cwnd */
__u32 last_cwnd; /* the last snd_cwnd */
@@ -91,7 +99,7 @@ struct bictcp {
__u32 curr_rtt; /* the minimum rtt of current round */
};
-static inline void bictcp_reset(struct bictcp *ca)
+static void bictcp_reset(struct bpf_bictcp *ca)
{
ca->cnt = 0;
ca->last_max_cwnd = 0;
@@ -112,7 +120,7 @@ extern unsigned long CONFIG_HZ __kconfig;
#define USEC_PER_SEC 1000000UL
#define USEC_PER_JIFFY (USEC_PER_SEC / HZ)
-static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
+static __u64 div64_u64(__u64 dividend, __u64 divisor)
{
return dividend / divisor;
}
@@ -120,7 +128,7 @@ static __always_inline __u64 div64_u64(__u64 dividend, __u64 divisor)
#define div64_ul div64_u64
#define BITS_PER_U64 (sizeof(__u64) * 8)
-static __always_inline int fls64(__u64 x)
+static int fls64(__u64 x)
{
int num = BITS_PER_U64 - 1;
@@ -153,15 +161,15 @@ static __always_inline int fls64(__u64 x)
return num + 1;
}
-static __always_inline __u32 bictcp_clock_us(const struct sock *sk)
+static __u32 bictcp_clock_us(const struct sock *sk)
{
return tcp_sk(sk)->tcp_mstamp;
}
-static __always_inline void bictcp_hystart_reset(struct sock *sk)
+static void bictcp_hystart_reset(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
ca->round_start = ca->last_ack = bictcp_clock_us(sk);
ca->end_seq = tp->snd_nxt;
@@ -169,11 +177,10 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk)
ca->sample_cnt = 0;
}
-/* "struct_ops/" prefix is a requirement */
-SEC("struct_ops/bpf_cubic_init")
+SEC("struct_ops")
void BPF_PROG(bpf_cubic_init, struct sock *sk)
{
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
bictcp_reset(ca);
@@ -184,12 +191,11 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
-/* "struct_ops" prefix is a requirement */
-SEC("struct_ops/bpf_cubic_cwnd_event")
+SEC("struct_ops")
void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_TX_START) {
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 now = tcp_jiffies32;
__s32 delta;
@@ -230,7 +236,7 @@ static const __u8 v[] = {
* Newton-Raphson iteration.
* Avg err ~= 0.195%
*/
-static __always_inline __u32 cubic_root(__u64 a)
+static __u32 cubic_root(__u64 a)
{
__u32 x, b, shift;
@@ -263,8 +269,7 @@ static __always_inline __u32 cubic_root(__u64 a)
/*
* Compute congestion window to use.
*/
-static __always_inline void bictcp_update(struct bictcp *ca, __u32 cwnd,
- __u32 acked)
+static void bictcp_update(struct bpf_bictcp *ca, __u32 cwnd, __u32 acked)
{
__u32 delta, bic_target, max_cnt;
__u64 offs, t;
@@ -377,11 +382,11 @@ tcp_friendliness:
ca->cnt = max(ca->cnt, 2U);
}
-/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */
-void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
if (!tcp_is_cwnd_limited(sk))
return;
@@ -397,10 +402,11 @@ void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acke
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
-__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_cubic_recalc_ssthresh, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
ca->epoch_start = 0; /* end of epoch */
@@ -414,7 +420,8 @@ __u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
}
-void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_state, struct sock *sk, __u8 new_state)
{
if (new_state == TCP_CA_Loss) {
bictcp_reset(inet_csk_ca(sk));
@@ -433,7 +440,7 @@ void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
* We apply another 100% factor because @rate is doubled at this point.
* We cap the cushion to 1ms.
*/
-static __always_inline __u32 hystart_ack_delay(struct sock *sk)
+static __u32 hystart_ack_delay(struct sock *sk)
{
unsigned long rate;
@@ -444,10 +451,10 @@ static __always_inline __u32 hystart_ack_delay(struct sock *sk)
div64_ul((__u64)GSO_MAX_SIZE * 4 * USEC_PER_SEC, rate));
}
-static __always_inline void hystart_update(struct sock *sk, __u32 delay)
+static void hystart_update(struct sock *sk, __u32 delay)
{
struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 threshold;
if (hystart_detect & HYSTART_ACK_TRAIN) {
@@ -492,11 +499,11 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay)
int bpf_cubic_acked_called = 0;
-void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
- const struct ack_sample *sample)
+SEC("struct_ops")
+void BPF_PROG(bpf_cubic_acked, struct sock *sk, const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct bictcp *ca = inet_csk_ca(sk);
+ struct bpf_bictcp *ca = inet_csk_ca(sk);
__u32 delay;
bpf_cubic_acked_called = 1;
@@ -524,7 +531,8 @@ void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
-__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk)
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_cubic_undo_cwnd, struct sock *sk)
{
return tcp_reno_undo_cwnd(sk);
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 460682759aed..02f552e7fd4d 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -6,15 +6,23 @@
* the kernel BPF logic.
*/
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/tcp.h>
-#include <errno.h>
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include "bpf_tcp_helpers.h"
+
+#ifndef EBUSY
+#define EBUSY 16
+#endif
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#define max(a, b) ((a) > (b) ? (a) : (b))
+#define min_not_zero(x, y) ({ \
+ typeof(x) __x = (x); \
+ typeof(y) __y = (y); \
+ __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
+static bool before(__u32 seq1, __u32 seq2)
+{
+ return (__s32)(seq1-seq2) < 0;
+}
char _license[] SEC("license") = "GPL";
@@ -35,7 +43,7 @@ struct {
#define DCTCP_MAX_ALPHA 1024U
-struct dctcp {
+struct bpf_dctcp {
__u32 old_delivered;
__u32 old_delivered_ce;
__u32 prior_rcv_nxt;
@@ -48,8 +56,7 @@ struct dctcp {
static unsigned int dctcp_shift_g = 4; /* g = 1/2^4 */
static unsigned int dctcp_alpha_on_init = DCTCP_MAX_ALPHA;
-static __always_inline void dctcp_reset(const struct tcp_sock *tp,
- struct dctcp *ca)
+static void dctcp_reset(const struct tcp_sock *tp, struct bpf_dctcp *ca)
{
ca->next_seq = tp->snd_nxt;
@@ -57,11 +64,11 @@ static __always_inline void dctcp_reset(const struct tcp_sock *tp,
ca->old_delivered_ce = tp->delivered_ce;
}
-SEC("struct_ops/dctcp_init")
-void BPF_PROG(dctcp_init, struct sock *sk)
+SEC("struct_ops")
+void BPF_PROG(bpf_dctcp_init, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
int *stg;
if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) {
@@ -70,7 +77,7 @@ void BPF_PROG(dctcp_init, struct sock *sk)
(void *)fallback, sizeof(fallback)) == -EBUSY)
ebusy_cnt++;
- /* Switch back to myself and the recurred dctcp_init()
+ /* Switch back to myself and the recurred bpf_dctcp_init()
* will get -EBUSY for all bpf_setsockopt(TCP_CONGESTION),
* except the last "cdg" one.
*/
@@ -104,21 +111,21 @@ void BPF_PROG(dctcp_init, struct sock *sk)
dctcp_reset(tp, ca);
}
-SEC("struct_ops/dctcp_ssthresh")
-__u32 BPF_PROG(dctcp_ssthresh, struct sock *sk)
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_dctcp_ssthresh, struct sock *sk)
{
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
ca->loss_cwnd = tp->snd_cwnd;
return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->dctcp_alpha) >> 11U), 2U);
}
-SEC("struct_ops/dctcp_update_alpha")
-void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
+SEC("struct_ops")
+void BPF_PROG(bpf_dctcp_update_alpha, struct sock *sk, __u32 flags)
{
const struct tcp_sock *tp = tcp_sk(sk);
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
/* Expired RTT */
if (!before(tp->snd_una, ca->next_seq)) {
@@ -144,27 +151,27 @@ void BPF_PROG(dctcp_update_alpha, struct sock *sk, __u32 flags)
}
}
-static __always_inline void dctcp_react_to_loss(struct sock *sk)
+static void dctcp_react_to_loss(struct sock *sk)
{
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
ca->loss_cwnd = tp->snd_cwnd;
tp->snd_ssthresh = max(tp->snd_cwnd >> 1U, 2U);
}
-SEC("struct_ops/dctcp_state")
-void BPF_PROG(dctcp_state, struct sock *sk, __u8 new_state)
+SEC("struct_ops")
+void BPF_PROG(bpf_dctcp_state, struct sock *sk, __u8 new_state)
{
if (new_state == TCP_CA_Recovery &&
new_state != BPF_CORE_READ_BITFIELD(inet_csk(sk), icsk_ca_state))
dctcp_react_to_loss(sk);
- /* We handle RTO in dctcp_cwnd_event to ensure that we perform only
+ /* We handle RTO in bpf_dctcp_cwnd_event to ensure that we perform only
* one loss-adjustment per RTT.
*/
}
-static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
+static void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -179,9 +186,8 @@ static __always_inline void dctcp_ece_ack_cwr(struct sock *sk, __u32 ce_state)
* S: 0 <- last pkt was non-CE
* 1 <- last pkt was CE
*/
-static __always_inline
-void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
- __u32 *prior_rcv_nxt, __u32 *ce_state)
+static void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
+ __u32 *prior_rcv_nxt, __u32 *ce_state)
{
__u32 new_ce_state = (evt == CA_EVENT_ECN_IS_CE) ? 1 : 0;
@@ -201,10 +207,10 @@ void dctcp_ece_ack_update(struct sock *sk, enum tcp_ca_event evt,
dctcp_ece_ack_cwr(sk, new_ce_state);
}
-SEC("struct_ops/dctcp_cwnd_event")
-void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
+SEC("struct_ops")
+void BPF_PROG(bpf_dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
{
- struct dctcp *ca = inet_csk_ca(sk);
+ struct bpf_dctcp *ca = inet_csk_ca(sk);
switch (ev) {
case CA_EVENT_ECN_IS_CE:
@@ -220,39 +226,39 @@ void BPF_PROG(dctcp_cwnd_event, struct sock *sk, enum tcp_ca_event ev)
}
}
-SEC("struct_ops/dctcp_cwnd_undo")
-__u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
+SEC("struct_ops")
+__u32 BPF_PROG(bpf_dctcp_cwnd_undo, struct sock *sk)
{
- const struct dctcp *ca = inet_csk_ca(sk);
+ const struct bpf_dctcp *ca = inet_csk_ca(sk);
return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
}
extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
-SEC("struct_ops/dctcp_reno_cong_avoid")
-void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+SEC("struct_ops")
+void BPF_PROG(bpf_dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
tcp_reno_cong_avoid(sk, ack, acked);
}
SEC(".struct_ops")
struct tcp_congestion_ops dctcp_nouse = {
- .init = (void *)dctcp_init,
- .set_state = (void *)dctcp_state,
+ .init = (void *)bpf_dctcp_init,
+ .set_state = (void *)bpf_dctcp_state,
.flags = TCP_CONG_NEEDS_ECN,
.name = "bpf_dctcp_nouse",
};
SEC(".struct_ops")
struct tcp_congestion_ops dctcp = {
- .init = (void *)dctcp_init,
- .in_ack_event = (void *)dctcp_update_alpha,
- .cwnd_event = (void *)dctcp_cwnd_event,
- .ssthresh = (void *)dctcp_ssthresh,
- .cong_avoid = (void *)dctcp_cong_avoid,
- .undo_cwnd = (void *)dctcp_cwnd_undo,
- .set_state = (void *)dctcp_state,
+ .init = (void *)bpf_dctcp_init,
+ .in_ack_event = (void *)bpf_dctcp_update_alpha,
+ .cwnd_event = (void *)bpf_dctcp_cwnd_event,
+ .ssthresh = (void *)bpf_dctcp_ssthresh,
+ .cong_avoid = (void *)bpf_dctcp_cong_avoid,
+ .undo_cwnd = (void *)bpf_dctcp_cwnd_undo,
+ .set_state = (void *)bpf_dctcp_state,
.flags = TCP_CONG_NEEDS_ECN,
.name = "bpf_dctcp",
};
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
index d836f7c372f0..c91763f248b2 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
@@ -1,19 +1,15 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
-#include <stddef.h>
-#include <linux/bpf.h>
-#include <linux/types.h>
-#include <linux/stddef.h>
-#include <linux/tcp.h>
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL";
const char cubic[] = "cubic";
-void BPF_STRUCT_OPS(dctcp_nouse_release, struct sock *sk)
+SEC("struct_ops")
+void BPF_PROG(dctcp_nouse_release, struct sock *sk)
{
bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
(void *)cubic, sizeof(cubic));
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
index c5969ca6f26b..564835ba7d51 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
@@ -6,12 +6,6 @@
char _license[] SEC("license") = "GPL";
-struct key_t {
- int a;
- int b;
- int c;
-};
-
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 3);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
index 85fa710fad90..9f0e0705b2bf 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
@@ -6,12 +6,6 @@
char _license[] SEC("license") = "GPL";
-struct key_t {
- int a;
- int b;
- int c;
-};
-
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 3);
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index fb2f5513e29e..81097a3f15eb 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -7,9 +7,9 @@
*
* The test_loader sequentially loads each program in a skeleton.
* Programs could be loaded in privileged and unprivileged modes.
- * - __success, __failure, __msg imply privileged mode;
- * - __success_unpriv, __failure_unpriv, __msg_unpriv imply
- * unprivileged mode.
+ * - __success, __failure, __msg, __regex imply privileged mode;
+ * - __success_unpriv, __failure_unpriv, __msg_unpriv, __regex_unpriv
+ * imply unprivileged mode.
* If combination of privileged and unprivileged attributes is present
* both modes are used. If none are present privileged mode is implied.
*
@@ -24,6 +24,9 @@
* Multiple __msg attributes could be specified.
* __msg_unpriv Same as __msg but for unprivileged mode.
*
+ * __regex Same as __msg, but using a regular expression.
+ * __regex_unpriv Same as __msg_unpriv but using a regular expression.
+ *
* __success Expect program load success in privileged mode.
* __success_unpriv Expect program load success in unprivileged mode.
*
@@ -59,10 +62,12 @@
* __auxiliary_unpriv Same, but load program in unprivileged mode.
*/
#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg)))
+#define __regex(regex) __attribute__((btf_decl_tag("comment:test_expect_regex=" regex)))
#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure")))
#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
#define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc)))
#define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" msg)))
+#define __regex_unpriv(regex) __attribute__((btf_decl_tag("comment:test_expect_regex_unpriv=" regex)))
#define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv")))
#define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv")))
#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
@@ -135,4 +140,8 @@
/* make it look to compiler like value is read and written */
#define __sink(expr) asm volatile("" : "+g"(expr))
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c
index 2ecd833dcd41..8a7a4c1b54e8 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c
+++ b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c
@@ -1,14 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
-#include <linux/types.h>
-#include <bpf/bpf_helpers.h>
+#include "bpf_tracing_net.h"
#include <bpf/bpf_tracing.h>
-#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "X";
-void BPF_STRUCT_OPS(nogpltcp_init, struct sock *sk)
+SEC("struct_ops")
+void BPF_PROG(nogpltcp_init, struct sock *sk)
{
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 7001965d1cc3..59843b430f76 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -2,6 +2,9 @@
#ifndef __BPF_TRACING_NET_H__
#define __BPF_TRACING_NET_H__
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+
#define AF_INET 2
#define AF_INET6 10
@@ -22,6 +25,7 @@
#define IP_TOS 1
+#define SOL_IPV6 41
#define IPV6_TCLASS 67
#define IPV6_AUTOFLOWLABEL 70
@@ -46,6 +50,13 @@
#define TCP_CA_NAME_MAX 16
#define TCP_NAGLE_OFF 1
+#define TCP_ECN_OK 1
+#define TCP_ECN_QUEUE_CWR 2
+#define TCP_ECN_DEMAND_CWR 4
+#define TCP_ECN_SEEN 8
+
+#define TCP_CONG_NEEDS_ECN 0x2
+
#define ICSK_TIME_RETRANS 1
#define ICSK_TIME_PROBE0 3
#define ICSK_TIME_LOSS_PROBE 5
@@ -80,6 +91,14 @@
#define TCP_INFINITE_SSTHRESH 0x7fffffff
#define TCP_PINGPONG_THRESH 3
+#define FLAG_DATA_ACKED 0x04 /* This ACK acknowledged new data. */
+#define FLAG_SYN_ACKED 0x10 /* This ACK acknowledged SYN. */
+#define FLAG_DATA_SACKED 0x20 /* New SACK. */
+#define FLAG_SND_UNA_ADVANCED \
+ 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
+#define FLAG_ACKED (FLAG_DATA_ACKED | FLAG_SYN_ACKED)
+#define FLAG_FORWARD_PROGRESS (FLAG_ACKED | FLAG_DATA_SACKED)
+
#define fib_nh_dev nh_common.nhc_dev
#define fib_nh_gw_family nh_common.nhc_gw_family
#define fib_nh_gw6 nh_common.nhc_gw.ipv6
@@ -119,4 +138,37 @@
#define tw_v6_daddr __tw_common.skc_v6_daddr
#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr
+#define tcp_jiffies32 ((__u32)bpf_jiffies64())
+
+static inline struct inet_connection_sock *inet_csk(const struct sock *sk)
+{
+ return (struct inet_connection_sock *)sk;
+}
+
+static inline void *inet_csk_ca(const struct sock *sk)
+{
+ return (void *)inet_csk(sk)->icsk_ca_priv;
+}
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+ return (struct tcp_sock *)sk;
+}
+
+static inline bool tcp_in_slow_start(const struct tcp_sock *tp)
+{
+ return tp->snd_cwnd < tp->snd_ssthresh;
+}
+
+static inline bool tcp_is_cwnd_limited(const struct sock *sk)
+{
+ const struct tcp_sock *tp = tcp_sk(sk);
+
+ /* If in slow start, ensure cwnd grows to twice what was ACKed. */
+ if (tcp_in_slow_start(tp))
+ return tp->snd_cwnd < 2 * tp->max_packets_out;
+
+ return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
+}
+
#endif
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
index ba97165bdb28..a657651eba52 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_multidim.c
@@ -14,9 +14,9 @@ typedef int *ptr_arr_t[6];
typedef int *ptr_multiarr_t[7][8][9][10];
-typedef int * (*fn_ptr_arr_t[11])();
+typedef int * (*fn_ptr_arr_t[11])(void);
-typedef int * (*fn_ptr_multiarr_t[12][13])();
+typedef int * (*fn_ptr_multiarr_t[12][13])(void);
struct root_struct {
arr_t _1;
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index ad21ee8c7e23..29d01fff32bd 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -100,7 +100,7 @@ typedef void (*printf_fn_t)(const char *, ...);
* `int -> char *` function and returns pointer to a char. Equivalent:
* typedef char * (*fn_input_t)(int);
* typedef char * (*fn_output_outer_t)(fn_input_t);
- * typedef const fn_output_outer_t (* fn_output_inner_t)();
+ * typedef const fn_output_outer_t (* fn_output_inner_t)(void);
* typedef const fn_output_inner_t fn_ptr_arr2_t[5];
*/
/* ----- START-EXPECTED-OUTPUT ----- */
@@ -127,7 +127,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int);
typedef char * (*fn_ptr_arr1_t[10])(int **);
-typedef char * (* (* const fn_ptr_arr2_t[5])())(char * (*)(int));
+typedef char * (* (* const fn_ptr_arr2_t[5])(void))(char * (*)(int));
struct struct_w_typedefs {
int_t a;
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
index 22914a70db54..73ba32e9a693 100644
--- a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
@@ -13,7 +13,7 @@ struct __cgrps_kfunc_map_value {
struct cgroup __kptr * cgrp;
};
-struct hash_map {
+struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, int);
__type(value, struct __cgrps_kfunc_map_value);
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index 7ef49ec04838..9e9ebf27b878 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -14,8 +14,6 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_tcp_helpers.h"
-
#define SRC_REWRITE_IP4 0x7f000004U
#define DST_REWRITE_IP4 0x7f000001U
#define DST_REWRITE_PORT4 4444
@@ -32,6 +30,10 @@
#define IFNAMSIZ 16
#endif
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
__attribute__ ((noinline)) __weak
int do_bind(struct bpf_sock_addr *ctx)
{
@@ -197,4 +199,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
return do_bind(ctx) ? 1 : 0;
}
+SEC("cgroup/connect4")
+int connect_v4_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c
index 40266d2c737c..e98573b00ddb 100644
--- a/tools/testing/selftests/bpf/progs/connect6_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect6_prog.c
@@ -90,4 +90,10 @@ int connect_v6_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/connect6")
+int connect_v6_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
index 2ef0e0c46d17..ba60adadb335 100644
--- a/tools/testing/selftests/bpf/progs/connect_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
@@ -36,4 +36,10 @@ int connect_unix_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/connect_unix")
+int connect_unix_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
index c705d8112a35..b979e91f55f0 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_common.h
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -9,7 +9,7 @@
int err;
-#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+#define private(name) SEC(".bss." #name) __attribute__((aligned(8)))
private(MASK) static struct bpf_cpumask __kptr * global_mask;
struct __cpumask_map_value {
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
index a9bf6ea336cf..a988d2823b52 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_failure.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -61,11 +61,8 @@ SEC("tp_btf/task_newtask")
__failure __msg("bpf_cpumask_set_cpu args#1 expected pointer to STRUCT bpf_cpumask")
int BPF_PROG(test_mutate_cpumask, struct task_struct *task, u64 clone_flags)
{
- struct bpf_cpumask *cpumask;
-
/* Can't set the CPU of a non-struct bpf_cpumask. */
bpf_cpumask_set_cpu(0, (struct bpf_cpumask *)task->cpus_ptr);
- __sink(cpumask);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c
index 7a1e64c6c065..fd8106831c32 100644
--- a/tools/testing/selftests/bpf/progs/cpumask_success.c
+++ b/tools/testing/selftests/bpf/progs/cpumask_success.c
@@ -12,6 +12,31 @@ char _license[] SEC("license") = "GPL";
int pid, nr_cpus;
+struct kptr_nested {
+ struct bpf_cpumask __kptr * mask;
+};
+
+struct kptr_nested_pair {
+ struct bpf_cpumask __kptr * mask_1;
+ struct bpf_cpumask __kptr * mask_2;
+};
+
+struct kptr_nested_mid {
+ int dummy;
+ struct kptr_nested m;
+};
+
+struct kptr_nested_deep {
+ struct kptr_nested_mid ptrs[2];
+ struct kptr_nested_pair ptr_pairs[3];
+};
+
+private(MASK) static struct bpf_cpumask __kptr * global_mask_array[2];
+private(MASK) static struct bpf_cpumask __kptr * global_mask_array_l2[2][1];
+private(MASK) static struct bpf_cpumask __kptr * global_mask_array_one[1];
+private(MASK) static struct kptr_nested global_mask_nested[2];
+private(MASK_DEEP) static struct kptr_nested_deep global_mask_nested_deep;
+
static bool is_test_task(void)
{
int cur_pid = bpf_get_current_pid_tgid() >> 32;
@@ -461,6 +486,152 @@ int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags)
}
SEC("tp_btf/task_newtask")
+int BPF_PROG(test_global_mask_array_one_rcu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local, *prev;
+
+ if (!is_test_task())
+ return 0;
+
+ /* Kptr arrays with one element are special cased, being treated
+ * just like a single pointer.
+ */
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask_array_one[0], local);
+ if (prev) {
+ bpf_cpumask_release(prev);
+ err = 3;
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ local = global_mask_array_one[0];
+ if (!local) {
+ err = 4;
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+static int _global_mask_array_rcu(struct bpf_cpumask **mask0,
+ struct bpf_cpumask **mask1)
+{
+ struct bpf_cpumask *local;
+
+ if (!is_test_task())
+ return 0;
+
+ /* Check if two kptrs in the array work and independently */
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ bpf_rcu_read_lock();
+
+ local = bpf_kptr_xchg(mask0, local);
+ if (local) {
+ err = 1;
+ goto err_exit;
+ }
+
+ /* [<mask 0>, NULL] */
+ if (!*mask0 || *mask1) {
+ err = 2;
+ goto err_exit;
+ }
+
+ local = create_cpumask();
+ if (!local) {
+ err = 9;
+ goto err_exit;
+ }
+
+ local = bpf_kptr_xchg(mask1, local);
+ if (local) {
+ err = 10;
+ goto err_exit;
+ }
+
+ /* [<mask 0>, <mask 1>] */
+ if (!*mask0 || !*mask1 || *mask0 == *mask1) {
+ err = 11;
+ goto err_exit;
+ }
+
+err_exit:
+ if (local)
+ bpf_cpumask_release(local);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_global_mask_array_rcu, struct task_struct *task, u64 clone_flags)
+{
+ return _global_mask_array_rcu(&global_mask_array[0], &global_mask_array[1]);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_global_mask_array_l2_rcu, struct task_struct *task, u64 clone_flags)
+{
+ return _global_mask_array_rcu(&global_mask_array_l2[0][0], &global_mask_array_l2[1][0]);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_global_mask_nested_rcu, struct task_struct *task, u64 clone_flags)
+{
+ return _global_mask_array_rcu(&global_mask_nested[0].mask, &global_mask_nested[1].mask);
+}
+
+/* Ensure that the field->offset has been correctly advanced from one
+ * nested struct or array sub-tree to another. In the case of
+ * kptr_nested_deep, it comprises two sub-trees: ktpr_1 and kptr_2. By
+ * calling bpf_kptr_xchg() on every single kptr in both nested sub-trees,
+ * the verifier should reject the program if the field->offset of any kptr
+ * is incorrect.
+ *
+ * For instance, if we have 10 kptrs in a nested struct and a program that
+ * accesses each kptr individually with bpf_kptr_xchg(), the compiler
+ * should emit instructions to access 10 different offsets if it works
+ * correctly. If the field->offset values of any pair of them are
+ * incorrectly the same, the number of unique offsets in btf_record for
+ * this nested struct should be less than 10. The verifier should fail to
+ * discover some of the offsets emitted by the compiler.
+ *
+ * Even if the field->offset values of kptrs are not duplicated, the
+ * verifier should fail to find a btf_field for the instruction accessing a
+ * kptr if the corresponding field->offset is pointing to a random
+ * incorrect offset.
+ */
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_global_mask_nested_deep_rcu, struct task_struct *task, u64 clone_flags)
+{
+ int r, i;
+
+ r = _global_mask_array_rcu(&global_mask_nested_deep.ptrs[0].m.mask,
+ &global_mask_nested_deep.ptrs[1].m.mask);
+ if (r)
+ return r;
+
+ for (i = 0; i < 3; i++) {
+ r = _global_mask_array_rcu(&global_mask_nested_deep.ptr_pairs[i].mask_1,
+ &global_mask_nested_deep.ptr_pairs[i].mask_2);
+ if (r)
+ return r;
+ }
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
int BPF_PROG(test_cpumask_weight, struct task_struct *task, u64 clone_flags)
{
struct bpf_cpumask *local;
diff --git a/tools/testing/selftests/bpf/progs/crypto_basic.c b/tools/testing/selftests/bpf/progs/crypto_basic.c
new file mode 100644
index 000000000000..8cf7168b42d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/crypto_basic.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include "crypto_common.h"
+
+int status;
+SEC("syscall")
+int crypto_release(void *ctx)
+{
+ struct bpf_crypto_params params = {
+ .type = "skcipher",
+ .algo = "ecb(aes)",
+ .key_len = 16,
+ };
+
+ struct bpf_crypto_ctx *cctx;
+ int err = 0;
+
+ status = 0;
+
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+
+ if (!cctx) {
+ status = err;
+ return 0;
+ }
+
+ bpf_crypto_ctx_release(cctx);
+
+ return 0;
+}
+
+SEC("syscall")
+__failure __msg("Unreleased reference")
+int crypto_acquire(void *ctx)
+{
+ struct bpf_crypto_params params = {
+ .type = "skcipher",
+ .algo = "ecb(aes)",
+ .key_len = 16,
+ };
+ struct bpf_crypto_ctx *cctx;
+ int err = 0;
+
+ status = 0;
+
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+
+ if (!cctx) {
+ status = err;
+ return 0;
+ }
+
+ cctx = bpf_crypto_ctx_acquire(cctx);
+ if (!cctx)
+ return -EINVAL;
+
+ bpf_crypto_ctx_release(cctx);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/crypto_bench.c b/tools/testing/selftests/bpf/progs/crypto_bench.c
new file mode 100644
index 000000000000..4ac956b26240
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/crypto_bench.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include "crypto_common.h"
+
+const volatile unsigned int len = 16;
+char cipher[128] = {};
+u32 key_len, authsize;
+char dst[256] = {};
+u8 key[256] = {};
+long hits = 0;
+int status;
+
+SEC("syscall")
+int crypto_setup(void *args)
+{
+ struct bpf_crypto_ctx *cctx;
+ struct bpf_crypto_params params = {
+ .type = "skcipher",
+ .key_len = key_len,
+ .authsize = authsize,
+ };
+ int err = 0;
+
+ status = 0;
+
+ if (!cipher[0] || !key_len || key_len > 256) {
+ status = -EINVAL;
+ return 0;
+ }
+
+ __builtin_memcpy(&params.algo, cipher, sizeof(cipher));
+ __builtin_memcpy(&params.key, key, sizeof(key));
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+
+ if (!cctx) {
+ status = err;
+ return 0;
+ }
+
+ err = crypto_ctx_insert(cctx);
+ if (err && err != -EEXIST)
+ status = err;
+
+ return 0;
+}
+
+SEC("tc")
+int crypto_encrypt(struct __sk_buff *skb)
+{
+ struct __crypto_ctx_value *v;
+ struct bpf_crypto_ctx *ctx;
+ struct bpf_dynptr psrc, pdst;
+
+ v = crypto_ctx_value_lookup();
+ if (!v) {
+ status = -ENOENT;
+ return 0;
+ }
+
+ ctx = v->ctx;
+ if (!ctx) {
+ status = -ENOENT;
+ return 0;
+ }
+
+ bpf_dynptr_from_skb(skb, 0, &psrc);
+ bpf_dynptr_from_mem(dst, len, 0, &pdst);
+
+ status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
+ __sync_add_and_fetch(&hits, 1);
+
+ return 0;
+}
+
+SEC("tc")
+int crypto_decrypt(struct __sk_buff *skb)
+{
+ struct bpf_dynptr psrc, pdst;
+ struct __crypto_ctx_value *v;
+ struct bpf_crypto_ctx *ctx;
+
+ v = crypto_ctx_value_lookup();
+ if (!v)
+ return -ENOENT;
+
+ ctx = v->ctx;
+ if (!ctx)
+ return -ENOENT;
+
+ bpf_dynptr_from_skb(skb, 0, &psrc);
+ bpf_dynptr_from_mem(dst, len, 0, &pdst);
+
+ status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
+ __sync_add_and_fetch(&hits, 1);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/crypto_common.h b/tools/testing/selftests/bpf/progs/crypto_common.h
new file mode 100644
index 000000000000..57dd7a68a8c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/crypto_common.h
@@ -0,0 +1,66 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _CRYPTO_COMMON_H
+#define _CRYPTO_COMMON_H
+
+#include "errno.h"
+#include <stdbool.h>
+
+struct bpf_crypto_ctx *bpf_crypto_ctx_create(const struct bpf_crypto_params *params,
+ u32 params__sz, int *err) __ksym;
+struct bpf_crypto_ctx *bpf_crypto_ctx_acquire(struct bpf_crypto_ctx *ctx) __ksym;
+void bpf_crypto_ctx_release(struct bpf_crypto_ctx *ctx) __ksym;
+int bpf_crypto_encrypt(struct bpf_crypto_ctx *ctx, const struct bpf_dynptr *src,
+ const struct bpf_dynptr *dst, const struct bpf_dynptr *iv) __ksym;
+int bpf_crypto_decrypt(struct bpf_crypto_ctx *ctx, const struct bpf_dynptr *src,
+ const struct bpf_dynptr *dst, const struct bpf_dynptr *iv) __ksym;
+
+struct __crypto_ctx_value {
+ struct bpf_crypto_ctx __kptr * ctx;
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct __crypto_ctx_value);
+ __uint(max_entries, 1);
+} __crypto_ctx_map SEC(".maps");
+
+static inline struct __crypto_ctx_value *crypto_ctx_value_lookup(void)
+{
+ u32 key = 0;
+
+ return bpf_map_lookup_elem(&__crypto_ctx_map, &key);
+}
+
+static inline int crypto_ctx_insert(struct bpf_crypto_ctx *ctx)
+{
+ struct __crypto_ctx_value local, *v;
+ struct bpf_crypto_ctx *old;
+ u32 key = 0;
+ int err;
+
+ local.ctx = NULL;
+ err = bpf_map_update_elem(&__crypto_ctx_map, &key, &local, 0);
+ if (err) {
+ bpf_crypto_ctx_release(ctx);
+ return err;
+ }
+
+ v = bpf_map_lookup_elem(&__crypto_ctx_map, &key);
+ if (!v) {
+ bpf_crypto_ctx_release(ctx);
+ return -ENOENT;
+ }
+
+ old = bpf_kptr_xchg(&v->ctx, ctx);
+ if (old) {
+ bpf_crypto_ctx_release(old);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+#endif /* _CRYPTO_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/crypto_sanity.c b/tools/testing/selftests/bpf/progs/crypto_sanity.c
new file mode 100644
index 000000000000..645be6cddf36
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/crypto_sanity.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include "crypto_common.h"
+
+unsigned char key[256] = {};
+u16 udp_test_port = 7777;
+u32 authsize, key_len;
+char algo[128] = {};
+char dst[16] = {};
+int status;
+
+static int skb_dynptr_validate(struct __sk_buff *skb, struct bpf_dynptr *psrc)
+{
+ struct ipv6hdr ip6h;
+ struct udphdr udph;
+ u32 offset;
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IPV6))
+ return -1;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &ip6h, sizeof(ip6h)))
+ return -1;
+
+ if (ip6h.nexthdr != IPPROTO_UDP)
+ return -1;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(ip6h), &udph, sizeof(udph)))
+ return -1;
+
+ if (udph.dest != __bpf_htons(udp_test_port))
+ return -1;
+
+ offset = ETH_HLEN + sizeof(ip6h) + sizeof(udph);
+ if (skb->len < offset + 16)
+ return -1;
+
+ /* let's make sure that 16 bytes of payload are in the linear part of skb */
+ bpf_skb_pull_data(skb, offset + 16);
+ bpf_dynptr_from_skb(skb, 0, psrc);
+ bpf_dynptr_adjust(psrc, offset, offset + 16);
+
+ return 0;
+}
+
+SEC("syscall")
+int skb_crypto_setup(void *ctx)
+{
+ struct bpf_crypto_params params = {
+ .type = "skcipher",
+ .key_len = key_len,
+ .authsize = authsize,
+ };
+ struct bpf_crypto_ctx *cctx;
+ int err = 0;
+
+ status = 0;
+
+ if (key_len > 256) {
+ status = -EINVAL;
+ return 0;
+ }
+
+ __builtin_memcpy(&params.algo, algo, sizeof(algo));
+ __builtin_memcpy(&params.key, key, sizeof(key));
+ cctx = bpf_crypto_ctx_create(&params, sizeof(params), &err);
+
+ if (!cctx) {
+ status = err;
+ return 0;
+ }
+
+ err = crypto_ctx_insert(cctx);
+ if (err && err != -EEXIST)
+ status = err;
+
+ return 0;
+}
+
+SEC("tc")
+int decrypt_sanity(struct __sk_buff *skb)
+{
+ struct __crypto_ctx_value *v;
+ struct bpf_crypto_ctx *ctx;
+ struct bpf_dynptr psrc, pdst;
+ int err;
+
+ err = skb_dynptr_validate(skb, &psrc);
+ if (err < 0) {
+ status = err;
+ return TC_ACT_SHOT;
+ }
+
+ v = crypto_ctx_value_lookup();
+ if (!v) {
+ status = -ENOENT;
+ return TC_ACT_SHOT;
+ }
+
+ ctx = v->ctx;
+ if (!ctx) {
+ status = -ENOENT;
+ return TC_ACT_SHOT;
+ }
+
+ /* dst is a global variable to make testing part easier to check. In real
+ * production code, a percpu map should be used to store the result.
+ */
+ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
+
+ status = bpf_crypto_decrypt(ctx, &psrc, &pdst, NULL);
+
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int encrypt_sanity(struct __sk_buff *skb)
+{
+ struct __crypto_ctx_value *v;
+ struct bpf_crypto_ctx *ctx;
+ struct bpf_dynptr psrc, pdst;
+ int err;
+
+ status = 0;
+
+ err = skb_dynptr_validate(skb, &psrc);
+ if (err < 0) {
+ status = err;
+ return TC_ACT_SHOT;
+ }
+
+ v = crypto_ctx_value_lookup();
+ if (!v) {
+ status = -ENOENT;
+ return TC_ACT_SHOT;
+ }
+
+ ctx = v->ctx;
+ if (!ctx) {
+ status = -ENOENT;
+ return TC_ACT_SHOT;
+ }
+
+ /* dst is a global variable to make testing part easier to check. In real
+ * production code, a percpu map should be used to store the result.
+ */
+ bpf_dynptr_from_mem(dst, sizeof(dst), 0, &pdst);
+
+ status = bpf_crypto_encrypt(ctx, &psrc, &pdst, NULL);
+
+ return TC_ACT_SHOT;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c b/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c
index 1efa746c25dc..ec0c595d47af 100644
--- a/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c
+++ b/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c
@@ -11,8 +11,17 @@ int BPF_PROG(test_1, struct bpf_dummy_ops_state *state)
{
int ret;
- if (!state)
- return 0xf2f3f4f5;
+ /* Check that 'state' nullable status is detected correctly.
+ * If 'state' argument would be assumed non-null by verifier
+ * the code below would be deleted as dead (which it shouldn't).
+ * Hide it from the compiler behind 'asm' block to avoid
+ * unnecessary optimizations.
+ */
+ asm volatile (
+ "if %[state] != 0 goto +2;"
+ "r0 = 0xf2f3f4f5;"
+ "exit;"
+ ::[state]"p"(state));
ret = state->val;
state->val = 0x5a;
@@ -25,7 +34,7 @@ SEC("struct_ops/test_2")
int BPF_PROG(test_2, struct bpf_dummy_ops_state *state, int a1, unsigned short a2,
char a3, unsigned long a4)
{
- test_2_args[0] = (unsigned long)state;
+ test_2_args[0] = state->val;
test_2_args[1] = a1;
test_2_args[2] = a2;
test_2_args[3] = a3;
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 7ce7e827d5f0..e35bc1eac52a 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -80,7 +80,7 @@ SEC("?raw_tp")
__failure __msg("Unreleased reference id=2")
int ringbuf_missing_release1(void *ctx)
{
- struct bpf_dynptr ptr;
+ struct bpf_dynptr ptr = {};
bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
@@ -964,7 +964,7 @@ int dynptr_invalidate_slice_reinit(void *ctx)
* mem_or_null pointers.
*/
SEC("?raw_tp")
-__failure __msg("R1 type=scalar expected=percpu_ptr_")
+__failure __regex("R[0-9]+ type=scalar expected=percpu_ptr_")
int dynptr_invalidate_slice_or_null(void *ctx)
{
struct bpf_dynptr ptr;
@@ -982,7 +982,7 @@ int dynptr_invalidate_slice_or_null(void *ctx)
/* Destruction of dynptr should also any slices obtained from it */
SEC("?raw_tp")
-__failure __msg("R7 invalid mem access 'scalar'")
+__failure __regex("R[0-9]+ invalid mem access 'scalar'")
int dynptr_invalidate_slice_failure(void *ctx)
{
struct bpf_dynptr ptr1;
@@ -1069,7 +1069,7 @@ int dynptr_read_into_slot(void *ctx)
/* bpf_dynptr_slice()s are read-only and cannot be written to */
SEC("?tc")
-__failure __msg("R0 cannot write into rdonly_mem")
+__failure __regex("R[0-9]+ cannot write into rdonly_mem")
int skb_invalid_slice_write(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
@@ -1385,7 +1385,7 @@ SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
int dynptr_adjust_invalid(void *ctx)
{
- struct bpf_dynptr ptr;
+ struct bpf_dynptr ptr = {};
/* this should fail */
bpf_dynptr_adjust(&ptr, 1, 2);
@@ -1398,7 +1398,7 @@ SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
int dynptr_is_null_invalid(void *ctx)
{
- struct bpf_dynptr ptr;
+ struct bpf_dynptr ptr = {};
/* this should fail */
bpf_dynptr_is_null(&ptr);
@@ -1411,7 +1411,7 @@ SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
int dynptr_is_rdonly_invalid(void *ctx)
{
- struct bpf_dynptr ptr;
+ struct bpf_dynptr ptr = {};
/* this should fail */
bpf_dynptr_is_rdonly(&ptr);
@@ -1424,7 +1424,7 @@ SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
int dynptr_size_invalid(void *ctx)
{
- struct bpf_dynptr ptr;
+ struct bpf_dynptr ptr = {};
/* this should fail */
bpf_dynptr_size(&ptr);
@@ -1437,7 +1437,7 @@ SEC("?raw_tp")
__failure __msg("Expected an initialized dynptr as arg #1")
int clone_invalid1(void *ctx)
{
- struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr1 = {};
struct bpf_dynptr ptr2;
/* this should fail */
@@ -1686,3 +1686,27 @@ int test_dynptr_skb_small_buff(struct __sk_buff *skb)
return !!data;
}
+
+__noinline long global_call_bpf_dynptr(const struct bpf_dynptr *dynptr)
+{
+ long ret = 0;
+ /* Avoid leaving this global function empty to avoid having the compiler
+ * optimize away the call to this global function.
+ */
+ __sink(ret);
+ return ret;
+}
+
+SEC("?raw_tp")
+__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr")
+int test_dynptr_reg_type(void *ctx)
+{
+ struct task_struct *current = NULL;
+ /* R1 should be holding a PTR_TO_BTF_ID, so this shouldn't be a
+ * reg->type that can be passed to a function accepting a
+ * ARG_PTR_TO_DYNPTR | MEM_RDONLY. process_dynptr_func() should catch
+ * this.
+ */
+ global_call_bpf_dynptr((const struct bpf_dynptr *)current);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fib_lookup.c b/tools/testing/selftests/bpf/progs/fib_lookup.c
index c4514dd58c62..7b5dd2214ff4 100644
--- a/tools/testing/selftests/bpf/progs/fib_lookup.c
+++ b/tools/testing/selftests/bpf/progs/fib_lookup.c
@@ -3,8 +3,8 @@
#include <linux/types.h>
#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_tracing_net.h"
struct bpf_fib_lookup fib_params = {};
int fib_lookup_ret = 0;
diff --git a/tools/testing/selftests/bpf/progs/for_each_multi_maps.c b/tools/testing/selftests/bpf/progs/for_each_multi_maps.c
new file mode 100644
index 000000000000..ff0bed7d4459
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_multi_maps.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} arraymap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 5);
+ __type(key, __u32);
+ __type(value, __u64);
+} hashmap SEC(".maps");
+
+struct callback_ctx {
+ int output;
+};
+
+u32 data_output = 0;
+int use_array = 0;
+
+static __u64
+check_map_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ data->output += *val;
+ return 0;
+}
+
+SEC("tc")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ struct callback_ctx data;
+
+ data.output = 0;
+ if (use_array)
+ bpf_for_each_map_elem(&arraymap, check_map_elem, &data, 0);
+ else
+ bpf_for_each_map_elem(&hashmap, check_map_elem, &data, 0);
+ data_output = data.output;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
index 8956eb78a226..2011cacdeb18 100644
--- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
@@ -5,13 +5,12 @@
char _license[] SEC("license") = "GPL";
-extern const void bpf_fentry_test1 __ksym;
+extern int bpf_fentry_test1(int a) __ksym;
+extern int bpf_modify_return_test(int a, int *b) __ksym;
+
extern const void bpf_fentry_test2 __ksym;
extern const void bpf_fentry_test3 __ksym;
extern const void bpf_fentry_test4 __ksym;
-extern const void bpf_modify_return_test __ksym;
-extern const void bpf_fentry_test6 __ksym;
-extern const void bpf_fentry_test7 __ksym;
extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak;
diff --git a/tools/testing/selftests/bpf/progs/getpeername4_prog.c b/tools/testing/selftests/bpf/progs/getpeername4_prog.c
new file mode 100644
index 000000000000..4c97208cd25d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getpeername4_prog.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+#define REWRITE_ADDRESS_IP4 0xc0a801fe // 192.168.1.254
+#define REWRITE_ADDRESS_PORT4 4040
+
+SEC("cgroup/getpeername4")
+int getpeername_v4_prog(struct bpf_sock_addr *ctx)
+{
+ ctx->user_ip4 = bpf_htonl(REWRITE_ADDRESS_IP4);
+ ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT4);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getpeername6_prog.c b/tools/testing/selftests/bpf/progs/getpeername6_prog.c
new file mode 100644
index 000000000000..070e4d7f636c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getpeername6_prog.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+#define REWRITE_ADDRESS_IP6_0 0xfaceb00c
+#define REWRITE_ADDRESS_IP6_1 0x12345678
+#define REWRITE_ADDRESS_IP6_2 0x00000000
+#define REWRITE_ADDRESS_IP6_3 0x0000abcd
+
+#define REWRITE_ADDRESS_PORT6 6060
+
+SEC("cgroup/getpeername6")
+int getpeername_v6_prog(struct bpf_sock_addr *ctx)
+{
+ ctx->user_ip6[0] = bpf_htonl(REWRITE_ADDRESS_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(REWRITE_ADDRESS_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(REWRITE_ADDRESS_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(REWRITE_ADDRESS_IP6_3);
+ ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT6);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getsockname4_prog.c b/tools/testing/selftests/bpf/progs/getsockname4_prog.c
new file mode 100644
index 000000000000..e298487c6347
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getsockname4_prog.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+#define REWRITE_ADDRESS_IP4 0xc0a801fe // 192.168.1.254
+#define REWRITE_ADDRESS_PORT4 4040
+
+SEC("cgroup/getsockname4")
+int getsockname_v4_prog(struct bpf_sock_addr *ctx)
+{
+ ctx->user_ip4 = bpf_htonl(REWRITE_ADDRESS_IP4);
+ ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT4);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getsockname6_prog.c b/tools/testing/selftests/bpf/progs/getsockname6_prog.c
new file mode 100644
index 000000000000..811d10cd5525
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getsockname6_prog.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+#define REWRITE_ADDRESS_IP6_0 0xfaceb00c
+#define REWRITE_ADDRESS_IP6_1 0x12345678
+#define REWRITE_ADDRESS_IP6_2 0x00000000
+#define REWRITE_ADDRESS_IP6_3 0x0000abcd
+
+#define REWRITE_ADDRESS_PORT6 6060
+
+SEC("cgroup/getsockname6")
+int getsockname_v6_prog(struct bpf_sock_addr *ctx)
+{
+ ctx->user_ip6[0] = bpf_htonl(REWRITE_ADDRESS_IP6_0);
+ ctx->user_ip6[1] = bpf_htonl(REWRITE_ADDRESS_IP6_1);
+ ctx->user_ip6[2] = bpf_htonl(REWRITE_ADDRESS_IP6_2);
+ ctx->user_ip6[3] = bpf_htonl(REWRITE_ADDRESS_IP6_3);
+ ctx->user_port = bpf_htons(REWRITE_ADDRESS_PORT6);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
index 1c2b6c1616b0..645b2c9f7867 100644
--- a/tools/testing/selftests/bpf/progs/ip_check_defrag.c
+++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
@@ -12,7 +12,7 @@
#define IP_OFFSET 0x1FFF
#define NEXTHDR_FRAGMENT 44
-extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags,
+extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym;
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
void *buffer, uint32_t buffer__sz) __ksym;
@@ -42,7 +42,7 @@ static bool is_frag_v6(struct ipv6hdr *ip6h)
return ip6h->nexthdr == NEXTHDR_FRAGMENT;
}
-static int handle_v4(struct sk_buff *skb)
+static int handle_v4(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
u8 iph_buf[20] = {};
@@ -64,7 +64,7 @@ static int handle_v4(struct sk_buff *skb)
return NF_ACCEPT;
}
-static int handle_v6(struct sk_buff *skb)
+static int handle_v6(struct __sk_buff *skb)
{
struct bpf_dynptr ptr;
struct ipv6hdr *ip6h;
@@ -89,9 +89,9 @@ static int handle_v6(struct sk_buff *skb)
SEC("netfilter")
int defrag(struct bpf_nf_ctx *ctx)
{
- struct sk_buff *skb = ctx->skb;
+ struct __sk_buff *skb = (struct __sk_buff *)ctx->skb;
- switch (bpf_ntohs(skb->protocol)) {
+ switch (bpf_ntohs(ctx->skb->protocol)) {
case ETH_P_IP:
return handle_v4(skb);
case ETH_P_IPV6:
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
index 3db416606f2f..16bdc3e25591 100644
--- a/tools/testing/selftests/bpf/progs/iters.c
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -7,8 +7,6 @@
#include "bpf_misc.h"
#include "bpf_compiler.h"
-#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
-
static volatile int zero = 0;
int my_pid;
@@ -673,7 +671,7 @@ static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul)
static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n)
{
- int *t, i, sum = 0;;
+ int *t, i, sum = 0;
while ((t = bpf_iter_num_next(it))) {
i = *t;
diff --git a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c
index f46965053acb..4d619bea9c75 100644
--- a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c
+++ b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c
@@ -4,6 +4,10 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#ifndef __clang__
+#pragma GCC diagnostic ignored "-Warray-bounds"
+#endif
+
char _license[] SEC("license") = "GPL";
struct {
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
index cf68d1e48a0f..f502f755f567 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -177,4 +177,41 @@ int kfunc_call_test_static_unused_arg(struct __sk_buff *skb)
return actual != expected ? -1 : 0;
}
+struct ctx_val {
+ struct bpf_testmod_ctx __kptr *ctx;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct ctx_val);
+} ctx_map SEC(".maps");
+
+SEC("tc")
+int kfunc_call_ctx(struct __sk_buff *skb)
+{
+ struct bpf_testmod_ctx *ctx;
+ int err = 0;
+
+ ctx = bpf_testmod_ctx_create(&err);
+ if (!ctx && !err)
+ err = -1;
+ if (ctx) {
+ int key = 0;
+ struct ctx_val *ctx_val = bpf_map_lookup_elem(&ctx_map, &key);
+
+ /* Transfer ctx to map to be freed via implicit dtor call
+ * on cleanup.
+ */
+ if (ctx_val)
+ ctx = bpf_kptr_xchg(&ctx_val->ctx, ctx);
+ if (ctx) {
+ bpf_testmod_ctx_release(ctx);
+ err = -1;
+ }
+ }
+ return err;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c
new file mode 100644
index 000000000000..bd8b7fb7061e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "bpf_kfuncs.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+extern const void bpf_fentry_test1 __ksym;
+extern const void bpf_fentry_test2 __ksym;
+extern const void bpf_fentry_test3 __ksym;
+extern const void bpf_fentry_test4 __ksym;
+extern const void bpf_fentry_test5 __ksym;
+extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
+extern const void bpf_fentry_test8 __ksym;
+
+int pid = 0;
+
+__u64 kprobe_session_result[8];
+
+static int session_check(void *ctx)
+{
+ unsigned int i;
+ __u64 addr;
+ const void *kfuncs[] = {
+ &bpf_fentry_test1,
+ &bpf_fentry_test2,
+ &bpf_fentry_test3,
+ &bpf_fentry_test4,
+ &bpf_fentry_test5,
+ &bpf_fentry_test6,
+ &bpf_fentry_test7,
+ &bpf_fentry_test8,
+ };
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 1;
+
+ addr = bpf_get_func_ip(ctx);
+
+ for (i = 0; i < ARRAY_SIZE(kfuncs); i++) {
+ if (kfuncs[i] == (void *) addr) {
+ kprobe_session_result[i]++;
+ break;
+ }
+ }
+
+ /*
+ * Force probes for function bpf_fentry_test[5-8] not to
+ * install and execute the return probe
+ */
+ if (((const void *) addr == &bpf_fentry_test5) ||
+ ((const void *) addr == &bpf_fentry_test6) ||
+ ((const void *) addr == &bpf_fentry_test7) ||
+ ((const void *) addr == &bpf_fentry_test8))
+ return 1;
+
+ return 0;
+}
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe.session/bpf_fentry_test*")
+int test_kprobe(struct pt_regs *ctx)
+{
+ return session_check(ctx);
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
new file mode 100644
index 000000000000..0835b5edf685
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_session_cookie.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+
+__u64 test_kprobe_1_result = 0;
+__u64 test_kprobe_2_result = 0;
+__u64 test_kprobe_3_result = 0;
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+static int check_cookie(__u64 val, __u64 *result)
+{
+ __u64 *cookie;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 1;
+
+ cookie = bpf_session_cookie();
+
+ if (bpf_session_is_return())
+ *result = *cookie == val ? val : 0;
+ else
+ *cookie = val;
+ return 0;
+}
+
+SEC("kprobe.session/bpf_fentry_test1")
+int test_kprobe_1(struct pt_regs *ctx)
+{
+ return check_cookie(1, &test_kprobe_1_result);
+}
+
+SEC("kprobe.session/bpf_fentry_test1")
+int test_kprobe_2(struct pt_regs *ctx)
+{
+ return check_cookie(2, &test_kprobe_2_result);
+}
+
+SEC("kprobe.session/bpf_fentry_test1")
+int test_kprobe_3(struct pt_regs *ctx)
+{
+ return check_cookie(3, &test_kprobe_3_result);
+}
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
index 26205ca80679..421f40835acd 100644
--- a/tools/testing/selftests/bpf/progs/linked_list.c
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -4,13 +4,26 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
#include "bpf_experimental.h"
-
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
-#endif
+#include "bpf_misc.h"
#include "linked_list.h"
+struct head_nested_inner {
+ struct bpf_spin_lock lock;
+ struct bpf_list_head head __contains(foo, node2);
+};
+
+struct head_nested {
+ int dummy;
+ struct head_nested_inner inner;
+};
+
+private(C) struct bpf_spin_lock glock_c;
+private(C) struct bpf_list_head ghead_array[2] __contains(foo, node2);
+private(C) struct bpf_list_head ghead_array_one[1] __contains(foo, node2);
+
+private(D) struct head_nested ghead_nested;
+
static __always_inline
int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
{
@@ -310,6 +323,32 @@ int global_list_push_pop(void *ctx)
}
SEC("tc")
+int global_list_push_pop_nested(void *ctx)
+{
+ return test_list_push_pop(&ghead_nested.inner.lock, &ghead_nested.inner.head);
+}
+
+SEC("tc")
+int global_list_array_push_pop(void *ctx)
+{
+ int r;
+
+ r = test_list_push_pop(&glock_c, &ghead_array[0]);
+ if (r)
+ return r;
+
+ r = test_list_push_pop(&glock_c, &ghead_array[1]);
+ if (r)
+ return r;
+
+ /* Arrays with only one element is a special case, being treated
+ * just like a bpf_list_head variable by the verifier, not an
+ * array.
+ */
+ return test_list_push_pop(&glock_c, &ghead_array_one[0]);
+}
+
+SEC("tc")
int map_list_push_pop_multiple(void *ctx)
{
struct map_value *v;
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
index e5e3a8b8dd07..637e75df2e14 100644
--- a/tools/testing/selftests/bpf/progs/local_storage.c
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -140,11 +140,12 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
struct local_storage *storage;
+ struct sock *sk = sock->sk;
- if (pid != monitored_pid)
+ if (pid != monitored_pid || !sk)
return 0;
- storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, 0);
+ storage = bpf_sk_storage_get(&sk_storage_map, sk, 0, 0);
if (!storage)
return 0;
@@ -155,24 +156,24 @@ int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
/* This tests that we can associate multiple elements
* with the local storage.
*/
- storage = bpf_sk_storage_get(&sk_storage_map2, sock->sk, 0,
+ storage = bpf_sk_storage_get(&sk_storage_map2, sk, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!storage)
return 0;
- if (bpf_sk_storage_delete(&sk_storage_map2, sock->sk))
+ if (bpf_sk_storage_delete(&sk_storage_map2, sk))
return 0;
- storage = bpf_sk_storage_get(&sk_storage_map2, sock->sk, 0,
+ storage = bpf_sk_storage_get(&sk_storage_map2, sk, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!storage)
return 0;
- if (bpf_sk_storage_delete(&sk_storage_map, sock->sk))
+ if (bpf_sk_storage_delete(&sk_storage_map, sk))
return 0;
/* Ensure that the sk_storage_map is disconnected from the storage. */
- if (!sock->sk->sk_bpf_storage || sock->sk->sk_bpf_storage->smap)
+ if (!sk->sk_bpf_storage || sk->sk_bpf_storage->smap)
return 0;
sk_storage_result = 0;
@@ -185,11 +186,12 @@ int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
struct local_storage *storage;
+ struct sock *sk = sock->sk;
- if (pid != monitored_pid)
+ if (pid != monitored_pid || !sk)
return 0;
- storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
+ storage = bpf_sk_storage_get(&sk_storage_map, sk, 0,
BPF_LOCAL_STORAGE_GET_F_CREATE);
if (!storage)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
index 02c11d16b692..d7598538aa2d 100644
--- a/tools/testing/selftests/bpf/progs/lsm_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
@@ -103,11 +103,15 @@ static __always_inline int real_bind(struct socket *sock,
int addrlen)
{
struct sockaddr_ll sa = {};
+ struct sock *sk = sock->sk;
- if (sock->sk->__sk_common.skc_family != AF_PACKET)
+ if (!sk)
+ return 1;
+
+ if (sk->__sk_common.skc_family != AF_PACKET)
return 1;
- if (sock->sk->sk_kern_sock)
+ if (sk->sk_kern_sock)
return 1;
bpf_probe_read_kernel(&sa, sizeof(sa), address);
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
index da30f0d59364..ab0ce1d01a4a 100644
--- a/tools/testing/selftests/bpf/progs/map_kptr.c
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -110,10 +110,14 @@ DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_map, array_of_array_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_map, array_of_hash_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_malloc_map, array_of_hash_malloc_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, lru_hash_map, array_of_lru_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, pcpu_array_map, array_of_pcpu_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, pcpu_hash_map, array_of_pcpu_hash_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, array_map, hash_of_array_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps);
DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, pcpu_array_map, hash_of_pcpu_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, pcpu_hash_map, hash_of_pcpu_hash_maps);
#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
@@ -204,6 +208,8 @@ int test_map_kptr(struct __sk_buff *ctx)
TEST(hash_map);
TEST(hash_malloc_map);
TEST(lru_hash_map);
+ TEST(pcpu_array_map);
+ TEST(pcpu_hash_map);
#undef TEST
return 0;
@@ -281,10 +287,14 @@ int test_map_in_map_kptr(struct __sk_buff *ctx)
TEST(array_of_hash_maps);
TEST(array_of_hash_malloc_maps);
TEST(array_of_lru_hash_maps);
+ TEST(array_of_pcpu_array_maps);
+ TEST(array_of_pcpu_hash_maps);
TEST(hash_of_array_maps);
TEST(hash_of_hash_maps);
TEST(hash_of_hash_malloc_maps);
TEST(hash_of_lru_hash_maps);
+ TEST(hash_of_pcpu_array_maps);
+ TEST(hash_of_pcpu_hash_maps);
#undef TEST
return 0;
diff --git a/tools/testing/selftests/bpf/progs/map_percpu_stats.c b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
index 10b2325c1720..63245785eb69 100644
--- a/tools/testing/selftests/bpf/progs/map_percpu_stats.c
+++ b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
@@ -7,7 +7,7 @@
__u32 target_id;
-__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
+__s64 bpf_map_sum_elem_count(const struct bpf_map *map) __ksym;
SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sock.c b/tools/testing/selftests/bpf/progs/mptcp_sock.c
index 91a0d7eff2ac..f3acb90588c7 100644
--- a/tools/testing/selftests/bpf/progs/mptcp_sock.c
+++ b/tools/testing/selftests/bpf/progs/mptcp_sock.c
@@ -2,9 +2,9 @@
/* Copyright (c) 2020, Tessares SA. */
/* Copyright (c) 2022, SUSE. */
-#include <linux/bpf.h>
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
__u32 token = 0;
diff --git a/tools/testing/selftests/bpf/progs/mptcpify.c b/tools/testing/selftests/bpf/progs/mptcpify.c
index 53301ae8a8f7..cbdc730c3a47 100644
--- a/tools/testing/selftests/bpf/progs/mptcpify.c
+++ b/tools/testing/selftests/bpf/progs/mptcpify.c
@@ -6,10 +6,14 @@
#include "bpf_tracing_net.h"
char _license[] SEC("license") = "GPL";
+int pid;
SEC("fmod_ret/update_socket_protocol")
int BPF_PROG(mptcpify, int family, int type, int protocol)
{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return protocol;
+
if ((family == AF_INET || family == AF_INET6) &&
type == SOCK_STREAM &&
(!protocol || protocol == IPPROTO_TCP)) {
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_common.h b/tools/testing/selftests/bpf/progs/nested_trust_common.h
index 83d33931136e..1784b496be2e 100644
--- a/tools/testing/selftests/bpf/progs/nested_trust_common.h
+++ b/tools/testing/selftests/bpf/progs/nested_trust_common.h
@@ -7,6 +7,6 @@
#include <stdbool.h>
bool bpf_cpumask_test_cpu(unsigned int cpu, const struct cpumask *cpumask) __ksym;
-bool bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
+__u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
#endif /* _NESTED_TRUST_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
index ea39497f11ed..3568ec450100 100644
--- a/tools/testing/selftests/bpf/progs/nested_trust_failure.c
+++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
@@ -31,14 +31,6 @@ int BPF_PROG(test_invalid_nested_user_cpus, struct task_struct *task, u64 clone_
return 0;
}
-SEC("tp_btf/task_newtask")
-__failure __msg("R1 must have zero offset when passed to release func or trusted arg to kfunc")
-int BPF_PROG(test_invalid_nested_offset, struct task_struct *task, u64 clone_flags)
-{
- bpf_cpumask_first_zero(&task->cpus_mask);
- return 0;
-}
-
/* Although R2 is of type sk_buff but sock_common is expected, we will hit untrusted ptr first. */
SEC("tp_btf/tcp_probe")
__failure __msg("R2 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_")
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_success.c b/tools/testing/selftests/bpf/progs/nested_trust_success.c
index 833840bffd3b..2b66953ca82e 100644
--- a/tools/testing/selftests/bpf/progs/nested_trust_success.c
+++ b/tools/testing/selftests/bpf/progs/nested_trust_success.c
@@ -32,3 +32,11 @@ int BPF_PROG(test_skb_field, struct sock *sk, struct sk_buff *skb)
bpf_sk_storage_get(&sk_storage_map, skb->sk, 0, 0);
return 0;
}
+
+SEC("tp_btf/task_newtask")
+__success
+int BPF_PROG(test_nested_offset, struct task_struct *task, u64 clone_flags)
+{
+ bpf_cpumask_first_zero(&task->cpus_mask);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
index c0062645fc68..9e067dcbf607 100644
--- a/tools/testing/selftests/bpf/progs/netif_receive_skb.c
+++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
@@ -5,6 +5,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
#include <errno.h>
@@ -23,10 +24,6 @@ bool skip = false;
#define BADPTR 0
#endif
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
__uint(max_entries, 1);
diff --git a/tools/testing/selftests/bpf/progs/preempt_lock.c b/tools/testing/selftests/bpf/progs/preempt_lock.c
new file mode 100644
index 000000000000..672fc368d9c4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/preempt_lock.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+SEC("?tc")
+__failure __msg("1 bpf_preempt_enable is missing")
+int preempt_lock_missing_1(struct __sk_buff *ctx)
+{
+ bpf_preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("2 bpf_preempt_enable(s) are missing")
+int preempt_lock_missing_2(struct __sk_buff *ctx)
+{
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("3 bpf_preempt_enable(s) are missing")
+int preempt_lock_missing_3(struct __sk_buff *ctx)
+{
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("1 bpf_preempt_enable is missing")
+int preempt_lock_missing_3_minus_2(struct __sk_buff *ctx)
+{
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ bpf_preempt_disable();
+ bpf_preempt_enable();
+ bpf_preempt_enable();
+ return 0;
+}
+
+static __noinline void preempt_disable(void)
+{
+ bpf_preempt_disable();
+}
+
+static __noinline void preempt_enable(void)
+{
+ bpf_preempt_enable();
+}
+
+SEC("?tc")
+__failure __msg("1 bpf_preempt_enable is missing")
+int preempt_lock_missing_1_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("2 bpf_preempt_enable(s) are missing")
+int preempt_lock_missing_2_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ preempt_disable();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("1 bpf_preempt_enable is missing")
+int preempt_lock_missing_2_minus_1_subprog(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ preempt_disable();
+ preempt_enable();
+ return 0;
+}
+
+static __noinline void preempt_balance_subprog(void)
+{
+ preempt_disable();
+ preempt_enable();
+}
+
+SEC("?tc")
+__success int preempt_balance(struct __sk_buff *ctx)
+{
+ bpf_guard_preempt();
+ return 0;
+}
+
+SEC("?tc")
+__success int preempt_balance_subprog_test(struct __sk_buff *ctx)
+{
+ preempt_balance_subprog();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("sleepable helper bpf_copy_from_user#")
+int preempt_sleepable_helper(void *ctx)
+{
+ u32 data;
+
+ bpf_preempt_disable();
+ bpf_copy_from_user(&data, sizeof(data), NULL);
+ bpf_preempt_enable();
+ return 0;
+}
+
+int __noinline preempt_global_subprog(void)
+{
+ preempt_balance_subprog();
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("global function calls are not allowed with preemption disabled")
+int preempt_global_subprog_test(struct __sk_buff *ctx)
+{
+ preempt_disable();
+ preempt_global_subprog();
+ preempt_enable();
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
index 6957d9f2805e..8bd1ebd7d6af 100644
--- a/tools/testing/selftests/bpf/progs/profiler.inc.h
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -9,6 +9,7 @@
#include "err.h"
#include "bpf_experimental.h"
#include "bpf_compiler.h"
+#include "bpf_misc.h"
#ifndef NULL
#define NULL 0
@@ -133,10 +134,6 @@ struct {
__uint(max_entries, 16);
} disallowed_exec_inodes SEC(".maps");
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(arr) (int)(sizeof(arr) / sizeof(arr[0]))
-#endif
-
static INLINE bool IS_ERR(const void* ptr)
{
return IS_ERR_VALUE((unsigned long)ptr);
diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c
index b09f4fffe57c..a3620c15c136 100644
--- a/tools/testing/selftests/bpf/progs/rbtree.c
+++ b/tools/testing/selftests/bpf/progs/rbtree.c
@@ -13,6 +13,15 @@ struct node_data {
struct bpf_rb_node node;
};
+struct root_nested_inner {
+ struct bpf_spin_lock glock;
+ struct bpf_rb_root root __contains(node_data, node);
+};
+
+struct root_nested {
+ struct root_nested_inner inner;
+};
+
long less_callback_ran = -1;
long removed_key = -1;
long first_data[2] = {-1, -1};
@@ -20,6 +29,9 @@ long first_data[2] = {-1, -1};
#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
private(A) struct bpf_spin_lock glock;
private(A) struct bpf_rb_root groot __contains(node_data, node);
+private(A) struct bpf_rb_root groot_array[2] __contains(node_data, node);
+private(A) struct bpf_rb_root groot_array_one[1] __contains(node_data, node);
+private(B) struct root_nested groot_nested;
static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
{
@@ -72,6 +84,12 @@ long rbtree_add_nodes(void *ctx)
}
SEC("tc")
+long rbtree_add_nodes_nested(void *ctx)
+{
+ return __add_three(&groot_nested.inner.root, &groot_nested.inner.glock);
+}
+
+SEC("tc")
long rbtree_add_and_remove(void *ctx)
{
struct bpf_rb_node *res = NULL;
@@ -110,6 +128,65 @@ err_out:
}
SEC("tc")
+long rbtree_add_and_remove_array(void *ctx)
+{
+ struct bpf_rb_node *res1 = NULL, *res2 = NULL, *res3 = NULL;
+ struct node_data *nodes[3][2] = {{NULL, NULL}, {NULL, NULL}, {NULL, NULL}};
+ struct node_data *n;
+ long k1 = -1, k2 = -1, k3 = -1;
+ int i, j;
+
+ for (i = 0; i < 3; i++) {
+ for (j = 0; j < 2; j++) {
+ nodes[i][j] = bpf_obj_new(typeof(*nodes[i][j]));
+ if (!nodes[i][j])
+ goto err_out;
+ nodes[i][j]->key = i * 2 + j;
+ }
+ }
+
+ bpf_spin_lock(&glock);
+ for (i = 0; i < 2; i++)
+ for (j = 0; j < 2; j++)
+ bpf_rbtree_add(&groot_array[i], &nodes[i][j]->node, less);
+ for (j = 0; j < 2; j++)
+ bpf_rbtree_add(&groot_array_one[0], &nodes[2][j]->node, less);
+ res1 = bpf_rbtree_remove(&groot_array[0], &nodes[0][0]->node);
+ res2 = bpf_rbtree_remove(&groot_array[1], &nodes[1][0]->node);
+ res3 = bpf_rbtree_remove(&groot_array_one[0], &nodes[2][0]->node);
+ bpf_spin_unlock(&glock);
+
+ if (res1) {
+ n = container_of(res1, struct node_data, node);
+ k1 = n->key;
+ bpf_obj_drop(n);
+ }
+ if (res2) {
+ n = container_of(res2, struct node_data, node);
+ k2 = n->key;
+ bpf_obj_drop(n);
+ }
+ if (res3) {
+ n = container_of(res3, struct node_data, node);
+ k3 = n->key;
+ bpf_obj_drop(n);
+ }
+ if (k1 != 0 || k2 != 2 || k3 != 4)
+ return 2;
+
+ return 0;
+
+err_out:
+ for (i = 0; i < 3; i++) {
+ for (j = 0; j < 2; j++) {
+ if (nodes[i][j])
+ bpf_obj_drop(nodes[i][j]);
+ }
+ }
+ return 1;
+}
+
+SEC("tc")
long rbtree_first_and_remove(void *ctx)
{
struct bpf_rb_node *res = NULL;
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
index 3fecf1c6dfe5..b722a1e1ddef 100644
--- a/tools/testing/selftests/bpf/progs/rbtree_fail.c
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -105,7 +105,7 @@ long rbtree_api_remove_unadded_node(void *ctx)
}
SEC("?tc")
-__failure __msg("Unreleased reference id=3 alloc_insn=10")
+__failure __regex("Unreleased reference id=3 alloc_insn=[0-9]+")
long rbtree_api_remove_no_drop(void *ctx)
{
struct bpf_rb_node *res;
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
index 1553b9c16aa7..f8d4b7cfcd68 100644
--- a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -32,7 +32,7 @@ static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
}
SEC("?tc")
-__failure __msg("Unreleased reference id=4 alloc_insn=21")
+__failure __regex("Unreleased reference id=4 alloc_insn=[0-9]+")
long rbtree_refcounted_node_ref_escapes(void *ctx)
{
struct node_acquire *n, *m;
@@ -73,7 +73,7 @@ long refcount_acquire_maybe_null(void *ctx)
}
SEC("?tc")
-__failure __msg("Unreleased reference id=3 alloc_insn=9")
+__failure __regex("Unreleased reference id=3 alloc_insn=[0-9]+")
long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
{
struct node_acquire *n, *m;
diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
index 351e79aef2fa..edc159598a0e 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
@@ -49,4 +49,10 @@ int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/sendmsg4")
+int sendmsg_v4_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
index bf9b46b806f6..36a7f960799f 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
@@ -20,6 +20,11 @@
#define DST_REWRITE_IP6_2 0
#define DST_REWRITE_IP6_3 1
+#define DST_REWRITE_IP6_V4_MAPPED_0 0
+#define DST_REWRITE_IP6_V4_MAPPED_1 0
+#define DST_REWRITE_IP6_V4_MAPPED_2 0x0000FFFF
+#define DST_REWRITE_IP6_V4_MAPPED_3 0xc0a80004 // 192.168.0.4
+
#define DST_REWRITE_PORT6 6666
SEC("cgroup/sendmsg6")
@@ -59,4 +64,56 @@ int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_v4mapped_prog(struct bpf_sock_addr *ctx)
+{
+ /* Rewrite source. */
+ ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+
+ /* Rewrite destination. */
+ ctx->user_ip6[0] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_0);
+ ctx->user_ip6[1] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_1);
+ ctx->user_ip6[2] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_2);
+ ctx->user_ip6[3] = bpf_htonl(DST_REWRITE_IP6_V4_MAPPED_3);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+
+ return 1;
+}
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_wildcard_prog(struct bpf_sock_addr *ctx)
+{
+ /* Rewrite source. */
+ ctx->msg_src_ip6[0] = bpf_htonl(SRC_REWRITE_IP6_0);
+ ctx->msg_src_ip6[1] = bpf_htonl(SRC_REWRITE_IP6_1);
+ ctx->msg_src_ip6[2] = bpf_htonl(SRC_REWRITE_IP6_2);
+ ctx->msg_src_ip6[3] = bpf_htonl(SRC_REWRITE_IP6_3);
+
+ /* Rewrite destination. */
+ ctx->user_ip6[0] = bpf_htonl(0);
+ ctx->user_ip6[1] = bpf_htonl(0);
+ ctx->user_ip6[2] = bpf_htonl(0);
+ ctx->user_ip6[3] = bpf_htonl(0);
+
+ ctx->user_port = bpf_htons(DST_REWRITE_PORT6);
+
+ return 1;
+}
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_preserve_dst_prog(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/sendmsg6")
+int sendmsg_v6_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
index d8869b03dda9..332d0eb1116f 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
@@ -36,4 +36,10 @@ int sendmsg_unix_prog(struct bpf_sock_addr *ctx)
return 1;
}
+SEC("cgroup/sendmsg_unix")
+int sendmsg_unix_deny_prog(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 7a438600ae98..60518aed1ffc 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -6,10 +6,7 @@
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
+#include "bpf_misc.h"
extern unsigned long CONFIG_HZ __kconfig;
diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
index 992b7861003a..3bb4451524a1 100644
--- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
@@ -1,5 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
+#ifndef BPF_NO_PRESERVE_ACCESS_INDEX
#define BPF_NO_PRESERVE_ACCESS_INDEX
+#endif
#include <vmlinux.h>
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
@@ -31,6 +33,8 @@ int main_prog(struct __sk_buff *skb)
struct iphdr *ip = NULL;
struct tcphdr *tcp;
__u8 proto = 0;
+ int urg_ptr;
+ u32 offset;
if (!(ip = get_iphdr(skb)))
goto out;
@@ -46,7 +50,14 @@ int main_prog(struct __sk_buff *skb)
if (!tcp)
goto out;
- return tcp->urg_ptr;
+ urg_ptr = tcp->urg_ptr;
+
+ /* Checksum validation part */
+ proto++;
+ offset = sizeof(struct ethhdr) + offsetof(struct iphdr, protocol);
+ bpf_skb_store_bytes(skb, offset, &proto, sizeof(proto), BPF_F_RECOMPUTE_CSUM);
+
+ return urg_ptr;
out:
return -1;
}
diff --git a/tools/testing/selftests/bpf/progs/sock_addr_kern.c b/tools/testing/selftests/bpf/progs/sock_addr_kern.c
new file mode 100644
index 000000000000..8386bb15ccdc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sock_addr_kern.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+SEC("syscall")
+int init_sock(struct init_sock_args *args)
+{
+ bpf_kfunc_init_sock(args);
+
+ return 0;
+}
+
+SEC("syscall")
+int close_sock(void *ctx)
+{
+ bpf_kfunc_close_sock();
+
+ return 0;
+}
+
+SEC("syscall")
+int kernel_connect(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_connect(args);
+}
+
+SEC("syscall")
+int kernel_bind(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_bind(args);
+}
+
+SEC("syscall")
+int kernel_listen(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_listen();
+}
+
+SEC("syscall")
+int kernel_sendmsg(struct sendmsg_args *args)
+{
+ return bpf_kfunc_call_kernel_sendmsg(args);
+}
+
+SEC("syscall")
+int sock_sendmsg(struct sendmsg_args *args)
+{
+ return bpf_kfunc_call_sock_sendmsg(args);
+}
+
+SEC("syscall")
+int kernel_getsockname(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_getsockname(args);
+}
+
+SEC("syscall")
+int kernel_getpeername(struct addr_args *args)
+{
+ return bpf_kfunc_call_kernel_getpeername(args);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
index 83753b00a556..5c3614333b01 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
@@ -1,24 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2021 Facebook */
-#include <string.h>
-#include <linux/tcp.h>
-#include <netinet/in.h>
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
+#include "bpf_tracing_net.h"
char _license[] SEC("license") = "GPL";
__s32 page_size = 0;
+const char cc_reno[TCP_CA_NAME_MAX] = "reno";
+const char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
+
SEC("cgroup/setsockopt")
int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
{
void *optval_end = ctx->optval_end;
int *optval = ctx->optval;
char buf[TCP_CA_NAME_MAX];
- char cc_reno[TCP_CA_NAME_MAX] = "reno";
- char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS)
goto out;
@@ -29,11 +25,11 @@ int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
if (bpf_getsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
return 0;
- if (!tcp_cc_eq(buf, cc_cubic))
+ if (bpf_strncmp(buf, sizeof(buf), cc_cubic))
return 0;
if (*optval == 0x2d) {
- if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &cc_reno,
+ if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, (void *)&cc_reno,
sizeof(cc_reno)))
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_detach.c b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
new file mode 100644
index 000000000000..56b787a89876
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_detach.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_do_detach;
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c
new file mode 100644
index 000000000000..3c822103bd40
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_forgotten_cb.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1_forgotten)
+{
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops ops = {
+ /* we forgot to reference test_1_forgotten above, oops */
+};
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c
index 026cabfa7f1f..4c56d4a9d9f4 100644
--- a/tools/testing/selftests/bpf/progs/struct_ops_module.c
+++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c
@@ -23,7 +23,7 @@ void BPF_PROG(test_2, int a, int b)
test_2_result = a + b;
}
-SEC("struct_ops/test_3")
+SEC("?struct_ops/test_3")
int BPF_PROG(test_3, int a, int b)
{
test_2_result = a + b + 3;
@@ -54,3 +54,37 @@ struct bpf_testmod_ops___v2 testmod_2 = {
.test_1 = (void *)test_1,
.test_2 = (void *)test_2_v2,
};
+
+struct bpf_testmod_ops___zeroed {
+ int (*test_1)(void);
+ void (*test_2)(int a, int b);
+ int (*test_maybe_null)(int dummy, struct task_struct *task);
+ void (*zeroed_op)(int a, int b);
+ int zeroed;
+};
+
+SEC("struct_ops/test_3")
+int BPF_PROG(zeroed_op)
+{
+ return 1;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___zeroed testmod_zeroed = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2_v2,
+ .zeroed_op = (void *)zeroed_op,
+};
+
+struct bpf_testmod_ops___incompatible {
+ int (*test_1)(void);
+ void (*test_2)(int *a);
+ int data;
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___incompatible testmod_incompatible = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2,
+ .data = 3,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c
new file mode 100644
index 000000000000..fa2021388485
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_nulled_out_cb.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+int rand;
+int arr[1];
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1_turn_off)
+{
+ return arr[rand]; /* potentially way out of range access */
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops ops = {
+ .test_1 = (void *)test_1_turn_off,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
index 41f2d44f49cb..6720c4b5be41 100644
--- a/tools/testing/selftests/bpf/progs/task_kfunc_common.h
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
@@ -13,7 +13,7 @@ struct __tasks_kfunc_map_value {
struct task_struct __kptr * task;
};
-struct hash_map {
+struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, int);
__type(value, struct __tasks_kfunc_map_value);
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
index 7bb872fb22dd..0016c90e9c13 100644
--- a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
@@ -1,24 +1,18 @@
// SPDX-License-Identifier: GPL-2.0
-#include "vmlinux.h"
-
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-static inline struct tcp_sock *tcp_sk(const struct sock *sk)
-{
- return (struct tcp_sock *)sk;
-}
-
-SEC("struct_ops/incompl_cong_ops_ssthresh")
+SEC("struct_ops")
__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk)
{
return tcp_sk(sk)->snd_ssthresh;
}
-SEC("struct_ops/incompl_cong_ops_undo_cwnd")
+SEC("struct_ops")
__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk)
{
return tcp_sk(sk)->snd_cwnd;
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c b/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c
new file mode 100644
index 000000000000..f95862f570b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_kfunc.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+
+extern void bbr_init(struct sock *sk) __ksym;
+extern void bbr_main(struct sock *sk, u32 ack, int flag, const struct rate_sample *rs) __ksym;
+extern u32 bbr_sndbuf_expand(struct sock *sk) __ksym;
+extern u32 bbr_undo_cwnd(struct sock *sk) __ksym;
+extern void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
+extern u32 bbr_ssthresh(struct sock *sk) __ksym;
+extern u32 bbr_min_tso_segs(struct sock *sk) __ksym;
+extern void bbr_set_state(struct sock *sk, u8 new_state) __ksym;
+
+extern void dctcp_init(struct sock *sk) __ksym;
+extern void dctcp_update_alpha(struct sock *sk, u32 flags) __ksym;
+extern void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev) __ksym;
+extern u32 dctcp_ssthresh(struct sock *sk) __ksym;
+extern u32 dctcp_cwnd_undo(struct sock *sk) __ksym;
+extern void dctcp_state(struct sock *sk, u8 new_state) __ksym;
+
+extern void cubictcp_init(struct sock *sk) __ksym;
+extern u32 cubictcp_recalc_ssthresh(struct sock *sk) __ksym;
+extern void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked) __ksym;
+extern void cubictcp_state(struct sock *sk, u8 new_state) __ksym;
+extern void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event) __ksym;
+extern void cubictcp_acked(struct sock *sk, const struct ack_sample *sample) __ksym;
+
+SEC("struct_ops")
+void BPF_PROG(init, struct sock *sk)
+{
+ bbr_init(sk);
+ dctcp_init(sk);
+ cubictcp_init(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(in_ack_event, struct sock *sk, u32 flags)
+{
+ dctcp_update_alpha(sk, flags);
+}
+
+SEC("struct_ops")
+void BPF_PROG(cong_control, struct sock *sk, u32 ack, int flag, const struct rate_sample *rs)
+{
+ bbr_main(sk, ack, flag, rs);
+}
+
+SEC("struct_ops")
+void BPF_PROG(cong_avoid, struct sock *sk, u32 ack, u32 acked)
+{
+ cubictcp_cong_avoid(sk, ack, acked);
+}
+
+SEC("struct_ops")
+u32 BPF_PROG(sndbuf_expand, struct sock *sk)
+{
+ return bbr_sndbuf_expand(sk);
+}
+
+SEC("struct_ops")
+u32 BPF_PROG(undo_cwnd, struct sock *sk)
+{
+ bbr_undo_cwnd(sk);
+ return dctcp_cwnd_undo(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(cwnd_event, struct sock *sk, enum tcp_ca_event event)
+{
+ bbr_cwnd_event(sk, event);
+ dctcp_cwnd_event(sk, event);
+ cubictcp_cwnd_event(sk, event);
+}
+
+SEC("struct_ops")
+u32 BPF_PROG(ssthresh, struct sock *sk)
+{
+ bbr_ssthresh(sk);
+ dctcp_ssthresh(sk);
+ return cubictcp_recalc_ssthresh(sk);
+}
+
+SEC("struct_ops")
+u32 BPF_PROG(min_tso_segs, struct sock *sk)
+{
+ return bbr_min_tso_segs(sk);
+}
+
+SEC("struct_ops")
+void BPF_PROG(set_state, struct sock *sk, u8 new_state)
+{
+ bbr_set_state(sk, new_state);
+ dctcp_state(sk, new_state);
+ cubictcp_state(sk, new_state);
+}
+
+SEC("struct_ops")
+void BPF_PROG(pkts_acked, struct sock *sk, const struct ack_sample *sample)
+{
+ cubictcp_acked(sk, sample);
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops tcp_ca_kfunc = {
+ .init = (void *)init,
+ .in_ack_event = (void *)in_ack_event,
+ .cong_control = (void *)cong_control,
+ .cong_avoid = (void *)cong_avoid,
+ .sndbuf_expand = (void *)sndbuf_expand,
+ .undo_cwnd = (void *)undo_cwnd,
+ .cwnd_event = (void *)cwnd_event,
+ .ssthresh = (void *)ssthresh,
+ .min_tso_segs = (void *)min_tso_segs,
+ .set_state = (void *)set_state,
+ .pkts_acked = (void *)pkts_acked,
+ .name = "tcp_ca_kfunc",
+};
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
index c06f4a41c21a..54f916a931c6 100644
--- a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
@@ -7,7 +7,7 @@
char _license[] SEC("license") = "GPL";
-SEC("struct_ops/unsupp_cong_op_get_info")
+SEC("struct_ops")
size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr,
union tcp_cc_info *info)
{
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
index b93a0ed33057..e4bd82bc0d01 100644
--- a/tools/testing/selftests/bpf/progs/tcp_ca_update.c
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include "vmlinux.h"
-
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -10,36 +9,31 @@ char _license[] SEC("license") = "GPL";
int ca1_cnt = 0;
int ca2_cnt = 0;
-static inline struct tcp_sock *tcp_sk(const struct sock *sk)
-{
- return (struct tcp_sock *)sk;
-}
-
-SEC("struct_ops/ca_update_1_init")
+SEC("struct_ops")
void BPF_PROG(ca_update_1_init, struct sock *sk)
{
ca1_cnt++;
}
-SEC("struct_ops/ca_update_2_init")
+SEC("struct_ops")
void BPF_PROG(ca_update_2_init, struct sock *sk)
{
ca2_cnt++;
}
-SEC("struct_ops/ca_update_cong_control")
+SEC("struct_ops")
void BPF_PROG(ca_update_cong_control, struct sock *sk,
const struct rate_sample *rs)
{
}
-SEC("struct_ops/ca_update_ssthresh")
+SEC("struct_ops")
__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
{
return tcp_sk(sk)->snd_ssthresh;
}
-SEC("struct_ops/ca_update_undo_cwnd")
+SEC("struct_ops")
__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
{
return tcp_sk(sk)->snd_cwnd;
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
index 0724a79cec78..a58b5194fc89 100644
--- a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
-#include "vmlinux.h"
-
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -11,22 +10,17 @@ char _license[] SEC("license") = "GPL";
#define min(a, b) ((a) < (b) ? (a) : (b))
-static inline struct tcp_sock *tcp_sk(const struct sock *sk)
-{
- return (struct tcp_sock *)sk;
-}
-
-static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
+static unsigned int tcp_left_out(const struct tcp_sock *tp)
{
return tp->sacked_out + tp->lost_out;
}
-static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
+static unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
{
return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
}
-SEC("struct_ops/write_sk_pacing_init")
+SEC("struct_ops")
void BPF_PROG(write_sk_pacing_init, struct sock *sk)
{
#ifdef ENABLE_ATOMICS_TESTS
@@ -37,7 +31,7 @@ void BPF_PROG(write_sk_pacing_init, struct sock *sk)
#endif
}
-SEC("struct_ops/write_sk_pacing_cong_control")
+SEC("struct_ops")
void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
const struct rate_sample *rs)
{
@@ -49,13 +43,13 @@ void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ?: 1;
}
-SEC("struct_ops/write_sk_pacing_ssthresh")
+SEC("struct_ops")
__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk)
{
return tcp_sk(sk)->snd_ssthresh;
}
-SEC("struct_ops/write_sk_pacing_undo_cwnd")
+SEC("struct_ops")
__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk)
{
return tcp_sk(sk)->snd_cwnd;
diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c
index 0988d79f1587..42c729f85524 100644
--- a/tools/testing/selftests/bpf/progs/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c
@@ -10,6 +10,9 @@ struct tcp_rtt_storage {
__u32 delivered;
__u32 delivered_ce;
__u32 icsk_retransmits;
+
+ __u32 mrtt_us; /* args[0] */
+ __u32 srtt; /* args[1] */
};
struct {
@@ -55,5 +58,8 @@ int _sockops(struct bpf_sock_ops *ctx)
storage->delivered_ce = tcp_sk->delivered_ce;
storage->icsk_retransmits = tcp_sk->icsk_retransmits;
+ storage->mrtt_us = ctx->args[0];
+ storage->srtt = ctx->args[1];
+
return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/test_access_variable_array.c b/tools/testing/selftests/bpf/progs/test_access_variable_array.c
index 808c49b79889..326b7d1f496a 100644
--- a/tools/testing/selftests/bpf/progs/test_access_variable_array.c
+++ b/tools/testing/selftests/bpf/progs/test_access_variable_array.c
@@ -7,7 +7,7 @@
unsigned long span = 0;
-SEC("fentry/load_balance")
+SEC("fentry/sched_balance_rq")
int BPF_PROG(fentry_fentry, int this_cpu, struct rq *this_rq,
struct sched_domain *sd)
{
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
index 5a3a80f751c4..c83142b55f47 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -15,6 +15,8 @@ __u64 uprobe_res;
__u64 uretprobe_res;
__u64 tp_res;
__u64 pe_res;
+__u64 raw_tp_res;
+__u64 tp_btf_res;
__u64 fentry_res;
__u64 fexit_res;
__u64 fmod_ret_res;
@@ -87,6 +89,20 @@ int handle_pe(struct pt_regs *ctx)
return 0;
}
+SEC("raw_tp/sys_enter")
+int handle_raw_tp(void *ctx)
+{
+ update(ctx, &raw_tp_res);
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int handle_tp_btf(void *ctx)
+{
+ update(ctx, &tp_btf_res);
+ return 0;
+}
+
SEC("fentry/bpf_fentry_test1")
int BPF_PROG(fentry_test1, int a)
{
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
index 3494ca30fa7f..4a4e0b8d9b72 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_ma.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
@@ -7,10 +7,6 @@
#include "bpf_experimental.h"
#include "bpf_misc.h"
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
struct generic_map_value {
void *data;
};
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 77ad8adf68da..f7b330ddd007 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
@@ -9,10 +10,14 @@
#define EINVAL 22
#define ENOENT 2
+#define NF_CT_ZONE_DIR_ORIG (1 << IP_CT_DIR_ORIGINAL)
+#define NF_CT_ZONE_DIR_REPL (1 << IP_CT_DIR_REPLY)
+
extern unsigned long CONFIG_HZ __kconfig;
int test_einval_bpf_tuple = 0;
int test_einval_reserved = 0;
+int test_einval_reserved_new = 0;
int test_einval_netns_id = 0;
int test_einval_len_opts = 0;
int test_eproto_l4proto = 0;
@@ -22,6 +27,11 @@ int test_eafnosupport = 0;
int test_alloc_entry = -EINVAL;
int test_insert_entry = -EAFNOSUPPORT;
int test_succ_lookup = -ENOENT;
+int test_ct_zone_id_alloc_entry = -EINVAL;
+int test_ct_zone_id_insert_entry = -EAFNOSUPPORT;
+int test_ct_zone_id_succ_lookup = -ENOENT;
+int test_ct_zone_dir_enoent_lookup = 0;
+int test_ct_zone_id_enoent_lookup = 0;
u32 test_delta_timeout = 0;
u32 test_status = 0;
u32 test_insert_lookup_mark = 0;
@@ -45,6 +55,17 @@ struct bpf_ct_opts___local {
s32 netns_id;
s32 error;
u8 l4proto;
+ u8 dir;
+ u8 reserved[2];
+};
+
+struct bpf_ct_opts___new {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 dir;
+ u16 ct_zone_id;
+ u8 ct_zone_dir;
u8 reserved[3];
} __attribute__((preserve_access_index));
@@ -220,10 +241,97 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
}
}
+static __always_inline void
+nf_ct_opts_new_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___new *, u32),
+ struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___new *, u32),
+ void *ctx)
+{
+ struct bpf_ct_opts___new opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+ struct bpf_sock_tuple bpf_tuple;
+ struct nf_conn *ct;
+
+ __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
+
+ opts_def.reserved[0] = 1;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ opts_def.reserved[0] = 0;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_reserved_new = opts_def.error;
+
+ bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */
+ bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */
+ bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */
+ bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */
+
+ /* use non-default ct zone */
+ opts_def.ct_zone_id = 10;
+ opts_def.ct_zone_dir = NF_CT_ZONE_DIR_ORIG;
+ ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ if (ct) {
+ __u16 sport = bpf_get_prandom_u32();
+ __u16 dport = bpf_get_prandom_u32();
+ union nf_inet_addr saddr = {};
+ union nf_inet_addr daddr = {};
+ struct nf_conn *ct_ins;
+
+ bpf_ct_set_timeout(ct, 10000);
+
+ /* snat */
+ saddr.ip = bpf_get_prandom_u32();
+ bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC___local);
+ /* dnat */
+ daddr.ip = bpf_get_prandom_u32();
+ bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST___local);
+
+ ct_ins = bpf_ct_insert_entry(ct);
+ if (ct_ins) {
+ struct nf_conn *ct_lk;
+
+ /* entry should exist in same ct zone we inserted it */
+ ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
+ &opts_def, sizeof(opts_def));
+ if (ct_lk) {
+ bpf_ct_release(ct_lk);
+ test_ct_zone_id_succ_lookup = 0;
+ }
+
+ /* entry should not exist with wrong direction */
+ opts_def.ct_zone_dir = NF_CT_ZONE_DIR_REPL;
+ ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
+ &opts_def, sizeof(opts_def));
+ opts_def.ct_zone_dir = NF_CT_ZONE_DIR_ORIG;
+ if (ct_lk)
+ bpf_ct_release(ct_lk);
+ else
+ test_ct_zone_dir_enoent_lookup = opts_def.error;
+
+ /* entry should not exist in default ct zone */
+ opts_def.ct_zone_id = 0;
+ ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
+ &opts_def, sizeof(opts_def));
+ if (ct_lk)
+ bpf_ct_release(ct_lk);
+ else
+ test_ct_zone_id_enoent_lookup = opts_def.error;
+
+ bpf_ct_release(ct_ins);
+ test_ct_zone_id_insert_entry = 0;
+ }
+ test_ct_zone_id_alloc_entry = 0;
+ }
+}
+
SEC("xdp")
int nf_xdp_ct_test(struct xdp_md *ctx)
{
nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
+ nf_ct_opts_new_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
return 0;
}
@@ -231,6 +339,7 @@ SEC("tc")
int nf_skb_ct_test(struct __sk_buff *ctx)
{
nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
+ nf_ct_opts_new_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
index 0e4759ab38ff..a586f087ffeb 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>
diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
index e2bea4da194b..f0759efff6ef 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
@@ -1,19 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#include <string.h>
-#include <errno.h>
-#include <netinet/in.h>
-#include <linux/stddef.h>
-#include <linux/bpf.h>
-#include <linux/ipv6.h>
-#include <linux/tcp.h>
-#include <linux/if_ether.h>
-#include <linux/pkt_cls.h>
-
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_tcp_helpers.h"
+
+#ifndef ENOENT
+#define ENOENT 2
+#endif
struct sockaddr_in6 srv_sa6 = {};
__u16 listen_tp_sport = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
index 8fba3f3649e2..5da001ca57a5 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func10.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
@@ -4,6 +4,10 @@
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
+#if !defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
struct Small {
long x;
};
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
index 2dde8e3fe4c9..e68667aec6a6 100644
--- a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -45,7 +45,7 @@ int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
}
SEC("?lsm.s/bpf")
-__failure __msg("arg#0 expected pointer to stack or dynptr_ptr")
+__failure __msg("arg#1 expected pointer to stack or const struct bpf_dynptr")
int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
{
unsigned long val = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
new file mode 100644
index 000000000000..7ac7e1de34d8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_param_nullable.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+SEC("tc")
+int kfunc_dynptr_nullable_test1(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_kfunc_dynptr_test(&data, NULL);
+
+ return 0;
+}
+
+SEC("tc")
+int kfunc_dynptr_nullable_test2(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_kfunc_dynptr_test(&data, &data);
+
+ return 0;
+}
+
+SEC("tc")
+__failure __msg("expected pointer to stack or const struct bpf_dynptr")
+int kfunc_dynptr_nullable_test3(struct __sk_buff *skb)
+{
+ struct bpf_dynptr data;
+
+ bpf_dynptr_from_skb(skb, 0, &data);
+ bpf_kfunc_dynptr_test(NULL, &data);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_redirect.c b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c
index 8c895122f293..83439b87b766 100644
--- a/tools/testing/selftests/bpf/progs/test_lwt_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c
@@ -3,7 +3,7 @@
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
#include <linux/ip.h>
-#include "bpf_tracing_net.h"
+#include <linux/if_ether.h>
/* We don't care about whether the packet can be received by network stack.
* Just care if the packet is sent to the correct device at correct direction
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index 8a1b50f3a002..cc1a012d038f 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -73,6 +73,29 @@ int BPF_PROG(handle_fentry_manual,
return 0;
}
+__u32 fentry_explicit_read_sz = 0;
+
+SEC("fentry/bpf_testmod:bpf_testmod_test_read")
+int BPF_PROG(handle_fentry_explicit,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ fentry_explicit_read_sz = len;
+ return 0;
+}
+
+
+__u32 fentry_explicit_manual_read_sz = 0;
+
+SEC("fentry")
+int BPF_PROG(handle_fentry_explicit_manual,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ fentry_explicit_manual_read_sz = len;
+ return 0;
+}
+
__u32 fexit_read_sz = 0;
int fexit_ret = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
index 0763d49f9c42..386315afad65 100644
--- a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
@@ -5,23 +5,48 @@
#include <stdint.h>
#include <bpf/bpf_helpers.h>
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u32);
+} sock_map SEC(".maps");
+
__u64 user_pid = 0;
__u64 user_tgid = 0;
__u64 dev = 0;
__u64 ino = 0;
-SEC("tracepoint/syscalls/sys_enter_nanosleep")
-int handler(const void *ctx)
+static void get_pid_tgid(void)
{
struct bpf_pidns_info nsdata;
if (bpf_get_ns_current_pid_tgid(dev, ino, &nsdata, sizeof(struct bpf_pidns_info)))
- return 0;
+ return;
user_pid = nsdata.pid;
user_tgid = nsdata.tgid;
+}
+SEC("?tracepoint/syscalls/sys_enter_nanosleep")
+int tp_handler(const void *ctx)
+{
+ get_pid_tgid();
return 0;
}
+SEC("?cgroup/bind4")
+int cgroup_bind4(struct bpf_sock_addr *ctx)
+{
+ get_pid_tgid();
+ return 1;
+}
+
+SEC("?sk_msg")
+int sk_msg(struct sk_msg_md *msg)
+{
+ get_pid_tgid();
+ return SK_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_skip.c b/tools/testing/selftests/bpf/progs/test_perf_skip.c
new file mode 100644
index 000000000000..7eb8b6de7a57
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_skip.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+uintptr_t ip;
+
+SEC("perf_event")
+int handler(struct bpf_perf_event_data *data)
+{
+ /* Skip events that have the correct ip. */
+ return ip != PT_REGS_IP(&data->regs);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_n.c b/tools/testing/selftests/bpf/progs/test_ringbuf_n.c
new file mode 100644
index 000000000000..8669eb42dbe0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_n.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2024 Andrea Righi <andrea.righi@canonical.com>
+
+#include <linux/bpf.h>
+#include <sched.h>
+#include <unistd.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define TASK_COMM_LEN 16
+
+struct sample {
+ int pid;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+int pid = 0;
+long value = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_ringbuf_n(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct sample *sample;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
+ if (!sample)
+ return 0;
+
+ sample->pid = pid;
+ sample->value = value;
+ bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+
+ bpf_ringbuf_submit(sample, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_write.c b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
new file mode 100644
index 000000000000..350513c0e4c9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_write.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+/* inputs */
+int pid = 0;
+
+/* outputs */
+long passed = 0;
+long discarded = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_ringbuf_write(void *ctx)
+{
+ int *foo, cur_pid = bpf_get_current_pid_tgid() >> 32;
+ void *sample1, *sample2;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample1 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ if (!sample1)
+ return 0;
+ /* first one can pass */
+ sample2 = bpf_ringbuf_reserve(&ringbuf, 0x3000, 0);
+ if (!sample2) {
+ bpf_ringbuf_discard(sample1, 0);
+ __sync_fetch_and_add(&discarded, 1);
+ return 0;
+ }
+ /* second one must not */
+ __sync_fetch_and_add(&passed, 1);
+ foo = sample2 + 4084;
+ *foo = 256;
+ bpf_ringbuf_discard(sample1, 0);
+ bpf_ringbuf_discard(sample2, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
index 02e718f06e0f..40531e56776e 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
@@ -84,7 +84,7 @@ int BPF_PROG(trace_tcp_connect, struct sock *sk)
}
SEC("fexit/inet_csk_accept")
-int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern,
+int BPF_PROG(inet_csk_accept, struct sock *sk, struct proto_accept_arg *arg,
struct sock *accepted_sk)
{
set_task_info(accepted_sk);
diff --git a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
index 45e8fc75a739..996b177324ba 100644
--- a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
@@ -24,8 +24,7 @@ struct {
__type(value, __u64);
} socket_storage SEC(".maps");
-SEC("sk_msg")
-int prog_msg_verdict(struct sk_msg_md *msg)
+static int prog_msg_verdict_common(struct sk_msg_md *msg)
{
struct task_struct *task = (struct task_struct *)bpf_get_current_task();
int verdict = SK_PASS;
@@ -44,4 +43,28 @@ int prog_msg_verdict(struct sk_msg_md *msg)
return verdict;
}
+SEC("sk_msg")
+int prog_msg_verdict(struct sk_msg_md *msg)
+{
+ return prog_msg_verdict_common(msg);
+}
+
+SEC("sk_msg")
+int prog_msg_verdict_clone(struct sk_msg_md *msg)
+{
+ return prog_msg_verdict_common(msg);
+}
+
+SEC("sk_msg")
+int prog_msg_verdict_clone2(struct sk_msg_md *msg)
+{
+ return prog_msg_verdict_common(msg);
+}
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index f75e531bf36f..196844be349c 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -7,7 +7,6 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_tcp_helpers.h"
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
@@ -42,6 +41,10 @@ struct {
__type(value, struct bpf_spinlock_cnt);
} sk_pkt_out_cnt10 SEC(".maps");
+struct tcp_sock {
+ __u32 lsndtime;
+} __attribute__((preserve_access_index));
+
struct bpf_tcp_sock listen_tp = {};
struct sockaddr_in6 srv_sa6 = {};
struct bpf_tcp_sock cli_tp = {};
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index 99d2ea9fb658..f48f85f1bd70 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -92,7 +92,7 @@ struct {
__uint(value_size, sizeof(int));
} tls_sock_map SEC(".maps");
-SEC("sk_skb1")
+SEC("sk_skb/stream_parser")
int bpf_prog1(struct __sk_buff *skb)
{
int *f, two = 2;
@@ -104,7 +104,7 @@ int bpf_prog1(struct __sk_buff *skb)
return skb->len;
}
-SEC("sk_skb2")
+SEC("sk_skb/stream_verdict")
int bpf_prog2(struct __sk_buff *skb)
{
__u32 lport = skb->local_port;
@@ -151,7 +151,7 @@ static inline void bpf_write_pass(struct __sk_buff *skb, int offset)
memcpy(c + offset, "PASS", 4);
}
-SEC("sk_skb3")
+SEC("sk_skb/stream_verdict")
int bpf_prog3(struct __sk_buff *skb)
{
int err, *f, ret = SK_PASS;
@@ -177,9 +177,6 @@ int bpf_prog3(struct __sk_buff *skb)
return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
#endif
}
- f = bpf_map_lookup_elem(&sock_skb_opts, &one);
- if (f && *f)
- ret = SK_DROP;
err = bpf_skb_adjust_room(skb, 4, 0, 0);
if (err)
return SK_DROP;
@@ -233,7 +230,7 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
return 0;
}
-SEC("sk_msg1")
+SEC("sk_msg")
int bpf_prog4(struct sk_msg_md *msg)
{
int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
@@ -263,7 +260,7 @@ int bpf_prog4(struct sk_msg_md *msg)
return SK_PASS;
}
-SEC("sk_msg2")
+SEC("sk_msg")
int bpf_prog6(struct sk_msg_md *msg)
{
int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0;
@@ -308,7 +305,7 @@ int bpf_prog6(struct sk_msg_md *msg)
#endif
}
-SEC("sk_msg3")
+SEC("sk_msg")
int bpf_prog8(struct sk_msg_md *msg)
{
void *data_end = (void *)(long) msg->data_end;
@@ -329,7 +326,8 @@ int bpf_prog8(struct sk_msg_md *msg)
return SK_PASS;
}
-SEC("sk_msg4")
+
+SEC("sk_msg")
int bpf_prog9(struct sk_msg_md *msg)
{
void *data_end = (void *)(long) msg->data_end;
@@ -347,7 +345,7 @@ int bpf_prog9(struct sk_msg_md *msg)
return SK_PASS;
}
-SEC("sk_msg5")
+SEC("sk_msg")
int bpf_prog10(struct sk_msg_md *msg)
{
int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop;
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
index 1d86a717a290..69aacc96db36 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
@@ -23,10 +23,25 @@ struct {
__type(value, int);
} sock_map_msg SEC(".maps");
-SEC("sk_skb")
+SEC("sk_skb/stream_verdict")
int prog_skb_verdict(struct __sk_buff *skb)
{
return SK_PASS;
}
+int clone_called;
+
+SEC("sk_skb/stream_verdict")
+int prog_skb_verdict_clone(struct __sk_buff *skb)
+{
+ clone_called = 1;
+ return SK_PASS;
+}
+
+SEC("sk_skb/stream_parser")
+int prog_skb_parser(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
index 3c69aa971738..d25b0bb30fc0 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
@@ -9,7 +9,7 @@ struct {
__type(value, __u64);
} sock_map SEC(".maps");
-SEC("sk_skb")
+SEC("sk_skb/verdict")
int prog_skb_verdict(struct __sk_buff *skb)
{
return SK_DROP;
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index 7f74077d6622..548660e299a5 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -10,10 +10,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_compiler.h"
-
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
+#include "bpf_misc.h"
/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
#define TCP_MEM_LOOPS 28 /* because 30 doesn't fit into 512 bytes of stack */
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index 68a75436e8af..81249d119a8b 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -10,10 +10,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_compiler.h"
-
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
+#include "bpf_misc.h"
/* tcp_mem sysctl has only 3 ints, but this test is doing TCP_MEM_LOOPS */
#define TCP_MEM_LOOPS 20 /* because 30 doesn't fit into 512 bytes of stack */
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index efc3c61f7852..bbdd08764789 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -10,6 +10,7 @@
#include <bpf/bpf_helpers.h>
#include "bpf_compiler.h"
+#include "bpf_misc.h"
/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
#define MAX_ULONG_STR_LEN 0xF
@@ -17,10 +18,6 @@
/* Max supported length of sysctl value string (pow2). */
#define MAX_VALUE_STR_LEN 0x40
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
const char tcp_mem_name[] = "net/ipv4/tcp_mem";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
index 74ec09f040b7..ca8e8734d901 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
@@ -222,17 +222,21 @@ int egress_host(struct __sk_buff *skb)
return TC_ACT_OK;
if (skb_proto(skb_type) == IPPROTO_TCP) {
- if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
+ if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC &&
skb->tstamp)
inc_dtimes(EGRESS_ENDHOST);
else
inc_errs(EGRESS_ENDHOST);
- } else {
- if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC &&
+ } else if (skb_proto(skb_type) == IPPROTO_UDP) {
+ if (skb->tstamp_type == BPF_SKB_CLOCK_TAI &&
skb->tstamp)
inc_dtimes(EGRESS_ENDHOST);
else
inc_errs(EGRESS_ENDHOST);
+ } else {
+ if (skb->tstamp_type == BPF_SKB_CLOCK_REALTIME &&
+ skb->tstamp)
+ inc_errs(EGRESS_ENDHOST);
}
skb->tstamp = EGRESS_ENDHOST_MAGIC;
@@ -252,7 +256,7 @@ int ingress_host(struct __sk_buff *skb)
if (!skb_type)
return TC_ACT_OK;
- if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
+ if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC &&
skb->tstamp == EGRESS_FWDNS_MAGIC)
inc_dtimes(INGRESS_ENDHOST);
else
@@ -315,7 +319,6 @@ int egress_fwdns_prio100(struct __sk_buff *skb)
SEC("tc")
int ingress_fwdns_prio101(struct __sk_buff *skb)
{
- __u64 expected_dtime = EGRESS_ENDHOST_MAGIC;
int skb_type;
skb_type = skb_get_type(skb);
@@ -323,29 +326,24 @@ int ingress_fwdns_prio101(struct __sk_buff *skb)
/* Should have handled in prio100 */
return TC_ACT_SHOT;
- if (skb_proto(skb_type) == IPPROTO_UDP)
- expected_dtime = 0;
-
if (skb->tstamp_type) {
if (fwdns_clear_dtime() ||
- skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
- skb->tstamp != expected_dtime)
+ (skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC &&
+ skb->tstamp_type != BPF_SKB_CLOCK_TAI) ||
+ skb->tstamp != EGRESS_ENDHOST_MAGIC)
inc_errs(INGRESS_FWDNS_P101);
else
inc_dtimes(INGRESS_FWDNS_P101);
} else {
- if (!fwdns_clear_dtime() && expected_dtime)
+ if (!fwdns_clear_dtime())
inc_errs(INGRESS_FWDNS_P101);
}
- if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
+ if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) {
skb->tstamp = INGRESS_FWDNS_MAGIC;
} else {
if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
- BPF_SKB_TSTAMP_DELIVERY_MONO))
- inc_errs(SET_DTIME);
- if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
- BPF_SKB_TSTAMP_UNSPEC))
+ BPF_SKB_CLOCK_MONOTONIC))
inc_errs(SET_DTIME);
}
@@ -370,7 +368,7 @@ int egress_fwdns_prio101(struct __sk_buff *skb)
if (skb->tstamp_type) {
if (fwdns_clear_dtime() ||
- skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
+ skb->tstamp_type != BPF_SKB_CLOCK_MONOTONIC ||
skb->tstamp != INGRESS_FWDNS_MAGIC)
inc_errs(EGRESS_FWDNS_P101);
else
@@ -380,14 +378,11 @@ int egress_fwdns_prio101(struct __sk_buff *skb)
inc_errs(EGRESS_FWDNS_P101);
}
- if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
+ if (skb->tstamp_type == BPF_SKB_CLOCK_MONOTONIC) {
skb->tstamp = EGRESS_FWDNS_MAGIC;
} else {
if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC,
- BPF_SKB_TSTAMP_DELIVERY_MONO))
- inc_errs(SET_DTIME);
- if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
- BPF_SKB_TSTAMP_UNSPEC))
+ BPF_SKB_CLOCK_MONOTONIC))
inc_errs(SET_DTIME);
}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c
index 992400acb957..ab3eae3d6af8 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_link.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_link.c
@@ -4,7 +4,8 @@
#include <linux/bpf.h>
#include <linux/if_ether.h>
-
+#include <linux/stddef.h>
+#include <linux/if_packet.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
@@ -16,7 +17,13 @@ bool seen_tc3;
bool seen_tc4;
bool seen_tc5;
bool seen_tc6;
+bool seen_tc7;
+
+bool set_type;
+
bool seen_eth;
+bool seen_host;
+bool seen_mcast;
SEC("tc/ingress")
int tc1(struct __sk_buff *skb)
@@ -28,8 +35,16 @@ int tc1(struct __sk_buff *skb)
if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)))
goto out;
seen_eth = eth.h_proto == bpf_htons(ETH_P_IP);
+ seen_host = skb->pkt_type == PACKET_HOST;
+ if (seen_host && set_type) {
+ eth.h_dest[0] = 4;
+ if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0))
+ goto fail;
+ bpf_skb_change_type(skb, PACKET_MULTICAST);
+ }
out:
seen_tc1 = true;
+fail:
return TCX_NEXT;
}
@@ -67,3 +82,21 @@ int tc6(struct __sk_buff *skb)
seen_tc6 = true;
return TCX_PASS;
}
+
+SEC("tc/ingress")
+int tc7(struct __sk_buff *skb)
+{
+ struct ethhdr eth = {};
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+ goto out;
+ if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)))
+ goto out;
+ if (eth.h_dest[0] == 4 && set_type) {
+ seen_mcast = skb->pkt_type == PACKET_MULTICAST;
+ bpf_skb_change_type(skb, PACKET_HOST);
+ }
+out:
+ seen_tc7 = true;
+ return TCX_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
index c8e4553648bf..44ee0d037f95 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -9,6 +9,7 @@
#include "bpf_kfuncs.h"
#include "test_siphash.h"
#include "test_tcp_custom_syncookie.h"
+#include "bpf_misc.h"
#define MAX_PACKET_OFF 0xffff
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
index 29a6a53cf229..f8b1b7e68d2e 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
@@ -7,8 +7,6 @@
#define __packed __attribute__((__packed__))
#define __force
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
#define swap(a, b) \
do { \
typeof(a) __tmp = (a); \
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index a3f3f43fc195..6935f32eeb8f 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -1,18 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
-#include <stddef.h>
-#include <string.h>
-#include <netinet/in.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/types.h>
-#include <linux/socket.h>
-#include <linux/tcp.h>
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-#include "bpf_tcp_helpers.h"
#include "test_tcpbpf.h"
struct tcpbpf_globals global = {};
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index 3e436e6f7312..3f5abcf3ff13 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -567,12 +567,18 @@ int ip6vxlan_get_tunnel_src(struct __sk_buff *skb)
return TC_ACT_OK;
}
+struct local_geneve_opt {
+ struct geneve_opt gopt;
+ int data;
+};
+
SEC("tc")
int geneve_set_tunnel(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
- struct geneve_opt gopt;
+ struct local_geneve_opt local_gopt;
+ struct geneve_opt *gopt = (struct geneve_opt *) &local_gopt;
__builtin_memset(&key, 0x0, sizeof(key));
key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
@@ -580,14 +586,14 @@ int geneve_set_tunnel(struct __sk_buff *skb)
key.tunnel_tos = 0;
key.tunnel_ttl = 64;
- __builtin_memset(&gopt, 0x0, sizeof(gopt));
- gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
- gopt.type = 0x08;
- gopt.r1 = 0;
- gopt.r2 = 0;
- gopt.r3 = 0;
- gopt.length = 2; /* 4-byte multiple */
- *(int *) &gopt.opt_data = bpf_htonl(0xdeadbeef);
+ __builtin_memset(gopt, 0x0, sizeof(local_gopt));
+ gopt->opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+ gopt->type = 0x08;
+ gopt->r1 = 0;
+ gopt->r2 = 0;
+ gopt->r3 = 0;
+ gopt->length = 2; /* 4-byte multiple */
+ *(int *) &gopt->opt_data = bpf_htonl(0xdeadbeef);
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_ZERO_CSUM_TX);
@@ -596,7 +602,7 @@ int geneve_set_tunnel(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+ ret = bpf_skb_set_tunnel_opt(skb, gopt, sizeof(local_gopt));
if (ret < 0) {
log_err(ret);
return TC_ACT_SHOT;
@@ -631,7 +637,8 @@ SEC("tc")
int ip6geneve_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
- struct geneve_opt gopt;
+ struct local_geneve_opt local_gopt;
+ struct geneve_opt *gopt = (struct geneve_opt *) &local_gopt;
int ret;
__builtin_memset(&key, 0x0, sizeof(key));
@@ -647,16 +654,16 @@ int ip6geneve_set_tunnel(struct __sk_buff *skb)
return TC_ACT_SHOT;
}
- __builtin_memset(&gopt, 0x0, sizeof(gopt));
- gopt.opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
- gopt.type = 0x08;
- gopt.r1 = 0;
- gopt.r2 = 0;
- gopt.r3 = 0;
- gopt.length = 2; /* 4-byte multiple */
- *(int *) &gopt.opt_data = bpf_htonl(0xfeedbeef);
+ __builtin_memset(gopt, 0x0, sizeof(local_gopt));
+ gopt->opt_class = bpf_htons(0x102); /* Open Virtual Networking (OVN) */
+ gopt->type = 0x08;
+ gopt->r1 = 0;
+ gopt->r2 = 0;
+ gopt->r3 = 0;
+ gopt->length = 2; /* 4-byte multiple */
+ *(int *) &gopt->opt_data = bpf_htonl(0xfeedbeef);
- ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
+ ret = bpf_skb_set_tunnel_opt(skb, gopt, sizeof(gopt));
if (ret < 0) {
log_err(ret);
return TC_ACT_SHOT;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 5c7e4758a0ca..fad94e41cef9 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -318,6 +318,14 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
return true;
}
+#ifndef __clang__
+#pragma GCC push_options
+/* GCC optimization collapses functions and increases the number of arguments
+ * beyond the compatible amount supported by BPF.
+ */
+#pragma GCC optimize("-fno-ipa-sra")
+#endif
+
static __attribute__ ((noinline))
bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
struct packet_description *pckt,
@@ -372,6 +380,10 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
return true;
}
+#ifndef __clang__
+#pragma GCC pop_options
+#endif
+
static __attribute__ ((noinline))
int swap_mac_and_send(void *data, void *data_end)
{
@@ -588,12 +600,13 @@ static void connection_table_lookup(struct real_definition **real,
__attribute__ ((noinline))
static int process_l3_headers_v6(struct packet_description *pckt,
__u8 *protocol, __u64 off,
- __u16 *pkt_bytes, void *data,
- void *data_end)
+ __u16 *pkt_bytes, void *extra_args[2])
{
struct ipv6hdr *ip6h;
__u64 iph_len;
int action;
+ void *data = extra_args[0];
+ void *data_end = extra_args[1];
ip6h = data + off;
if (ip6h + 1 > data_end)
@@ -619,11 +632,12 @@ static int process_l3_headers_v6(struct packet_description *pckt,
__attribute__ ((noinline))
static int process_l3_headers_v4(struct packet_description *pckt,
__u8 *protocol, __u64 off,
- __u16 *pkt_bytes, void *data,
- void *data_end)
+ __u16 *pkt_bytes, void *extra_args[2])
{
struct iphdr *iph;
int action;
+ void *data = extra_args[0];
+ void *data_end = extra_args[1];
iph = data + off;
if (iph + 1 > data_end)
@@ -666,13 +680,14 @@ static int process_packet(void *data, __u64 off, void *data_end,
__u8 protocol;
__u32 vip_num;
int action;
+ void *extra_args[2] = { data, data_end };
if (is_ipv6)
action = process_l3_headers_v6(&pckt, &protocol, off,
- &pkt_bytes, data, data_end);
+ &pkt_bytes, extra_args);
else
action = process_l3_headers_v4(&pckt, &protocol, off,
- &pkt_bytes, data, data_end);
+ &pkt_bytes, extra_args);
if (action >= 0)
return action;
protocol = pckt.flow.proto;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
index f3ec8086482d..a7588302268d 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
@@ -160,7 +160,7 @@ int xdp_prognum1(struct xdp_md *ctx)
/* Modifying VLAN, preserve top 4 bits */
vlan_hdr->h_vlan_TCI =
- bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000)
+ bpf_htons((bpf_ntohs(vlan_hdr->h_vlan_TCI) & 0xf000U)
| TO_VLAN);
}
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
index f615da97df26..4c677c001258 100644
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -2,9 +2,10 @@
/* Copyright (c) 2021 Facebook */
#include <linux/bpf.h>
#include <time.h>
+#include <stdbool.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct hmap_elem {
diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c
index 0996c2486f05..5a2e9dabf1c6 100644
--- a/tools/testing/selftests/bpf/progs/timer_failure.c
+++ b/tools/testing/selftests/bpf/progs/timer_failure.c
@@ -5,8 +5,8 @@
#include <time.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
#include "bpf_misc.h"
-#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer_lockup.c b/tools/testing/selftests/bpf/progs/timer_lockup.c
new file mode 100644
index 000000000000..3e520133281e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_lockup.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer1_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer2_map SEC(".maps");
+
+int timer1_err;
+int timer2_err;
+
+static int timer_cb1(void *map, int *k, struct elem *v)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer2_map, &key);
+ if (timer)
+ timer2_err = bpf_timer_cancel(timer);
+
+ return 0;
+}
+
+static int timer_cb2(void *map, int *k, struct elem *v)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer1_map, &key);
+ if (timer)
+ timer1_err = bpf_timer_cancel(timer);
+
+ return 0;
+}
+
+SEC("tc")
+int timer1_prog(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer1_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer1_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb1);
+ bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
+ }
+
+ return 0;
+}
+
+SEC("tc")
+int timer2_prog(void *ctx)
+{
+ struct bpf_timer *timer;
+ int key = 0;
+
+ timer = bpf_map_lookup_elem(&timer2_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer2_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb2);
+ bpf_timer_start(timer, 1, BPF_F_TIMER_CPU_PIN);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim.c b/tools/testing/selftests/bpf/progs/timer_mim.c
index 2fee7ab105ef..50ebc3f68522 100644
--- a/tools/testing/selftests/bpf/progs/timer_mim.c
+++ b/tools/testing/selftests/bpf/progs/timer_mim.c
@@ -4,7 +4,7 @@
#include <time.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct hmap_elem {
diff --git a/tools/testing/selftests/bpf/progs/timer_mim_reject.c b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
index 5d648e3d8a41..dd3f1ed6d6e6 100644
--- a/tools/testing/selftests/bpf/progs/timer_mim_reject.c
+++ b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
@@ -4,7 +4,7 @@
#include <time.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
-#include "bpf_tcp_helpers.h"
+#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
struct hmap_elem {
diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c
index 515daef3c84b..c435a3a8328a 100644
--- a/tools/testing/selftests/bpf/progs/tracing_struct.c
+++ b/tools/testing/selftests/bpf/progs/tracing_struct.c
@@ -18,11 +18,6 @@ struct bpf_testmod_struct_arg_3 {
int b[];
};
-struct bpf_testmod_struct_arg_4 {
- u64 a;
- int b;
-};
-
long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs;
__u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3;
long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret;
@@ -30,9 +25,6 @@ long t3_a, t3_b, t3_c_a, t3_c_b, t3_ret;
long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret;
long t5_ret;
int t6;
-long t7_a, t7_b, t7_c, t7_d, t7_e, t7_f_a, t7_f_b, t7_ret;
-long t8_a, t8_b, t8_c, t8_d, t8_e, t8_f_a, t8_f_b, t8_g, t8_ret;
-
SEC("fentry/bpf_testmod_test_struct_arg_1")
int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c)
@@ -138,50 +130,4 @@ int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a)
return 0;
}
-SEC("fentry/bpf_testmod_test_struct_arg_7")
-int BPF_PROG2(test_struct_arg_12, __u64, a, void *, b, short, c, int, d,
- void *, e, struct bpf_testmod_struct_arg_4, f)
-{
- t7_a = a;
- t7_b = (long)b;
- t7_c = c;
- t7_d = d;
- t7_e = (long)e;
- t7_f_a = f.a;
- t7_f_b = f.b;
- return 0;
-}
-
-SEC("fexit/bpf_testmod_test_struct_arg_7")
-int BPF_PROG2(test_struct_arg_13, __u64, a, void *, b, short, c, int, d,
- void *, e, struct bpf_testmod_struct_arg_4, f, int, ret)
-{
- t7_ret = ret;
- return 0;
-}
-
-SEC("fentry/bpf_testmod_test_struct_arg_8")
-int BPF_PROG2(test_struct_arg_14, __u64, a, void *, b, short, c, int, d,
- void *, e, struct bpf_testmod_struct_arg_4, f, int, g)
-{
- t8_a = a;
- t8_b = (long)b;
- t8_c = c;
- t8_d = d;
- t8_e = (long)e;
- t8_f_a = f.a;
- t8_f_b = f.b;
- t8_g = g;
- return 0;
-}
-
-SEC("fexit/bpf_testmod_test_struct_arg_8")
-int BPF_PROG2(test_struct_arg_15, __u64, a, void *, b, short, c, int, d,
- void *, e, struct bpf_testmod_struct_arg_4, f, int, g,
- int, ret)
-{
- t8_ret = ret;
- return 0;
-}
-
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c b/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c
new file mode 100644
index 000000000000..4742012ace06
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tracing_struct_many_args.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct bpf_testmod_struct_arg_4 {
+ u64 a;
+ int b;
+};
+
+struct bpf_testmod_struct_arg_5 {
+ char a;
+ short b;
+ int c;
+ long d;
+};
+
+long t7_a, t7_b, t7_c, t7_d, t7_e, t7_f_a, t7_f_b, t7_ret;
+long t8_a, t8_b, t8_c, t8_d, t8_e, t8_f_a, t8_f_b, t8_g, t8_ret;
+long t9_a, t9_b, t9_c, t9_d, t9_e, t9_f, t9_g, t9_h_a, t9_h_b, t9_h_c, t9_h_d, t9_i, t9_ret;
+
+SEC("fentry/bpf_testmod_test_struct_arg_7")
+int BPF_PROG2(test_struct_many_args_1, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f)
+{
+ t7_a = a;
+ t7_b = (long)b;
+ t7_c = c;
+ t7_d = d;
+ t7_e = (long)e;
+ t7_f_a = f.a;
+ t7_f_b = f.b;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_7")
+int BPF_PROG2(test_struct_many_args_2, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, ret)
+{
+ t7_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_8")
+int BPF_PROG2(test_struct_many_args_3, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, g)
+{
+ t8_a = a;
+ t8_b = (long)b;
+ t8_c = c;
+ t8_d = d;
+ t8_e = (long)e;
+ t8_f_a = f.a;
+ t8_f_b = f.b;
+ t8_g = g;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_8")
+int BPF_PROG2(test_struct_many_args_4, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, g,
+ int, ret)
+{
+ t8_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_9")
+int BPF_PROG2(test_struct_many_args_5, __u64, a, void *, b, short, c, int, d, void *, e,
+ char, f, short, g, struct bpf_testmod_struct_arg_5, h, long, i)
+{
+ t9_a = a;
+ t9_b = (long)b;
+ t9_c = c;
+ t9_d = d;
+ t9_e = (long)e;
+ t9_f = f;
+ t9_g = g;
+ t9_h_a = h.a;
+ t9_h_b = h.b;
+ t9_h_c = h.c;
+ t9_h_d = h.d;
+ t9_i = i;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_9")
+int BPF_PROG2(test_struct_many_args_6, __u64, a, void *, b, short, c, int, d, void *, e,
+ char, f, short, g, struct bpf_testmod_struct_arg_5, h, long, i, int, ret)
+{
+ t9_ret = ret;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 5fda43901033..2619ed193c65 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -1,6 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Facebook
-
#include <linux/bpf.h>
#include <asm/unistd.h>
#include <bpf/bpf_helpers.h>
@@ -9,82 +8,126 @@
char _license[] SEC("license") = "GPL";
-long hits = 0;
+#define CPU_MASK 255
+#define MAX_CPUS (CPU_MASK + 1) /* should match MAX_BUCKETS in benchs/bench_trigger.c */
-SEC("tp/syscalls/sys_enter_getpgid")
-int bench_trigger_tp(void *ctx)
+/* matches struct counter in bench.h */
+struct counter {
+ long value;
+} __attribute__((aligned(128)));
+
+struct counter hits[MAX_CPUS];
+
+static __always_inline void inc_counter(void)
+{
+ int cpu = bpf_get_smp_processor_id();
+
+ __sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1);
+}
+
+SEC("?uprobe")
+int bench_trigger_uprobe(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("raw_tp/sys_enter")
-int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id)
+const volatile int batch_iters = 0;
+
+SEC("?raw_tp")
+int trigger_count(void *ctx)
{
- if (id == __NR_getpgid)
- __sync_add_and_fetch(&hits, 1);
+ int i;
+
+ for (i = 0; i < batch_iters; i++)
+ inc_counter();
+
return 0;
}
-SEC("kprobe/" SYS_PREFIX "sys_getpgid")
+SEC("?raw_tp")
+int trigger_driver(void *ctx)
+{
+ int i;
+
+ for (i = 0; i < batch_iters; i++)
+ (void)bpf_get_numa_node_id(); /* attach point for benchmarking */
+
+ return 0;
+}
+
+extern int bpf_modify_return_test_tp(int nonce) __ksym __weak;
+
+SEC("?raw_tp")
+int trigger_driver_kfunc(void *ctx)
+{
+ int i;
+
+ for (i = 0; i < batch_iters; i++)
+ (void)bpf_modify_return_test_tp(0); /* attach point for benchmarking */
+
+ return 0;
+}
+
+SEC("?kprobe/bpf_get_numa_node_id")
int bench_trigger_kprobe(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("kretprobe/" SYS_PREFIX "sys_getpgid")
+SEC("?kretprobe/bpf_get_numa_node_id")
int bench_trigger_kretprobe(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("kprobe.multi/" SYS_PREFIX "sys_getpgid")
+SEC("?kprobe.multi/bpf_get_numa_node_id")
int bench_trigger_kprobe_multi(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("kretprobe.multi/" SYS_PREFIX "sys_getpgid")
+SEC("?kretprobe.multi/bpf_get_numa_node_id")
int bench_trigger_kretprobe_multi(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("fentry/" SYS_PREFIX "sys_getpgid")
+SEC("?fentry/bpf_get_numa_node_id")
int bench_trigger_fentry(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("fexit/" SYS_PREFIX "sys_getpgid")
+SEC("?fexit/bpf_get_numa_node_id")
int bench_trigger_fexit(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
-SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_fentry_sleep(void *ctx)
+SEC("?fmod_ret/bpf_modify_return_test_tp")
+int bench_trigger_fmodret(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
- return 0;
+ inc_counter();
+ return -22;
}
-SEC("fmod_ret/" SYS_PREFIX "sys_getpgid")
-int bench_trigger_fmodret(void *ctx)
+SEC("?tp/bpf_test_run/bpf_trigger_tp")
+int bench_trigger_tp(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
- return -22;
+ inc_counter();
+ return 0;
}
-SEC("uprobe")
-int bench_trigger_uprobe(void *ctx)
+SEC("?raw_tp/bpf_trigger_tp")
+int bench_trigger_rawtp(void *ctx)
{
- __sync_add_and_fetch(&hits, 1);
+ inc_counter();
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi.c b/tools/testing/selftests/bpf/progs/uprobe_multi.c
index 419d9aa28fce..44190efcdba2 100644
--- a/tools/testing/selftests/bpf/progs/uprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi.c
@@ -1,8 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include <stdbool.h>
+#include <bpf/usdt.bpf.h>
char _license[] SEC("license") = "GPL";
@@ -22,6 +22,13 @@ __u64 uprobe_multi_sleep_result = 0;
int pid = 0;
int child_pid = 0;
+int child_tid = 0;
+int child_pid_usdt = 0;
+int child_tid_usdt = 0;
+
+int expect_pid = 0;
+bool bad_pid_seen = false;
+bool bad_pid_seen_usdt = false;
bool test_cookie = false;
void *user_ptr = 0;
@@ -36,11 +43,19 @@ static __always_inline bool verify_sleepable_user_copy(void)
static void uprobe_multi_check(void *ctx, bool is_return, bool is_sleep)
{
- child_pid = bpf_get_current_pid_tgid() >> 32;
+ __u64 cur_pid_tgid = bpf_get_current_pid_tgid();
+ __u32 cur_pid;
- if (pid && child_pid != pid)
+ cur_pid = cur_pid_tgid >> 32;
+ if (pid && cur_pid != pid)
return;
+ if (expect_pid && cur_pid != expect_pid)
+ bad_pid_seen = true;
+
+ child_pid = cur_pid_tgid >> 32;
+ child_tid = (__u32)cur_pid_tgid;
+
__u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
__u64 addr = bpf_get_func_ip(ctx);
@@ -97,5 +112,32 @@ int uretprobe_sleep(struct pt_regs *ctx)
SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*")
int uprobe_extra(struct pt_regs *ctx)
{
+ /* we need this one just to mix PID-filtered and global uprobes */
+ return 0;
+}
+
+SEC("usdt")
+int usdt_pid(struct pt_regs *ctx)
+{
+ __u64 cur_pid_tgid = bpf_get_current_pid_tgid();
+ __u32 cur_pid;
+
+ cur_pid = cur_pid_tgid >> 32;
+ if (pid && cur_pid != pid)
+ return 0;
+
+ if (expect_pid && cur_pid != expect_pid)
+ bad_pid_seen_usdt = true;
+
+ child_pid_usdt = cur_pid_tgid >> 32;
+ child_tid_usdt = (__u32)cur_pid_tgid;
+
+ return 0;
+}
+
+SEC("usdt")
+int usdt_extra(struct pt_regs *ctx)
+{
+ /* we need this one just to mix PID-filtered and global USDT probes */
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall.c b/tools/testing/selftests/bpf/progs/uprobe_syscall.c
new file mode 100644
index 000000000000..8a4fa6c7ef59
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_syscall.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <string.h>
+
+struct pt_regs regs;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("uretprobe//proc/self/exe:uretprobe_regs_trigger")
+int uretprobe(struct pt_regs *ctx)
+{
+ __builtin_memcpy(&regs, ctx, sizeof(regs));
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c
new file mode 100644
index 000000000000..0d7f1a7db2e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_syscall_executed.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <string.h>
+
+struct pt_regs regs;
+
+char _license[] SEC("license") = "GPL";
+
+int executed = 0;
+
+SEC("uretprobe.multi")
+int test(struct pt_regs *regs)
+{
+ executed = 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/uretprobe_stack.c b/tools/testing/selftests/bpf/progs/uretprobe_stack.c
new file mode 100644
index 000000000000..9fdcf396b8f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uretprobe_stack.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/usdt.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 entry_stack1[32], exit_stack1[32];
+__u64 entry_stack1_recur[32], exit_stack1_recur[32];
+__u64 entry_stack2[32];
+__u64 entry_stack3[32];
+__u64 entry_stack4[32], exit_stack4[32];
+__u64 usdt_stack[32];
+
+int entry1_len, exit1_len;
+int entry1_recur_len, exit1_recur_len;
+int entry2_len, exit2_len;
+int entry3_len, exit3_len;
+int entry4_len, exit4_len;
+int usdt_len;
+
+#define SZ sizeof(usdt_stack)
+
+SEC("uprobe//proc/self/exe:target_1")
+int BPF_UPROBE(uprobe_1)
+{
+ /* target_1 is recursive wit depth of 2, so we capture two separate
+ * stack traces, depending on which occurence it is
+ */
+ static bool recur = false;
+
+ if (!recur)
+ entry1_len = bpf_get_stack(ctx, &entry_stack1, SZ, BPF_F_USER_STACK);
+ else
+ entry1_recur_len = bpf_get_stack(ctx, &entry_stack1_recur, SZ, BPF_F_USER_STACK);
+
+ recur = true;
+ return 0;
+}
+
+SEC("uretprobe//proc/self/exe:target_1")
+int BPF_URETPROBE(uretprobe_1)
+{
+ /* see above, target_1 is recursive */
+ static bool recur = false;
+
+ /* NOTE: order of returns is reversed to order of entries */
+ if (!recur)
+ exit1_recur_len = bpf_get_stack(ctx, &exit_stack1_recur, SZ, BPF_F_USER_STACK);
+ else
+ exit1_len = bpf_get_stack(ctx, &exit_stack1, SZ, BPF_F_USER_STACK);
+
+ recur = true;
+ return 0;
+}
+
+SEC("uprobe//proc/self/exe:target_2")
+int BPF_UPROBE(uprobe_2)
+{
+ entry2_len = bpf_get_stack(ctx, &entry_stack2, SZ, BPF_F_USER_STACK);
+ return 0;
+}
+
+/* no uretprobe for target_2 */
+
+SEC("uprobe//proc/self/exe:target_3")
+int BPF_UPROBE(uprobe_3)
+{
+ entry3_len = bpf_get_stack(ctx, &entry_stack3, SZ, BPF_F_USER_STACK);
+ return 0;
+}
+
+/* no uretprobe for target_3 */
+
+SEC("uprobe//proc/self/exe:target_4")
+int BPF_UPROBE(uprobe_4)
+{
+ entry4_len = bpf_get_stack(ctx, &entry_stack4, SZ, BPF_F_USER_STACK);
+ return 0;
+}
+
+SEC("uretprobe//proc/self/exe:target_4")
+int BPF_URETPROBE(uretprobe_4)
+{
+ exit4_len = bpf_get_stack(ctx, &exit_stack4, SZ, BPF_F_USER_STACK);
+ return 0;
+}
+
+SEC("usdt//proc/self/exe:uretprobe_stack:target")
+int BPF_USDT(usdt_probe)
+{
+ usdt_len = bpf_get_stack(ctx, &usdt_stack, SZ, BPF_F_USER_STACK);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
index 11ab25c42c36..54de0389f878 100644
--- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
@@ -221,3 +221,25 @@ int user_ringbuf_callback_reinit_dynptr_ringbuf(void *ctx)
bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_ringbuf, NULL, 0);
return 0;
}
+
+__noinline long global_call_bpf_dynptr_data(struct bpf_dynptr *dynptr)
+{
+ bpf_dynptr_data(dynptr, 0xA, 0xA);
+ return 0;
+}
+
+static long callback_adjust_bpf_dynptr_reg_off(struct bpf_dynptr *dynptr,
+ void *ctx)
+{
+ global_call_bpf_dynptr_data(dynptr += 1024);
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("dereference of modified dynptr_ptr ptr R1 off=16384 disallowed")
+int user_ringbuf_callback_const_ptr_to_dynptr_reg_off(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf,
+ callback_adjust_bpf_dynptr_reg_off, NULL, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
index 93144ae6df74..67509c5d3982 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena_large.c b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
index ef66ea460264..6065f862d964 100644
--- a/tools/testing/selftests/bpf/progs/verifier_arena_large.c
+++ b/tools/testing/selftests/bpf/progs/verifier_arena_large.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_NO_KFUNC_PROTOTYPES
#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
diff --git a/tools/testing/selftests/bpf/progs/verifier_bits_iter.c b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
new file mode 100644
index 000000000000..716113c2bce2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bits_iter.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int bpf_iter_bits_new(struct bpf_iter_bits *it, const u64 *unsafe_ptr__ign,
+ u32 nr_bits) __ksym __weak;
+int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym __weak;
+void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym __weak;
+
+SEC("iter.s/cgroup")
+__description("bits iter without destroy")
+__failure __msg("Unreleased reference")
+int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+ struct bpf_iter_bits it;
+ u64 data = 1;
+
+ bpf_iter_bits_new(&it, &data, 1);
+ bpf_iter_bits_next(&it);
+ return 0;
+}
+
+SEC("iter/cgroup")
+__description("uninitialized iter in ->next()")
+__failure __msg("expected an initialized iter_bits as arg #1")
+int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+ struct bpf_iter_bits *it = NULL;
+
+ bpf_iter_bits_next(it);
+ return 0;
+}
+
+SEC("iter/cgroup")
+__description("uninitialized iter in ->destroy()")
+__failure __msg("expected an initialized iter_bits as arg #1")
+int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+ struct bpf_iter_bits it = {};
+
+ bpf_iter_bits_destroy(&it);
+ return 0;
+}
+
+SEC("syscall")
+__description("null pointer")
+__success __retval(0)
+int null_pointer(void)
+{
+ int nr = 0;
+ int *bit;
+
+ bpf_for_each(bits, bit, NULL, 1)
+ nr++;
+ return nr;
+}
+
+SEC("syscall")
+__description("bits copy")
+__success __retval(10)
+int bits_copy(void)
+{
+ u64 data = 0xf7310UL; /* 4 + 3 + 2 + 1 + 0*/
+ int nr = 0;
+ int *bit;
+
+ bpf_for_each(bits, bit, &data, 1)
+ nr++;
+ return nr;
+}
+
+SEC("syscall")
+__description("bits memalloc")
+__success __retval(64)
+int bits_memalloc(void)
+{
+ u64 data[2];
+ int nr = 0;
+ int *bit;
+
+ __builtin_memset(&data, 0xf0, sizeof(data)); /* 4 * 16 */
+ bpf_for_each(bits, bit, &data[0], sizeof(data) / sizeof(u64))
+ nr++;
+ return nr;
+}
+
+SEC("syscall")
+__description("bit index")
+__success __retval(8)
+int bit_index(void)
+{
+ u64 data = 0x100;
+ int bit_idx = 0;
+ int *bit;
+
+ bpf_for_each(bits, bit, &data, 1) {
+ if (*bit == 0)
+ continue;
+ bit_idx = *bit;
+ }
+ return bit_idx;
+}
+
+SEC("syscall")
+__description("bits nomem")
+__success __retval(0)
+int bits_nomem(void)
+{
+ u64 data[4];
+ int nr = 0;
+ int *bit;
+
+ __builtin_memset(&data, 0xff, sizeof(data));
+ bpf_for_each(bits, bit, &data[0], 513) /* Be greater than 512 */
+ nr++;
+ return nr;
+}
+
+SEC("syscall")
+__description("fewer words")
+__success __retval(1)
+int fewer_words(void)
+{
+ u64 data[2] = {0x1, 0xff};
+ int nr = 0;
+ int *bit;
+
+ bpf_for_each(bits, bit, &data[0], 1)
+ nr++;
+ return nr;
+}
+
+SEC("syscall")
+__description("zero words")
+__success __retval(0)
+int zero_words(void)
+{
+ u64 data[2] = {0x1, 0xff};
+ int nr = 0;
+ int *bit;
+
+ bpf_for_each(bits, bit, &data[0], 0)
+ nr++;
+ return nr;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
index 960998f16306..a0bb7fb40ea5 100644
--- a/tools/testing/selftests/bpf/progs/verifier_bounds.c
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -886,6 +886,69 @@ l1_%=: r0 = 0; \
}
SEC("socket")
+__description("bounds check for non const xor src dst")
+__success __log_level(2)
+__msg("5: (af) r0 ^= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__naked void non_const_xor_src_dst(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r6 &= 0xaf; \
+ r0 &= 0x1a0; \
+ r0 ^= r6; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for non const or src dst")
+__success __log_level(2)
+__msg("5: (4f) r0 |= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=431,var_off=(0x0; 0x1af))")
+__naked void non_const_or_src_dst(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r6 &= 0xaf; \
+ r0 &= 0x1a0; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for non const mul regs")
+__success __log_level(2)
+__msg("5: (2f) r0 *= r6 ; R0_w=scalar(smin=smin32=0,smax=umax=smax32=umax32=3825,var_off=(0x0; 0xfff))")
+__naked void non_const_mul_regs(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r6 &= 0xff; \
+ r0 &= 0x0f; \
+ r0 *= r6; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
__description("bounds checks after 32-bit truncation. test 1")
__success __failure_unpriv __msg_unpriv("R0 leaks addr")
__retval(0)
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
index baff5ffe9405..a9fc30ed4d73 100644
--- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
+++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
@@ -8,6 +8,13 @@
#include "xdp_metadata.h"
#include "bpf_kfuncs.h"
+/* The compiler may be able to detect the access to uninitialized
+ memory in the routines performing out of bound memory accesses and
+ emit warnings about it. This is the case of GCC. */
+#if !defined(__clang__)
+#pragma GCC diagnostic ignored "-Wmaybe-uninitialized"
+#endif
+
int arr[1];
int unkn_idx;
const volatile bool call_dead_subprog = false;
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
index 0ede0ccd090c..059aa716e3d0 100644
--- a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
@@ -30,7 +30,7 @@ struct {
SEC("kprobe")
__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE")
-__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
__naked void in_bpf_prog_type_kprobe_1(void)
{
asm volatile (" \
@@ -44,7 +44,7 @@ __naked void in_bpf_prog_type_kprobe_1(void)
SEC("tracepoint")
__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_TRACEPOINT")
-__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
__naked void in_bpf_prog_type_tracepoint_1(void)
{
asm volatile (" \
@@ -58,7 +58,7 @@ __naked void in_bpf_prog_type_tracepoint_1(void)
SEC("perf_event")
__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_PERF_EVENT")
-__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
__naked void bpf_prog_type_perf_event_1(void)
{
asm volatile (" \
@@ -72,7 +72,7 @@ __naked void bpf_prog_type_perf_event_1(void)
SEC("raw_tracepoint")
__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT")
-__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__failure __msg("program of this type cannot use helper bpf_ktime_get_coarse_ns")
__naked void bpf_prog_type_raw_tracepoint_1(void)
{
asm volatile (" \
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
index 99e561f18f9b..e54bb5385bc1 100644
--- a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -274,6 +274,58 @@ static __naked void iter_limit_bug_cb(void)
);
}
+int tmp_var;
+SEC("socket")
+__failure __msg("infinite loop detected at insn 2")
+__naked void jgt_imm64_and_may_goto(void)
+{
+ asm volatile (" \
+ r0 = %[tmp_var] ll; \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -3; /* off -3 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" :: __imm_addr(tmp_var)
+ : __clobber_all);
+}
+
+SEC("socket")
+__failure __msg("infinite loop detected at insn 1")
+__naked void may_goto_self(void)
+{
+ asm volatile (" \
+ r0 = *(u32 *)(r10 - 4); \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -1; /* off -1 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void may_goto_neg_off(void)
+{
+ asm volatile (" \
+ r0 = *(u32 *)(r10 - 4); \
+ goto l0_%=; \
+ goto l1_%=; \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short -2; /* off -2 */ \
+ .long 0; /* imm */ \
+ if r0 > 10 goto l0_%=; \
+l1_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
SEC("tc")
__failure
__flag(BPF_F_TEST_STATE_FREQ)
@@ -307,6 +359,100 @@ int iter_limit_bug(struct __sk_buff *skb)
return 0;
}
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto2(void)
+{
+ asm volatile (" \
+l0_%=: r0 = 0; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+
+SEC("socket")
+__success __retval(0)
+__naked void jlt_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: call %[bpf_jiffies64]; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ if r0 < 10 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" :: __imm(bpf_jiffies64)
+ : __clobber_all);
+}
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+SEC("socket")
+__success __retval(0)
+__naked void gotol_and_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: r0 = 0; \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ gotol l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_common);
+}
+#endif
+
+SEC("socket")
+__success __retval(0)
+__naked void ja_and_may_goto_subprog(void)
+{
+ asm volatile (" \
+ call subprog_with_may_goto; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __used
+void subprog_with_may_goto(void)
+{
+ asm volatile (" \
+l0_%=: .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 1; /* off 1 */ \
+ .long 0; /* imm */ \
+ goto l0_%=; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
#define ARR_SZ 1000000
int zero;
char arr[ARR_SZ];
@@ -318,7 +464,7 @@ int cond_break1(const void *ctx)
unsigned long i;
unsigned int sum = 0;
- for (i = zero; i < ARR_SZ; cond_break, i++)
+ for (i = zero; i < ARR_SZ && can_loop; i++)
sum += i;
for (i = zero; i < ARR_SZ; i++) {
barrier_var(i);
@@ -336,12 +482,11 @@ int cond_break2(const void *ctx)
int i, j;
int sum = 0;
- for (i = zero; i < 1000; cond_break, i++)
+ for (i = zero; i < 1000 && can_loop; i++)
for (j = zero; j < 1000; j++) {
sum += i + j;
cond_break;
- }
-
+ }
return sum;
}
@@ -349,7 +494,7 @@ static __noinline int loop(void)
{
int i, sum = 0;
- for (i = zero; i <= 1000000; i++, cond_break)
+ for (i = zero; i <= 1000000 && can_loop; i++)
sum += i;
return sum;
@@ -406,4 +551,240 @@ int cond_break5(const void *ctx)
return cnt1 > 1 && cnt2 > 1 ? 1 : 0;
}
+#define ARR2_SZ 1000
+SEC(".data.arr2")
+char arr2[ARR2_SZ];
+
+SEC("socket")
+__success __flag(BPF_F_TEST_STATE_FREQ)
+int loop_inside_iter(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, sum = 0;
+ __u64 i = 0;
+
+ bpf_iter_num_new(&it, 0, ARR2_SZ);
+ while ((v = bpf_iter_num_next(&it))) {
+ if (i < ARR2_SZ)
+ sum += arr2[i++];
+ }
+ bpf_iter_num_destroy(&it);
+ return sum;
+}
+
+SEC("socket")
+__success __flag(BPF_F_TEST_STATE_FREQ)
+int loop_inside_iter_signed(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, sum = 0;
+ long i = 0;
+
+ bpf_iter_num_new(&it, 0, ARR2_SZ);
+ while ((v = bpf_iter_num_next(&it))) {
+ if (i < ARR2_SZ && i >= 0)
+ sum += arr2[i++];
+ }
+ bpf_iter_num_destroy(&it);
+ return sum;
+}
+
+volatile const int limit = ARR2_SZ;
+
+SEC("socket")
+__success __flag(BPF_F_TEST_STATE_FREQ)
+int loop_inside_iter_volatile_limit(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, sum = 0;
+ __u64 i = 0;
+
+ bpf_iter_num_new(&it, 0, ARR2_SZ);
+ while ((v = bpf_iter_num_next(&it))) {
+ if (i < limit)
+ sum += arr2[i++];
+ }
+ bpf_iter_num_destroy(&it);
+ return sum;
+}
+
+#define ARR_LONG_SZ 1000
+
+SEC(".data.arr_long")
+long arr_long[ARR_LONG_SZ];
+
+SEC("socket")
+__success
+int test1(const void *ctx)
+{
+ long i;
+
+ for (i = 0; i < ARR_LONG_SZ && can_loop; i++)
+ arr_long[i] = i;
+ return 0;
+}
+
+SEC("socket")
+__success
+int test2(const void *ctx)
+{
+ __u64 i;
+
+ for (i = zero; i < ARR_LONG_SZ && can_loop; i++) {
+ barrier_var(i);
+ arr_long[i] = i;
+ }
+ return 0;
+}
+
+SEC(".data.arr_foo")
+struct {
+ int a;
+ int b;
+} arr_foo[ARR_LONG_SZ];
+
+SEC("socket")
+__success
+int test3(const void *ctx)
+{
+ __u64 i;
+
+ for (i = zero; i < ARR_LONG_SZ && can_loop; i++) {
+ barrier_var(i);
+ arr_foo[i].a = i;
+ arr_foo[i].b = i;
+ }
+ return 0;
+}
+
+SEC("socket")
+__success
+int test4(const void *ctx)
+{
+ long i;
+
+ for (i = zero + ARR_LONG_SZ - 1; i < ARR_LONG_SZ && i >= 0 && can_loop; i--) {
+ barrier_var(i);
+ arr_foo[i].a = i;
+ arr_foo[i].b = i;
+ }
+ return 0;
+}
+
+char buf[10] SEC(".data.buf");
+
+SEC("socket")
+__description("check add const")
+__success
+__naked void check_add_const(void)
+{
+ /* typical LLVM generated loop with may_goto */
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 > 9 goto l1_%=; \
+l0_%=: r1 = %[buf]; \
+ r2 = r0; \
+ r1 += r2; \
+ r3 = *(u8 *)(r1 +0); \
+ .byte 0xe5; /* may_goto */ \
+ .byte 0; /* regs */ \
+ .short 4; /* off of l1_%=: */ \
+ .long 0; /* imm */ \
+ r0 = r2; \
+ r0 += 1; \
+ if r2 < 9 goto l0_%=; \
+ exit; \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm_ptr(buf)
+ : __clobber_common);
+}
+
+SEC("socket")
+__failure
+__msg("*(u8 *)(r7 +0) = r0")
+__msg("invalid access to map value, value_size=10 off=10 size=1")
+__naked void check_add_const_3regs(void)
+{
+ asm volatile (
+ "r6 = %[buf];"
+ "r7 = %[buf];"
+ "call %[bpf_ktime_get_ns];"
+ "r1 = r0;" /* link r0.id == r1.id == r2.id */
+ "r2 = r0;"
+ "r1 += 1;" /* r1 == r0+1 */
+ "r2 += 2;" /* r2 == r0+2 */
+ "if r0 > 8 goto 1f;" /* r0 range [0, 8] */
+ "r6 += r1;" /* r1 range [1, 9] */
+ "r7 += r2;" /* r2 range [2, 10] */
+ "*(u8 *)(r6 +0) = r0;" /* safe, within bounds */
+ "*(u8 *)(r7 +0) = r0;" /* unsafe, out of bounds */
+ "1: exit;"
+ :
+ : __imm(bpf_ktime_get_ns),
+ __imm_ptr(buf)
+ : __clobber_common);
+}
+
+SEC("socket")
+__failure
+__msg("*(u8 *)(r8 -1) = r0")
+__msg("invalid access to map value, value_size=10 off=10 size=1")
+__naked void check_add_const_3regs_2if(void)
+{
+ asm volatile (
+ "r6 = %[buf];"
+ "r7 = %[buf];"
+ "r8 = %[buf];"
+ "call %[bpf_ktime_get_ns];"
+ "if r0 < 2 goto 1f;"
+ "r1 = r0;" /* link r0.id == r1.id == r2.id */
+ "r2 = r0;"
+ "r1 += 1;" /* r1 == r0+1 */
+ "r2 += 2;" /* r2 == r0+2 */
+ "if r2 > 11 goto 1f;" /* r2 range [0, 11] -> r0 range [-2, 9]; r1 range [-1, 10] */
+ "if r0 s< 0 goto 1f;" /* r0 range [0, 9] -> r1 range [1, 10]; r2 range [2, 11]; */
+ "r6 += r0;" /* r0 range [0, 9] */
+ "r7 += r1;" /* r1 range [1, 10] */
+ "r8 += r2;" /* r2 range [2, 11] */
+ "*(u8 *)(r6 +0) = r0;" /* safe, within bounds */
+ "*(u8 *)(r7 -1) = r0;" /* safe */
+ "*(u8 *)(r8 -1) = r0;" /* unsafe */
+ "1: exit;"
+ :
+ : __imm(bpf_ktime_get_ns),
+ __imm_ptr(buf)
+ : __clobber_common);
+}
+
+SEC("socket")
+__failure
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void check_add_const_regsafe_off(void)
+{
+ asm volatile (
+ "r8 = %[buf];"
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r7 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r1 = r0;" /* same ids for r1 and r0 */
+ "if r6 > r7 goto 1f;" /* this jump can't be predicted */
+ "r1 += 1;" /* r1.off == +1 */
+ "goto 2f;"
+ "1: r1 += 100;" /* r1.off == +100 */
+ "goto +0;" /* verify r1.off in regsafe() after this insn */
+ "2: if r0 > 8 goto 3f;" /* r0 range [0,8], r1 range either [1,9] or [100,108]*/
+ "r8 += r1;"
+ "*(u8 *)(r8 +0) = r0;" /* potentially unsafe, buf size is 10 */
+ "3: exit;"
+ :
+ : __imm(bpf_ktime_get_ns),
+ __imm_ptr(buf)
+ : __clobber_common);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c b/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c
new file mode 100644
index 000000000000..cb32b0cfc84b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_kfunc_prog_types.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "cgrp_kfunc_common.h"
+#include "cpumask_common.h"
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/***************
+ * Task kfuncs *
+ ***************/
+
+static void task_kfunc_load_test(void)
+{
+ struct task_struct *current, *ref_1, *ref_2;
+
+ current = bpf_get_current_task_btf();
+ ref_1 = bpf_task_from_pid(current->pid);
+ if (!ref_1)
+ return;
+
+ ref_2 = bpf_task_acquire(ref_1);
+ if (ref_2)
+ bpf_task_release(ref_2);
+ bpf_task_release(ref_1);
+}
+
+SEC("raw_tp")
+__failure __msg("calling kernel function")
+int BPF_PROG(task_kfunc_raw_tp)
+{
+ task_kfunc_load_test();
+ return 0;
+}
+
+SEC("syscall")
+__success
+int BPF_PROG(task_kfunc_syscall)
+{
+ task_kfunc_load_test();
+ return 0;
+}
+
+/*****************
+ * cgroup kfuncs *
+ *****************/
+
+static void cgrp_kfunc_load_test(void)
+{
+ struct cgroup *cgrp, *ref;
+
+ cgrp = bpf_cgroup_from_id(0);
+ if (!cgrp)
+ return;
+
+ ref = bpf_cgroup_acquire(cgrp);
+ if (!ref) {
+ bpf_cgroup_release(cgrp);
+ return;
+ }
+
+ bpf_cgroup_release(ref);
+ bpf_cgroup_release(cgrp);
+}
+
+SEC("raw_tp")
+__failure __msg("calling kernel function")
+int BPF_PROG(cgrp_kfunc_raw_tp)
+{
+ cgrp_kfunc_load_test();
+ return 0;
+}
+
+SEC("syscall")
+__success
+int BPF_PROG(cgrp_kfunc_syscall)
+{
+ cgrp_kfunc_load_test();
+ return 0;
+}
+
+/******************
+ * cpumask kfuncs *
+ ******************/
+
+static void cpumask_kfunc_load_test(void)
+{
+ struct bpf_cpumask *alloc, *ref;
+
+ alloc = bpf_cpumask_create();
+ if (!alloc)
+ return;
+
+ ref = bpf_cpumask_acquire(alloc);
+ bpf_cpumask_set_cpu(0, alloc);
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)ref);
+
+ bpf_cpumask_release(ref);
+ bpf_cpumask_release(alloc);
+}
+
+SEC("raw_tp")
+__failure __msg("calling kernel function")
+int BPF_PROG(cpumask_kfunc_raw_tp)
+{
+ cpumask_kfunc_load_test();
+ return 0;
+}
+
+SEC("syscall")
+__success
+int BPF_PROG(cpumask_kfunc_syscall)
+{
+ cpumask_kfunc_load_test();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
index cbb9d6714f53..028ec855587b 100644
--- a/tools/testing/selftests/bpf/progs/verifier_movsx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -224,6 +224,69 @@ l0_%=: \
: __clobber_all);
}
+SEC("socket")
+__description("MOV32SX, S8, var_off u32_max")
+__failure __msg("infinite loop detected")
+__failure_unpriv __msg_unpriv("back-edge from insn 2 to 0")
+__naked void mov64sx_s32_varoff_1(void)
+{
+ asm volatile (" \
+l0_%=: \
+ r3 = *(u8 *)(r10 -387); \
+ w7 = (s8)w3; \
+ if w7 >= 0x2533823b goto l0_%=; \
+ w0 = 0; \
+ exit; \
+" :
+ :
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, var_off not u32_max, positive after s8 extension")
+__success __retval(0)
+__failure_unpriv __msg_unpriv("frame pointer is read only")
+__naked void mov64sx_s32_varoff_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ r3 &= 0xf; \
+ w7 = (s8)w3; \
+ if w7 s>= 16 goto l0_%=; \
+ w0 = 0; \
+ exit; \
+l0_%=: \
+ r10 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, var_off not u32_max, negative after s8 extension")
+__success __retval(0)
+__failure_unpriv __msg_unpriv("frame pointer is read only")
+__naked void mov64sx_s32_varoff_3(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ r3 &= 0xf; \
+ r3 |= 0x80; \
+ w7 = (s8)w3; \
+ if w7 s>= -5 goto l0_%=; \
+ w0 = 0; \
+ exit; \
+l0_%=: \
+ r10 = 1; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
#else
SEC("socket")
diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
index 65bba330e7e5..ab9f9f2620ed 100644
--- a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
+++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
@@ -79,7 +79,7 @@ int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx)
return NF_ACCEPT;
}
-extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags,
+extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
struct bpf_dynptr *ptr__uninit) __ksym;
extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
void *buffer, uint32_t buffer__sz) __ksym;
@@ -90,8 +90,8 @@ __success __failure_unpriv
__retval(0)
int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx)
{
+ struct __sk_buff *skb = (struct __sk_buff *)ctx->skb;
const struct nf_hook_state *state = ctx->state;
- struct sk_buff *skb = ctx->skb;
const struct iphdr *iph;
const struct tcphdr *th;
u8 buffer_iph[20] = {};
@@ -99,7 +99,7 @@ int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx)
struct bpf_dynptr ptr;
uint8_t ihl;
- if (skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr))
+ if (ctx->skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr))
return NF_ACCEPT;
iph = bpf_dynptr_slice(&ptr, 0, buffer_iph, sizeof(buffer_iph));
diff --git a/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c
new file mode 100644
index 000000000000..f37713a265ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_or_jmp32_k.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("or_jmp32_k: bit ops + branch on unknown value")
+__failure
+__msg("R0 invalid mem access 'scalar'")
+__naked void or_jmp32_k(void)
+{
+ asm volatile (" \
+ r0 = 0xffffffff; \
+ r0 /= 1; \
+ r1 = 0; \
+ w1 = -1; \
+ w1 >>= 1; \
+ w0 &= w1; \
+ w0 |= 2; \
+ if w0 != 0x7ffffffd goto l1; \
+ r0 = 1; \
+ exit; \
+l3: \
+ r0 = 5; \
+ *(u64*)(r0 - 8) = r0; \
+ exit; \
+l2: \
+ w0 -= 0xe; \
+ if w0 == 1 goto l3; \
+ r0 = 4; \
+ exit; \
+l1: \
+ w0 -= 0x7ffffff0; \
+ if w0 s>= 0xe goto l2; \
+ r0 = 3; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_sock_addr.c b/tools/testing/selftests/bpf/progs/verifier_sock_addr.c
new file mode 100644
index 000000000000..9c31448a0f52
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_sock_addr.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Google LLC */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf_sockopt_helpers.h>
+#include "bpf_misc.h"
+
+SEC("cgroup/recvmsg4")
+__success
+int recvmsg4_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/recvmsg4")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int recvmsg4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/recvmsg6")
+__success
+int recvmsg6_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/recvmsg6")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int recvmsg6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/recvmsg_unix")
+__success
+int recvmsg_unix_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/recvmsg_unix")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int recvmsg_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/sendmsg4")
+__success
+int sendmsg4_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/sendmsg4")
+__success
+int sendmsg4_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/sendmsg4")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int sendmsg4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/sendmsg6")
+__success
+int sendmsg6_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/sendmsg6")
+__success
+int sendmsg6_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/sendmsg6")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int sendmsg6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/sendmsg_unix")
+__success
+int sendmsg_unix_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/sendmsg_unix")
+__success
+int sendmsg_unix_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/sendmsg_unix")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int sendmsg_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/getpeername4")
+__success
+int getpeername4_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getpeername4")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getpeername4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getpeername6")
+__success
+int getpeername6_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getpeername6")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getpeername6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getpeername_unix")
+__success
+int getpeername_unix_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getpeername_unix")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getpeername_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getsockname4")
+__success
+int getsockname4_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockname4")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getsockname4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getsockname6")
+__success
+int getsockname6_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockname6")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getsockname6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/getsockname_unix")
+__success
+int getsockname_unix_good_return_code(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/getsockname_unix")
+__failure __msg("At program exit the register R0 has smin=0 smax=0 should have been in [1, 1]")
+int getsockname_unix_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/bind4")
+__success
+int bind4_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/bind4")
+__success
+int bind4_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/bind4")
+__success
+int bind4_good_return_code_2(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/bind4")
+__success
+int bind4_good_return_code_3(struct bpf_sock_addr *ctx)
+{
+ return 3;
+}
+
+SEC("cgroup/bind4")
+__failure __msg("At program exit the register R0 has smin=4 smax=4 should have been in [0, 3]")
+int bind4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 4;
+}
+
+SEC("cgroup/bind6")
+__success
+int bind6_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/bind6")
+__success
+int bind6_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/bind6")
+__success
+int bind6_good_return_code_2(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/bind6")
+__success
+int bind6_good_return_code_3(struct bpf_sock_addr *ctx)
+{
+ return 3;
+}
+
+SEC("cgroup/bind6")
+__failure __msg("At program exit the register R0 has smin=4 smax=4 should have been in [0, 3]")
+int bind6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 4;
+}
+
+SEC("cgroup/connect4")
+__success
+int connect4_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/connect4")
+__success
+int connect4_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/connect4")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int connect4_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/connect6")
+__success
+int connect6_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/connect6")
+__success
+int connect6_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/connect6")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int connect6_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+SEC("cgroup/connect_unix")
+__success
+int connect_unix_good_return_code_0(struct bpf_sock_addr *ctx)
+{
+ return 0;
+}
+
+SEC("cgroup/connect_unix")
+__success
+int connect_unix_good_return_code_1(struct bpf_sock_addr *ctx)
+{
+ return 1;
+}
+
+SEC("cgroup/connect_unix")
+__failure __msg("At program exit the register R0 has smin=2 smax=2 should have been in [0, 1]")
+int connect_unix_bad_return_code(struct bpf_sock_addr *ctx)
+{
+ return 2;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c b/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c
new file mode 100644
index 000000000000..fe4b123187b8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_sockmap_mutate.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+
+#define __always_unused __attribute__((unused))
+
+char _license[] SEC("license") = "GPL";
+
+struct sock {
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__sockmap {
+ union {
+ struct sock *sk;
+ };
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sockhash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sockmap SEC(".maps");
+
+enum { CG_OK = 1 };
+
+int zero = 0;
+
+static __always_inline void test_sockmap_delete(void)
+{
+ bpf_map_delete_elem(&sockmap, &zero);
+ bpf_map_delete_elem(&sockhash, &zero);
+}
+
+static __always_inline void test_sockmap_update(void *sk)
+{
+ if (sk) {
+ bpf_map_update_elem(&sockmap, &zero, sk, BPF_ANY);
+ bpf_map_update_elem(&sockhash, &zero, sk, BPF_ANY);
+ }
+}
+
+static __always_inline void test_sockmap_lookup_and_update(void)
+{
+ struct bpf_sock *sk = bpf_map_lookup_elem(&sockmap, &zero);
+
+ if (sk) {
+ test_sockmap_update(sk);
+ bpf_sk_release(sk);
+ }
+}
+
+static __always_inline void test_sockmap_mutate(void *sk)
+{
+ test_sockmap_delete();
+ test_sockmap_update(sk);
+}
+
+static __always_inline void test_sockmap_lookup_and_mutate(void)
+{
+ test_sockmap_delete();
+ test_sockmap_lookup_and_update();
+}
+
+SEC("action")
+__success
+int test_sched_act(struct __sk_buff *skb)
+{
+ test_sockmap_mutate(skb->sk);
+ return 0;
+}
+
+SEC("classifier")
+__success
+int test_sched_cls(struct __sk_buff *skb)
+{
+ test_sockmap_mutate(skb->sk);
+ return 0;
+}
+
+SEC("flow_dissector")
+__success
+int test_flow_dissector_delete(struct __sk_buff *skb __always_unused)
+{
+ test_sockmap_delete();
+ return 0;
+}
+
+SEC("flow_dissector")
+__failure __msg("program of this type cannot use helper bpf_sk_release")
+int test_flow_dissector_update(struct __sk_buff *skb __always_unused)
+{
+ test_sockmap_lookup_and_update(); /* no access to skb->sk */
+ return 0;
+}
+
+SEC("iter/sockmap")
+__success
+int test_trace_iter(struct bpf_iter__sockmap *ctx)
+{
+ test_sockmap_mutate(ctx->sk);
+ return 0;
+}
+
+SEC("raw_tp/kfree")
+__failure __msg("cannot update sockmap in this context")
+int test_raw_tp_delete(const void *ctx __always_unused)
+{
+ test_sockmap_delete();
+ return 0;
+}
+
+SEC("raw_tp/kfree")
+__failure __msg("cannot update sockmap in this context")
+int test_raw_tp_update(const void *ctx __always_unused)
+{
+ test_sockmap_lookup_and_update();
+ return 0;
+}
+
+SEC("sk_lookup")
+__success
+int test_sk_lookup(struct bpf_sk_lookup *ctx)
+{
+ test_sockmap_mutate(ctx->sk);
+ return 0;
+}
+
+SEC("sk_reuseport")
+__success
+int test_sk_reuseport(struct sk_reuseport_md *ctx)
+{
+ test_sockmap_mutate(ctx->sk);
+ return 0;
+}
+
+SEC("socket")
+__success
+int test_socket_filter(struct __sk_buff *skb)
+{
+ test_sockmap_mutate(skb->sk);
+ return 0;
+}
+
+SEC("sockops")
+__success
+int test_sockops_delete(struct bpf_sock_ops *ctx __always_unused)
+{
+ test_sockmap_delete();
+ return CG_OK;
+}
+
+SEC("sockops")
+__failure __msg("cannot update sockmap in this context")
+int test_sockops_update(struct bpf_sock_ops *ctx)
+{
+ test_sockmap_update(ctx->sk);
+ return CG_OK;
+}
+
+SEC("sockops")
+__success
+int test_sockops_update_dedicated(struct bpf_sock_ops *ctx)
+{
+ bpf_sock_map_update(ctx, &sockmap, &zero, BPF_ANY);
+ bpf_sock_hash_update(ctx, &sockhash, &zero, BPF_ANY);
+ return CG_OK;
+}
+
+SEC("xdp")
+__success
+int test_xdp(struct xdp_md *ctx __always_unused)
+{
+ test_sockmap_lookup_and_mutate();
+ return XDP_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
index 6f5d19665cf6..6a6fad625f7e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -6,8 +6,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include "bpf_misc.h"
-
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+#include <../../../tools/include/linux/filter.h>
int vals[] SEC(".data.vals") = {1, 2, 3, 4};
@@ -76,6 +75,94 @@ __naked int subprog_result_precise(void)
);
}
+__naked __noinline __used
+static unsigned long fp_leaking_subprog()
+{
+ asm volatile (
+ ".8byte %[r0_eq_r10_cast_s8];"
+ "exit;"
+ :: __imm_insn(r0_eq_r10_cast_s8, BPF_MOVSX64_REG(BPF_REG_0, BPF_REG_10, 8))
+ );
+}
+
+__naked __noinline __used
+static unsigned long sneaky_fp_leaking_subprog()
+{
+ asm volatile (
+ "r1 = r10;"
+ ".8byte %[r0_eq_r1_cast_s8];"
+ "exit;"
+ :: __imm_insn(r0_eq_r1_cast_s8, BPF_MOVSX64_REG(BPF_REG_0, BPF_REG_1, 8))
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("6: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 6 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3")
+__msg("mark_precise: frame0: regs=r0 stack= before 10: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 9: (bf) r0 = (s8)r10")
+__msg("7: R0_w=scalar")
+__naked int fp_precise_subprog_result(void)
+{
+ asm volatile (
+ "call fp_leaking_subprog;"
+ /* use subprog's returned value (which is derived from r10=fp
+ * register), as index into vals array, forcing all of that to
+ * be known precisely
+ */
+ "r0 &= 3;"
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* force precision marking */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("6: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 6 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (57) r0 &= 3")
+__msg("mark_precise: frame0: regs=r0 stack= before 11: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = (s8)r1")
+/* here r1 is marked precise, even though it's fp register, but that's fine
+ * because by the time we get out of subprogram it has to be derived from r10
+ * anyways, at which point we'll break precision chain
+ */
+__msg("mark_precise: frame1: regs=r1 stack= before 9: (bf) r1 = r10")
+__msg("7: R0_w=scalar")
+__naked int sneaky_fp_precise_subprog_result(void)
+{
+ asm volatile (
+ "call sneaky_fp_leaking_subprog;"
+ /* use subprog's returned value (which is derived from r10=fp
+ * register), as index into vals array, forcing all of that to
+ * be known precisely
+ */
+ "r0 &= 3;"
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* force precision marking */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common
+ );
+}
+
SEC("?raw_tp")
__success __log_level(2)
__msg("9: (0f) r1 += r0")
diff --git a/tools/testing/selftests/bpf/progs/wq.c b/tools/testing/selftests/bpf/progs/wq.c
new file mode 100644
index 000000000000..f8d3ae0c29ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/wq.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Benjamin Tissoires
+ */
+
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct hmap_elem {
+ int counter;
+ struct bpf_timer timer; /* unused */
+ struct bpf_spin_lock lock; /* unused */
+ struct bpf_wq work;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap_malloc SEC(".maps");
+
+struct elem {
+ int ok_offset;
+ struct bpf_wq w;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __type(value, struct elem);
+} lru SEC(".maps");
+
+__u32 ok;
+__u32 ok_sleepable;
+
+static int test_elem_callback(void *map, int *key,
+ int (callback_fn)(void *map, int *key, void *value))
+{
+ struct elem init = {}, *val;
+ struct bpf_wq *wq;
+
+ if ((ok & (1 << *key) ||
+ (ok_sleepable & (1 << *key))))
+ return -22;
+
+ if (map == &lru &&
+ bpf_map_update_elem(map, key, &init, 0))
+ return -1;
+
+ val = bpf_map_lookup_elem(map, key);
+ if (!val)
+ return -2;
+
+ val->ok_offset = *key;
+
+ wq = &val->w;
+ if (bpf_wq_init(wq, map, 0) != 0)
+ return -3;
+
+ if (bpf_wq_set_callback(wq, callback_fn, 0))
+ return -4;
+
+ if (bpf_wq_start(wq, 0))
+ return -5;
+
+ return 0;
+}
+
+static int test_hmap_elem_callback(void *map, int *key,
+ int (callback_fn)(void *map, int *key, void *value))
+{
+ struct hmap_elem init = {}, *val;
+ struct bpf_wq *wq;
+
+ if ((ok & (1 << *key) ||
+ (ok_sleepable & (1 << *key))))
+ return -22;
+
+ if (bpf_map_update_elem(map, key, &init, 0))
+ return -1;
+
+ val = bpf_map_lookup_elem(map, key);
+ if (!val)
+ return -2;
+
+ wq = &val->work;
+ if (bpf_wq_init(wq, map, 0) != 0)
+ return -3;
+
+ if (bpf_wq_set_callback(wq, callback_fn, 0))
+ return -4;
+
+ if (bpf_wq_start(wq, 0))
+ return -5;
+
+ return 0;
+}
+
+/* callback for non sleepable workqueue */
+static int wq_callback(void *map, int *key, void *value)
+{
+ bpf_kfunc_common_test();
+ ok |= (1 << *key);
+ return 0;
+}
+
+/* callback for sleepable workqueue */
+static int wq_cb_sleepable(void *map, int *key, void *value)
+{
+ struct elem *data = (struct elem *)value;
+ int offset = data->ok_offset;
+
+ if (*key != offset)
+ return 0;
+
+ bpf_kfunc_call_test_sleepable();
+ ok_sleepable |= (1 << offset);
+ return 0;
+}
+
+SEC("tc")
+/* test that workqueues can be used from an array */
+__retval(0)
+long test_call_array_sleepable(void *ctx)
+{
+ int key = 0;
+
+ return test_elem_callback(&array, &key, wq_cb_sleepable);
+}
+
+SEC("syscall")
+/* Same test than above but from a sleepable context. */
+__retval(0)
+long test_syscall_array_sleepable(void *ctx)
+{
+ int key = 1;
+
+ return test_elem_callback(&array, &key, wq_cb_sleepable);
+}
+
+SEC("tc")
+/* test that workqueues can be used from a hashmap */
+__retval(0)
+long test_call_hash_sleepable(void *ctx)
+{
+ int key = 2;
+
+ return test_hmap_elem_callback(&hmap, &key, wq_callback);
+}
+
+SEC("tc")
+/* test that workqueues can be used from a hashmap with NO_PREALLOC. */
+__retval(0)
+long test_call_hash_malloc_sleepable(void *ctx)
+{
+ int key = 3;
+
+ return test_hmap_elem_callback(&hmap_malloc, &key, wq_callback);
+}
+
+SEC("tc")
+/* test that workqueues can be used from a LRU map */
+__retval(0)
+long test_call_lru_sleepable(void *ctx)
+{
+ int key = 4;
+
+ return test_elem_callback(&lru, &key, wq_callback);
+}
diff --git a/tools/testing/selftests/bpf/progs/wq_failures.c b/tools/testing/selftests/bpf/progs/wq_failures.c
new file mode 100644
index 000000000000..25b51a72fe0f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/wq_failures.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Benjamin Tissoires
+ */
+
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_wq w;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __type(value, struct elem);
+} lru SEC(".maps");
+
+/* callback for non sleepable workqueue */
+static int wq_callback(void *map, int *key, void *value)
+{
+ bpf_kfunc_common_test();
+ return 0;
+}
+
+/* callback for sleepable workqueue */
+static int wq_cb_sleepable(void *map, int *key, void *value)
+{
+ bpf_kfunc_call_test_sleepable();
+ return 0;
+}
+
+SEC("tc")
+/* test that bpf_wq_init takes a map as a second argument
+ */
+__log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+__msg(": (85) call bpf_wq_init#") /* anchor message */
+__msg("pointer in R2 isn't map pointer")
+long test_wq_init_nomap(void *ctx)
+{
+ struct bpf_wq *wq;
+ struct elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&array, &key);
+ if (!val)
+ return -1;
+
+ wq = &val->w;
+ if (bpf_wq_init(wq, &key, 0) != 0)
+ return -3;
+
+ return 0;
+}
+
+SEC("tc")
+/* test that the workqueue is part of the map in bpf_wq_init
+ */
+__log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+__msg(": (85) call bpf_wq_init#") /* anchor message */
+__msg("workqueue pointer in R1 map_uid=0 doesn't match map pointer in R2 map_uid=0")
+long test_wq_init_wrong_map(void *ctx)
+{
+ struct bpf_wq *wq;
+ struct elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&array, &key);
+ if (!val)
+ return -1;
+
+ wq = &val->w;
+ if (bpf_wq_init(wq, &lru, 0) != 0)
+ return -3;
+
+ return 0;
+}
+
+SEC("?tc")
+__log_level(2)
+__failure
+/* check that the first argument of bpf_wq_set_callback()
+ * is a correct bpf_wq pointer.
+ */
+__msg(": (85) call bpf_wq_set_callback_impl#") /* anchor message */
+__msg("arg#0 doesn't point to a map value")
+long test_wrong_wq_pointer(void *ctx)
+{
+ int key = 0;
+ struct bpf_wq *wq;
+
+ wq = bpf_map_lookup_elem(&array, &key);
+ if (!wq)
+ return 1;
+
+ if (bpf_wq_init(wq, &array, 0))
+ return 2;
+
+ if (bpf_wq_set_callback((void *)&wq, wq_callback, 0))
+ return 3;
+
+ return -22;
+}
+
+SEC("?tc")
+__log_level(2)
+__failure
+/* check that the first argument of bpf_wq_set_callback()
+ * is a correct bpf_wq pointer.
+ */
+__msg(": (85) call bpf_wq_set_callback_impl#") /* anchor message */
+__msg("off 1 doesn't point to 'struct bpf_wq' that is at 0")
+long test_wrong_wq_pointer_offset(void *ctx)
+{
+ int key = 0;
+ struct bpf_wq *wq;
+
+ wq = bpf_map_lookup_elem(&array, &key);
+ if (!wq)
+ return 1;
+
+ if (bpf_wq_init(wq, &array, 0))
+ return 2;
+
+ if (bpf_wq_set_callback((void *)wq + 1, wq_cb_sleepable, 0))
+ return 3;
+
+ return -22;
+}
diff --git a/tools/testing/selftests/bpf/progs/xdp_flowtable.c b/tools/testing/selftests/bpf/progs/xdp_flowtable.c
new file mode 100644
index 000000000000..7fdc7b23ee74
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_flowtable.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_KFUNC_PROTOTYPES
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86dd
+#define IP_MF 0x2000 /* "More Fragments" */
+#define IP_OFFSET 0x1fff /* "Fragment Offset" */
+#define AF_INET 2
+#define AF_INET6 10
+
+struct bpf_flowtable_opts___local {
+ s32 error;
+};
+
+struct flow_offload_tuple_rhash *
+bpf_xdp_flow_lookup(struct xdp_md *, struct bpf_fib_lookup *,
+ struct bpf_flowtable_opts___local *, u32) __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} stats SEC(".maps");
+
+static bool xdp_flowtable_offload_check_iphdr(struct iphdr *iph)
+{
+ /* ip fragmented traffic */
+ if (iph->frag_off & bpf_htons(IP_MF | IP_OFFSET))
+ return false;
+
+ /* ip options */
+ if (iph->ihl * 4 != sizeof(*iph))
+ return false;
+
+ if (iph->ttl <= 1)
+ return false;
+
+ return true;
+}
+
+static bool xdp_flowtable_offload_check_tcp_state(void *ports, void *data_end,
+ u8 proto)
+{
+ if (proto == IPPROTO_TCP) {
+ struct tcphdr *tcph = ports;
+
+ if (tcph + 1 > data_end)
+ return false;
+
+ if (tcph->fin || tcph->rst)
+ return false;
+ }
+
+ return true;
+}
+
+struct flow_ports___local {
+ __be16 source, dest;
+} __attribute__((preserve_access_index));
+
+SEC("xdp.frags")
+int xdp_flowtable_do_lookup(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ struct bpf_flowtable_opts___local opts = {};
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct bpf_fib_lookup tuple = {
+ .ifindex = ctx->ingress_ifindex,
+ };
+ void *data = (void *)(long)ctx->data;
+ struct ethhdr *eth = data;
+ struct flow_ports___local *ports;
+ __u32 *val, key = 0;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ switch (eth->h_proto) {
+ case bpf_htons(ETH_P_IP): {
+ struct iphdr *iph = data + sizeof(*eth);
+
+ ports = (struct flow_ports___local *)(iph + 1);
+ if (ports + 1 > data_end)
+ return XDP_PASS;
+
+ /* sanity check on ip header */
+ if (!xdp_flowtable_offload_check_iphdr(iph))
+ return XDP_PASS;
+
+ if (!xdp_flowtable_offload_check_tcp_state(ports, data_end,
+ iph->protocol))
+ return XDP_PASS;
+
+ tuple.family = AF_INET;
+ tuple.tos = iph->tos;
+ tuple.l4_protocol = iph->protocol;
+ tuple.tot_len = bpf_ntohs(iph->tot_len);
+ tuple.ipv4_src = iph->saddr;
+ tuple.ipv4_dst = iph->daddr;
+ tuple.sport = ports->source;
+ tuple.dport = ports->dest;
+ break;
+ }
+ case bpf_htons(ETH_P_IPV6): {
+ struct in6_addr *src = (struct in6_addr *)tuple.ipv6_src;
+ struct in6_addr *dst = (struct in6_addr *)tuple.ipv6_dst;
+ struct ipv6hdr *ip6h = data + sizeof(*eth);
+
+ ports = (struct flow_ports___local *)(ip6h + 1);
+ if (ports + 1 > data_end)
+ return XDP_PASS;
+
+ if (ip6h->hop_limit <= 1)
+ return XDP_PASS;
+
+ if (!xdp_flowtable_offload_check_tcp_state(ports, data_end,
+ ip6h->nexthdr))
+ return XDP_PASS;
+
+ tuple.family = AF_INET6;
+ tuple.l4_protocol = ip6h->nexthdr;
+ tuple.tot_len = bpf_ntohs(ip6h->payload_len);
+ *src = ip6h->saddr;
+ *dst = ip6h->daddr;
+ tuple.sport = ports->source;
+ tuple.dport = ports->dest;
+ break;
+ }
+ default:
+ return XDP_PASS;
+ }
+
+ tuplehash = bpf_xdp_flow_lookup(ctx, &tuple, &opts, sizeof(opts));
+ if (!tuplehash)
+ return XDP_PASS;
+
+ val = bpf_map_lookup_elem(&stats, &key);
+ if (val)
+ __sync_add_and_fetch(val, 1);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
index 7ea9785738b5..f8f5dc9f72b8 100644
--- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+#define BPF_NO_KFUNC_PROTOTYPES
#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
diff --git a/tools/testing/selftests/bpf/progs/xfrm_info.c b/tools/testing/selftests/bpf/progs/xfrm_info.c
index f6a501fbba2b..a1d9f106c3f0 100644
--- a/tools/testing/selftests/bpf/progs/xfrm_info.c
+++ b/tools/testing/selftests/bpf/progs/xfrm_info.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_KFUNC_PROTOTYPES
#include "vmlinux.h"
#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp
index f4936834f76f..dde0bb16e782 100644
--- a/tools/testing/selftests/bpf/test_cpp.cpp
+++ b/tools/testing/selftests/bpf/test_cpp.cpp
@@ -7,6 +7,7 @@
#include <bpf/bpf.h>
#include <bpf/btf.h>
#include "test_core_extern.skel.h"
+#include "struct_ops_module.skel.h"
template <typename T>
class Skeleton {
@@ -98,6 +99,7 @@ int main(int argc, char *argv[])
{
struct btf_dump_opts opts = { };
struct test_core_extern *skel;
+ struct struct_ops_module *skel2;
struct btf *btf;
int fd;
@@ -118,6 +120,9 @@ int main(int argc, char *argv[])
skel = test_core_extern__open_and_load();
test_core_extern__destroy(skel);
+ skel2 = struct_ops_module__open_and_load();
+ struct_ops_module__destroy(skel2);
+
fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
if (fd < 0)
std::cout << "FAILED to enable stats: " << fd << std::endl;
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
index 524c38e9cde4..f14e10b0de96 100644
--- a/tools/testing/selftests/bpf/test_loader.c
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
#include <linux/capability.h>
#include <stdlib.h>
+#include <regex.h>
#include <test_progs.h>
#include <bpf/btf.h>
@@ -17,9 +18,11 @@
#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
+#define TEST_TAG_EXPECT_REGEX_PFX "comment:test_expect_regex="
#define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv"
#define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv"
#define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv="
+#define TEST_TAG_EXPECT_REGEX_PFX_UNPRIV "comment:test_expect_regex_unpriv="
#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags="
#define TEST_TAG_DESCRIPTION_PFX "comment:test_description="
@@ -46,10 +49,16 @@ enum mode {
UNPRIV = 2
};
+struct expect_msg {
+ const char *substr; /* substring match */
+ const char *regex_str; /* regex-based match */
+ regex_t regex;
+};
+
struct test_subspec {
char *name;
bool expect_failure;
- const char **expect_msgs;
+ struct expect_msg *expect_msgs;
size_t expect_msg_cnt;
int retval;
bool execute;
@@ -89,6 +98,16 @@ void test_loader_fini(struct test_loader *tester)
static void free_test_spec(struct test_spec *spec)
{
+ int i;
+
+ /* Deallocate expect_msgs arrays. */
+ for (i = 0; i < spec->priv.expect_msg_cnt; i++)
+ if (spec->priv.expect_msgs[i].regex_str)
+ regfree(&spec->priv.expect_msgs[i].regex);
+ for (i = 0; i < spec->unpriv.expect_msg_cnt; i++)
+ if (spec->unpriv.expect_msgs[i].regex_str)
+ regfree(&spec->unpriv.expect_msgs[i].regex);
+
free(spec->priv.name);
free(spec->unpriv.name);
free(spec->priv.expect_msgs);
@@ -100,18 +119,38 @@ static void free_test_spec(struct test_spec *spec)
spec->unpriv.expect_msgs = NULL;
}
-static int push_msg(const char *msg, struct test_subspec *subspec)
+static int push_msg(const char *substr, const char *regex_str, struct test_subspec *subspec)
{
void *tmp;
+ int regcomp_res;
+ char error_msg[100];
+ struct expect_msg *msg;
- tmp = realloc(subspec->expect_msgs, (1 + subspec->expect_msg_cnt) * sizeof(void *));
+ tmp = realloc(subspec->expect_msgs,
+ (1 + subspec->expect_msg_cnt) * sizeof(struct expect_msg));
if (!tmp) {
ASSERT_FAIL("failed to realloc memory for messages\n");
return -ENOMEM;
}
subspec->expect_msgs = tmp;
- subspec->expect_msgs[subspec->expect_msg_cnt++] = msg;
+ msg = &subspec->expect_msgs[subspec->expect_msg_cnt];
+
+ if (substr) {
+ msg->substr = substr;
+ msg->regex_str = NULL;
+ } else {
+ msg->regex_str = regex_str;
+ msg->substr = NULL;
+ regcomp_res = regcomp(&msg->regex, regex_str, REG_EXTENDED|REG_NEWLINE);
+ if (regcomp_res != 0) {
+ regerror(regcomp_res, &msg->regex, error_msg, sizeof(error_msg));
+ PRINT_FAIL("Regexp compilation error in '%s': '%s'\n",
+ regex_str, error_msg);
+ return -EINVAL;
+ }
+ }
+ subspec->expect_msg_cnt += 1;
return 0;
}
@@ -233,13 +272,25 @@ static int parse_test_spec(struct test_loader *tester,
spec->mode_mask |= UNPRIV;
} else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) {
msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1;
- err = push_msg(msg, &spec->priv);
+ err = push_msg(msg, NULL, &spec->priv);
if (err)
goto cleanup;
spec->mode_mask |= PRIV;
} else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV)) {
msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX_UNPRIV) - 1;
- err = push_msg(msg, &spec->unpriv);
+ err = push_msg(msg, NULL, &spec->unpriv);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ } else if (str_has_pfx(s, TEST_TAG_EXPECT_REGEX_PFX)) {
+ msg = s + sizeof(TEST_TAG_EXPECT_REGEX_PFX) - 1;
+ err = push_msg(NULL, msg, &spec->priv);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= PRIV;
+ } else if (str_has_pfx(s, TEST_TAG_EXPECT_REGEX_PFX_UNPRIV)) {
+ msg = s + sizeof(TEST_TAG_EXPECT_REGEX_PFX_UNPRIV) - 1;
+ err = push_msg(NULL, msg, &spec->unpriv);
if (err)
goto cleanup;
spec->mode_mask |= UNPRIV;
@@ -337,16 +388,13 @@ static int parse_test_spec(struct test_loader *tester,
}
if (!spec->unpriv.expect_msgs) {
- size_t sz = spec->priv.expect_msg_cnt * sizeof(void *);
+ for (i = 0; i < spec->priv.expect_msg_cnt; i++) {
+ struct expect_msg *msg = &spec->priv.expect_msgs[i];
- spec->unpriv.expect_msgs = malloc(sz);
- if (!spec->unpriv.expect_msgs) {
- PRINT_FAIL("failed to allocate memory for unpriv.expect_msgs\n");
- err = -ENOMEM;
- goto cleanup;
+ err = push_msg(msg->substr, msg->regex_str, &spec->unpriv);
+ if (err)
+ goto cleanup;
}
- memcpy(spec->unpriv.expect_msgs, spec->priv.expect_msgs, sz);
- spec->unpriv.expect_msg_cnt = spec->priv.expect_msg_cnt;
}
}
@@ -402,27 +450,40 @@ static void validate_case(struct test_loader *tester,
struct bpf_program *prog,
int load_err)
{
- int i, j;
+ int i, j, err;
+ char *match;
+ regmatch_t reg_match[1];
for (i = 0; i < subspec->expect_msg_cnt; i++) {
- char *match;
- const char *expect_msg;
-
- expect_msg = subspec->expect_msgs[i];
+ struct expect_msg *msg = &subspec->expect_msgs[i];
+
+ if (msg->substr) {
+ match = strstr(tester->log_buf + tester->next_match_pos, msg->substr);
+ if (match)
+ tester->next_match_pos = match - tester->log_buf + strlen(msg->substr);
+ } else {
+ err = regexec(&msg->regex,
+ tester->log_buf + tester->next_match_pos, 1, reg_match, 0);
+ if (err == 0) {
+ match = tester->log_buf + tester->next_match_pos + reg_match[0].rm_so;
+ tester->next_match_pos += reg_match[0].rm_eo;
+ } else {
+ match = NULL;
+ }
+ }
- match = strstr(tester->log_buf + tester->next_match_pos, expect_msg);
if (!ASSERT_OK_PTR(match, "expect_msg")) {
- /* if we are in verbose mode, we've already emitted log */
if (env.verbosity == VERBOSE_NONE)
emit_verifier_log(tester->log_buf, true /*force*/);
- for (j = 0; j < i; j++)
- fprintf(stderr,
- "MATCHED MSG: '%s'\n", subspec->expect_msgs[j]);
- fprintf(stderr, "EXPECTED MSG: '%s'\n", expect_msg);
+ for (j = 0; j <= i; j++) {
+ msg = &subspec->expect_msgs[j];
+ fprintf(stderr, "%s %s: '%s'\n",
+ j < i ? "MATCHED " : "EXPECTED",
+ msg->substr ? "SUBSTR" : " REGEX",
+ msg->substr ?: msg->regex_str);
+ }
return;
}
-
- tester->next_match_pos = match - tester->log_buf + strlen(expect_msg);
}
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 0ba5a20b19ba..51341d50213b 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -377,6 +377,15 @@ int test__join_cgroup(const char *path);
___ok; \
})
+#define ASSERT_OK_FD(fd, name) ({ \
+ static int duration = 0; \
+ int ___fd = (fd); \
+ bool ___ok = ___fd >= 0; \
+ CHECK(!___ok, (name), "unexpected fd: %d (errno %d)\n", \
+ ___fd, errno); \
+ ___ok; \
+})
+
#define SYS(goto_label, fmt, ...) \
({ \
char cmd[1024]; \
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
deleted file mode 100644
index 80c42583f597..000000000000
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ /dev/null
@@ -1,1434 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#define _GNU_SOURCE
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include <arpa/inet.h>
-#include <netinet/in.h>
-#include <sys/types.h>
-#include <sys/select.h>
-#include <sys/socket.h>
-
-#include <linux/filter.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "testing_helpers.h"
-#include "bpf_util.h"
-
-#ifndef ENOTSUPP
-# define ENOTSUPP 524
-#endif
-
-#define CG_PATH "/foo"
-#define CONNECT4_PROG_PATH "./connect4_prog.bpf.o"
-#define CONNECT6_PROG_PATH "./connect6_prog.bpf.o"
-#define SENDMSG4_PROG_PATH "./sendmsg4_prog.bpf.o"
-#define SENDMSG6_PROG_PATH "./sendmsg6_prog.bpf.o"
-#define RECVMSG4_PROG_PATH "./recvmsg4_prog.bpf.o"
-#define RECVMSG6_PROG_PATH "./recvmsg6_prog.bpf.o"
-#define BIND4_PROG_PATH "./bind4_prog.bpf.o"
-#define BIND6_PROG_PATH "./bind6_prog.bpf.o"
-
-#define SERV4_IP "192.168.1.254"
-#define SERV4_REWRITE_IP "127.0.0.1"
-#define SRC4_IP "172.16.0.1"
-#define SRC4_REWRITE_IP "127.0.0.4"
-#define SERV4_PORT 4040
-#define SERV4_REWRITE_PORT 4444
-
-#define SERV6_IP "face:b00c:1234:5678::abcd"
-#define SERV6_REWRITE_IP "::1"
-#define SERV6_V4MAPPED_IP "::ffff:192.168.0.4"
-#define SRC6_IP "::1"
-#define SRC6_REWRITE_IP "::6"
-#define WILDCARD6_IP "::"
-#define SERV6_PORT 6060
-#define SERV6_REWRITE_PORT 6666
-
-#define INET_NTOP_BUF 40
-
-struct sock_addr_test;
-
-typedef int (*load_fn)(const struct sock_addr_test *test);
-typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
-
-char bpf_log_buf[BPF_LOG_BUF_SIZE];
-
-struct sock_addr_test {
- const char *descr;
- /* BPF prog properties */
- load_fn loadfn;
- enum bpf_attach_type expected_attach_type;
- enum bpf_attach_type attach_type;
- /* Socket properties */
- int domain;
- int type;
- /* IP:port pairs for BPF prog to override */
- const char *requested_ip;
- unsigned short requested_port;
- const char *expected_ip;
- unsigned short expected_port;
- const char *expected_src_ip;
- /* Expected test result */
- enum {
- LOAD_REJECT,
- ATTACH_REJECT,
- ATTACH_OKAY,
- SYSCALL_EPERM,
- SYSCALL_ENOTSUPP,
- SUCCESS,
- } expected_result;
-};
-
-static int bind4_prog_load(const struct sock_addr_test *test);
-static int bind6_prog_load(const struct sock_addr_test *test);
-static int connect4_prog_load(const struct sock_addr_test *test);
-static int connect6_prog_load(const struct sock_addr_test *test);
-static int sendmsg_allow_prog_load(const struct sock_addr_test *test);
-static int sendmsg_deny_prog_load(const struct sock_addr_test *test);
-static int recvmsg_allow_prog_load(const struct sock_addr_test *test);
-static int recvmsg_deny_prog_load(const struct sock_addr_test *test);
-static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test);
-static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test);
-static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test);
-static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test);
-static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test);
-static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test);
-static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test);
-static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test);
-
-static struct sock_addr_test tests[] = {
- /* bind */
- {
- "bind4: load prog with wrong expected attach type",
- bind4_prog_load,
- BPF_CGROUP_INET6_BIND,
- BPF_CGROUP_INET4_BIND,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "bind4: attach prog with wrong attach type",
- bind4_prog_load,
- BPF_CGROUP_INET4_BIND,
- BPF_CGROUP_INET6_BIND,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "bind4: rewrite IP & TCP port in",
- bind4_prog_load,
- BPF_CGROUP_INET4_BIND,
- BPF_CGROUP_INET4_BIND,
- AF_INET,
- SOCK_STREAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- NULL,
- SUCCESS,
- },
- {
- "bind4: rewrite IP & UDP port in",
- bind4_prog_load,
- BPF_CGROUP_INET4_BIND,
- BPF_CGROUP_INET4_BIND,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- NULL,
- SUCCESS,
- },
- {
- "bind6: load prog with wrong expected attach type",
- bind6_prog_load,
- BPF_CGROUP_INET4_BIND,
- BPF_CGROUP_INET6_BIND,
- AF_INET6,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "bind6: attach prog with wrong attach type",
- bind6_prog_load,
- BPF_CGROUP_INET6_BIND,
- BPF_CGROUP_INET4_BIND,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "bind6: rewrite IP & TCP port in",
- bind6_prog_load,
- BPF_CGROUP_INET6_BIND,
- BPF_CGROUP_INET6_BIND,
- AF_INET6,
- SOCK_STREAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- NULL,
- SUCCESS,
- },
- {
- "bind6: rewrite IP & UDP port in",
- bind6_prog_load,
- BPF_CGROUP_INET6_BIND,
- BPF_CGROUP_INET6_BIND,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- NULL,
- SUCCESS,
- },
-
- /* connect */
- {
- "connect4: load prog with wrong expected attach type",
- connect4_prog_load,
- BPF_CGROUP_INET6_CONNECT,
- BPF_CGROUP_INET4_CONNECT,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "connect4: attach prog with wrong attach type",
- connect4_prog_load,
- BPF_CGROUP_INET4_CONNECT,
- BPF_CGROUP_INET6_CONNECT,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "connect4: rewrite IP & TCP port",
- connect4_prog_load,
- BPF_CGROUP_INET4_CONNECT,
- BPF_CGROUP_INET4_CONNECT,
- AF_INET,
- SOCK_STREAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SUCCESS,
- },
- {
- "connect4: rewrite IP & UDP port",
- connect4_prog_load,
- BPF_CGROUP_INET4_CONNECT,
- BPF_CGROUP_INET4_CONNECT,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SUCCESS,
- },
- {
- "connect6: load prog with wrong expected attach type",
- connect6_prog_load,
- BPF_CGROUP_INET4_CONNECT,
- BPF_CGROUP_INET6_CONNECT,
- AF_INET6,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "connect6: attach prog with wrong attach type",
- connect6_prog_load,
- BPF_CGROUP_INET6_CONNECT,
- BPF_CGROUP_INET4_CONNECT,
- AF_INET,
- SOCK_STREAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "connect6: rewrite IP & TCP port",
- connect6_prog_load,
- BPF_CGROUP_INET6_CONNECT,
- BPF_CGROUP_INET6_CONNECT,
- AF_INET6,
- SOCK_STREAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
- {
- "connect6: rewrite IP & UDP port",
- connect6_prog_load,
- BPF_CGROUP_INET6_CONNECT,
- BPF_CGROUP_INET6_CONNECT,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
-
- /* sendmsg */
- {
- "sendmsg4: load prog with wrong expected attach type",
- sendmsg4_rw_asm_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "sendmsg4: attach prog with wrong attach type",
- sendmsg4_rw_asm_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "sendmsg4: rewrite IP & port (asm)",
- sendmsg4_rw_asm_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg4: rewrite IP & port (C)",
- sendmsg4_rw_c_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg4: deny call",
- sendmsg_deny_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET,
- SOCK_DGRAM,
- SERV4_IP,
- SERV4_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SRC4_REWRITE_IP,
- SYSCALL_EPERM,
- },
- {
- "sendmsg6: load prog with wrong expected attach type",
- sendmsg6_rw_asm_prog_load,
- BPF_CGROUP_UDP4_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "sendmsg6: attach prog with wrong attach type",
- sendmsg6_rw_asm_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP4_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_REJECT,
- },
- {
- "sendmsg6: rewrite IP & port (asm)",
- sendmsg6_rw_asm_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg6: rewrite IP & port (C)",
- sendmsg6_rw_c_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg6: IPv4-mapped IPv6",
- sendmsg6_rw_v4mapped_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SYSCALL_ENOTSUPP,
- },
- {
- "sendmsg6: set dst IP = [::] (BSD'ism)",
- sendmsg6_rw_wildcard_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SUCCESS,
- },
- {
- "sendmsg6: preserve dst IP = [::] (BSD'ism)",
- sendmsg_allow_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- WILDCARD6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_PORT,
- SRC6_IP,
- SUCCESS,
- },
- {
- "sendmsg6: deny call",
- sendmsg_deny_prog_load,
- BPF_CGROUP_UDP6_SENDMSG,
- BPF_CGROUP_UDP6_SENDMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_IP,
- SERV6_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SRC6_REWRITE_IP,
- SYSCALL_EPERM,
- },
-
- /* recvmsg */
- {
- "recvmsg4: return code ok",
- recvmsg_allow_prog_load,
- BPF_CGROUP_UDP4_RECVMSG,
- BPF_CGROUP_UDP4_RECVMSG,
- AF_INET,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_OKAY,
- },
- {
- "recvmsg4: return code !ok",
- recvmsg_deny_prog_load,
- BPF_CGROUP_UDP4_RECVMSG,
- BPF_CGROUP_UDP4_RECVMSG,
- AF_INET,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "recvmsg6: return code ok",
- recvmsg_allow_prog_load,
- BPF_CGROUP_UDP6_RECVMSG,
- BPF_CGROUP_UDP6_RECVMSG,
- AF_INET6,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- ATTACH_OKAY,
- },
- {
- "recvmsg6: return code !ok",
- recvmsg_deny_prog_load,
- BPF_CGROUP_UDP6_RECVMSG,
- BPF_CGROUP_UDP6_RECVMSG,
- AF_INET6,
- SOCK_DGRAM,
- NULL,
- 0,
- NULL,
- 0,
- NULL,
- LOAD_REJECT,
- },
- {
- "recvmsg4: rewrite IP & port (C)",
- recvmsg4_rw_c_prog_load,
- BPF_CGROUP_UDP4_RECVMSG,
- BPF_CGROUP_UDP4_RECVMSG,
- AF_INET,
- SOCK_DGRAM,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SERV4_REWRITE_IP,
- SERV4_REWRITE_PORT,
- SERV4_IP,
- SUCCESS,
- },
- {
- "recvmsg6: rewrite IP & port (C)",
- recvmsg6_rw_c_prog_load,
- BPF_CGROUP_UDP6_RECVMSG,
- BPF_CGROUP_UDP6_RECVMSG,
- AF_INET6,
- SOCK_DGRAM,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SERV6_REWRITE_IP,
- SERV6_REWRITE_PORT,
- SERV6_IP,
- SUCCESS,
- },
-};
-
-static int mk_sockaddr(int domain, const char *ip, unsigned short port,
- struct sockaddr *addr, socklen_t addr_len)
-{
- struct sockaddr_in6 *addr6;
- struct sockaddr_in *addr4;
-
- if (domain != AF_INET && domain != AF_INET6) {
- log_err("Unsupported address family");
- return -1;
- }
-
- memset(addr, 0, addr_len);
-
- if (domain == AF_INET) {
- if (addr_len < sizeof(struct sockaddr_in))
- return -1;
- addr4 = (struct sockaddr_in *)addr;
- addr4->sin_family = domain;
- addr4->sin_port = htons(port);
- if (inet_pton(domain, ip, (void *)&addr4->sin_addr) != 1) {
- log_err("Invalid IPv4: %s", ip);
- return -1;
- }
- } else if (domain == AF_INET6) {
- if (addr_len < sizeof(struct sockaddr_in6))
- return -1;
- addr6 = (struct sockaddr_in6 *)addr;
- addr6->sin6_family = domain;
- addr6->sin6_port = htons(port);
- if (inet_pton(domain, ip, (void *)&addr6->sin6_addr) != 1) {
- log_err("Invalid IPv6: %s", ip);
- return -1;
- }
- }
-
- return 0;
-}
-
-static int load_insns(const struct sock_addr_test *test,
- const struct bpf_insn *insns, size_t insns_cnt)
-{
- LIBBPF_OPTS(bpf_prog_load_opts, opts);
- int ret;
-
- opts.expected_attach_type = test->expected_attach_type;
- opts.log_buf = bpf_log_buf;
- opts.log_size = BPF_LOG_BUF_SIZE;
-
- ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, NULL, "GPL", insns, insns_cnt, &opts);
- if (ret < 0 && test->expected_result != LOAD_REJECT) {
- log_err(">>> Loading program error.\n"
- ">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
- }
-
- return ret;
-}
-
-static int load_path(const struct sock_addr_test *test, const char *path)
-{
- struct bpf_object *obj;
- struct bpf_program *prog;
- int err;
-
- obj = bpf_object__open_file(path, NULL);
- err = libbpf_get_error(obj);
- if (err) {
- log_err(">>> Opening BPF object (%s) error.\n", path);
- return -1;
- }
-
- prog = bpf_object__next_program(obj, NULL);
- if (!prog)
- goto err_out;
-
- bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR);
- bpf_program__set_expected_attach_type(prog, test->expected_attach_type);
- bpf_program__set_flags(prog, testing_prog_flags());
-
- err = bpf_object__load(obj);
- if (err) {
- if (test->expected_result != LOAD_REJECT)
- log_err(">>> Loading program (%s) error.\n", path);
- goto err_out;
- }
-
- return bpf_program__fd(prog);
-err_out:
- bpf_object__close(obj);
- return -1;
-}
-
-static int bind4_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, BIND4_PROG_PATH);
-}
-
-static int bind6_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, BIND6_PROG_PATH);
-}
-
-static int connect4_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, CONNECT4_PROG_PATH);
-}
-
-static int connect6_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, CONNECT6_PROG_PATH);
-}
-
-static int xmsg_ret_only_prog_load(const struct sock_addr_test *test,
- int32_t rc)
-{
- struct bpf_insn insns[] = {
- /* return rc */
- BPF_MOV64_IMM(BPF_REG_0, rc),
- BPF_EXIT_INSN(),
- };
- return load_insns(test, insns, ARRAY_SIZE(insns));
-}
-
-static int sendmsg_allow_prog_load(const struct sock_addr_test *test)
-{
- return xmsg_ret_only_prog_load(test, /*rc*/ 1);
-}
-
-static int sendmsg_deny_prog_load(const struct sock_addr_test *test)
-{
- return xmsg_ret_only_prog_load(test, /*rc*/ 0);
-}
-
-static int recvmsg_allow_prog_load(const struct sock_addr_test *test)
-{
- return xmsg_ret_only_prog_load(test, /*rc*/ 1);
-}
-
-static int recvmsg_deny_prog_load(const struct sock_addr_test *test)
-{
- return xmsg_ret_only_prog_load(test, /*rc*/ 0);
-}
-
-static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test)
-{
- struct sockaddr_in dst4_rw_addr;
- struct in_addr src4_rw_ip;
-
- if (inet_pton(AF_INET, SRC4_REWRITE_IP, (void *)&src4_rw_ip) != 1) {
- log_err("Invalid IPv4: %s", SRC4_REWRITE_IP);
- return -1;
- }
-
- if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
- (struct sockaddr *)&dst4_rw_addr,
- sizeof(dst4_rw_addr)) == -1)
- return -1;
-
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
- /* if (sk.family == AF_INET && */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, family)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 8),
-
- /* sk.type == SOCK_DGRAM) { */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, type)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 6),
-
- /* msg_src_ip4 = src4_rw_ip */
- BPF_MOV32_IMM(BPF_REG_7, src4_rw_ip.s_addr),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, msg_src_ip4)),
-
- /* user_ip4 = dst4_rw_addr.sin_addr */
- BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_addr.s_addr),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_ip4)),
-
- /* user_port = dst4_rw_addr.sin_port */
- BPF_MOV32_IMM(BPF_REG_7, dst4_rw_addr.sin_port),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_port)),
- /* } */
-
- /* return 1 */
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- };
-
- return load_insns(test, insns, ARRAY_SIZE(insns));
-}
-
-static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, RECVMSG4_PROG_PATH);
-}
-
-static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, SENDMSG4_PROG_PATH);
-}
-
-static int sendmsg6_rw_dst_asm_prog_load(const struct sock_addr_test *test,
- const char *rw_dst_ip)
-{
- struct sockaddr_in6 dst6_rw_addr;
- struct in6_addr src6_rw_ip;
-
- if (inet_pton(AF_INET6, SRC6_REWRITE_IP, (void *)&src6_rw_ip) != 1) {
- log_err("Invalid IPv6: %s", SRC6_REWRITE_IP);
- return -1;
- }
-
- if (mk_sockaddr(AF_INET6, rw_dst_ip, SERV6_REWRITE_PORT,
- (struct sockaddr *)&dst6_rw_addr,
- sizeof(dst6_rw_addr)) == -1)
- return -1;
-
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
- /* if (sk.family == AF_INET6) { */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, family)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
-
-#define STORE_IPV6_WORD_N(DST, SRC, N) \
- BPF_MOV32_IMM(BPF_REG_7, SRC[N]), \
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \
- offsetof(struct bpf_sock_addr, DST[N]))
-
-#define STORE_IPV6(DST, SRC) \
- STORE_IPV6_WORD_N(DST, SRC, 0), \
- STORE_IPV6_WORD_N(DST, SRC, 1), \
- STORE_IPV6_WORD_N(DST, SRC, 2), \
- STORE_IPV6_WORD_N(DST, SRC, 3)
-
- STORE_IPV6(msg_src_ip6, src6_rw_ip.s6_addr32),
- STORE_IPV6(user_ip6, dst6_rw_addr.sin6_addr.s6_addr32),
-
- /* user_port = dst6_rw_addr.sin6_port */
- BPF_MOV32_IMM(BPF_REG_7, dst6_rw_addr.sin6_port),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_port)),
-
- /* } */
-
- /* return 1 */
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- };
-
- return load_insns(test, insns, ARRAY_SIZE(insns));
-}
-
-static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test)
-{
- return sendmsg6_rw_dst_asm_prog_load(test, SERV6_REWRITE_IP);
-}
-
-static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, RECVMSG6_PROG_PATH);
-}
-
-static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test)
-{
- return sendmsg6_rw_dst_asm_prog_load(test, SERV6_V4MAPPED_IP);
-}
-
-static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test)
-{
- return sendmsg6_rw_dst_asm_prog_load(test, WILDCARD6_IP);
-}
-
-static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test)
-{
- return load_path(test, SENDMSG6_PROG_PATH);
-}
-
-static int cmp_addr(const struct sockaddr_storage *addr1,
- const struct sockaddr_storage *addr2, int cmp_port)
-{
- const struct sockaddr_in *four1, *four2;
- const struct sockaddr_in6 *six1, *six2;
-
- if (addr1->ss_family != addr2->ss_family)
- return -1;
-
- if (addr1->ss_family == AF_INET) {
- four1 = (const struct sockaddr_in *)addr1;
- four2 = (const struct sockaddr_in *)addr2;
- return !((four1->sin_port == four2->sin_port || !cmp_port) &&
- four1->sin_addr.s_addr == four2->sin_addr.s_addr);
- } else if (addr1->ss_family == AF_INET6) {
- six1 = (const struct sockaddr_in6 *)addr1;
- six2 = (const struct sockaddr_in6 *)addr2;
- return !((six1->sin6_port == six2->sin6_port || !cmp_port) &&
- !memcmp(&six1->sin6_addr, &six2->sin6_addr,
- sizeof(struct in6_addr)));
- }
-
- return -1;
-}
-
-static int cmp_sock_addr(info_fn fn, int sock1,
- const struct sockaddr_storage *addr2, int cmp_port)
-{
- struct sockaddr_storage addr1;
- socklen_t len1 = sizeof(addr1);
-
- memset(&addr1, 0, len1);
- if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0)
- return -1;
-
- return cmp_addr(&addr1, addr2, cmp_port);
-}
-
-static int cmp_local_ip(int sock1, const struct sockaddr_storage *addr2)
-{
- return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 0);
-}
-
-static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2)
-{
- return cmp_sock_addr(getsockname, sock1, addr2, /*cmp_port*/ 1);
-}
-
-static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2)
-{
- return cmp_sock_addr(getpeername, sock1, addr2, /*cmp_port*/ 1);
-}
-
-static int start_server(int type, const struct sockaddr_storage *addr,
- socklen_t addr_len)
-{
- int fd;
-
- fd = socket(addr->ss_family, type, 0);
- if (fd == -1) {
- log_err("Failed to create server socket");
- goto out;
- }
-
- if (bind(fd, (const struct sockaddr *)addr, addr_len) == -1) {
- log_err("Failed to bind server socket");
- goto close_out;
- }
-
- if (type == SOCK_STREAM) {
- if (listen(fd, 128) == -1) {
- log_err("Failed to listen on server socket");
- goto close_out;
- }
- }
-
- goto out;
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int connect_to_server(int type, const struct sockaddr_storage *addr,
- socklen_t addr_len)
-{
- int domain;
- int fd = -1;
-
- domain = addr->ss_family;
-
- if (domain != AF_INET && domain != AF_INET6) {
- log_err("Unsupported address family");
- goto err;
- }
-
- fd = socket(domain, type, 0);
- if (fd == -1) {
- log_err("Failed to create client socket");
- goto err;
- }
-
- if (connect(fd, (const struct sockaddr *)addr, addr_len) == -1) {
- log_err("Fail to connect to server");
- goto err;
- }
-
- goto out;
-err:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-int init_pktinfo(int domain, struct cmsghdr *cmsg)
-{
- struct in6_pktinfo *pktinfo6;
- struct in_pktinfo *pktinfo4;
-
- if (domain == AF_INET) {
- cmsg->cmsg_level = SOL_IP;
- cmsg->cmsg_type = IP_PKTINFO;
- cmsg->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
- pktinfo4 = (struct in_pktinfo *)CMSG_DATA(cmsg);
- memset(pktinfo4, 0, sizeof(struct in_pktinfo));
- if (inet_pton(domain, SRC4_IP,
- (void *)&pktinfo4->ipi_spec_dst) != 1)
- return -1;
- } else if (domain == AF_INET6) {
- cmsg->cmsg_level = SOL_IPV6;
- cmsg->cmsg_type = IPV6_PKTINFO;
- cmsg->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
- pktinfo6 = (struct in6_pktinfo *)CMSG_DATA(cmsg);
- memset(pktinfo6, 0, sizeof(struct in6_pktinfo));
- if (inet_pton(domain, SRC6_IP,
- (void *)&pktinfo6->ipi6_addr) != 1)
- return -1;
- } else {
- return -1;
- }
-
- return 0;
-}
-
-static int sendmsg_to_server(int type, const struct sockaddr_storage *addr,
- socklen_t addr_len, int set_cmsg, int flags,
- int *syscall_err)
-{
- union {
- char buf[CMSG_SPACE(sizeof(struct in6_pktinfo))];
- struct cmsghdr align;
- } control6;
- union {
- char buf[CMSG_SPACE(sizeof(struct in_pktinfo))];
- struct cmsghdr align;
- } control4;
- struct msghdr hdr;
- struct iovec iov;
- char data = 'a';
- int domain;
- int fd = -1;
-
- domain = addr->ss_family;
-
- if (domain != AF_INET && domain != AF_INET6) {
- log_err("Unsupported address family");
- goto err;
- }
-
- fd = socket(domain, type, 0);
- if (fd == -1) {
- log_err("Failed to create client socket");
- goto err;
- }
-
- memset(&iov, 0, sizeof(iov));
- iov.iov_base = &data;
- iov.iov_len = sizeof(data);
-
- memset(&hdr, 0, sizeof(hdr));
- hdr.msg_name = (void *)addr;
- hdr.msg_namelen = addr_len;
- hdr.msg_iov = &iov;
- hdr.msg_iovlen = 1;
-
- if (set_cmsg) {
- if (domain == AF_INET) {
- hdr.msg_control = &control4;
- hdr.msg_controllen = sizeof(control4.buf);
- } else if (domain == AF_INET6) {
- hdr.msg_control = &control6;
- hdr.msg_controllen = sizeof(control6.buf);
- }
- if (init_pktinfo(domain, CMSG_FIRSTHDR(&hdr))) {
- log_err("Fail to init pktinfo");
- goto err;
- }
- }
-
- if (sendmsg(fd, &hdr, flags) != sizeof(data)) {
- log_err("Fail to send message to server");
- *syscall_err = errno;
- goto err;
- }
-
- goto out;
-err:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int fastconnect_to_server(const struct sockaddr_storage *addr,
- socklen_t addr_len)
-{
- int sendmsg_err;
-
- return sendmsg_to_server(SOCK_STREAM, addr, addr_len, /*set_cmsg*/0,
- MSG_FASTOPEN, &sendmsg_err);
-}
-
-static int recvmsg_from_client(int sockfd, struct sockaddr_storage *src_addr)
-{
- struct timeval tv;
- struct msghdr hdr;
- struct iovec iov;
- char data[64];
- fd_set rfds;
-
- FD_ZERO(&rfds);
- FD_SET(sockfd, &rfds);
-
- tv.tv_sec = 2;
- tv.tv_usec = 0;
-
- if (select(sockfd + 1, &rfds, NULL, NULL, &tv) <= 0 ||
- !FD_ISSET(sockfd, &rfds))
- return -1;
-
- memset(&iov, 0, sizeof(iov));
- iov.iov_base = data;
- iov.iov_len = sizeof(data);
-
- memset(&hdr, 0, sizeof(hdr));
- hdr.msg_name = src_addr;
- hdr.msg_namelen = sizeof(struct sockaddr_storage);
- hdr.msg_iov = &iov;
- hdr.msg_iovlen = 1;
-
- return recvmsg(sockfd, &hdr, 0);
-}
-
-static int init_addrs(const struct sock_addr_test *test,
- struct sockaddr_storage *requested_addr,
- struct sockaddr_storage *expected_addr,
- struct sockaddr_storage *expected_src_addr)
-{
- socklen_t addr_len = sizeof(struct sockaddr_storage);
-
- if (mk_sockaddr(test->domain, test->expected_ip, test->expected_port,
- (struct sockaddr *)expected_addr, addr_len) == -1)
- goto err;
-
- if (mk_sockaddr(test->domain, test->requested_ip, test->requested_port,
- (struct sockaddr *)requested_addr, addr_len) == -1)
- goto err;
-
- if (test->expected_src_ip &&
- mk_sockaddr(test->domain, test->expected_src_ip, 0,
- (struct sockaddr *)expected_src_addr, addr_len) == -1)
- goto err;
-
- return 0;
-err:
- return -1;
-}
-
-static int run_bind_test_case(const struct sock_addr_test *test)
-{
- socklen_t addr_len = sizeof(struct sockaddr_storage);
- struct sockaddr_storage requested_addr;
- struct sockaddr_storage expected_addr;
- int clientfd = -1;
- int servfd = -1;
- int err = 0;
-
- if (init_addrs(test, &requested_addr, &expected_addr, NULL))
- goto err;
-
- servfd = start_server(test->type, &requested_addr, addr_len);
- if (servfd == -1)
- goto err;
-
- if (cmp_local_addr(servfd, &expected_addr))
- goto err;
-
- /* Try to connect to server just in case */
- clientfd = connect_to_server(test->type, &expected_addr, addr_len);
- if (clientfd == -1)
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- close(clientfd);
- close(servfd);
- return err;
-}
-
-static int run_connect_test_case(const struct sock_addr_test *test)
-{
- socklen_t addr_len = sizeof(struct sockaddr_storage);
- struct sockaddr_storage expected_src_addr;
- struct sockaddr_storage requested_addr;
- struct sockaddr_storage expected_addr;
- int clientfd = -1;
- int servfd = -1;
- int err = 0;
-
- if (init_addrs(test, &requested_addr, &expected_addr,
- &expected_src_addr))
- goto err;
-
- /* Prepare server to connect to */
- servfd = start_server(test->type, &expected_addr, addr_len);
- if (servfd == -1)
- goto err;
-
- clientfd = connect_to_server(test->type, &requested_addr, addr_len);
- if (clientfd == -1)
- goto err;
-
- /* Make sure src and dst addrs were overridden properly */
- if (cmp_peer_addr(clientfd, &expected_addr))
- goto err;
-
- if (cmp_local_ip(clientfd, &expected_src_addr))
- goto err;
-
- if (test->type == SOCK_STREAM) {
- /* Test TCP Fast Open scenario */
- clientfd = fastconnect_to_server(&requested_addr, addr_len);
- if (clientfd == -1)
- goto err;
-
- /* Make sure src and dst addrs were overridden properly */
- if (cmp_peer_addr(clientfd, &expected_addr))
- goto err;
-
- if (cmp_local_ip(clientfd, &expected_src_addr))
- goto err;
- }
-
- goto out;
-err:
- err = -1;
-out:
- close(clientfd);
- close(servfd);
- return err;
-}
-
-static int run_xmsg_test_case(const struct sock_addr_test *test, int max_cmsg)
-{
- socklen_t addr_len = sizeof(struct sockaddr_storage);
- struct sockaddr_storage expected_addr;
- struct sockaddr_storage server_addr;
- struct sockaddr_storage sendmsg_addr;
- struct sockaddr_storage recvmsg_addr;
- int clientfd = -1;
- int servfd = -1;
- int set_cmsg;
- int err = 0;
-
- if (test->type != SOCK_DGRAM)
- goto err;
-
- if (init_addrs(test, &sendmsg_addr, &server_addr, &expected_addr))
- goto err;
-
- /* Prepare server to sendmsg to */
- servfd = start_server(test->type, &server_addr, addr_len);
- if (servfd == -1)
- goto err;
-
- for (set_cmsg = 0; set_cmsg <= max_cmsg; ++set_cmsg) {
- if (clientfd >= 0)
- close(clientfd);
-
- clientfd = sendmsg_to_server(test->type, &sendmsg_addr,
- addr_len, set_cmsg, /*flags*/0,
- &err);
- if (err)
- goto out;
- else if (clientfd == -1)
- goto err;
-
- /* Try to receive message on server instead of using
- * getpeername(2) on client socket, to check that client's
- * destination address was rewritten properly, since
- * getpeername(2) doesn't work with unconnected datagram
- * sockets.
- *
- * Get source address from recvmsg(2) as well to make sure
- * source was rewritten properly: getsockname(2) can't be used
- * since socket is unconnected and source defined for one
- * specific packet may differ from the one used by default and
- * returned by getsockname(2).
- */
- if (recvmsg_from_client(servfd, &recvmsg_addr) == -1)
- goto err;
-
- if (cmp_addr(&recvmsg_addr, &expected_addr, /*cmp_port*/0))
- goto err;
- }
-
- goto out;
-err:
- err = -1;
-out:
- close(clientfd);
- close(servfd);
- return err;
-}
-
-static int run_test_case(int cgfd, const struct sock_addr_test *test)
-{
- int progfd = -1;
- int err = 0;
-
- printf("Test case: %s .. ", test->descr);
-
- progfd = test->loadfn(test);
- if (test->expected_result == LOAD_REJECT && progfd < 0)
- goto out;
- else if (test->expected_result == LOAD_REJECT || progfd < 0)
- goto err;
-
- err = bpf_prog_attach(progfd, cgfd, test->attach_type,
- BPF_F_ALLOW_OVERRIDE);
- if (test->expected_result == ATTACH_REJECT && err) {
- err = 0; /* error was expected, reset it */
- goto out;
- } else if (test->expected_result == ATTACH_REJECT || err) {
- goto err;
- } else if (test->expected_result == ATTACH_OKAY) {
- err = 0;
- goto out;
- }
-
- switch (test->attach_type) {
- case BPF_CGROUP_INET4_BIND:
- case BPF_CGROUP_INET6_BIND:
- err = run_bind_test_case(test);
- break;
- case BPF_CGROUP_INET4_CONNECT:
- case BPF_CGROUP_INET6_CONNECT:
- err = run_connect_test_case(test);
- break;
- case BPF_CGROUP_UDP4_SENDMSG:
- case BPF_CGROUP_UDP6_SENDMSG:
- err = run_xmsg_test_case(test, 1);
- break;
- case BPF_CGROUP_UDP4_RECVMSG:
- case BPF_CGROUP_UDP6_RECVMSG:
- err = run_xmsg_test_case(test, 0);
- break;
- default:
- goto err;
- }
-
- if (test->expected_result == SYSCALL_EPERM && err == EPERM) {
- err = 0; /* error was expected, reset it */
- goto out;
- }
-
- if (test->expected_result == SYSCALL_ENOTSUPP && err == ENOTSUPP) {
- err = 0; /* error was expected, reset it */
- goto out;
- }
-
- if (err || test->expected_result != SUCCESS)
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- /* Detaching w/o checking return code: best effort attempt. */
- if (progfd != -1)
- bpf_prog_detach(cgfd, test->attach_type);
- close(progfd);
- printf("[%s]\n", err ? "FAIL" : "PASS");
- return err;
-}
-
-static int run_tests(int cgfd)
-{
- int passes = 0;
- int fails = 0;
- int i;
-
- for (i = 0; i < ARRAY_SIZE(tests); ++i) {
- if (run_test_case(cgfd, &tests[i]))
- ++fails;
- else
- ++passes;
- }
- printf("Summary: %d PASSED, %d FAILED\n", passes, fails);
- return fails ? -1 : 0;
-}
-
-int main(int argc, char **argv)
-{
- int cgfd = -1;
- int err = 0;
-
- if (argc < 2) {
- fprintf(stderr,
- "%s has to be run via %s.sh. Skip direct run.\n",
- argv[0], argv[0]);
- exit(err);
- }
-
- cgfd = cgroup_setup_and_join(CG_PATH);
- if (cgfd < 0)
- goto err;
-
- /* Use libbpf 1.0 API mode */
- libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
-
- if (run_tests(cgfd))
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- close(cgfd);
- cleanup_cgroup_environment();
- return err;
-}
diff --git a/tools/testing/selftests/bpf/test_sock_addr.sh b/tools/testing/selftests/bpf/test_sock_addr.sh
deleted file mode 100755
index 3b9fdb8094aa..000000000000
--- a/tools/testing/selftests/bpf/test_sock_addr.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/sh
-
-set -eu
-
-ping_once()
-{
- type ping${1} >/dev/null 2>&1 && PING="ping${1}" || PING="ping -${1}"
- $PING -q -c 1 -W 1 ${2%%/*} >/dev/null 2>&1
-}
-
-wait_for_ip()
-{
- local _i
- echo -n "Wait for testing IPv4/IPv6 to become available "
- for _i in $(seq ${MAX_PING_TRIES}); do
- echo -n "."
- if ping_once 4 ${TEST_IPv4} && ping_once 6 ${TEST_IPv6}; then
- echo " OK"
- return
- fi
- done
- echo 1>&2 "ERROR: Timeout waiting for test IP to become available."
- exit 1
-}
-
-setup()
-{
- # Create testing interfaces not to interfere with current environment.
- ip link add dev ${TEST_IF} type veth peer name ${TEST_IF_PEER}
- ip link set ${TEST_IF} up
- ip link set ${TEST_IF_PEER} up
-
- ip -4 addr add ${TEST_IPv4} dev ${TEST_IF}
- ip -6 addr add ${TEST_IPv6} dev ${TEST_IF}
- wait_for_ip
-}
-
-cleanup()
-{
- ip link del ${TEST_IF} 2>/dev/null || :
- ip link del ${TEST_IF_PEER} 2>/dev/null || :
-}
-
-main()
-{
- trap cleanup EXIT 2 3 6 15
- setup
- ./test_sock_addr setup_done
-}
-
-BASENAME=$(basename $0 .sh)
-TEST_IF="${BASENAME}1"
-TEST_IF_PEER="${BASENAME}2"
-TEST_IPv4="127.0.0.4/8"
-TEST_IPv6="::6/128"
-MAX_PING_TRIES=5
-
-main
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 024a0faafb3b..3e02d7267de8 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -63,7 +63,8 @@ int passed;
int failed;
int map_fd[9];
struct bpf_map *maps[9];
-int prog_fd[11];
+struct bpf_program *progs[9];
+struct bpf_link *links[9];
int txmsg_pass;
int txmsg_redir;
@@ -680,7 +681,8 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
}
}
- s->bytes_recvd += recv;
+ if (recv > 0)
+ s->bytes_recvd += recv;
if (opt->check_recved_len && s->bytes_recvd > total_bytes) {
errno = EMSGSIZE;
@@ -952,7 +954,8 @@ enum {
static int run_options(struct sockmap_options *options, int cg_fd, int test)
{
- int i, key, next_key, err, tx_prog_fd = -1, zero = 0;
+ int i, key, next_key, err, zero = 0;
+ struct bpf_program *tx_prog;
/* If base test skip BPF setup */
if (test == BASE || test == BASE_SENDPAGE)
@@ -960,48 +963,44 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
/* Attach programs to sockmap */
if (!txmsg_omit_skb_parser) {
- err = bpf_prog_attach(prog_fd[0], map_fd[0],
- BPF_SK_SKB_STREAM_PARSER, 0);
- if (err) {
+ links[0] = bpf_program__attach_sockmap(progs[0], map_fd[0]);
+ if (!links[0]) {
fprintf(stderr,
- "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
- prog_fd[0], map_fd[0], err, strerror(errno));
- return err;
+ "ERROR: bpf_program__attach_sockmap (sockmap %i->%i): (%s)\n",
+ bpf_program__fd(progs[0]), map_fd[0], strerror(errno));
+ return -1;
}
}
- err = bpf_prog_attach(prog_fd[1], map_fd[0],
- BPF_SK_SKB_STREAM_VERDICT, 0);
- if (err) {
- fprintf(stderr, "ERROR: bpf_prog_attach (sockmap): %d (%s)\n",
- err, strerror(errno));
- return err;
+ links[1] = bpf_program__attach_sockmap(progs[1], map_fd[0]);
+ if (!links[1]) {
+ fprintf(stderr, "ERROR: bpf_program__attach_sockmap (sockmap): (%s)\n",
+ strerror(errno));
+ return -1;
}
/* Attach programs to TLS sockmap */
if (txmsg_ktls_skb) {
if (!txmsg_omit_skb_parser) {
- err = bpf_prog_attach(prog_fd[0], map_fd[8],
- BPF_SK_SKB_STREAM_PARSER, 0);
- if (err) {
+ links[2] = bpf_program__attach_sockmap(progs[0], map_fd[8]);
+ if (!links[2]) {
fprintf(stderr,
- "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
- prog_fd[0], map_fd[8], err, strerror(errno));
- return err;
+ "ERROR: bpf_program__attach_sockmap (TLS sockmap %i->%i): (%s)\n",
+ bpf_program__fd(progs[0]), map_fd[8], strerror(errno));
+ return -1;
}
}
- err = bpf_prog_attach(prog_fd[2], map_fd[8],
- BPF_SK_SKB_STREAM_VERDICT, 0);
- if (err) {
- fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
- err, strerror(errno));
- return err;
+ links[3] = bpf_program__attach_sockmap(progs[2], map_fd[8]);
+ if (!links[3]) {
+ fprintf(stderr, "ERROR: bpf_program__attach_sockmap (TLS sockmap): (%s)\n",
+ strerror(errno));
+ return -1;
}
}
/* Attach to cgroups */
- err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+ err = bpf_prog_attach(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS, 0);
if (err) {
fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
err, strerror(errno));
@@ -1017,30 +1016,31 @@ run:
/* Attach txmsg program to sockmap */
if (txmsg_pass)
- tx_prog_fd = prog_fd[4];
+ tx_prog = progs[4];
else if (txmsg_redir)
- tx_prog_fd = prog_fd[5];
+ tx_prog = progs[5];
else if (txmsg_apply)
- tx_prog_fd = prog_fd[6];
+ tx_prog = progs[6];
else if (txmsg_cork)
- tx_prog_fd = prog_fd[7];
+ tx_prog = progs[7];
else if (txmsg_drop)
- tx_prog_fd = prog_fd[8];
+ tx_prog = progs[8];
else
- tx_prog_fd = 0;
+ tx_prog = NULL;
- if (tx_prog_fd) {
- int redir_fd, i = 0;
+ if (tx_prog) {
+ int redir_fd;
- err = bpf_prog_attach(tx_prog_fd,
- map_fd[1], BPF_SK_MSG_VERDICT, 0);
- if (err) {
+ links[4] = bpf_program__attach_sockmap(tx_prog, map_fd[1]);
+ if (!links[4]) {
fprintf(stderr,
- "ERROR: bpf_prog_attach (txmsg): %d (%s)\n",
- err, strerror(errno));
+ "ERROR: bpf_program__attach_sockmap (txmsg): (%s)\n",
+ strerror(errno));
+ err = -1;
goto out;
}
+ i = 0;
err = bpf_map_update_elem(map_fd[1], &i, &c1, BPF_ANY);
if (err) {
fprintf(stderr,
@@ -1279,16 +1279,14 @@ run:
fprintf(stderr, "unknown test\n");
out:
/* Detatch and zero all the maps */
- bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
- bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
- bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
- bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
- bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
+ bpf_prog_detach2(bpf_program__fd(progs[3]), cg_fd, BPF_CGROUP_SOCK_OPS);
- if (tx_prog_fd >= 0)
- bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
+ for (i = 0; i < ARRAY_SIZE(links); i++) {
+ if (links[i])
+ bpf_link__detach(links[i]);
+ }
- for (i = 0; i < 8; i++) {
+ for (i = 0; i < ARRAY_SIZE(map_fd); i++) {
key = next_key = 0;
bpf_map_update_elem(map_fd[i], &key, &zero, BPF_ANY);
while (bpf_map_get_next_key(map_fd[i], &key, &next_key) == 0) {
@@ -1783,34 +1781,6 @@ char *map_names[] = {
"tls_sock_map",
};
-int prog_attach_type[] = {
- BPF_SK_SKB_STREAM_PARSER,
- BPF_SK_SKB_STREAM_VERDICT,
- BPF_SK_SKB_STREAM_VERDICT,
- BPF_CGROUP_SOCK_OPS,
- BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
- BPF_SK_MSG_VERDICT,
-};
-
-int prog_type[] = {
- BPF_PROG_TYPE_SK_SKB,
- BPF_PROG_TYPE_SK_SKB,
- BPF_PROG_TYPE_SK_SKB,
- BPF_PROG_TYPE_SOCK_OPS,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
- BPF_PROG_TYPE_SK_MSG,
-};
-
static int populate_progs(char *bpf_file)
{
struct bpf_program *prog;
@@ -1829,17 +1799,10 @@ static int populate_progs(char *bpf_file)
return -1;
}
- bpf_object__for_each_program(prog, obj) {
- bpf_program__set_type(prog, prog_type[i]);
- bpf_program__set_expected_attach_type(prog,
- prog_attach_type[i]);
- i++;
- }
-
i = bpf_object__load(obj);
i = 0;
bpf_object__for_each_program(prog, obj) {
- prog_fd[i] = bpf_program__fd(prog);
+ progs[i] = prog;
i++;
}
@@ -1853,6 +1816,9 @@ static int populate_progs(char *bpf_file)
}
}
+ for (i = 0; i < ARRAY_SIZE(links); i++)
+ links[i] = NULL;
+
return 0;
}
@@ -1887,10 +1853,13 @@ static int check_whitelist(struct _test *t, struct sockmap_options *opt)
while (entry) {
if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
strstr(opt->map, entry) != 0 ||
- strstr(t->title, entry) != 0)
+ strstr(t->title, entry) != 0) {
+ free(ptr);
return 0;
+ }
entry = strtok(NULL, ",");
}
+ free(ptr);
return -EINVAL;
}
@@ -1907,10 +1876,13 @@ static int check_blacklist(struct _test *t, struct sockmap_options *opt)
while (entry) {
if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
strstr(opt->map, entry) != 0 ||
- strstr(t->title, entry) != 0)
+ strstr(t->title, entry) != 0) {
+ free(ptr);
return 0;
+ }
entry = strtok(NULL, ",");
}
+ free(ptr);
return -EINVAL;
}
@@ -1964,7 +1936,6 @@ static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt)
static int test_selftest(int cg_fd, struct sockmap_options *opt)
{
-
test_selftests_sockmap(cg_fd, opt);
test_selftests_sockhash(cg_fd, opt);
test_selftests_ktls(cg_fd, opt);
@@ -2104,9 +2075,9 @@ out:
free(options.whitelist);
if (options.blacklist)
free(options.blacklist);
+ close(cg_fd);
if (cg_created)
cleanup_cgroup_environment();
- close(cg_fd);
return err;
}
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 910044f08908..7989ec608454 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -72,7 +72,6 @@ cleanup() {
server_listen() {
ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" &
server_pid=$!
- sleep 0.2
}
client_connect() {
@@ -93,6 +92,16 @@ verify_data() {
fi
}
+wait_for_port() {
+ for i in $(seq 20); do
+ if ip netns exec "${ns2}" ss ${2:--4}OHntl | grep -q "$1"; then
+ return 0
+ fi
+ sleep 0.1
+ done
+ return 1
+}
+
set -e
# no arguments: automated test, run all
@@ -193,6 +202,7 @@ setup
# basic communication works
echo "test basic connectivity"
server_listen
+wait_for_port ${port} ${netcat_opt}
client_connect
verify_data
@@ -204,6 +214,7 @@ ip netns exec "${ns1}" tc filter add dev veth1 egress \
section "encap_${tuntype}_${mac}"
echo "test bpf encap without decap (expect failure)"
server_listen
+wait_for_port ${port} ${netcat_opt}
! client_connect
if [[ "$tuntype" =~ "udp" ]]; then
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
index 32df93747095..3844f9b8232a 100644
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -16,68 +16,7 @@
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
-
-static int start_server(const struct sockaddr *addr, socklen_t len, bool dual)
-{
- int mode = !dual;
- int fd;
-
- fd = socket(addr->sa_family, SOCK_STREAM, 0);
- if (fd == -1) {
- log_err("Failed to create server socket");
- goto out;
- }
-
- if (addr->sa_family == AF_INET6) {
- if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *)&mode,
- sizeof(mode)) == -1) {
- log_err("Failed to set the dual-stack mode");
- goto close_out;
- }
- }
-
- if (bind(fd, addr, len) == -1) {
- log_err("Failed to bind server socket");
- goto close_out;
- }
-
- if (listen(fd, 128) == -1) {
- log_err("Failed to listen on server socket");
- goto close_out;
- }
-
- goto out;
-
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int connect_to_server(const struct sockaddr *addr, socklen_t len)
-{
- int fd = -1;
-
- fd = socket(addr->sa_family, SOCK_STREAM, 0);
- if (fd == -1) {
- log_err("Failed to create client socket");
- goto out;
- }
-
- if (connect(fd, (const struct sockaddr *)addr, len) == -1) {
- log_err("Fail to connect to server");
- goto close_out;
- }
-
- goto out;
-
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
+#include "network_helpers.h"
static int get_map_fd_by_prog_id(int prog_id, bool *xdp)
{
@@ -117,8 +56,7 @@ err:
return map_fd;
}
-static int run_test(int server_fd, int results_fd, bool xdp,
- const struct sockaddr *addr, socklen_t len)
+static int run_test(int server_fd, int results_fd, bool xdp)
{
int client = -1, srv_client = -1;
int ret = 0;
@@ -144,7 +82,7 @@ static int run_test(int server_fd, int results_fd, bool xdp,
goto err;
}
- client = connect_to_server(addr, len);
+ client = connect_to_fd(server_fd, 0);
if (client == -1)
goto err;
@@ -201,27 +139,23 @@ out:
return ret;
}
-static bool get_port(int server_fd, in_port_t *port)
+static int v6only_true(int fd, void *opts)
{
- struct sockaddr_in addr;
- socklen_t len = sizeof(addr);
+ int mode = true;
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
- log_err("Failed to get server addr");
- return false;
- }
+ return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode));
+}
+
+static int v6only_false(int fd, void *opts)
+{
+ int mode = false;
- /* sin_port and sin6_port are located at the same offset. */
- *port = addr.sin_port;
- return true;
+ return setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, &mode, sizeof(mode));
}
int main(int argc, char **argv)
{
- struct sockaddr_in addr4;
- struct sockaddr_in6 addr6;
- struct sockaddr_in addr4dual;
- struct sockaddr_in6 addr6dual;
+ struct network_helper_opts opts = { 0 };
int server = -1;
int server_v6 = -1;
int server_dual = -1;
@@ -243,47 +177,27 @@ int main(int argc, char **argv)
goto err;
}
- memset(&addr4, 0, sizeof(addr4));
- addr4.sin_family = AF_INET;
- addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addr4.sin_port = 0;
- memcpy(&addr4dual, &addr4, sizeof(addr4dual));
-
- memset(&addr6, 0, sizeof(addr6));
- addr6.sin6_family = AF_INET6;
- addr6.sin6_addr = in6addr_loopback;
- addr6.sin6_port = 0;
-
- memset(&addr6dual, 0, sizeof(addr6dual));
- addr6dual.sin6_family = AF_INET6;
- addr6dual.sin6_addr = in6addr_any;
- addr6dual.sin6_port = 0;
-
- server = start_server((const struct sockaddr *)&addr4, sizeof(addr4),
- false);
- if (server == -1 || !get_port(server, &addr4.sin_port))
+ server = start_server_str(AF_INET, SOCK_STREAM, "127.0.0.1", 0, NULL);
+ if (server == -1)
goto err;
- server_v6 = start_server((const struct sockaddr *)&addr6,
- sizeof(addr6), false);
- if (server_v6 == -1 || !get_port(server_v6, &addr6.sin6_port))
+ opts.post_socket_cb = v6only_true;
+ server_v6 = start_server_str(AF_INET6, SOCK_STREAM, "::1", 0, &opts);
+ if (server_v6 == -1)
goto err;
- server_dual = start_server((const struct sockaddr *)&addr6dual,
- sizeof(addr6dual), true);
- if (server_dual == -1 || !get_port(server_dual, &addr4dual.sin_port))
+ opts.post_socket_cb = v6only_false;
+ server_dual = start_server_str(AF_INET6, SOCK_STREAM, "::0", 0, &opts);
+ if (server_dual == -1)
goto err;
- if (run_test(server, results, xdp,
- (const struct sockaddr *)&addr4, sizeof(addr4)))
+ if (run_test(server, results, xdp))
goto err;
- if (run_test(server_v6, results, xdp,
- (const struct sockaddr *)&addr6, sizeof(addr6)))
+ if (run_test(server_v6, results, xdp))
goto err;
- if (run_test(server_dual, results, xdp,
- (const struct sockaddr *)&addr4dual, sizeof(addr4dual)))
+ if (run_test(server_dual, results, xdp))
goto err;
printf("ok\n");
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index df04bda1c927..610392dfc4fb 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1237,11 +1237,6 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
fixup_prog_kfuncs(prog, fd_array, test->fixup_kfunc_btf_id);
}
-struct libcap {
- struct __user_cap_header_struct hdr;
- struct __user_cap_data_struct data[2];
-};
-
static int set_admin(bool admin)
{
int err;
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 28b6646662af..d5379a0e6da8 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -368,9 +368,23 @@ int delete_module(const char *name, int flags)
int unload_bpf_testmod(bool verbose)
{
+ int ret, cnt = 0;
+
if (kern_sync_rcu())
fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n");
- if (delete_module("bpf_testmod", 0)) {
+
+ for (;;) {
+ ret = delete_module("bpf_testmod", 0);
+ if (!ret || errno != EAGAIN)
+ break;
+ if (++cnt > 10000) {
+ fprintf(stdout, "Unload of bpf_testmod timed out\n");
+ break;
+ }
+ usleep(100);
+ }
+
+ if (ret) {
if (errno == ENOENT) {
if (verbose)
fprintf(stdout, "bpf_testmod.ko is already unloaded.\n");
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 27fd7ed3e4b0..465d196c7165 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -61,12 +61,7 @@ void free_kallsyms_local(struct ksyms *ksyms)
free(ksyms);
}
-static int ksym_cmp(const void *p1, const void *p2)
-{
- return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
-}
-
-struct ksyms *load_kallsyms_local(void)
+static struct ksyms *load_kallsyms_local_common(ksym_cmp_t cmp_cb)
{
FILE *f;
char func[256], buf[256];
@@ -100,7 +95,7 @@ struct ksyms *load_kallsyms_local(void)
goto error;
}
fclose(f);
- qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), ksym_cmp);
+ qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), cmp_cb);
return ksyms;
error:
@@ -109,6 +104,21 @@ error:
return NULL;
}
+static int ksym_cmp(const void *p1, const void *p2)
+{
+ return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
+}
+
+struct ksyms *load_kallsyms_local(void)
+{
+ return load_kallsyms_local_common(ksym_cmp);
+}
+
+struct ksyms *load_kallsyms_custom_local(ksym_cmp_t cmp_cb)
+{
+ return load_kallsyms_local_common(cmp_cb);
+}
+
int load_kallsyms(void)
{
pthread_mutex_lock(&ksyms_mutex);
@@ -148,6 +158,28 @@ struct ksym *ksym_search_local(struct ksyms *ksyms, long key)
return &ksyms->syms[0];
}
+struct ksym *search_kallsyms_custom_local(struct ksyms *ksyms, const void *p,
+ ksym_search_cmp_t cmp_cb)
+{
+ int start = 0, mid, end = ksyms->sym_cnt;
+ struct ksym *ks;
+ int result;
+
+ while (start < end) {
+ mid = start + (end - start) / 2;
+ ks = &ksyms->syms[mid];
+ result = cmp_cb(p, ks);
+ if (result < 0)
+ end = mid;
+ else if (result > 0)
+ start = mid + 1;
+ else
+ return ks;
+ }
+
+ return NULL;
+}
+
struct ksym *ksym_search(long key)
{
if (!ksyms)
@@ -179,7 +211,7 @@ long ksym_get_addr(const char *name)
*/
int kallsyms_find(const char *sym, unsigned long long *addr)
{
- char type, name[500];
+ char type, name[500], *match;
unsigned long long value;
int err = 0;
FILE *f;
@@ -189,6 +221,17 @@ int kallsyms_find(const char *sym, unsigned long long *addr)
return -EINVAL;
while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) {
+ /* If CONFIG_LTO_CLANG_THIN is enabled, static variable/function
+ * symbols could be promoted to global due to cross-file inlining.
+ * For such cases, clang compiler will add .llvm.<hash> suffix
+ * to those symbols to avoid potential naming conflict.
+ * Let us ignore .llvm.<hash> suffix during symbol comparison.
+ */
+ if (type == 'd') {
+ match = strstr(name, ".llvm.");
+ if (match)
+ *match = '\0';
+ }
if (strcmp(name, sym) == 0) {
*addr = value;
goto out;
@@ -201,29 +244,6 @@ out:
return err;
}
-void read_trace_pipe(void)
-{
- int trace_fd;
-
- if (access(TRACEFS_PIPE, F_OK) == 0)
- trace_fd = open(TRACEFS_PIPE, O_RDONLY, 0);
- else
- trace_fd = open(DEBUGFS_PIPE, O_RDONLY, 0);
- if (trace_fd < 0)
- return;
-
- while (1) {
- static char buf[4096];
- ssize_t sz;
-
- sz = read(trace_fd, buf, sizeof(buf) - 1);
- if (sz > 0) {
- buf[sz] = 0;
- puts(buf);
- }
- }
-}
-
ssize_t get_uprobe_offset(const void *addr)
{
size_t start, end, base;
@@ -381,3 +401,43 @@ out:
close(fd);
return err;
}
+
+int read_trace_pipe_iter(void (*cb)(const char *str, void *data), void *data, int iter)
+{
+ size_t buflen, n;
+ char *buf = NULL;
+ FILE *fp = NULL;
+
+ if (access(TRACEFS_PIPE, F_OK) == 0)
+ fp = fopen(TRACEFS_PIPE, "r");
+ else
+ fp = fopen(DEBUGFS_PIPE, "r");
+ if (!fp)
+ return -1;
+
+ /* We do not want to wait forever when iter is specified. */
+ if (iter)
+ fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
+
+ while ((n = getline(&buf, &buflen, fp) >= 0) || errno == EAGAIN) {
+ if (n > 0)
+ cb(buf, data);
+ if (iter && !(--iter))
+ break;
+ }
+
+ free(buf);
+ if (fp)
+ fclose(fp);
+ return 0;
+}
+
+static void trace_pipe_cb(const char *str, void *data)
+{
+ printf("%s", str);
+}
+
+void read_trace_pipe(void)
+{
+ read_trace_pipe_iter(trace_pipe_cb, NULL, 0);
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 04fd1da7079d..2ce873c9f9aa 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -13,6 +13,9 @@ struct ksym {
};
struct ksyms;
+typedef int (*ksym_cmp_t)(const void *p1, const void *p2);
+typedef int (*ksym_search_cmp_t)(const void *p1, const struct ksym *p2);
+
int load_kallsyms(void);
struct ksym *ksym_search(long key);
long ksym_get_addr(const char *name);
@@ -22,10 +25,16 @@ struct ksym *ksym_search_local(struct ksyms *ksyms, long key);
long ksym_get_addr_local(struct ksyms *ksyms, const char *name);
void free_kallsyms_local(struct ksyms *ksyms);
+struct ksyms *load_kallsyms_custom_local(ksym_cmp_t cmp_cb);
+struct ksym *search_kallsyms_custom_local(struct ksyms *ksyms, const void *p1,
+ ksym_search_cmp_t cmp_cb);
+
/* open kallsyms and find addresses on the fly, faster than load + search. */
int kallsyms_find(const char *sym, unsigned long long *addr);
void read_trace_pipe(void);
+int read_trace_pipe_iter(void (*cb)(const char *str, void *data),
+ void *data, int iter);
ssize_t get_uprobe_offset(const void *addr);
ssize_t get_rel_offset(uintptr_t addr);
diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c
index a61ceab60b68..7ffa563ffeba 100644
--- a/tools/testing/selftests/bpf/uprobe_multi.c
+++ b/tools/testing/selftests/bpf/uprobe_multi.c
@@ -9,7 +9,7 @@
#define NAME(name, idx) PASTE(name, idx)
-#define DEF(name, idx) int NAME(name, idx)(void) { return 0; }
+#define DEF(name, idx) int __attribute__((weak)) NAME(name, idx)(void) { return 0; }
#define CALL(name, idx) NAME(name, idx)();
#define F(body, name, idx) body(name, idx)
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index ab25a81fd3a1..d0cdd156cd55 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -76,7 +76,7 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "R1 must have zero offset when passed to release func or trusted arg to kfunc",
+ .errstr = "arg#0 expected pointer to ctx, but got PTR",
.fixup_kfunc_btf_id = {
{ "bpf_kfunc_call_test_pass_ctx", 2 },
},
@@ -276,6 +276,19 @@
.result = ACCEPT,
},
{
+ "calls: invalid kfunc call: must provide (attach_prog_fd, btf_id) pair when freplace",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_EXT,
+ .result = REJECT,
+ .errstr = "Tracing programs must provide btf_id",
+ .fixup_kfunc_btf_id = {
+ { "bpf_dynptr_from_skb", 0 },
+ },
+},
+{
"calls: basic sanity",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 0a9293a57211..90643ccc221d 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -39,12 +39,12 @@
.result = VERBOSE_ACCEPT,
.errstr =
"mark_precise: frame0: last_idx 26 first_idx 20\
- mark_precise: frame0: regs=r2 stack= before 25\
- mark_precise: frame0: regs=r2 stack= before 24\
- mark_precise: frame0: regs=r2 stack= before 23\
- mark_precise: frame0: regs=r2 stack= before 22\
- mark_precise: frame0: regs=r2 stack= before 20\
- mark_precise: frame0: parent state regs=r2 stack=:\
+ mark_precise: frame0: regs=r2,r9 stack= before 25\
+ mark_precise: frame0: regs=r2,r9 stack= before 24\
+ mark_precise: frame0: regs=r2,r9 stack= before 23\
+ mark_precise: frame0: regs=r2,r9 stack= before 22\
+ mark_precise: frame0: regs=r2,r9 stack= before 20\
+ mark_precise: frame0: parent state regs=r2,r9 stack=:\
mark_precise: frame0: last_idx 19 first_idx 10\
mark_precise: frame0: regs=r2,r9 stack= before 19\
mark_precise: frame0: regs=r9 stack= before 18\
@@ -100,11 +100,11 @@
.errstr =
"26: (85) call bpf_probe_read_kernel#113\
mark_precise: frame0: last_idx 26 first_idx 22\
- mark_precise: frame0: regs=r2 stack= before 25\
- mark_precise: frame0: regs=r2 stack= before 24\
- mark_precise: frame0: regs=r2 stack= before 23\
- mark_precise: frame0: regs=r2 stack= before 22\
- mark_precise: frame0: parent state regs=r2 stack=:\
+ mark_precise: frame0: regs=r2,r9 stack= before 25\
+ mark_precise: frame0: regs=r2,r9 stack= before 24\
+ mark_precise: frame0: regs=r2,r9 stack= before 23\
+ mark_precise: frame0: regs=r2,r9 stack= before 22\
+ mark_precise: frame0: parent state regs=r2,r9 stack=:\
mark_precise: frame0: last_idx 20 first_idx 20\
mark_precise: frame0: regs=r2,r9 stack= before 20\
mark_precise: frame0: parent state regs=r2,r9 stack=:\
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 244d4996e06e..b2854238d4a0 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -792,10 +792,13 @@ static int parse_stats(const char *stats_str, struct stat_specs *specs)
while ((next = strtok_r(state ? NULL : input, ",", &state))) {
err = parse_stat(next, specs);
- if (err)
+ if (err) {
+ free(input);
return err;
+ }
}
+ free(input);
return 0;
}
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index bdf5d8180067..6f9956eed797 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -495,20 +495,6 @@ peek:
return 0;
}
-struct ethtool_channels {
- __u32 cmd;
- __u32 max_rx;
- __u32 max_tx;
- __u32 max_other;
- __u32 max_combined;
- __u32 rx_count;
- __u32 tx_count;
- __u32 other_count;
- __u32 combined_count;
-};
-
-#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */
-
static int rxq_num(const char *ifname)
{
struct ethtool_channels ch = {
@@ -595,6 +581,8 @@ static void cleanup(void)
if (bpf_obj)
xdp_hw_metadata__destroy(bpf_obj);
+
+ free((void *)saved_hwtstamp_ifname);
}
static void handle_signal(int sig)
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index b1102ee13faa..8144fd145237 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -81,6 +81,7 @@
#include <linux/mman.h>
#include <linux/netdev.h>
#include <linux/bitmap.h>
+#include <linux/ethtool.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <locale.h>
@@ -105,11 +106,15 @@
#include "../kselftest.h"
#include "xsk_xdp_common.h"
+#include <network_helpers.h>
+
static bool opt_verbose;
static bool opt_print_tests;
static enum test_mode opt_mode = TEST_MODE_ALL;
static u32 opt_run_test = RUN_ALL_TESTS;
+void test__fail(void) { /* for network_helpers.c */ }
+
static void __exit_with_error(int error, const char *file, const char *func, int line)
{
ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
@@ -191,6 +196,12 @@ static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem
};
int ret;
+ if (umem->fill_size)
+ cfg.fill_size = umem->fill_size;
+
+ if (umem->comp_size)
+ cfg.comp_size = umem->comp_size;
+
if (umem->unaligned_mode)
cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
@@ -239,7 +250,7 @@ static void enable_busy_poll(struct xsk_socket_info *xsk)
(void *)&sock_opt, sizeof(sock_opt)) < 0)
exit_with_error(errno);
- sock_opt = BATCH_SIZE;
+ sock_opt = xsk->batch_size;
if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
(void *)&sock_opt, sizeof(sock_opt)) < 0)
exit_with_error(errno);
@@ -260,6 +271,10 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i
cfg.bind_flags |= XDP_SHARED_UMEM;
if (ifobject->mtu > MAX_ETH_PKT_SIZE)
cfg.bind_flags |= XDP_USE_SG;
+ if (umem->comp_size)
+ cfg.tx_size = umem->comp_size;
+ if (umem->fill_size)
+ cfg.rx_size = umem->fill_size;
txr = ifobject->tx_on ? &xsk->tx : NULL;
rxr = ifobject->rx_on ? &xsk->rx : NULL;
@@ -409,6 +424,33 @@ static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj
}
}
+static int set_ring_size(struct ifobject *ifobj)
+{
+ int ret;
+ u32 ctr = 0;
+
+ while (ctr++ < SOCK_RECONF_CTR) {
+ ret = set_hw_ring_size(ifobj->ifname, &ifobj->ring);
+ if (!ret)
+ break;
+
+ /* Retry if it fails */
+ if (ctr >= SOCK_RECONF_CTR || errno != EBUSY)
+ return -errno;
+
+ usleep(USLEEP_MAX);
+ }
+
+ return ret;
+}
+
+static int hw_ring_size_reset(struct ifobject *ifobj)
+{
+ ifobj->ring.tx_pending = ifobj->set_ring.default_tx;
+ ifobj->ring.rx_pending = ifobj->set_ring.default_rx;
+ return set_ring_size(ifobj);
+}
+
static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
struct ifobject *ifobj_rx)
{
@@ -439,6 +481,7 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
for (j = 0; j < MAX_SOCKETS; j++) {
memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ ifobj->xsk_arr[j].batch_size = DEFAULT_BATCH_SIZE;
if (i == 0)
ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
else
@@ -451,12 +494,16 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
}
}
+ if (ifobj_tx->hw_ring_size_supp)
+ hw_ring_size_reset(ifobj_tx);
+
test->ifobj_tx = ifobj_tx;
test->ifobj_rx = ifobj_rx;
test->current_step = 0;
test->total_steps = 1;
test->nb_sockets = 1;
test->fail = false;
+ test->set_ring = false;
test->mtu = MAX_ETH_PKT_SIZE;
test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
@@ -1087,7 +1134,7 @@ static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
return TEST_CONTINUE;
}
- rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ rcvd = xsk_ring_cons__peek(&xsk->rx, xsk->batch_size, &idx_rx);
if (!rcvd)
return TEST_CONTINUE;
@@ -1239,7 +1286,8 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b
buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
/* pkts_in_flight might be negative if many invalid packets are sent */
- if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) {
+ if (pkts_in_flight >= (int)((umem_size(umem) - xsk->batch_size * buffer_len) /
+ buffer_len)) {
ret = kick_tx(xsk);
if (ret)
return TEST_FAILURE;
@@ -1249,7 +1297,7 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b
fds.fd = xsk_socket__fd(xsk->xsk);
fds.events = POLLOUT;
- while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) {
+ while (xsk_ring_prod__reserve(&xsk->tx, xsk->batch_size, &idx) < xsk->batch_size) {
if (use_poll) {
ret = poll(&fds, 1, POLL_TMOUT);
if (timeout) {
@@ -1269,10 +1317,10 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b
}
}
- complete_pkts(xsk, BATCH_SIZE);
+ complete_pkts(xsk, xsk->batch_size);
}
- for (i = 0; i < BATCH_SIZE; i++) {
+ for (i = 0; i < xsk->batch_size; i++) {
struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
u32 nb_frags_left, nb_frags, bytes_written = 0;
@@ -1280,9 +1328,9 @@ static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, b
break;
nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
- if (nb_frags > BATCH_SIZE - i) {
+ if (nb_frags > xsk->batch_size - i) {
pkt_stream_cancel(pkt_stream);
- xsk_ring_prod__cancel(&xsk->tx, BATCH_SIZE - i);
+ xsk_ring_prod__cancel(&xsk->tx, xsk->batch_size - i);
break;
}
nb_frags_left = nb_frags;
@@ -1370,7 +1418,7 @@ static int wait_for_tx_completion(struct xsk_socket_info *xsk)
return TEST_FAILURE;
}
- complete_pkts(xsk, BATCH_SIZE);
+ complete_pkts(xsk, xsk->batch_size);
}
return TEST_PASS;
@@ -1578,7 +1626,7 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream
if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
buffers_to_fill = umem->num_frames;
else
- buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ buffers_to_fill = umem->fill_size;
ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
if (ret != buffers_to_fill)
@@ -1860,6 +1908,18 @@ static int testapp_validate_traffic(struct test_spec *test)
return TEST_SKIP;
}
+ if (test->set_ring) {
+ if (ifobj_tx->hw_ring_size_supp) {
+ if (set_ring_size(ifobj_tx)) {
+ ksft_test_result_skip("Failed to change HW ring size.\n");
+ return TEST_FAILURE;
+ }
+ } else {
+ ksft_test_result_skip("Changing HW ring size not supported.\n");
+ return TEST_SKIP;
+ }
+ }
+
xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx);
return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
}
@@ -2373,6 +2433,54 @@ static int testapp_xdp_metadata_mb(struct test_spec *test)
return testapp_xdp_metadata_copy(test);
}
+static int testapp_hw_sw_min_ring_size(struct test_spec *test)
+{
+ int ret;
+
+ test->set_ring = true;
+ test->total_steps = 2;
+ test->ifobj_tx->ring.tx_pending = DEFAULT_BATCH_SIZE;
+ test->ifobj_tx->ring.rx_pending = DEFAULT_BATCH_SIZE * 2;
+ test->ifobj_tx->xsk->batch_size = 1;
+ test->ifobj_rx->xsk->batch_size = 1;
+ ret = testapp_validate_traffic(test);
+ if (ret)
+ return ret;
+
+ /* Set batch size to hw_ring_size - 1 */
+ test->ifobj_tx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+ test->ifobj_rx->xsk->batch_size = DEFAULT_BATCH_SIZE - 1;
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_hw_sw_max_ring_size(struct test_spec *test)
+{
+ u32 max_descs = XSK_RING_PROD__DEFAULT_NUM_DESCS * 4;
+ int ret;
+
+ test->set_ring = true;
+ test->total_steps = 2;
+ test->ifobj_tx->ring.tx_pending = test->ifobj_tx->ring.tx_max_pending;
+ test->ifobj_tx->ring.rx_pending = test->ifobj_tx->ring.rx_max_pending;
+ test->ifobj_rx->umem->num_frames = max_descs;
+ test->ifobj_rx->umem->fill_size = max_descs;
+ test->ifobj_rx->umem->comp_size = max_descs;
+ test->ifobj_tx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ test->ifobj_rx->xsk->batch_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+
+ ret = testapp_validate_traffic(test);
+ if (ret)
+ return ret;
+
+ /* Set batch_size to 8152 for testing, as the ice HW ignores the 3 lowest bits when
+ * updating the Rx HW tail register.
+ */
+ test->ifobj_tx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+ test->ifobj_rx->xsk->batch_size = test->ifobj_tx->ring.tx_max_pending - 8;
+ pkt_stream_replace(test, max_descs, MIN_PKT_SIZE);
+ return testapp_validate_traffic(test);
+}
+
static void run_pkt_test(struct test_spec *test)
{
int ret;
@@ -2477,7 +2585,9 @@ static const struct test_spec tests[] = {
{.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
{.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
{.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
-};
+ {.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
+ {.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
+ };
static void print_tests(void)
{
@@ -2497,6 +2607,7 @@ int main(int argc, char **argv)
int modes = TEST_MODE_SKB + 1;
struct test_spec test;
bool shared_netdev;
+ int ret;
/* Use libbpf 1.0 API mode */
libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
@@ -2534,6 +2645,13 @@ int main(int argc, char **argv)
modes++;
}
+ ret = get_hw_ring_size(ifobj_tx->ifname, &ifobj_tx->ring);
+ if (!ret) {
+ ifobj_tx->hw_ring_size_supp = true;
+ ifobj_tx->set_ring.default_tx = ifobj_tx->ring.tx_pending;
+ ifobj_tx->set_ring.default_rx = ifobj_tx->ring.rx_pending;
+ }
+
init_iface(ifobj_rx, worker_testapp_validate_rx);
init_iface(ifobj_tx, worker_testapp_validate_tx);
@@ -2581,6 +2699,9 @@ int main(int argc, char **argv)
}
}
+ if (ifobj_tx->hw_ring_size_supp)
+ hw_ring_size_reset(ifobj_tx);
+
pkt_stream_delete(tx_pkt_stream_default);
pkt_stream_delete(rx_pkt_stream_default);
xsk_unload_xdp_programs(ifobj_tx);
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index f174df2d693f..885c948c5d83 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -44,7 +44,7 @@
#define MAX_ETH_JUMBO_SIZE 9000
#define USLEEP_MAX 10000
#define SOCK_RECONF_CTR 10
-#define BATCH_SIZE 64
+#define DEFAULT_BATCH_SIZE 64
#define POLL_TMOUT 1000
#define THREAD_TMOUT 3
#define DEFAULT_PKT_CNT (4 * 1024)
@@ -80,6 +80,8 @@ struct xsk_umem_info {
void *buffer;
u32 frame_size;
u32 base_addr;
+ u32 fill_size;
+ u32 comp_size;
bool unaligned_mode;
};
@@ -91,6 +93,7 @@ struct xsk_socket_info {
struct pkt_stream *pkt_stream;
u32 outstanding_tx;
u32 rxqsize;
+ u32 batch_size;
u8 dst_mac[ETH_ALEN];
u8 src_mac[ETH_ALEN];
};
@@ -113,6 +116,11 @@ struct pkt_stream {
bool verbatim;
};
+struct set_hw_ring {
+ u32 default_tx;
+ u32 default_rx;
+};
+
struct ifobject;
struct test_spec;
typedef int (*validation_func_t)(struct ifobject *ifobj);
@@ -129,6 +137,8 @@ struct ifobject {
struct xsk_xdp_progs *xdp_progs;
struct bpf_map *xskmap;
struct bpf_program *xdp_prog;
+ struct ethtool_ringparam ring;
+ struct set_hw_ring set_ring;
enum test_mode mode;
int ifindex;
int mtu;
@@ -145,6 +155,7 @@ struct ifobject {
bool unaligned_supp;
bool multi_buff_supp;
bool multi_buff_zc_supp;
+ bool hw_ring_size_supp;
};
struct test_spec {
@@ -162,6 +173,7 @@ struct test_spec {
u16 current_step;
u16 nb_sockets;
bool fail;
+ bool set_ring;
enum test_mode mode;
char name[MAX_TEST_NAME_SIZE];
};
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index b8703c499d28..dfec31fb9b30 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -130,7 +130,6 @@ int run_test(int cpu)
void suspend(void)
{
int power_state_fd;
- struct sigevent event = {};
int timerfd;
int err;
struct itimerspec spec = {};
diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
index b171fd53b004..632ab44737ec 100644
--- a/tools/testing/selftests/cachestat/test_cachestat.c
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
#include <stdio.h>
#include <stdbool.h>
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
index 7cde07a5df78..47bad7ddc5bc 100644
--- a/tools/testing/selftests/capabilities/test_execve.c
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -82,7 +82,7 @@ static bool create_and_enter_ns(uid_t inner_uid)
{
uid_t outer_uid;
gid_t outer_gid;
- int i;
+ int i, ret;
bool have_outer_privilege;
outer_uid = getuid();
@@ -97,7 +97,10 @@ static bool create_and_enter_ns(uid_t inner_uid)
ksft_exit_fail_msg("setresuid - %s\n", strerror(errno));
// Re-enable effective caps
- capng_get_caps_process();
+ ret = capng_get_caps_process();
+ if (ret == -1)
+ ksft_exit_fail_msg("capng_get_caps_process failed\n");
+
for (i = 0; i < CAP_LAST_CAP; i++)
if (capng_have_capability(CAPNG_PERMITTED, i))
capng_update(CAPNG_ADD, CAPNG_EFFECTIVE, i);
@@ -207,6 +210,7 @@ static void exec_validate_cap(bool eff, bool perm, bool inh, bool ambient)
static int do_tests(int uid, const char *our_path)
{
+ int ret;
bool have_outer_privilege = create_and_enter_ns(uid);
int ourpath_fd = open(our_path, O_RDONLY | O_DIRECTORY);
@@ -250,7 +254,9 @@ static int do_tests(int uid, const char *our_path)
ksft_exit_fail_msg("chmod - %s\n", strerror(errno));
}
- capng_get_caps_process();
+ ret = capng_get_caps_process();
+ if (ret == -1)
+ ksft_exit_fail_msg("capng_get_caps_process failed\n");
/* Make sure that i starts out clear */
capng_update(CAPNG_DROP, CAPNG_INHERITABLE, CAP_NET_BIND_SERVICE);
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
index 60b4e7b716a7..65f2a1c89239 100644
--- a/tools/testing/selftests/capabilities/validate_cap.c
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -28,6 +28,7 @@ static bool bool_arg(char **argv, int i)
int main(int argc, char **argv)
{
const char *atsec = "";
+ int ret;
/*
* Be careful just in case a setgid or setcapped copy of this
@@ -44,7 +45,11 @@ int main(int argc, char **argv)
atsec = " (AT_SECURE is not set)";
#endif
- capng_get_caps_process();
+ ret = capng_get_caps_process();
+ if (ret == -1) {
+ ksft_print_msg("capng_get_caps_process failed\n");
+ return 1;
+ }
if (capng_have_capability(CAPNG_EFFECTIVE, CAP_NET_BIND_SERVICE) != bool_arg(argv, 1)) {
ksft_print_msg("Wrong effective state%s\n", atsec);
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index 2732e0b29271..952e4448bf07 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -1,11 +1,12 @@
# SPDX-License-Identifier: GPL-2.0-only
-test_memcontrol
test_core
-test_freezer
-test_kmem
-test_kill
test_cpu
test_cpuset
-test_zswap
+test_freezer
test_hugetlb_memcg
+test_kill
+test_kmem
+test_memcontrol
+test_pids
+test_zswap
wait_inotify
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 00b441928909..1b897152bab6 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -4,28 +4,31 @@ CFLAGS += -Wall -pthread
all: ${HELPER_PROGS}
TEST_FILES := with_stress.sh
-TEST_PROGS := test_stress.sh test_cpuset_prs.sh
+TEST_PROGS := test_stress.sh test_cpuset_prs.sh test_cpuset_v1_hp.sh
TEST_GEN_FILES := wait_inotify
-TEST_GEN_PROGS = test_memcontrol
-TEST_GEN_PROGS += test_kmem
-TEST_GEN_PROGS += test_core
-TEST_GEN_PROGS += test_freezer
-TEST_GEN_PROGS += test_kill
+# Keep the lists lexicographically sorted
+TEST_GEN_PROGS = test_core
TEST_GEN_PROGS += test_cpu
TEST_GEN_PROGS += test_cpuset
-TEST_GEN_PROGS += test_zswap
+TEST_GEN_PROGS += test_freezer
TEST_GEN_PROGS += test_hugetlb_memcg
+TEST_GEN_PROGS += test_kill
+TEST_GEN_PROGS += test_kmem
+TEST_GEN_PROGS += test_memcontrol
+TEST_GEN_PROGS += test_pids
+TEST_GEN_PROGS += test_zswap
LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
include ../lib.mk
-$(OUTPUT)/test_memcontrol: cgroup_util.c
-$(OUTPUT)/test_kmem: cgroup_util.c
$(OUTPUT)/test_core: cgroup_util.c
-$(OUTPUT)/test_freezer: cgroup_util.c
-$(OUTPUT)/test_kill: cgroup_util.c
$(OUTPUT)/test_cpu: cgroup_util.c
$(OUTPUT)/test_cpuset: cgroup_util.c
-$(OUTPUT)/test_zswap: cgroup_util.c
+$(OUTPUT)/test_freezer: cgroup_util.c
$(OUTPUT)/test_hugetlb_memcg: cgroup_util.c
+$(OUTPUT)/test_kill: cgroup_util.c
+$(OUTPUT)/test_kmem: cgroup_util.c
+$(OUTPUT)/test_memcontrol: cgroup_util.c
+$(OUTPUT)/test_pids: cgroup_util.c
+$(OUTPUT)/test_zswap: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 0340d4ca8f51..432db923bced 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -195,10 +195,10 @@ int cg_write_numeric(const char *cgroup, const char *control, long value)
return cg_write(cgroup, control, buf);
}
-int cg_find_unified_root(char *root, size_t len)
+int cg_find_unified_root(char *root, size_t len, bool *nsdelegate)
{
char buf[10 * PAGE_SIZE];
- char *fs, *mount, *type;
+ char *fs, *mount, *type, *options;
const char delim[] = "\n\t ";
if (read_text("/proc/self/mounts", buf, sizeof(buf)) <= 0)
@@ -211,12 +211,14 @@ int cg_find_unified_root(char *root, size_t len)
for (fs = strtok(buf, delim); fs; fs = strtok(NULL, delim)) {
mount = strtok(NULL, delim);
type = strtok(NULL, delim);
- strtok(NULL, delim);
+ options = strtok(NULL, delim);
strtok(NULL, delim);
strtok(NULL, delim);
if (strcmp(type, "cgroup2") == 0) {
strncpy(root, mount, len);
+ if (nsdelegate)
+ *nsdelegate = !!strstr(options, "nsdelegate");
return 0;
}
}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 1df7f202214a..e8d04ac9e3d2 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -18,10 +18,10 @@
*/
static inline int values_close(long a, long b, int err)
{
- return abs(a - b) <= (a + b) / 100 * err;
+ return labs(a - b) <= (a + b) / 100 * err;
}
-extern int cg_find_unified_root(char *root, size_t len);
+extern int cg_find_unified_root(char *root, size_t len, bool *nsdelegate);
extern char *cg_name(const char *root, const char *name);
extern char *cg_name_indexed(const char *root, const char *name, int index);
extern char *cg_control(const char *cgroup, const char *control);
diff --git a/tools/testing/selftests/cgroup/config b/tools/testing/selftests/cgroup/config
index 97d549ee894f..39f979690dd3 100644
--- a/tools/testing/selftests/cgroup/config
+++ b/tools/testing/selftests/cgroup/config
@@ -3,5 +3,4 @@ CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_SCHED=y
CONFIG_MEMCG=y
-CONFIG_MEMCG_KMEM=y
CONFIG_PAGE_COUNTER=y
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index 80aa6b2373b9..a5672a91d273 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -18,6 +18,8 @@
#include "../kselftest.h"
#include "cgroup_util.h"
+static bool nsdelegate;
+
static int touch_anon(char *buf, size_t size)
{
int fd;
@@ -775,6 +777,9 @@ static int test_cgcore_lesser_ns_open(const char *root)
pid_t pid;
int status;
+ if (!nsdelegate)
+ return KSFT_SKIP;
+
cg_test_a = cg_name(root, "cg_test_a");
cg_test_b = cg_name(root, "cg_test_b");
@@ -862,7 +867,7 @@ int main(int argc, char *argv[])
char root[PATH_MAX];
int i, ret = EXIT_SUCCESS;
- if (cg_find_unified_root(root, sizeof(root)))
+ if (cg_find_unified_root(root, sizeof(root), &nsdelegate))
ksft_exit_skip("cgroup v2 isn't mounted\n");
if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
index 24020a2c68dc..dad2ed82f3ef 100644
--- a/tools/testing/selftests/cgroup/test_cpu.c
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -237,7 +237,7 @@ run_cpucg_weight_test(
{
int ret = KSFT_FAIL, i;
char *parent = NULL;
- struct cpu_hogger children[3] = {NULL};
+ struct cpu_hogger children[3] = {};
parent = cg_name(root, "cpucg_test_0");
if (!parent)
@@ -408,7 +408,7 @@ run_cpucg_nested_weight_test(const char *root, bool overprovisioned)
{
int ret = KSFT_FAIL, i;
char *parent = NULL, *child = NULL;
- struct cpu_hogger leaf[3] = {NULL};
+ struct cpu_hogger leaf[3] = {};
long nested_leaf_usage, child_usage;
int nprocs = get_nprocs();
@@ -700,7 +700,7 @@ int main(int argc, char *argv[])
char root[PATH_MAX];
int i, ret = EXIT_SUCCESS;
- if (cg_find_unified_root(root, sizeof(root)))
+ if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
if (cg_read_strstr(root, "cgroup.subtree_control", "cpu"))
diff --git a/tools/testing/selftests/cgroup/test_cpuset.c b/tools/testing/selftests/cgroup/test_cpuset.c
index b061ed1e05b4..4034d14ba69a 100644
--- a/tools/testing/selftests/cgroup/test_cpuset.c
+++ b/tools/testing/selftests/cgroup/test_cpuset.c
@@ -249,7 +249,7 @@ int main(int argc, char *argv[])
char root[PATH_MAX];
int i, ret = EXIT_SUCCESS;
- if (cg_find_unified_root(root, sizeof(root)))
+ if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
if (cg_read_strstr(root, "cgroup.subtree_control", "cpuset"))
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index b5eb1be2248c..7c08cc153367 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -28,6 +28,14 @@ CPULIST=$(cat $CGROUP2/cpuset.cpus.effective)
NR_CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//")
[[ $NR_CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!"
+# Check to see if /dev/console exists and is writable
+if [[ -c /dev/console && -w /dev/console ]]
+then
+ CONSOLE=/dev/console
+else
+ CONSOLE=/dev/null
+fi
+
# Set verbose flag and delay factor
PROG=$1
VERBOSE=0
@@ -103,8 +111,8 @@ console_msg()
{
MSG=$1
echo "$MSG"
- echo "" > /dev/console
- echo "$MSG" > /dev/console
+ echo "" > $CONSOLE
+ echo "$MSG" > $CONSOLE
pause 0.01
}
@@ -161,6 +169,14 @@ test_add_proc()
# T = put a task into cgroup
# O<c>=<v> = Write <v> to CPU online file of <c>
#
+# ECPUs - effective CPUs of cpusets
+# Pstate - partition root state
+# ISOLCPUS - isolated CPUs (<icpus>[,<icpus2>])
+#
+# Note that if there are 2 fields in ISOLCPUS, the first one is for
+# sched-debug matching which includes offline CPUs and single-CPU partitions
+# while the second one is for matching cpuset.cpus.isolated.
+#
SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1"
TEST_MATRIX=(
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
@@ -220,23 +236,29 @@ TEST_MATRIX=(
" C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3"
" C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3"
" C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
- " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-2,A2:1-2,A3:3 A1:P0,A3:P2 3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
" C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2"
" C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5"
" C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
" C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
" C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3"
" C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5"
+ " C4:X0-3:S+ X1-3:S+ X2-3 . . P2 . . 0 A1:4,A2:1-3,A3:1-3 A2:P2 1-3"
+ " C4:X0-3:S+ X1-3:S+ X2-3 . . . P2 . 0 A1:4,A2:4,A3:2-3 A3:P2 2-3"
# Nested remote/local partition tests
" C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \
A1:P0,A2:P1,A3:P2,B1:P1 2-3"
" C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \
A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 . P1 0 A1:0-1,A2:2-3,A3:2-3,B1:4 \
+ A1:P0,A2:P1,A3:P0,B1:P1"
" C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \
A1:P0,A2:P1,A3:P2,B1:P1 2-4,3"
" C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \
A1:P0,A2:P2,A3:P1 2-4,2-3"
+ " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X3-4:P1 . 0 A1:0-1,A2:2,A3:3-4 \
+ A1:P0,A2:P2,A3:P1 2"
" C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
. . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \
A1:P0,A2:P-2,A3:P-1"
@@ -262,8 +284,8 @@ TEST_MATRIX=(
. . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3"
# Invalid to valid local partition direct transition tests
- " C1-3:S+:P2 C2-3:X1:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:2-3:XA2: A1:P2,A2:P-2 1-3"
- " C1-3:S+:P2 C2-3:X1:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3"
+ " C1-3:S+:P2 X4:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:1-3:XA2: A1:P2,A2:P-2 1-3"
+ " C1-3:S+:P2 X4:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3"
" C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0"
" C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3"
" C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3"
@@ -274,32 +296,26 @@ TEST_MATRIX=(
" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
. . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
" C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
- . . C4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
+ . . C4:X . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
# Local partition CPU change tests
" C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2"
" C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3"
# cpus_allowed/exclusive_cpus update tests
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
- . C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \
+ . X:C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \
A1:P0,A3:P-2"
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
. X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \
A1:P0,A3:P-2"
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
- . . C3 P2 . 0 A1:0-2,A2:0-2,XA2:3,XA3:3,A3:3 \
- A1:P0,A3:P2 3"
- " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
. . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \
A1:P0,A3:P2 3"
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
. . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \
A1:P0,A3:P-2"
" C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
- . . C3 . . 0 A1:0-3,A2:3,XA2:3,XA3:3,A3:3 \
- A1:P0,A3:P-2"
- " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
- . C4 . . . 0 A1:4,A2:4,A3:4,XA1:,XA2:,XA3 \
+ . X4 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:4,XA2:,XA3 \
A1:P0,A3:P-2"
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
@@ -346,6 +362,9 @@ TEST_MATRIX=(
" C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1"
" C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1"
+ # cpuset.cpus can overlap with sibling cpuset.cpus.exclusive but not subsumed by it
+ " C0-3 . . C4-5 X5 . . . 0 A1:0-3,B1:4-5"
+
# old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
# ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
# Failure cases:
@@ -355,6 +374,9 @@ TEST_MATRIX=(
# Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected
" C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5"
+
+ # cpuset.cpus cannot be a subset of sibling cpuset.cpus.exclusive
+ " C0-3 . . C4-5 X3-5 . . . 1 A1:0-3,B1:4-5"
)
#
@@ -556,14 +578,15 @@ check_cgroup_states()
do
set -- $(echo $CHK | sed -e "s/:/ /g")
CGRP=$1
+ CGRP_DIR=$CGRP
STATE=$2
FILE=
EVAL=$(expr substr $STATE 2 2)
- [[ $CGRP = A2 ]] && CGRP=A1/A2
- [[ $CGRP = A3 ]] && CGRP=A1/A2/A3
+ [[ $CGRP = A2 ]] && CGRP_DIR=A1/A2
+ [[ $CGRP = A3 ]] && CGRP_DIR=A1/A2/A3
case $STATE in
- P*) FILE=$CGRP/cpuset.cpus.partition
+ P*) FILE=$CGRP_DIR/cpuset.cpus.partition
;;
*) echo "Unknown state: $STATE!"
exit 1
@@ -587,6 +610,16 @@ check_cgroup_states()
;;
esac
[[ $EVAL != $VAL ]] && return 1
+
+ #
+ # For root partition, dump sched-domains info to console if
+ # verbose mode set for manual comparison with sched debug info.
+ #
+ [[ $VAL -eq 1 && $VERBOSE -gt 0 ]] && {
+ DOMS=$(cat $CGRP_DIR/cpuset.cpus.effective)
+ [[ -n "$DOMS" ]] &&
+ echo " [$CGRP] sched-domain: $DOMS" > $CONSOLE
+ }
done
return 0
}
@@ -694,9 +727,9 @@ null_isolcpus_check()
[[ $VERBOSE -gt 0 ]] || return 0
# Retry a few times before printing error
RETRY=0
- while [[ $RETRY -lt 5 ]]
+ while [[ $RETRY -lt 8 ]]
do
- pause 0.01
+ pause 0.02
check_isolcpus "."
[[ $? -eq 0 ]] && return 0
((RETRY++))
@@ -726,7 +759,7 @@ run_state_test()
while [[ $I -lt $CNT ]]
do
- echo "Running test $I ..." > /dev/console
+ echo "Running test $I ..." > $CONSOLE
[[ $VERBOSE -gt 1 ]] && {
echo ""
eval echo \${$TEST[$I]}
@@ -783,7 +816,7 @@ run_state_test()
while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]]
do
# Wait a bit longer & recheck a few times
- pause 0.01
+ pause 0.02
((RETRY++))
NEWLIST=$(cat cpuset.cpus.effective)
done
diff --git a/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
new file mode 100755
index 000000000000..3f45512fb512
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cpuset_v1_hp.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test the special cpuset v1 hotplug case where a cpuset become empty of
+# CPUs will force migration of tasks out to an ancestor.
+#
+
+skip_test() {
+ echo "$1"
+ echo "Test SKIPPED"
+ exit 4 # ksft_skip
+}
+
+[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
+
+# Find cpuset v1 mount point
+CPUSET=$(mount -t cgroup | grep cpuset | head -1 | awk -e '{print $3}')
+[[ -n "$CPUSET" ]] || skip_test "cpuset v1 mount point not found!"
+
+#
+# Create a test cpuset, put a CPU and a task there and offline that CPU
+#
+TDIR=test$$
+[[ -d $CPUSET/$TDIR ]] || mkdir $CPUSET/$TDIR
+echo 1 > $CPUSET/$TDIR/cpuset.cpus
+echo 0 > $CPUSET/$TDIR/cpuset.mems
+sleep 10&
+TASK=$!
+echo $TASK > $CPUSET/$TDIR/tasks
+NEWCS=$(cat /proc/$TASK/cpuset)
+[[ $NEWCS != "/$TDIR" ]] && {
+ echo "Unexpected cpuset $NEWCS, test FAILED!"
+ exit 1
+}
+
+echo 0 > /sys/devices/system/cpu/cpu1/online
+sleep 0.5
+echo 1 > /sys/devices/system/cpu/cpu1/online
+NEWCS=$(cat /proc/$TASK/cpuset)
+rmdir $CPUSET/$TDIR
+[[ $NEWCS != "/" ]] && {
+ echo "cpuset $NEWCS, test FAILED!"
+ exit 1
+}
+echo "Test PASSED"
+exit 0
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
index 8845353aca53..8730645d363a 100644
--- a/tools/testing/selftests/cgroup/test_freezer.c
+++ b/tools/testing/selftests/cgroup/test_freezer.c
@@ -827,7 +827,7 @@ int main(int argc, char *argv[])
char root[PATH_MAX];
int i, ret = EXIT_SUCCESS;
- if (cg_find_unified_root(root, sizeof(root)))
+ if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
index f0fefeb4cc24..856f9508ea56 100644
--- a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
+++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
@@ -214,7 +214,7 @@ int main(int argc, char **argv)
return ret;
}
- if (cg_find_unified_root(root, sizeof(root)))
+ if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
switch (test_hugetlb_memcg(root)) {
diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c
index 6153690319c9..0e5bb6c7307a 100644
--- a/tools/testing/selftests/cgroup/test_kill.c
+++ b/tools/testing/selftests/cgroup/test_kill.c
@@ -276,7 +276,7 @@ int main(int argc, char *argv[])
char root[PATH_MAX];
int i, ret = EXIT_SUCCESS;
- if (cg_find_unified_root(root, sizeof(root)))
+ if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
index c82f974b85c9..96693d8772be 100644
--- a/tools/testing/selftests/cgroup/test_kmem.c
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -192,7 +192,7 @@ static int test_kmem_memcg_deletion(const char *root)
goto cleanup;
sum = anon + file + kernel + sock;
- if (abs(sum - current) < MAX_VMSTAT_ERROR) {
+ if (labs(sum - current) < MAX_VMSTAT_ERROR) {
ret = KSFT_PASS;
} else {
printf("memory.current = %ld\n", current);
@@ -380,7 +380,7 @@ static int test_percpu_basic(const char *root)
current = cg_read_long(parent, "memory.current");
percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
- if (current > 0 && percpu > 0 && abs(current - percpu) <
+ if (current > 0 && percpu > 0 && labs(current - percpu) <
MAX_VMSTAT_ERROR)
ret = KSFT_PASS;
else
@@ -420,7 +420,7 @@ int main(int argc, char **argv)
char root[PATH_MAX];
int i, ret = EXIT_SUCCESS;
- if (cg_find_unified_root(root, sizeof(root)))
+ if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
/*
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index c7c9572003a8..41ae8047b889 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -716,7 +716,9 @@ static bool reclaim_until(const char *memcg, long goal)
*/
static int test_memcg_reclaim(const char *root)
{
- int ret = KSFT_FAIL, fd, retries;
+ int ret = KSFT_FAIL;
+ int fd = -1;
+ int retries;
char *memcg;
long current, expected_usage;
@@ -1314,7 +1316,7 @@ int main(int argc, char **argv)
char root[PATH_MAX];
int i, proc_status, ret = EXIT_SUCCESS;
- if (cg_find_unified_root(root, sizeof(root)))
+ if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
/*
diff --git a/tools/testing/selftests/cgroup/test_pids.c b/tools/testing/selftests/cgroup/test_pids.c
new file mode 100644
index 000000000000..9ecb83c6cc5c
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_pids.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <linux/limits.h>
+#include <signal.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+static int run_success(const char *cgroup, void *arg)
+{
+ return 0;
+}
+
+static int run_pause(const char *cgroup, void *arg)
+{
+ return pause();
+}
+
+/*
+ * This test checks that pids.max prevents forking new children above the
+ * specified limit in the cgroup.
+ */
+static int test_pids_max(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg_pids;
+ int pid;
+
+ cg_pids = cg_name(root, "pids_test");
+ if (!cg_pids)
+ goto cleanup;
+
+ if (cg_create(cg_pids))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_pids, "pids.max", "max\n"))
+ goto cleanup;
+
+ if (cg_write(cg_pids, "pids.max", "2"))
+ goto cleanup;
+
+ if (cg_enter_current(cg_pids))
+ goto cleanup;
+
+ pid = cg_run_nowait(cg_pids, run_pause, NULL);
+ if (pid < 0)
+ goto cleanup;
+
+ if (cg_run_nowait(cg_pids, run_success, NULL) != -1 || errno != EAGAIN)
+ goto cleanup;
+
+ if (kill(pid, SIGINT))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ cg_destroy(cg_pids);
+ free(cg_pids);
+
+ return ret;
+}
+
+/*
+ * This test checks that pids.events are counted in cgroup associated with pids.max
+ */
+static int test_pids_events(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg_parent = NULL, *cg_child = NULL;
+ int pid;
+
+ cg_parent = cg_name(root, "pids_parent");
+ cg_child = cg_name(cg_parent, "pids_child");
+ if (!cg_parent || !cg_child)
+ goto cleanup;
+
+ if (cg_create(cg_parent))
+ goto cleanup;
+ if (cg_write(cg_parent, "cgroup.subtree_control", "+pids"))
+ goto cleanup;
+ if (cg_create(cg_child))
+ goto cleanup;
+
+ if (cg_write(cg_parent, "pids.max", "2"))
+ goto cleanup;
+
+ if (cg_read_strcmp(cg_child, "pids.max", "max\n"))
+ goto cleanup;
+
+ if (cg_enter_current(cg_child))
+ goto cleanup;
+
+ pid = cg_run_nowait(cg_child, run_pause, NULL);
+ if (pid < 0)
+ goto cleanup;
+
+ if (cg_run_nowait(cg_child, run_success, NULL) != -1 || errno != EAGAIN)
+ goto cleanup;
+
+ if (kill(pid, SIGINT))
+ goto cleanup;
+
+ if (cg_read_key_long(cg_child, "pids.events", "max ") != 0)
+ goto cleanup;
+ if (cg_read_key_long(cg_parent, "pids.events", "max ") != 1)
+ goto cleanup;
+
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_child)
+ cg_destroy(cg_child);
+ if (cg_parent)
+ cg_destroy(cg_parent);
+ free(cg_child);
+ free(cg_parent);
+
+ return ret;
+}
+
+
+
+#define T(x) { x, #x }
+struct pids_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_pids_max),
+ T(test_pids_events),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ if (cg_find_unified_root(root, sizeof(root), NULL))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ /*
+ * Check that pids controller is available:
+ * pids is listed in cgroup.controllers
+ */
+ if (cg_read_strstr(root, "cgroup.controllers", "pids"))
+ ksft_exit_skip("pids controller isn't available\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "pids"))
+ if (cg_write(root, "cgroup.subtree_control", "+pids"))
+ ksft_exit_skip("Failed to set pids controller\n");
+
+ for (int i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
index f0e488ed90d8..190096017f80 100644
--- a/tools/testing/selftests/cgroup/test_zswap.c
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -50,7 +50,7 @@ static int get_zswap_stored_pages(size_t *value)
return read_int("/sys/kernel/debug/zswap/stored_pages", value);
}
-static int get_cg_wb_count(const char *cg)
+static long get_cg_wb_count(const char *cg)
{
return cg_read_key_long(cg, "memory.stat", "zswpwb");
}
@@ -249,6 +249,132 @@ out:
}
/*
+ * Attempt writeback with the following steps:
+ * 1. Allocate memory.
+ * 2. Reclaim memory equal to the amount that was allocated in step 1.
+ This will move it into zswap.
+ * 3. Save current zswap usage.
+ * 4. Move the memory allocated in step 1 back in from zswap.
+ * 5. Set zswap.max to half the amount that was recorded in step 3.
+ * 6. Attempt to reclaim memory equal to the amount that was allocated,
+ this will either trigger writeback if it's enabled, or reclamation
+ will fail if writeback is disabled as there isn't enough zswap space.
+ */
+static int attempt_writeback(const char *cgroup, void *arg)
+{
+ long pagesize = sysconf(_SC_PAGESIZE);
+ char *test_group = arg;
+ size_t memsize = MB(4);
+ char buf[pagesize];
+ long zswap_usage;
+ bool wb_enabled;
+ int ret = -1;
+ char *mem;
+
+ wb_enabled = cg_read_long(test_group, "memory.zswap.writeback");
+ mem = (char *)malloc(memsize);
+ if (!mem)
+ return ret;
+
+ /*
+ * Fill half of each page with increasing data, and keep other
+ * half empty, this will result in data that is still compressible
+ * and ends up in zswap, with material zswap usage.
+ */
+ for (int i = 0; i < pagesize; i++)
+ buf[i] = i < pagesize/2 ? (char) i : 0;
+
+ for (int i = 0; i < memsize; i += pagesize)
+ memcpy(&mem[i], buf, pagesize);
+
+ /* Try and reclaim allocated memory */
+ if (cg_write_numeric(test_group, "memory.reclaim", memsize)) {
+ ksft_print_msg("Failed to reclaim all of the requested memory\n");
+ goto out;
+ }
+
+ zswap_usage = cg_read_long(test_group, "memory.zswap.current");
+
+ /* zswpin */
+ for (int i = 0; i < memsize; i += pagesize) {
+ if (memcmp(&mem[i], buf, pagesize)) {
+ ksft_print_msg("invalid memory\n");
+ goto out;
+ }
+ }
+
+ if (cg_write_numeric(test_group, "memory.zswap.max", zswap_usage/2))
+ goto out;
+
+ /*
+ * If writeback is enabled, trying to reclaim memory now will trigger a
+ * writeback as zswap.max is half of what was needed when reclaim ran the first time.
+ * If writeback is disabled, memory reclaim will fail as zswap is limited and
+ * it can't writeback to swap.
+ */
+ ret = cg_write_numeric(test_group, "memory.reclaim", memsize);
+ if (!wb_enabled)
+ ret = (ret == -EAGAIN) ? 0 : -1;
+
+out:
+ free(mem);
+ return ret;
+}
+
+/* Test to verify the zswap writeback path */
+static int test_zswap_writeback(const char *root, bool wb)
+{
+ long zswpwb_before, zswpwb_after;
+ int ret = KSFT_FAIL;
+ char *test_group;
+
+ test_group = cg_name(root, "zswap_writeback_test");
+ if (!test_group)
+ goto out;
+ if (cg_create(test_group))
+ goto out;
+ if (cg_write(test_group, "memory.zswap.writeback", wb ? "1" : "0"))
+ goto out;
+
+ zswpwb_before = get_cg_wb_count(test_group);
+ if (zswpwb_before != 0) {
+ ksft_print_msg("zswpwb_before = %ld instead of 0\n", zswpwb_before);
+ goto out;
+ }
+
+ if (cg_run(test_group, attempt_writeback, (void *) test_group))
+ goto out;
+
+ /* Verify that zswap writeback occurred only if writeback was enabled */
+ zswpwb_after = get_cg_wb_count(test_group);
+ if (zswpwb_after < 0)
+ goto out;
+
+ if (wb != !!zswpwb_after) {
+ ksft_print_msg("zswpwb_after is %ld while wb is %s",
+ zswpwb_after, wb ? "enabled" : "disabled");
+ goto out;
+ }
+
+ ret = KSFT_PASS;
+
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+static int test_zswap_writeback_enabled(const char *root)
+{
+ return test_zswap_writeback(root, true);
+}
+
+static int test_zswap_writeback_disabled(const char *root)
+{
+ return test_zswap_writeback(root, false);
+}
+
+/*
* When trying to store a memcg page in zswap, if the memcg hits its memory
* limit in zswap, writeback should affect only the zswapped pages of that
* memcg.
@@ -257,7 +383,7 @@ static int test_no_invasive_cgroup_shrink(const char *root)
{
int ret = KSFT_FAIL;
size_t control_allocation_size = MB(10);
- char *control_allocation, *wb_group = NULL, *control_group = NULL;
+ char *control_allocation = NULL, *wb_group = NULL, *control_group = NULL;
wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
if (!wb_group)
@@ -342,7 +468,7 @@ static int test_no_kmem_bypass(const char *root)
struct sysinfo sys_info;
int ret = KSFT_FAIL;
int child_status;
- char *test_group;
+ char *test_group = NULL;
pid_t child_pid;
/* Read sys info and compute test values accordingly */
@@ -364,8 +490,6 @@ static int test_no_kmem_bypass(const char *root)
trigger_allocation_size = sys_info.totalram / 20;
/* Set up test memcg */
- if (cg_write(root, "cgroup.subtree_control", "+memory"))
- goto out;
test_group = cg_name(root, "kmem_bypass_test");
if (!test_group)
goto out;
@@ -425,6 +549,8 @@ struct zswap_test {
T(test_zswap_usage),
T(test_swapin_nozswap),
T(test_zswapin),
+ T(test_zswap_writeback_enabled),
+ T(test_zswap_writeback_disabled),
T(test_no_kmem_bypass),
T(test_no_invasive_cgroup_shrink),
};
@@ -440,7 +566,7 @@ int main(int argc, char **argv)
char root[PATH_MAX];
int i, ret = EXIT_SUCCESS;
- if (cg_find_unified_root(root, sizeof(root)))
+ if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
if (!zswap_configured())
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index 3c9bf0cd82a8..e61f07973ce5 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -95,9 +95,14 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
getpid(), pid);
if (waitpid(-1, &status, __WALL) < 0) {
- ksft_print_msg("Child returned %s\n", strerror(errno));
+ ksft_print_msg("waitpid() returned %s\n", strerror(errno));
return -errno;
}
+ if (!WIFEXITED(status)) {
+ ksft_print_msg("Child did not exit normally, status 0x%x\n",
+ status);
+ return EXIT_FAILURE;
+ }
if (WEXITSTATUS(status))
return WEXITSTATUS(status);
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index 54a8b2445be9..ce0426786828 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -120,5 +120,5 @@ int main(int argc, char **argv)
test_clone3_clear_sighand();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index ed785afb6077..bfb0da2b4fdd 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -114,7 +114,8 @@ static int call_clone3_set_tid(pid_t *set_tid,
return WEXITSTATUS(status);
}
-static void test_clone3_set_tid(pid_t *set_tid,
+static void test_clone3_set_tid(const char *desc,
+ pid_t *set_tid,
size_t set_tid_size,
int flags,
int expected,
@@ -129,17 +130,13 @@ static void test_clone3_set_tid(pid_t *set_tid,
ret = call_clone3_set_tid(set_tid, set_tid_size, flags, expected_pid,
wait_for_it);
ksft_print_msg(
- "[%d] clone3() with CLONE_SET_TID %d says :%d - expected %d\n",
+ "[%d] clone3() with CLONE_SET_TID %d says: %d - expected %d\n",
getpid(), set_tid[0], ret, expected);
- if (ret != expected)
- ksft_test_result_fail(
- "[%d] Result (%d) is different than expected (%d)\n",
- getpid(), ret, expected);
- else
- ksft_test_result_pass(
- "[%d] Result (%d) matches expectation (%d)\n",
- getpid(), ret, expected);
+
+ ksft_test_result(ret == expected, "%s with %zu TIDs and flags 0x%x\n",
+ desc, set_tid_size, flags);
}
+
int main(int argc, char *argv[])
{
FILE *f;
@@ -172,73 +169,91 @@ int main(int argc, char *argv[])
/* Try invalid settings */
memset(&set_tid, 0, sizeof(set_tid));
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
- -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
+ -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
/*
* This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
* nested PID namespace.
*/
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, 0 TID",
+ set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
memset(&set_tid, 0xff, sizeof(set_tid));
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
- -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
+ -EINVAL, 0, 0);
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
/*
* This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
* nested PID namespace.
*/
- test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("invalid size, TID all 1s",
+ set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
memset(&set_tid, 0, sizeof(set_tid));
/* Try with an invalid PID */
set_tid[0] = 0;
- test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("valid size, 0 TID",
+ set_tid, 1, 0, -EINVAL, 0, 0);
set_tid[0] = -1;
- test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("valid size, -1 TID",
+ set_tid, 1, 0, -EINVAL, 0, 0);
/* Claim that the set_tid array actually contains 2 elements. */
- test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("2 TIDs, -1 and 0",
+ set_tid, 2, 0, -EINVAL, 0, 0);
/* Try it in a new PID namespace */
if (uid == 0)
- test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("valid size, -1 TID",
+ set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
else
ksft_test_result_skip("Clone3() with set_tid requires root\n");
/* Try with a valid PID (1) this should return -EEXIST. */
set_tid[0] = 1;
if (uid == 0)
- test_clone3_set_tid(set_tid, 1, 0, -EEXIST, 0, 0);
+ test_clone3_set_tid("duplicate PID 1",
+ set_tid, 1, 0, -EEXIST, 0, 0);
else
ksft_test_result_skip("Clone3() with set_tid requires root\n");
/* Try it in a new PID namespace */
if (uid == 0)
- test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, 0, 0, 0);
+ test_clone3_set_tid("duplicate PID 1",
+ set_tid, 1, CLONE_NEWPID, 0, 0, 0);
else
ksft_test_result_skip("Clone3() with set_tid requires root\n");
/* pid_max should fail everywhere */
set_tid[0] = pid_max;
- test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("set TID to maximum",
+ set_tid, 1, 0, -EINVAL, 0, 0);
if (uid == 0)
- test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("set TID to maximum",
+ set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
else
ksft_test_result_skip("Clone3() with set_tid requires root\n");
@@ -262,10 +277,12 @@ int main(int argc, char *argv[])
/* After the child has finished, its PID should be free. */
set_tid[0] = pid;
- test_clone3_set_tid(set_tid, 1, 0, 0, 0, 0);
+ test_clone3_set_tid("reallocate child TID",
+ set_tid, 1, 0, 0, 0, 0);
/* This should fail as there is no PID 1 in that namespace */
- test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("duplicate child TID",
+ set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
/*
* Creating a process with PID 1 in the newly created most nested
@@ -274,7 +291,8 @@ int main(int argc, char *argv[])
*/
set_tid[0] = 1;
set_tid[1] = pid;
- test_clone3_set_tid(set_tid, 2, CLONE_NEWPID, 0, pid, 0);
+ test_clone3_set_tid("create PID 1 in new NS",
+ set_tid, 2, CLONE_NEWPID, 0, pid, 0);
ksft_print_msg("unshare PID namespace\n");
if (unshare(CLONE_NEWPID) == -1)
@@ -284,7 +302,8 @@ int main(int argc, char *argv[])
set_tid[0] = pid;
/* This should fail as there is no PID 1 in that namespace */
- test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("duplicate PID 1",
+ set_tid, 1, 0, -EINVAL, 0, 0);
/* Let's create a PID 1 */
ns_pid = fork();
@@ -295,21 +314,25 @@ int main(int argc, char *argv[])
*/
set_tid[0] = 43;
set_tid[1] = -1;
- test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
+ test_clone3_set_tid("check leak on invalid TID -1",
+ set_tid, 2, 0, -EINVAL, 0, 0);
set_tid[0] = 43;
set_tid[1] = pid;
- test_clone3_set_tid(set_tid, 2, 0, 0, 43, 0);
+ test_clone3_set_tid("check leak on invalid specific TID",
+ set_tid, 2, 0, 0, 43, 0);
ksft_print_msg("Child in PID namespace has PID %d\n", getpid());
set_tid[0] = 2;
- test_clone3_set_tid(set_tid, 1, 0, 0, 2, 0);
+ test_clone3_set_tid("create PID 2 in child NS",
+ set_tid, 1, 0, 0, 2, 0);
set_tid[0] = 1;
set_tid[1] = -1;
set_tid[2] = pid;
/* This should fail as there is invalid PID at level '1'. */
- test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("fail due to invalid TID at level 1",
+ set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0);
set_tid[0] = 1;
set_tid[1] = 42;
@@ -319,13 +342,15 @@ int main(int argc, char *argv[])
* namespaces. Again assuming this is running in the host's
* PID namespace. Not yet nested.
*/
- test_clone3_set_tid(set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0);
+ test_clone3_set_tid("fail due to too few active PID NSs",
+ set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0);
/*
* This should work and from the parent we should see
* something like 'NSpid: pid 42 1'.
*/
- test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, 0, 42, true);
+ test_clone3_set_tid("verify that we have 3 PID NSs",
+ set_tid, 3, CLONE_NEWPID, 0, 42, true);
child_exit(ksft_cnt.ksft_fail);
}
@@ -380,16 +405,14 @@ int main(int argc, char *argv[])
ksft_cnt.ksft_pass += 6 - (ksft_cnt.ksft_fail - WEXITSTATUS(status));
ksft_cnt.ksft_fail = WEXITSTATUS(status);
- if (ns3 == pid && ns2 == 42 && ns1 == 1)
- ksft_test_result_pass(
- "PIDs in all namespaces as expected (%d,%d,%d)\n",
- ns3, ns2, ns1);
- else
- ksft_test_result_fail(
- "PIDs in all namespaces not as expected (%d,%d,%d)\n",
- ns3, ns2, ns1);
+ ksft_print_msg("Expecting PIDs %d, 42, 1\n", pid);
+ ksft_print_msg("Have PIDs in namespaces: %d, %d, %d\n", ns3, ns2, ns1);
+ ksft_test_result(ns3 == pid && ns2 == 42 && ns1 == 1,
+ "PIDs in all namespaces as expected\n");
out:
ret = 0;
- return !ret ? ksft_exit_pass() : ksft_exit_fail();
+ if (ret)
+ ksft_exit_fail();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index c59e4adb905d..991c473e3859 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -17,6 +17,15 @@
#include "../kselftest_harness.h"
#include "../clone3/clone3_selftests.h"
+
+#ifndef F_LINUX_SPECIFIC_BASE
+#define F_LINUX_SPECIFIC_BASE 1024
+#endif
+
+#ifndef F_DUPFD_QUERY
+#define F_DUPFD_QUERY (F_LINUX_SPECIFIC_BASE + 3)
+#endif
+
static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
unsigned int flags)
{
@@ -45,6 +54,15 @@ TEST(core_close_range)
SKIP(return, "close_range() syscall not supported");
}
+ for (i = 0; i < 100; i++) {
+ ret = fcntl(open_fds[i], F_DUPFD_QUERY, open_fds[i + 1]);
+ if (ret < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(ret, 0);
+ }
+ }
+
EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
for (i = 0; i <= 50; i++)
@@ -358,7 +376,7 @@ TEST(close_range_cloexec_unshare)
*/
TEST(close_range_cloexec_syzbot)
{
- int fd1, fd2, fd3, flags, ret, status;
+ int fd1, fd2, fd3, fd4, flags, ret, status;
pid_t pid;
struct __clone_args args = {
.flags = CLONE_FILES,
@@ -372,6 +390,13 @@ TEST(close_range_cloexec_syzbot)
fd2 = dup2(fd1, 1000);
EXPECT_GT(fd2, 0);
+ flags = fcntl(fd1, F_DUPFD_QUERY, fd2);
+ if (flags < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(flags, 1);
+ }
+
pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
@@ -396,6 +421,15 @@ TEST(close_range_cloexec_syzbot)
fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0);
+ flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
+ if (flags < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(flags, 1);
+ }
+
+
+
/*
* Duplicating the file descriptor must remove the
* FD_CLOEXEC flag.
@@ -426,6 +460,24 @@ TEST(close_range_cloexec_syzbot)
fd3 = dup2(fd1, 42);
EXPECT_GT(fd3, 0);
+ flags = fcntl(fd1, F_DUPFD_QUERY, fd3);
+ if (flags < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(flags, 1);
+ }
+
+ fd4 = open("/dev/null", O_RDWR);
+ EXPECT_GT(fd4, 0);
+
+ /* Same inode, different file pointers. */
+ flags = fcntl(fd1, F_DUPFD_QUERY, fd4);
+ if (flags < 0) {
+ EXPECT_EQ(errno, EINVAL);
+ } else {
+ EXPECT_EQ(flags, 0);
+ }
+
flags = fcntl(fd3, F_GETFD);
EXPECT_GT(flags, -1);
EXPECT_EQ(flags & FD_CLOEXEC, 0);
@@ -433,6 +485,7 @@ TEST(close_range_cloexec_syzbot)
EXPECT_EQ(close(fd1), 0);
EXPECT_EQ(close(fd2), 0);
EXPECT_EQ(close(fd3), 0);
+ EXPECT_EQ(close(fd4), 0);
}
/*
diff --git a/tools/testing/selftests/cpufreq/cpufreq.sh b/tools/testing/selftests/cpufreq/cpufreq.sh
index b583a2fb4504..a8b1dbc0a3a5 100755
--- a/tools/testing/selftests/cpufreq/cpufreq.sh
+++ b/tools/testing/selftests/cpufreq/cpufreq.sh
@@ -178,8 +178,7 @@ cpufreq_basic_tests()
count=$(count_cpufreq_managed_cpus)
if [ $count = 0 ]; then
- printf "No cpu is managed by cpufreq core, exiting\n"
- exit;
+ ktap_exit_fail_msg "No cpu is managed by cpufreq core, exiting\n"
else
printf "CPUFreq manages: $count CPUs\n\n"
fi
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
index 60ce18ed0666..a0eb84cf7167 100755
--- a/tools/testing/selftests/cpufreq/main.sh
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -7,15 +7,15 @@ source governor.sh
source module.sh
source special-tests.sh
+DIR="$(dirname $(readlink -f "$0"))"
+source "${DIR}"/../kselftest/ktap_helpers.sh
+
FUNC=basic # do basic tests by default
OUTFILE=cpufreq_selftest
SYSFS=
CPUROOT=
CPUFREQROOT=
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
helpme()
{
printf "Usage: $0 [-h] [-todg args]
@@ -32,7 +32,7 @@ helpme()
[-d <driver's module name: only with \"-t modtest>\"]
[-g <governor's module name: only with \"-t modtest>\"]
\n"
- exit 2
+ exit "${KSFT_FAIL}"
}
prerequisite()
@@ -40,8 +40,8 @@ prerequisite()
msg="skip all tests:"
if [ $UID != 0 ]; then
- echo $msg must be run as root >&2
- exit $ksft_skip
+ ktap_skip_all "$msg must be run as root"
+ exit "${KSFT_SKIP}"
fi
taskset -p 01 $$
@@ -49,21 +49,21 @@ prerequisite()
SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
if [ ! -d "$SYSFS" ]; then
- echo $msg sysfs is not mounted >&2
- exit 2
+ ktap_skip_all "$msg sysfs is not mounted"
+ exit "${KSFT_SKIP}"
fi
CPUROOT=$SYSFS/devices/system/cpu
CPUFREQROOT="$CPUROOT/cpufreq"
if ! ls $CPUROOT/cpu* > /dev/null 2>&1; then
- echo $msg cpus not available in sysfs >&2
- exit 2
+ ktap_skip_all "$msg cpus not available in sysfs"
+ exit "${KSFT_SKIP}"
fi
if ! ls $CPUROOT/cpufreq > /dev/null 2>&1; then
- echo $msg cpufreq directory not available in sysfs >&2
- exit 2
+ ktap_skip_all "$msg cpufreq directory not available in sysfs"
+ exit "${KSFT_SKIP}"
fi
}
@@ -105,8 +105,7 @@ do_test()
count=$(count_cpufreq_managed_cpus)
if [ $count = 0 -a $FUNC != "modtest" ]; then
- echo "No cpu is managed by cpufreq core, exiting"
- exit 2;
+ ktap_exit_fail_msg "No cpu is managed by cpufreq core, exiting"
fi
case "$FUNC" in
@@ -125,8 +124,7 @@ do_test()
"modtest")
# Do we have modules in place?
if [ -z $DRIVER_MOD ] && [ -z $GOVERNOR_MOD ]; then
- echo "No driver or governor module passed with -d or -g"
- exit 2;
+ ktap_exit_fail_msg "No driver or governor module passed with -d or -g"
fi
if [ $DRIVER_MOD ]; then
@@ -137,8 +135,7 @@ do_test()
fi
else
if [ $count = 0 ]; then
- echo "No cpu is managed by cpufreq core, exiting"
- exit 2;
+ ktap_exit_fail_msg "No cpu is managed by cpufreq core, exiting"
fi
module_governor_test $GOVERNOR_MOD
@@ -162,7 +159,7 @@ do_test()
;;
*)
- echo "Invalid [-f] function type"
+ ktap_print_msg "Invalid [-f] function type"
helpme
;;
esac
@@ -186,13 +183,25 @@ dmesg_dumps()
dmesg >> $1.dmesg_full.txt
}
+ktap_print_header
+
# Parse arguments
parse_arguments $@
+ktap_set_plan 1
+
# Make sure all requirements are met
prerequisite
# Run requested functions
clear_dumps $OUTFILE
do_test | tee -a $OUTFILE.txt
+if [ "${PIPESTATUS[0]}" -ne 0 ]; then
+ exit ${PIPESTATUS[0]};
+fi
dmesg_dumps $OUTFILE
+
+ktap_test_pass "Completed successfully"
+
+ktap_print_totals
+exit "${KSFT_PASS}"
diff --git a/tools/testing/selftests/cpufreq/module.sh b/tools/testing/selftests/cpufreq/module.sh
index 22563cd122e7..7f2667e0ae2d 100755
--- a/tools/testing/selftests/cpufreq/module.sh
+++ b/tools/testing/selftests/cpufreq/module.sh
@@ -24,16 +24,14 @@ test_basic_insmod_rmmod()
# insert module
insmod $1
if [ $? != 0 ]; then
- printf "Insmod $1 failed\n"
- exit;
+ ktap_exit_fail_msg "Insmod $1 failed\n"
fi
printf "Removing $1 module\n"
# remove module
rmmod $1
if [ $? != 0 ]; then
- printf "rmmod $1 failed\n"
- exit;
+ ktap_exit_fail_msg "rmmod $1 failed\n"
fi
printf "\n"
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
index 789d6949c247..1e2e98cc809d 100644
--- a/tools/testing/selftests/damon/Makefile
+++ b/tools/testing/selftests/damon/Makefile
@@ -4,19 +4,25 @@
TEST_GEN_FILES += huge_count_read_write
TEST_GEN_FILES += debugfs_target_ids_read_before_terminate_race
TEST_GEN_FILES += debugfs_target_ids_pid_leak
-TEST_GEN_FILES += access_memory
+TEST_GEN_FILES += access_memory access_memory_even
TEST_FILES = _chk_dependency.sh _debugfs_common.sh
+
+# functionality tests
TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
+TEST_PROGS += sysfs.sh
+TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py
+TEST_PROGS += damos_quota.py damos_quota_goal.py damos_apply_interval.py
+TEST_PROGS += damos_tried_regions.py damon_nr_regions.py
+TEST_PROGS += reclaim.sh lru_sort.sh
+
+# regression tests (reproducers of previously found bugs)
TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
TEST_PROGS += debugfs_duplicate_context_creation.sh
TEST_PROGS += debugfs_rm_non_contexts.sh
TEST_PROGS += debugfs_target_ids_read_before_terminate_race.sh
TEST_PROGS += debugfs_target_ids_pid_leak.sh
-TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh
+TEST_PROGS += sysfs_update_removed_scheme_dir.sh
TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py
-TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py
-TEST_PROGS += damos_quota.py damos_apply_interval.py
-TEST_PROGS += reclaim.sh lru_sort.sh
include ../lib.mk
diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py
index d23d7398a27a..6e136dc3df19 100644
--- a/tools/testing/selftests/damon/_damon_sysfs.py
+++ b/tools/testing/selftests/damon/_damon_sysfs.py
@@ -2,7 +2,18 @@
import os
-sysfs_root = '/sys/kernel/mm/damon/admin'
+ksft_skip=4
+
+sysfs_root = None
+with open('/proc/mounts', 'r') as f:
+ for line in f:
+ dev_name, mount_point, dev_fs = line.split()[:3]
+ if dev_fs == 'sysfs':
+ sysfs_root = '%s/kernel/mm/damon/admin' % mount_point
+ break
+if sysfs_root is None:
+ print('Seems sysfs not mounted?')
+ exit(ksft_skip)
def write_file(path, string):
"Returns error string if failed, or None otherwise"
@@ -34,11 +45,11 @@ class DamosAccessPattern:
self.nr_accesses = nr_accesses
self.age = age
- if self.size == None:
+ if self.size is None:
self.size = [0, 2**64 - 1]
- if self.nr_accesses == None:
+ if self.nr_accesses is None:
self.nr_accesses = [0, 2**64 - 1]
- if self.age == None:
+ if self.age is None:
self.age = [0, 2**64 - 1]
def sysfs_dir(self):
@@ -47,55 +58,109 @@ class DamosAccessPattern:
def stage(self):
err = write_file(
os.path.join(self.sysfs_dir(), 'sz', 'min'), self.size[0])
- if err != None:
+ if err is not None:
return err
err = write_file(
os.path.join(self.sysfs_dir(), 'sz', 'max'), self.size[1])
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'min'),
self.nr_accesses[0])
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'max'),
self.nr_accesses[1])
- if err != None:
+ if err is not None:
return err
err = write_file(
os.path.join(self.sysfs_dir(), 'age', 'min'), self.age[0])
- if err != None:
+ if err is not None:
return err
err = write_file(
os.path.join(self.sysfs_dir(), 'age', 'max'), self.age[1])
- if err != None:
+ if err is not None:
return err
+qgoal_metric_user_input = 'user_input'
+qgoal_metric_some_mem_psi_us = 'some_mem_psi_us'
+qgoal_metrics = [qgoal_metric_user_input, qgoal_metric_some_mem_psi_us]
+
+class DamosQuotaGoal:
+ metric = None
+ target_value = None
+ current_value = None
+ effective_bytes = None
+ quota = None # owner quota
+ idx = None
+
+ def __init__(self, metric, target_value=10000, current_value=0):
+ self.metric = metric
+ self.target_value = target_value
+ self.current_value = current_value
+
+ def sysfs_dir(self):
+ return os.path.join(self.quota.sysfs_dir(), 'goals', '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'target_metric'),
+ self.metric)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'target_value'),
+ self.target_value)
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'current_value'),
+ self.current_value)
+ if err is not None:
+ return err
+ return None
+
class DamosQuota:
sz = None # size quota, in bytes
ms = None # time quota
+ goals = None # quota goals
reset_interval_ms = None # quota reset interval
scheme = None # owner scheme
- def __init__(self, sz=0, ms=0, reset_interval_ms=0):
+ def __init__(self, sz=0, ms=0, goals=None, reset_interval_ms=0):
self.sz = sz
self.ms = ms
self.reset_interval_ms = reset_interval_ms
+ self.goals = goals if goals is not None else []
+ for idx, goal in enumerate(self.goals):
+ goal.idx = idx
+ goal.quota = self
def sysfs_dir(self):
return os.path.join(self.scheme.sysfs_dir(), 'quotas')
def stage(self):
err = write_file(os.path.join(self.sysfs_dir(), 'bytes'), self.sz)
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.sysfs_dir(), 'ms'), self.ms)
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.sysfs_dir(), 'reset_interval_ms'),
self.reset_interval_ms)
- if err != None:
+ if err is not None:
return err
+ nr_goals_file = os.path.join(self.sysfs_dir(), 'goals', 'nr_goals')
+ content, err = read_file(nr_goals_file)
+ if err is not None:
+ return err
+ if int(content) != len(self.goals):
+ err = write_file(nr_goals_file, len(self.goals))
+ if err is not None:
+ return err
+ for goal in self.goals:
+ err = goal.stage()
+ if err is not None:
+ return err
+ return None
+
class DamosStats:
nr_tried = None
sz_tried = None
@@ -110,16 +175,24 @@ class DamosStats:
self.sz_applied = sz_applied
self.qt_exceeds = qt_exceeds
+class DamosTriedRegion:
+ def __init__(self, start, end, nr_accesses, age):
+ self.start = start
+ self.end = end
+ self.nr_accesses = nr_accesses
+ self.age = age
+
class Damos:
action = None
access_pattern = None
quota = None
apply_interval_us = None
- # todo: Support watermarks, stats, tried_regions
+ # todo: Support watermarks, stats
idx = None
context = None
tried_bytes = None
stats = None
+ tried_regions = None
def __init__(self, action='stat', access_pattern=DamosAccessPattern(),
quota=DamosQuota(), apply_interval_us=0):
@@ -136,30 +209,30 @@ class Damos:
def stage(self):
err = write_file(os.path.join(self.sysfs_dir(), 'action'), self.action)
- if err != None:
+ if err is not None:
return err
err = self.access_pattern.stage()
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.sysfs_dir(), 'apply_interval_us'),
'%d' % self.apply_interval_us)
- if err != None:
+ if err is not None:
return err
err = self.quota.stage()
- if err != None:
+ if err is not None:
return err
# disable watermarks
err = write_file(
os.path.join(self.sysfs_dir(), 'watermarks', 'metric'), 'none')
- if err != None:
+ if err is not None:
return err
# disable filters
err = write_file(
os.path.join(self.sysfs_dir(), 'filters', 'nr_filters'), '0')
- if err != None:
+ if err is not None:
return err
class DamonTarget:
@@ -178,7 +251,7 @@ class DamonTarget:
def stage(self):
err = write_file(
os.path.join(self.sysfs_dir(), 'regions', 'nr_regions'), '0')
- if err != None:
+ if err is not None:
return err
return write_file(
os.path.join(self.sysfs_dir(), 'pid_target'), self.pid)
@@ -210,27 +283,27 @@ class DamonAttrs:
def stage(self):
err = write_file(os.path.join(self.interval_sysfs_dir(), 'sample_us'),
self.sample_us)
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.interval_sysfs_dir(), 'aggr_us'),
self.aggr_us)
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'),
self.update_us)
- if err != None:
+ if err is not None:
return err
err = write_file(
os.path.join(self.nr_regions_range_sysfs_dir(), 'min'),
self.min_nr_regions)
- if err != None:
+ if err is not None:
return err
err = write_file(
os.path.join(self.nr_regions_range_sysfs_dir(), 'max'),
self.max_nr_regions)
- if err != None:
+ if err is not None:
return err
class DamonCtx:
@@ -264,36 +337,38 @@ class DamonCtx:
def stage(self):
err = write_file(
os.path.join(self.sysfs_dir(), 'operations'), self.ops)
- if err != None:
+ if err is not None:
return err
err = self.monitoring_attrs.stage()
- if err != None:
+ if err is not None:
return err
nr_targets_file = os.path.join(
self.sysfs_dir(), 'targets', 'nr_targets')
content, err = read_file(nr_targets_file)
- if err != None:
+ if err is not None:
return err
if int(content) != len(self.targets):
err = write_file(nr_targets_file, '%d' % len(self.targets))
- if err != None:
+ if err is not None:
return err
for target in self.targets:
err = target.stage()
- if err != None:
+ if err is not None:
return err
nr_schemes_file = os.path.join(
self.sysfs_dir(), 'schemes', 'nr_schemes')
content, err = read_file(nr_schemes_file)
+ if err is not None:
+ return err
if int(content) != len(self.schemes):
err = write_file(nr_schemes_file, '%d' % len(self.schemes))
- if err != None:
+ if err is not None:
return err
for scheme in self.schemes:
err = scheme.stage()
- if err != None:
+ if err is not None:
return err
return None
@@ -317,37 +392,66 @@ class Kdamond:
nr_contexts_file = os.path.join(self.sysfs_dir(),
'contexts', 'nr_contexts')
content, err = read_file(nr_contexts_file)
- if err != None:
+ if err is not None:
return err
if int(content) != len(self.contexts):
err = write_file(nr_contexts_file, '%d' % len(self.contexts))
- if err != None:
+ if err is not None:
return err
for context in self.contexts:
err = context.stage()
- if err != None:
+ if err is not None:
return err
err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on')
return err
+ def stop(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'off')
+ return err
+
+ def update_schemes_tried_regions(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'),
+ 'update_schemes_tried_regions')
+ if err is not None:
+ return err
+ for context in self.contexts:
+ for scheme in context.schemes:
+ tried_regions = []
+ tried_regions_dir = os.path.join(
+ scheme.sysfs_dir(), 'tried_regions')
+ for filename in os.listdir(
+ os.path.join(scheme.sysfs_dir(), 'tried_regions')):
+ tried_region_dir = os.path.join(tried_regions_dir, filename)
+ if not os.path.isdir(tried_region_dir):
+ continue
+ region_values = []
+ for f in ['start', 'end', 'nr_accesses', 'age']:
+ content, err = read_file(
+ os.path.join(tried_region_dir, f))
+ if err is not None:
+ return err
+ region_values.append(int(content))
+ tried_regions.append(DamosTriedRegion(*region_values))
+ scheme.tried_regions = tried_regions
+
def update_schemes_tried_bytes(self):
err = write_file(os.path.join(self.sysfs_dir(), 'state'),
'update_schemes_tried_bytes')
- if err != None:
+ if err is not None:
return err
for context in self.contexts:
for scheme in context.schemes:
content, err = read_file(os.path.join(scheme.sysfs_dir(),
'tried_regions', 'total_bytes'))
- if err != None:
+ if err is not None:
return err
scheme.tried_bytes = int(content)
def update_schemes_stats(self):
err = write_file(os.path.join(self.sysfs_dir(), 'state'),
'update_schemes_stats')
- if err != None:
+ if err is not None:
return err
for context in self.contexts:
for scheme in context.schemes:
@@ -356,11 +460,58 @@ class Kdamond:
'sz_applied', 'qt_exceeds']:
content, err = read_file(
os.path.join(scheme.sysfs_dir(), 'stats', stat))
- if err != None:
+ if err is not None:
return err
stat_values.append(int(content))
scheme.stats = DamosStats(*stat_values)
+ def update_schemes_effective_quotas(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'),
+ 'update_schemes_effective_quotas')
+ if err is not None:
+ return err
+ for context in self.contexts:
+ for scheme in context.schemes:
+ for goal in scheme.quota.goals:
+ content, err = read_file(
+ os.path.join(scheme.quota.sysfs_dir(),
+ 'effective_bytes'))
+ if err is not None:
+ return err
+ goal.effective_bytes = int(content)
+ return None
+
+ def commit(self):
+ nr_contexts_file = os.path.join(self.sysfs_dir(),
+ 'contexts', 'nr_contexts')
+ content, err = read_file(nr_contexts_file)
+ if err is not None:
+ return err
+ if int(content) != len(self.contexts):
+ err = write_file(nr_contexts_file, '%d' % len(self.contexts))
+ if err is not None:
+ return err
+
+ for context in self.contexts:
+ err = context.stage()
+ if err is not None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'commit')
+ return err
+
+
+ def commit_schemes_quota_goals(self):
+ for context in self.contexts:
+ for scheme in context.schemes:
+ for goal in scheme.quota.goals:
+ err = goal.stage()
+ if err is not None:
+ print('commit_schemes_quota_goals failed stagign: %s'%
+ err)
+ exit(1)
+ return write_file(os.path.join(self.sysfs_dir(), 'state'),
+ 'commit_schemes_quota_goals')
+
class Kdamonds:
kdamonds = []
@@ -376,10 +527,17 @@ class Kdamonds:
def start(self):
err = write_file(os.path.join(self.sysfs_dir(), 'nr_kdamonds'),
'%s' % len(self.kdamonds))
- if err != None:
+ if err is not None:
return err
for kdamond in self.kdamonds:
err = kdamond.start()
- if err != None:
+ if err is not None:
+ return err
+ return None
+
+ def stop(self):
+ for kdamond in self.kdamonds:
+ err = kdamond.stop()
+ if err is not None:
return err
return None
diff --git a/tools/testing/selftests/damon/access_memory.c b/tools/testing/selftests/damon/access_memory.c
index 585a2fa54329..56b17e8fe1be 100644
--- a/tools/testing/selftests/damon/access_memory.c
+++ b/tools/testing/selftests/damon/access_memory.c
@@ -35,7 +35,7 @@ int main(int argc, char *argv[])
start_clock = clock();
while ((clock() - start_clock) * 1000 / CLOCKS_PER_SEC <
access_time_ms)
- memset(regions[i], i, 1024 * 1024 * 10);
+ memset(regions[i], i, sz_region);
}
return 0;
}
diff --git a/tools/testing/selftests/damon/access_memory_even.c b/tools/testing/selftests/damon/access_memory_even.c
new file mode 100644
index 000000000000..3be121487432
--- /dev/null
+++ b/tools/testing/selftests/damon/access_memory_even.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Artificial memory access program for testing DAMON.
+ *
+ * Receives number of regions and size of each region from user. Allocate the
+ * regions and repeatedly access even numbered (starting from zero) regions.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+int main(int argc, char *argv[])
+{
+ char **regions;
+ clock_t start_clock;
+ int nr_regions;
+ int sz_region;
+ int access_time_ms;
+ int i;
+
+ if (argc != 3) {
+ printf("Usage: %s <number> <size (bytes)>\n", argv[0]);
+ return -1;
+ }
+
+ nr_regions = atoi(argv[1]);
+ sz_region = atoi(argv[2]);
+
+ regions = malloc(sizeof(*regions) * nr_regions);
+ for (i = 0; i < nr_regions; i++)
+ regions[i] = malloc(sz_region);
+
+ while (1) {
+ for (i = 0; i < nr_regions; i++) {
+ if (i % 2 == 0)
+ memset(regions[i], i, sz_region);
+ }
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/damon/damon_nr_regions.py b/tools/testing/selftests/damon/damon_nr_regions.py
new file mode 100644
index 000000000000..2e8a74aff543
--- /dev/null
+++ b/tools/testing/selftests/damon/damon_nr_regions.py
@@ -0,0 +1,145 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def test_nr_regions(real_nr_regions, min_nr_regions, max_nr_regions):
+ '''
+ Create process of the given 'real_nr_regions' regions, monitor it using
+ DAMON with given '{min,max}_nr_regions' monitoring parameter.
+
+ Exit with non-zero return code if the given {min,max}_nr_regions is not
+ kept.
+ '''
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory_even', '%d' % real_nr_regions,
+ '%d' % sz_region])
+
+ # stat every monitored regions
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ monitoring_attrs=_damon_sysfs.DamonAttrs(
+ min_nr_regions=min_nr_regions,
+ max_nr_regions=max_nr_regions),
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(action='stat',
+ )] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err is not None:
+ proc.terminate()
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ collected_nr_regions = []
+ while proc.poll() is None:
+ time.sleep(0.1)
+ err = kdamonds.kdamonds[0].update_schemes_tried_regions()
+ if err is not None:
+ proc.terminate()
+ print('tried regions update failed: %s' % err)
+ exit(1)
+
+ scheme = kdamonds.kdamonds[0].contexts[0].schemes[0]
+ if scheme.tried_regions is None:
+ proc.terminate()
+ print('tried regions is not collected')
+ exit(1)
+
+ nr_tried_regions = len(scheme.tried_regions)
+ if nr_tried_regions <= 0:
+ proc.terminate()
+ print('tried regions is not created')
+ exit(1)
+ collected_nr_regions.append(nr_tried_regions)
+ if len(collected_nr_regions) > 10:
+ break
+ proc.terminate()
+ kdamonds.stop()
+
+ test_name = 'nr_regions test with %d/%d/%d real/min/max nr_regions' % (
+ real_nr_regions, min_nr_regions, max_nr_regions)
+ if (collected_nr_regions[0] < min_nr_regions or
+ collected_nr_regions[-1] > max_nr_regions):
+ print('fail %s' % test_name)
+ print('number of regions that collected are:')
+ for nr in collected_nr_regions:
+ print(nr)
+ exit(1)
+ print('pass %s ' % test_name)
+
+def main():
+ # test min_nr_regions larger than real nr regions
+ test_nr_regions(10, 20, 100)
+
+ # test max_nr_regions smaller than real nr regions
+ test_nr_regions(15, 3, 10)
+
+ # test online-tuned max_nr_regions that smaller than real nr regions
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory_even', '14', '%d' % sz_region])
+
+ # stat every monitored regions
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ monitoring_attrs=_damon_sysfs.DamonAttrs(
+ min_nr_regions=10, max_nr_regions=1000),
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(action='stat',
+ )] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err is not None:
+ proc.terminate()
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ # wait until the real regions are found
+ time.sleep(3)
+
+ attrs = kdamonds.kdamonds[0].contexts[0].monitoring_attrs
+ attrs.min_nr_regions = 3
+ attrs.max_nr_regions = 7
+ err = kdamonds.kdamonds[0].commit()
+ if err is not None:
+ proc.terminate()
+ print('commit failed: %s' % err)
+ exit(1)
+ # wait for next merge operation is executed
+ time.sleep(0.3)
+
+ err = kdamonds.kdamonds[0].update_schemes_tried_regions()
+ if err is not None:
+ proc.terminate()
+ print('tried regions update failed: %s' % err)
+ exit(1)
+
+ scheme = kdamonds.kdamonds[0].contexts[0].schemes[0]
+ if scheme.tried_regions is None:
+ proc.terminate()
+ print('tried regions is not collected')
+ exit(1)
+
+ nr_tried_regions = len(scheme.tried_regions)
+ if nr_tried_regions <= 0:
+ proc.terminate()
+ print('tried regions is not created')
+ exit(1)
+ proc.terminate()
+
+ if nr_tried_regions > 7:
+ print('fail online-tuned max_nr_regions: %d > 7' % nr_tried_regions)
+ exit(1)
+ print('pass online-tuned max_nr_regions')
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/damos_quota_goal.py b/tools/testing/selftests/damon/damos_quota_goal.py
new file mode 100644
index 000000000000..18246f3b62f7
--- /dev/null
+++ b/tools/testing/selftests/damon/damos_quota_goal.py
@@ -0,0 +1,77 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ # access two 10 MiB memory regions, 2 second per each
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000'])
+
+ goal = _damon_sysfs.DamosQuotaGoal(
+ metric=_damon_sysfs.qgoal_metric_user_input, target_value=10000)
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(
+ action='stat',
+ quota=_damon_sysfs.DamosQuota(
+ goals=[goal], reset_interval_ms=100),
+ )] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err != None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ score_values_to_test = [0, 15000, 5000, 18000]
+ while proc.poll() == None:
+ if len(score_values_to_test) == 0:
+ time.sleep(0.1)
+ continue
+
+ goal.current_value = score_values_to_test.pop(0)
+ expect_increase = goal.current_value < goal.target_value
+
+ err = kdamonds.kdamonds[0].commit_schemes_quota_goals()
+ if err is not None:
+ print('commit_schemes_quota_goals failed: %s' % err)
+ exit(1)
+
+ err = kdamonds.kdamonds[0].update_schemes_effective_quotas()
+ if err is not None:
+ print('before-update_schemes_effective_quotas failed: %s' % err)
+ exit(1)
+ last_effective_bytes = goal.effective_bytes
+
+ time.sleep(0.5)
+
+ err = kdamonds.kdamonds[0].update_schemes_effective_quotas()
+ if err is not None:
+ print('after-update_schemes_effective_quotas failed: %s' % err)
+ exit(1)
+
+ print('score: %s, effective quota: %d -> %d (%.3fx)' % (
+ goal.current_value, last_effective_bytes, goal.effective_bytes,
+ goal.effective_bytes / last_effective_bytes
+ if last_effective_bytes != 0 else -1.0))
+
+ if last_effective_bytes == goal.effective_bytes:
+ print('efective bytes not changed: %d' % goal.effective_bytes)
+ exit(1)
+
+ increased = last_effective_bytes < goal.effective_bytes
+ if expect_increase != increased:
+ print('expectation of increase (%s) != increased (%s)' %
+ (expect_increase, increased))
+ exit(1)
+ last_effective_bytes = goal.effective_bytes
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/damos_tried_regions.py b/tools/testing/selftests/damon/damos_tried_regions.py
new file mode 100644
index 000000000000..3b347eb28bd2
--- /dev/null
+++ b/tools/testing/selftests/damon/damos_tried_regions.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ # repeatedly access even-numbered ones in 14 regions of 10 MiB size
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory_even', '14', '%d' % sz_region])
+
+ # stat every monitored regions
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(action='stat',
+ )] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err is not None:
+ proc.terminate()
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ collected_nr_regions = []
+ while proc.poll() is None:
+ time.sleep(0.1)
+ err = kdamonds.kdamonds[0].update_schemes_tried_regions()
+ if err is not None:
+ proc.terminate()
+ print('tried regions update failed: %s' % err)
+ exit(1)
+
+ scheme = kdamonds.kdamonds[0].contexts[0].schemes[0]
+ if scheme.tried_regions is None:
+ proc.terminate()
+ print('tried regions is not collected')
+ exit(1)
+
+ nr_tried_regions = len(scheme.tried_regions)
+ if nr_tried_regions <= 0:
+ proc.terminate()
+ print('tried regions is not created')
+ exit(1)
+ collected_nr_regions.append(nr_tried_regions)
+ if len(collected_nr_regions) > 10:
+ break
+ proc.terminate()
+
+ collected_nr_regions.sort()
+ sample = collected_nr_regions[4]
+ print('50-th percentile nr_regions: %d' % sample)
+ print('expectation (>= 14) is %s' % 'met' if sample >= 14 else 'not met')
+ if collected_nr_regions[4] < 14:
+ print('full nr_regions:')
+ print('\n'.join(collected_nr_regions))
+ exit(1)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/devices/Makefile b/tools/testing/selftests/devices/Makefile
deleted file mode 100644
index ca29249b30c3..000000000000
--- a/tools/testing/selftests/devices/Makefile
+++ /dev/null
@@ -1,4 +0,0 @@
-TEST_PROGS := test_discoverable_devices.py
-TEST_FILES := boards ksft.py
-
-include ../lib.mk
diff --git a/tools/testing/selftests/devices/error_logs/Makefile b/tools/testing/selftests/devices/error_logs/Makefile
new file mode 100644
index 000000000000..d546c3fb0a7f
--- /dev/null
+++ b/tools/testing/selftests/devices/error_logs/Makefile
@@ -0,0 +1,3 @@
+TEST_PROGS := test_device_error_logs.py
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/devices/error_logs/test_device_error_logs.py b/tools/testing/selftests/devices/error_logs/test_device_error_logs.py
new file mode 100755
index 000000000000..3dd56c8ec92c
--- /dev/null
+++ b/tools/testing/selftests/devices/error_logs/test_device_error_logs.py
@@ -0,0 +1,85 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2024 Collabora Ltd
+#
+# This test checks for the presence of error (or more critical) log messages
+# coming from devices in the kernel log.
+#
+# One failed test case is reported for each device that has outputted error
+# logs. Devices with no errors do not produce a passing test case to avoid
+# polluting the results, therefore a successful run will list 0 tests run.
+#
+
+import glob
+import os
+import re
+import sys
+
+# Allow ksft module to be imported from different directory
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(this_dir, "../../kselftest/"))
+
+import ksft
+
+kmsg = "/dev/kmsg"
+
+RE_log = re.compile(
+ r"(?P<prefix>[0-9]+),(?P<sequence>[0-9]+),(?P<timestamp>[0-9]+),(?P<flag>[^;]*)(,[^;]*)*;(?P<message>.*)"
+)
+RE_tag = re.compile(r" (?P<key>[^=]+)=(?P<value>.*)")
+
+PREFIX_ERROR = 3
+
+logs = []
+error_log_per_device = {}
+
+
+def parse_kmsg():
+ current_log = {}
+
+ with open(kmsg) as f:
+ os.set_blocking(f.fileno(), False)
+
+ for line in f:
+ tag_line = RE_tag.match(line)
+ log_line = RE_log.match(line)
+
+ if log_line:
+ if current_log:
+ logs.append(current_log) # Save last log
+
+ current_log = {
+ "prefix": int(log_line.group("prefix")),
+ "sequence": int(log_line.group("sequence")),
+ "timestamp": int(log_line.group("timestamp")),
+ "flag": log_line.group("flag"),
+ "message": log_line.group("message"),
+ }
+ elif tag_line:
+ current_log[tag_line.group("key")] = tag_line.group("value")
+
+
+def generate_per_device_error_log():
+ for log in logs:
+ if log.get("DEVICE") and log["prefix"] <= PREFIX_ERROR:
+ if not error_log_per_device.get(log["DEVICE"]):
+ error_log_per_device[log["DEVICE"]] = []
+ error_log_per_device[log["DEVICE"]].append(log)
+
+
+parse_kmsg()
+
+generate_per_device_error_log()
+num_tests = len(error_log_per_device)
+
+ksft.print_header()
+ksft.set_plan(num_tests)
+
+for device in error_log_per_device:
+ for log in error_log_per_device[device]:
+ ksft.print_msg(log["message"])
+ ksft.test_result_fail(device)
+if num_tests == 0:
+ ksft.print_msg("No device error logs found")
+ksft.finished()
diff --git a/tools/testing/selftests/devices/probe/Makefile b/tools/testing/selftests/devices/probe/Makefile
new file mode 100644
index 000000000000..f630108c3fdf
--- /dev/null
+++ b/tools/testing/selftests/devices/probe/Makefile
@@ -0,0 +1,4 @@
+TEST_PROGS := test_discoverable_devices.py
+TEST_FILES := boards
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/devices/boards/Dell Inc.,XPS 13 9300.yaml b/tools/testing/selftests/devices/probe/boards/Dell Inc.,XPS 13 9300.yaml
index ff932eb19f0b..ff932eb19f0b 100644
--- a/tools/testing/selftests/devices/boards/Dell Inc.,XPS 13 9300.yaml
+++ b/tools/testing/selftests/devices/probe/boards/Dell Inc.,XPS 13 9300.yaml
diff --git a/tools/testing/selftests/devices/boards/google,spherion.yaml b/tools/testing/selftests/devices/probe/boards/google,spherion.yaml
index 17157ecd8c14..3ea843324797 100644
--- a/tools/testing/selftests/devices/boards/google,spherion.yaml
+++ b/tools/testing/selftests/devices/probe/boards/google,spherion.yaml
@@ -11,6 +11,10 @@
# this, several optional keys can be used:
# - dt-mmio: identify the MMIO address of the controller as defined in the
# Devicetree.
+# - of-fullname-regex: regular expression to match against the OF_FULLNAME
+# property. Useful when the controller's address is not unique across other
+# sibling controllers. In this case, dt-mmio can't be used, and this property
+# allows the matching to include parent nodes as well to make it unique.
# - usb-version: for USB controllers to differentiate between USB3 and USB2
# buses sharing the same controller.
# - acpi-uid: _UID property of the controller as supplied by the ACPI. Useful to
diff --git a/tools/testing/selftests/devices/test_discoverable_devices.py b/tools/testing/selftests/devices/probe/test_discoverable_devices.py
index fbae8deb593d..d94a74b8a054 100755
--- a/tools/testing/selftests/devices/test_discoverable_devices.py
+++ b/tools/testing/selftests/devices/probe/test_discoverable_devices.py
@@ -14,13 +14,19 @@
# the description and examples of the file structure and vocabulary.
#
+import argparse
import glob
-import ksft
import os
import re
import sys
import yaml
+# Allow ksft module to be imported from different directory
+this_dir = os.path.dirname(os.path.realpath(__file__))
+sys.path.append(os.path.join(this_dir, "../../kselftest/"))
+
+import ksft
+
pci_controllers = []
usb_controllers = []
@@ -63,6 +69,22 @@ def get_dt_mmio(sysfs_dev_dir):
sysfs_dev_dir = os.path.dirname(sysfs_dev_dir)
+def get_of_fullname(sysfs_dev_dir):
+ re_of_fullname = re.compile("OF_FULLNAME=(.*)")
+ of_full_name = None
+
+ # PCI controllers' sysfs don't have an of_node, so have to read it from the
+ # parent
+ while not of_full_name:
+ try:
+ with open(os.path.join(sysfs_dev_dir, "uevent")) as f:
+ of_fullname = re_of_fullname.search(f.read()).group(1)
+ return of_fullname
+ except:
+ pass
+ sysfs_dev_dir = os.path.dirname(sysfs_dev_dir)
+
+
def get_acpi_uid(sysfs_dev_dir):
with open(os.path.join(sysfs_dev_dir, "firmware_node", "uid")) as f:
return f.read()
@@ -96,6 +118,11 @@ def find_controller_in_sysfs(controller, parent_sysfs=None):
if str(controller["dt-mmio"]) != get_dt_mmio(c):
continue
+ if controller.get("of-fullname-regex"):
+ re_of_fullname = re.compile(str(controller["of-fullname-regex"]))
+ if not re_of_fullname.match(get_of_fullname(c)):
+ continue
+
if controller.get("usb-version"):
if controller["usb-version"] != get_usb_version(c):
continue
@@ -194,6 +221,9 @@ def generate_pathname(device):
if device.get("dt-mmio"):
pathname += "@" + str(device["dt-mmio"])
+ if device.get("of-fullname-regex"):
+ pathname += "-" + str(device["of-fullname-regex"])
+
if device.get("name"):
pathname = pathname + "/" + device["name"]
@@ -296,14 +326,24 @@ def run_test(yaml_file):
parse_device_tree_node(device_tree)
+parser = argparse.ArgumentParser()
+parser.add_argument(
+ "--boards-dir", default="boards", help="Directory containing the board YAML files"
+)
+args = parser.parse_args()
+
find_pci_controller_dirs()
find_usb_controller_dirs()
ksft.print_header()
+if not os.path.exists(args.boards_dir):
+ ksft.print_msg(f"Boards directory '{args.boards_dir}' doesn't exist")
+ ksft.exit_fail()
+
board_file = ""
for board_filename in get_board_filenames():
- full_board_filename = os.path.join("boards", board_filename + ".yaml")
+ full_board_filename = os.path.join(args.boards_dir, board_filename + ".yaml")
if os.path.exists(full_board_filename):
board_file = full_board_filename
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
index 5c997f17fcbd..b12f1f9babf8 100644
--- a/tools/testing/selftests/dma/dma_map_benchmark.c
+++ b/tools/testing/selftests/dma/dma_map_benchmark.c
@@ -33,7 +33,6 @@ int main(int argc, char **argv)
int granule = 1;
int cmd = DMA_MAP_BENCHMARK;
- char *p;
while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) {
switch (opt) {
diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
index 890a8236a8ba..5d0a809dc2df 100644
--- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
+++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
@@ -15,6 +15,7 @@
#include <linux/dma-buf.h>
#include <linux/dma-heap.h>
#include <drm/drm.h>
+#include "../kselftest.h"
#define DEVPATH "/dev/dma_heap"
@@ -28,9 +29,11 @@ static int check_vgem(int fd)
version.name = name;
ret = ioctl(fd, DRM_IOCTL_VERSION, &version);
- if (ret)
+ if (ret || version.name_len != 4)
return 0;
+ name[4] = '\0';
+
return !strcmp(name, "vgem");
}
@@ -90,14 +93,13 @@ static int dmabuf_heap_open(char *name)
char buf[256];
ret = snprintf(buf, 256, "%s/%s", DEVPATH, name);
- if (ret < 0) {
- printf("snprintf failed!\n");
- return ret;
- }
+ if (ret < 0)
+ ksft_exit_fail_msg("snprintf failed! %d\n", ret);
fd = open(buf, O_RDWR);
if (fd < 0)
- printf("open %s failed!\n", buf);
+ ksft_exit_fail_msg("open %s failed: %s\n", buf, strerror(errno));
+
return fd;
}
@@ -140,7 +142,7 @@ static int dmabuf_sync(int fd, int start_stop)
#define ONE_MEG (1024 * 1024)
-static int test_alloc_and_import(char *heap_name)
+static void test_alloc_and_import(char *heap_name)
{
int heap_fd = -1, dmabuf_fd = -1, importer_fd = -1;
uint32_t handle = 0;
@@ -148,27 +150,19 @@ static int test_alloc_and_import(char *heap_name)
int ret;
heap_fd = dmabuf_heap_open(heap_name);
- if (heap_fd < 0)
- return -1;
- printf(" Testing allocation and importing: ");
+ ksft_print_msg("Testing allocation and importing:\n");
ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0, &dmabuf_fd);
if (ret) {
- printf("FAIL (Allocation Failed!)\n");
- ret = -1;
- goto out;
+ ksft_test_result_fail("FAIL (Allocation Failed!) %d\n", ret);
+ return;
}
+
/* mmap and write a simple pattern */
- p = mmap(NULL,
- ONE_MEG,
- PROT_READ | PROT_WRITE,
- MAP_SHARED,
- dmabuf_fd,
- 0);
+ p = mmap(NULL, ONE_MEG, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd, 0);
if (p == MAP_FAILED) {
- printf("FAIL (mmap() failed)\n");
- ret = -1;
- goto out;
+ ksft_test_result_fail("FAIL (mmap() failed): %s\n", strerror(errno));
+ goto close_and_return;
}
dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
@@ -178,71 +172,64 @@ static int test_alloc_and_import(char *heap_name)
importer_fd = open_vgem();
if (importer_fd < 0) {
- ret = importer_fd;
- printf("(Could not open vgem - skipping): ");
+ ksft_test_result_skip("Could not open vgem %d\n", importer_fd);
} else {
ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle);
- if (ret < 0) {
- printf("FAIL (Failed to import buffer)\n");
- goto out;
- }
+ ksft_test_result(ret >= 0, "Import buffer %d\n", ret);
}
ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
if (ret < 0) {
- printf("FAIL (DMA_BUF_SYNC_START failed!)\n");
+ ksft_print_msg("FAIL (DMA_BUF_SYNC_START failed!) %d\n", ret);
goto out;
}
memset(p, 0xff, ONE_MEG);
ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END);
if (ret < 0) {
- printf("FAIL (DMA_BUF_SYNC_END failed!)\n");
+ ksft_print_msg("FAIL (DMA_BUF_SYNC_END failed!) %d\n", ret);
goto out;
}
close_handle(importer_fd, handle);
- ret = 0;
- printf(" OK\n");
+ ksft_test_result_pass("%s dmabuf sync succeeded\n", __func__);
+ return;
+
out:
- if (p)
- munmap(p, ONE_MEG);
- if (importer_fd >= 0)
- close(importer_fd);
- if (dmabuf_fd >= 0)
- close(dmabuf_fd);
- if (heap_fd >= 0)
- close(heap_fd);
+ ksft_test_result_fail("%s dmabuf sync failed\n", __func__);
+ munmap(p, ONE_MEG);
+ close(importer_fd);
- return ret;
+close_and_return:
+ close(dmabuf_fd);
+ close(heap_fd);
}
-static int test_alloc_zeroed(char *heap_name, size_t size)
+static void test_alloc_zeroed(char *heap_name, size_t size)
{
int heap_fd = -1, dmabuf_fd[32];
- int i, j, ret;
+ int i, j, k, ret;
void *p = NULL;
char *c;
- printf(" Testing alloced %ldk buffers are zeroed: ", size / 1024);
+ ksft_print_msg("Testing alloced %ldk buffers are zeroed:\n", size / 1024);
heap_fd = dmabuf_heap_open(heap_name);
- if (heap_fd < 0)
- return -1;
/* Allocate and fill a bunch of buffers */
for (i = 0; i < 32; i++) {
ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]);
- if (ret < 0) {
- printf("FAIL (Allocation (%i) failed)\n", i);
- goto out;
+ if (ret) {
+ ksft_test_result_fail("FAIL (Allocation (%i) failed) %d\n", i, ret);
+ goto close_and_return;
}
+
/* mmap and fill with simple pattern */
p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0);
if (p == MAP_FAILED) {
- printf("FAIL (mmap() failed!)\n");
- ret = -1;
- goto out;
+ ksft_test_result_fail("FAIL (mmap() failed!): %s\n", strerror(errno));
+ goto close_and_return;
}
+
dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START);
memset(p, 0xff, size);
dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END);
@@ -251,48 +238,47 @@ static int test_alloc_zeroed(char *heap_name, size_t size)
/* close them all */
for (i = 0; i < 32; i++)
close(dmabuf_fd[i]);
+ ksft_test_result_pass("Allocate and fill a bunch of buffers\n");
/* Allocate and validate all buffers are zeroed */
for (i = 0; i < 32; i++) {
ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]);
if (ret < 0) {
- printf("FAIL (Allocation (%i) failed)\n", i);
- goto out;
+ ksft_test_result_fail("FAIL (Allocation (%i) failed) %d\n", i, ret);
+ goto close_and_return;
}
/* mmap and validate everything is zero */
p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0);
if (p == MAP_FAILED) {
- printf("FAIL (mmap() failed!)\n");
- ret = -1;
- goto out;
+ ksft_test_result_fail("FAIL (mmap() failed!): %s\n", strerror(errno));
+ goto close_and_return;
}
+
dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START);
c = (char *)p;
for (j = 0; j < size; j++) {
if (c[j] != 0) {
- printf("FAIL (Allocated buffer not zeroed @ %i)\n", j);
- break;
+ ksft_print_msg("FAIL (Allocated buffer not zeroed @ %i)\n", j);
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END);
+ munmap(p, size);
+ goto out;
}
}
dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END);
munmap(p, size);
}
- /* close them all */
- for (i = 0; i < 32; i++)
- close(dmabuf_fd[i]);
-
- close(heap_fd);
- printf("OK\n");
- return 0;
out:
- while (i > 0) {
- close(dmabuf_fd[i]);
- i--;
- }
+ ksft_test_result(i == 32, "Allocate and validate all buffers are zeroed\n");
+
+close_and_return:
+ /* close them all */
+ for (k = 0; k < i; k++)
+ close(dmabuf_fd[k]);
+
close(heap_fd);
- return ret;
+ return;
}
/* Test the ioctl version compatibility w/ a smaller structure then expected */
@@ -360,126 +346,97 @@ static int dmabuf_heap_alloc_newer(int fd, size_t len, unsigned int flags,
return ret;
}
-static int test_alloc_compat(char *heap_name)
+static void test_alloc_compat(char *heap_name)
{
- int heap_fd = -1, dmabuf_fd = -1;
- int ret;
+ int ret, heap_fd = -1, dmabuf_fd = -1;
heap_fd = dmabuf_heap_open(heap_name);
- if (heap_fd < 0)
- return -1;
- printf(" Testing (theoretical)older alloc compat: ");
+ ksft_print_msg("Testing (theoretical) older alloc compat:\n");
ret = dmabuf_heap_alloc_older(heap_fd, ONE_MEG, 0, &dmabuf_fd);
- if (ret) {
- printf("FAIL (Older compat allocation failed!)\n");
- ret = -1;
- goto out;
- }
- close(dmabuf_fd);
- printf("OK\n");
+ if (dmabuf_fd >= 0)
+ close(dmabuf_fd);
+ ksft_test_result(!ret, "dmabuf_heap_alloc_older\n");
- printf(" Testing (theoretical)newer alloc compat: ");
+ ksft_print_msg("Testing (theoretical) newer alloc compat:\n");
ret = dmabuf_heap_alloc_newer(heap_fd, ONE_MEG, 0, &dmabuf_fd);
- if (ret) {
- printf("FAIL (Newer compat allocation failed!)\n");
- ret = -1;
- goto out;
- }
- printf("OK\n");
-out:
if (dmabuf_fd >= 0)
close(dmabuf_fd);
- if (heap_fd >= 0)
- close(heap_fd);
+ ksft_test_result(!ret, "dmabuf_heap_alloc_newer\n");
- return ret;
+ close(heap_fd);
}
-static int test_alloc_errors(char *heap_name)
+static void test_alloc_errors(char *heap_name)
{
int heap_fd = -1, dmabuf_fd = -1;
int ret;
heap_fd = dmabuf_heap_open(heap_name);
- if (heap_fd < 0)
- return -1;
- printf(" Testing expected error cases: ");
+ ksft_print_msg("Testing expected error cases:\n");
ret = dmabuf_heap_alloc(0, ONE_MEG, 0x111111, &dmabuf_fd);
- if (!ret) {
- printf("FAIL (Did not see expected error (invalid fd)!)\n");
- ret = -1;
- goto out;
- }
+ ksft_test_result(ret, "Error expected on invalid fd %d\n", ret);
ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0x111111, &dmabuf_fd);
- if (!ret) {
- printf("FAIL (Did not see expected error (invalid heap flags)!)\n");
- ret = -1;
- goto out;
- }
+ ksft_test_result(ret, "Error expected on invalid heap flags %d\n", ret);
ret = dmabuf_heap_alloc_fdflags(heap_fd, ONE_MEG,
~(O_RDWR | O_CLOEXEC), 0, &dmabuf_fd);
- if (!ret) {
- printf("FAIL (Did not see expected error (invalid fd flags)!)\n");
- ret = -1;
- goto out;
- }
+ ksft_test_result(ret, "Error expected on invalid heap flags %d\n", ret);
- printf("OK\n");
- ret = 0;
-out:
if (dmabuf_fd >= 0)
close(dmabuf_fd);
- if (heap_fd >= 0)
- close(heap_fd);
+ close(heap_fd);
+}
- return ret;
+static int numer_of_heaps(void)
+{
+ DIR *d = opendir(DEVPATH);
+ struct dirent *dir;
+ int heaps = 0;
+
+ while ((dir = readdir(d))) {
+ if (!strncmp(dir->d_name, ".", 2))
+ continue;
+ if (!strncmp(dir->d_name, "..", 3))
+ continue;
+ heaps++;
+ }
+
+ return heaps;
}
int main(void)
{
- DIR *d;
struct dirent *dir;
- int ret = -1;
+ DIR *d;
+
+ ksft_print_header();
d = opendir(DEVPATH);
if (!d) {
- printf("No %s directory?\n", DEVPATH);
- return -1;
+ ksft_print_msg("No %s directory?\n", DEVPATH);
+ return KSFT_SKIP;
}
- while ((dir = readdir(d)) != NULL) {
+ ksft_set_plan(11 * numer_of_heaps());
+
+ while ((dir = readdir(d))) {
if (!strncmp(dir->d_name, ".", 2))
continue;
if (!strncmp(dir->d_name, "..", 3))
continue;
- printf("Testing heap: %s\n", dir->d_name);
- printf("=======================================\n");
- ret = test_alloc_and_import(dir->d_name);
- if (ret)
- break;
-
- ret = test_alloc_zeroed(dir->d_name, 4 * 1024);
- if (ret)
- break;
-
- ret = test_alloc_zeroed(dir->d_name, ONE_MEG);
- if (ret)
- break;
-
- ret = test_alloc_compat(dir->d_name);
- if (ret)
- break;
-
- ret = test_alloc_errors(dir->d_name);
- if (ret)
- break;
+ ksft_print_msg("Testing heap: %s\n", dir->d_name);
+ ksft_print_msg("=======================================\n");
+ test_alloc_and_import(dir->d_name);
+ test_alloc_zeroed(dir->d_name, 4 * 1024);
+ test_alloc_zeroed(dir->d_name, ONE_MEG);
+ test_alloc_compat(dir->d_name);
+ test_alloc_errors(dir->d_name);
}
closedir(d);
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
index c812080e304e..6062723a172e 100644
--- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c
+++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
@@ -9,52 +9,162 @@
#include <errno.h>
#include <fcntl.h>
#include <malloc.h>
+#include <stdbool.h>
#include <sys/ioctl.h>
#include <sys/syscall.h>
+#include <sys/mman.h>
#include <linux/memfd.h>
#include <linux/udmabuf.h>
+#include "../../kselftest.h"
#define TEST_PREFIX "drivers/dma-buf/udmabuf"
#define NUM_PAGES 4
+#define NUM_ENTRIES 4
+#define MEMFD_SIZE 1024 /* in pages */
-static int memfd_create(const char *name, unsigned int flags)
+static unsigned int page_size;
+
+static int create_memfd_with_seals(off64_t size, bool hpage)
+{
+ int memfd, ret;
+ unsigned int flags = MFD_ALLOW_SEALING;
+
+ if (hpage)
+ flags |= MFD_HUGETLB;
+
+ memfd = memfd_create("udmabuf-test", flags);
+ if (memfd < 0) {
+ ksft_print_msg("%s: [skip,no-memfd]\n", TEST_PREFIX);
+ exit(KSFT_SKIP);
+ }
+
+ ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK);
+ if (ret < 0) {
+ ksft_print_msg("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
+ exit(KSFT_SKIP);
+ }
+
+ ret = ftruncate(memfd, size);
+ if (ret == -1) {
+ ksft_print_msg("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
+ exit(KSFT_FAIL);
+ }
+
+ return memfd;
+}
+
+static int create_udmabuf_list(int devfd, int memfd, off64_t memfd_size)
+{
+ struct udmabuf_create_list *list;
+ int ubuf_fd, i;
+
+ list = malloc(sizeof(struct udmabuf_create_list) +
+ sizeof(struct udmabuf_create_item) * NUM_ENTRIES);
+ if (!list) {
+ ksft_print_msg("%s: [FAIL, udmabuf-malloc]\n", TEST_PREFIX);
+ exit(KSFT_FAIL);
+ }
+
+ for (i = 0; i < NUM_ENTRIES; i++) {
+ list->list[i].memfd = memfd;
+ list->list[i].offset = i * (memfd_size / NUM_ENTRIES);
+ list->list[i].size = getpagesize() * NUM_PAGES;
+ }
+
+ list->count = NUM_ENTRIES;
+ list->flags = UDMABUF_FLAGS_CLOEXEC;
+ ubuf_fd = ioctl(devfd, UDMABUF_CREATE_LIST, list);
+ free(list);
+ if (ubuf_fd < 0) {
+ ksft_print_msg("%s: [FAIL, udmabuf-create]\n", TEST_PREFIX);
+ exit(KSFT_FAIL);
+ }
+
+ return ubuf_fd;
+}
+
+static void write_to_memfd(void *addr, off64_t size, char chr)
+{
+ int i;
+
+ for (i = 0; i < size / page_size; i++) {
+ *((char *)addr + (i * page_size)) = chr;
+ }
+}
+
+static void *mmap_fd(int fd, off64_t size)
+{
+ void *addr;
+
+ addr = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED) {
+ ksft_print_msg("%s: ubuf_fd mmap fail\n", TEST_PREFIX);
+ exit(KSFT_FAIL);
+ }
+
+ return addr;
+}
+
+static int compare_chunks(void *addr1, void *addr2, off64_t memfd_size)
{
- return syscall(__NR_memfd_create, name, flags);
+ off64_t off;
+ int i = 0, j, k = 0, ret = 0;
+ char char1, char2;
+
+ while (i < NUM_ENTRIES) {
+ off = i * (memfd_size / NUM_ENTRIES);
+ for (j = 0; j < NUM_PAGES; j++, k++) {
+ char1 = *((char *)addr1 + off + (j * getpagesize()));
+ char2 = *((char *)addr2 + (k * getpagesize()));
+ if (char1 != char2) {
+ ret = -1;
+ goto err;
+ }
+ }
+ i++;
+ }
+err:
+ munmap(addr1, memfd_size);
+ munmap(addr2, NUM_ENTRIES * NUM_PAGES * getpagesize());
+ return ret;
}
int main(int argc, char *argv[])
{
struct udmabuf_create create;
int devfd, memfd, buf, ret;
- off_t size;
- void *mem;
+ off64_t size;
+ void *addr1, *addr2;
+
+ ksft_print_header();
+ ksft_set_plan(6);
devfd = open("/dev/udmabuf", O_RDWR);
if (devfd < 0) {
- printf("%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
- TEST_PREFIX);
- exit(77);
+ ksft_print_msg(
+ "%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
+ TEST_PREFIX);
+ exit(KSFT_SKIP);
}
memfd = memfd_create("udmabuf-test", MFD_ALLOW_SEALING);
if (memfd < 0) {
- printf("%s: [skip,no-memfd]\n", TEST_PREFIX);
- exit(77);
+ ksft_print_msg("%s: [skip,no-memfd]\n", TEST_PREFIX);
+ exit(KSFT_SKIP);
}
ret = fcntl(memfd, F_ADD_SEALS, F_SEAL_SHRINK);
if (ret < 0) {
- printf("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
- exit(77);
+ ksft_print_msg("%s: [skip,fcntl-add-seals]\n", TEST_PREFIX);
+ exit(KSFT_SKIP);
}
-
size = getpagesize() * NUM_PAGES;
ret = ftruncate(memfd, size);
if (ret == -1) {
- printf("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
- exit(1);
+ ksft_print_msg("%s: [FAIL,memfd-truncate]\n", TEST_PREFIX);
+ exit(KSFT_FAIL);
}
memset(&create, 0, sizeof(create));
@@ -64,44 +174,86 @@ int main(int argc, char *argv[])
create.offset = getpagesize()/2;
create.size = getpagesize();
buf = ioctl(devfd, UDMABUF_CREATE, &create);
- if (buf >= 0) {
- printf("%s: [FAIL,test-1]\n", TEST_PREFIX);
- exit(1);
- }
+ if (buf >= 0)
+ ksft_test_result_fail("%s: [FAIL,test-1]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-1]\n", TEST_PREFIX);
/* should fail (size not multiple of page) */
create.memfd = memfd;
create.offset = 0;
create.size = getpagesize()/2;
buf = ioctl(devfd, UDMABUF_CREATE, &create);
- if (buf >= 0) {
- printf("%s: [FAIL,test-2]\n", TEST_PREFIX);
- exit(1);
- }
+ if (buf >= 0)
+ ksft_test_result_fail("%s: [FAIL,test-2]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-2]\n", TEST_PREFIX);
/* should fail (not memfd) */
create.memfd = 0; /* stdin */
create.offset = 0;
create.size = size;
buf = ioctl(devfd, UDMABUF_CREATE, &create);
- if (buf >= 0) {
- printf("%s: [FAIL,test-3]\n", TEST_PREFIX);
- exit(1);
- }
+ if (buf >= 0)
+ ksft_test_result_fail("%s: [FAIL,test-3]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-3]\n", TEST_PREFIX);
/* should work */
+ page_size = getpagesize();
+ addr1 = mmap_fd(memfd, size);
+ write_to_memfd(addr1, size, 'a');
create.memfd = memfd;
create.offset = 0;
create.size = size;
buf = ioctl(devfd, UDMABUF_CREATE, &create);
- if (buf < 0) {
- printf("%s: [FAIL,test-4]\n", TEST_PREFIX);
- exit(1);
- }
+ if (buf < 0)
+ ksft_test_result_fail("%s: [FAIL,test-4]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-4]\n", TEST_PREFIX);
+
+ munmap(addr1, size);
+ close(buf);
+ close(memfd);
+
+ /* should work (migration of 4k size pages)*/
+ size = MEMFD_SIZE * page_size;
+ memfd = create_memfd_with_seals(size, false);
+ addr1 = mmap_fd(memfd, size);
+ write_to_memfd(addr1, size, 'a');
+ buf = create_udmabuf_list(devfd, memfd, size);
+ addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize());
+ write_to_memfd(addr1, size, 'b');
+ ret = compare_chunks(addr1, addr2, size);
+ if (ret < 0)
+ ksft_test_result_fail("%s: [FAIL,test-5]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-5]\n", TEST_PREFIX);
+
+ close(buf);
+ close(memfd);
+
+ /* should work (migration of 2MB size huge pages)*/
+ page_size = getpagesize() * 512; /* 2 MB */
+ size = MEMFD_SIZE * page_size;
+ memfd = create_memfd_with_seals(size, true);
+ addr1 = mmap_fd(memfd, size);
+ write_to_memfd(addr1, size, 'a');
+ buf = create_udmabuf_list(devfd, memfd, size);
+ addr2 = mmap_fd(buf, NUM_PAGES * NUM_ENTRIES * getpagesize());
+ write_to_memfd(addr1, size, 'b');
+ ret = compare_chunks(addr1, addr2, size);
+ if (ret < 0)
+ ksft_test_result_fail("%s: [FAIL,test-6]\n", TEST_PREFIX);
+ else
+ ksft_test_result_pass("%s: [PASS,test-6]\n", TEST_PREFIX);
- fprintf(stderr, "%s: ok\n", TEST_PREFIX);
close(buf);
close(memfd);
close(devfd);
+
+ ksft_print_msg("%s: ok\n", TEST_PREFIX);
+ ksft_print_cnts();
+
return 0;
}
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
new file mode 100644
index 000000000000..e54f382bcb02
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_INCLUDES := $(wildcard lib/py/*.py)
+
+TEST_PROGS := \
+ ping.py \
+ queues.py \
+ stats.py \
+# end of TEST_PROGS
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/README.rst b/tools/testing/selftests/drivers/net/README.rst
new file mode 100644
index 000000000000..3b6a29e6564b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/README.rst
@@ -0,0 +1,136 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+Running driver tests
+====================
+
+Networking driver tests are executed within kselftest framework like any
+other tests. They support testing both real device drivers and emulated /
+software drivers (latter mostly to test the core parts of the stack).
+
+SW mode
+~~~~~~~
+
+By default, when no extra parameters are set or exported, tests execute
+against software drivers such as netdevsim. No extra preparation is required
+the software devices are created and destroyed as part of the test.
+In this mode the tests are indistinguishable from other selftests and
+(for example) can be run under ``virtme-ng`` like the core networking selftests.
+
+HW mode
+~~~~~~~
+
+Executing tests against a real device requires external preparation.
+The netdevice against which tests will be run must exist, be running
+(in UP state) and be configured with an IP address.
+
+Refer to list of :ref:`Variables` later in this file to set up running
+the tests against a real device.
+
+Both modes required
+~~~~~~~~~~~~~~~~~~~
+
+All tests in drivers/net must support running both against a software device
+and a real device. SW-only tests should instead be placed in net/ or
+drivers/net/netdevsim, HW-only tests in drivers/net/hw.
+
+Variables
+=========
+
+The variables can be set in the environment or by creating a net.config
+file in the same directory as this README file. Example::
+
+ $ NETIF=eth0 ./some_test.sh
+
+or::
+
+ $ cat tools/testing/selftests/drivers/net/net.config
+ # Variable set in a file
+ NETIF=eth0
+
+Local test (which don't require endpoint for sending / receiving traffic)
+need only the ``NETIF`` variable. Remaining variables define the endpoint
+and communication method.
+
+NETIF
+~~~~~
+
+Name of the netdevice against which the test should be executed.
+When empty or not set software devices will be used.
+
+LOCAL_V4, LOCAL_V6, REMOTE_V4, REMOTE_V6
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Local and remote endpoint IP addresses.
+
+REMOTE_TYPE
+~~~~~~~~~~~
+
+Communication method used to run commands on the remote endpoint.
+Test framework has built-in support for ``netns`` and ``ssh`` channels.
+``netns`` assumes the "remote" interface is part of the same
+host, just moved to the specified netns.
+``ssh`` communicates with remote endpoint over ``ssh`` and ``scp``.
+Using persistent SSH connections is strongly encouraged to avoid
+the latency of SSH connection setup on every command.
+
+Communication methods are defined by classes in ``lib/py/remote_{name}.py``.
+It should be possible to add a new method without modifying any of
+the framework, by simply adding an appropriately named file to ``lib/py``.
+
+REMOTE_ARGS
+~~~~~~~~~~~
+
+Arguments used to construct the communication channel.
+Communication channel dependent::
+
+ for netns - name of the "remote" namespace
+ for ssh - name/address of the remote host
+
+Example
+=======
+
+Build the selftests::
+
+ # make -C tools/testing/selftests/ TARGETS="drivers/net drivers/net/hw"
+
+"Install" the tests and copy them over to the target machine::
+
+ # make -C tools/testing/selftests/ TARGETS="drivers/net drivers/net/hw" \
+ install INSTALL_PATH=/tmp/ksft-net-drv
+
+ # rsync -ra --delete /tmp/ksft-net-drv root@192.168.1.1:/root/
+
+On the target machine, running the tests will use netdevsim by default::
+
+ [/root] # ./ksft-net-drv/run_kselftest.sh -t drivers/net:ping.py
+ TAP version 13
+ 1..1
+ # timeout set to 45
+ # selftests: drivers/net: ping.py
+ # KTAP version 1
+ # 1..3
+ # ok 1 ping.test_v4
+ # ok 2 ping.test_v6
+ # ok 3 ping.test_tcp
+ # # Totals: pass:3 fail:0 xfail:0 xpass:0 skip:0 error:0
+ ok 1 selftests: drivers/net: ping.py
+
+Create a config with remote info::
+
+ [/root] # cat > ./ksft-net-drv/drivers/net/net.config <<EOF
+ NETIF=eth0
+ LOCAL_V4=192.168.1.1
+ REMOTE_V4=192.168.1.2
+ REMOTE_TYPE=ssh
+ REMOTE_ARGS=root@192.168.1.2
+ EOF
+
+Run the test::
+
+ [/root] # ./ksft-net-drv/drivers/net/ping.py
+ KTAP version 1
+ 1..3
+ ok 1 ping.test_v4
+ ok 2 ping.test_v6 # SKIP Test requires IPv6 connectivity
+ ok 3 ping.test_tcp
+ # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:1 error:0
diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config
new file mode 100644
index 000000000000..f6a58ce8a230
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/config
@@ -0,0 +1,2 @@
+CONFIG_IPV6=y
+CONFIG_NETDEVSIM=m
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
new file mode 100644
index 000000000000..c9f2f48fc30f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = \
+ csum.py \
+ devlink_port_split.py \
+ ethtool.sh \
+ ethtool_extended_state.sh \
+ ethtool_mm.sh \
+ ethtool_rmon.sh \
+ hw_stats_l3.sh \
+ hw_stats_l3_gre.sh \
+ loopback.sh \
+ pp_alloc_fail.py \
+ rss_ctx.py \
+ #
+
+TEST_FILES := \
+ ethtool_lib.sh \
+ #
+
+TEST_INCLUDES := \
+ $(wildcard lib/py/*.py ../lib/py/*.py) \
+ ../../../net/lib.sh \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/forwarding/ipip_lib.sh \
+ ../../../net/forwarding/tc_common.sh \
+ #
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/hw/csum.py b/tools/testing/selftests/drivers/net/hw/csum.py
new file mode 100755
index 000000000000..cb40497faee4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/csum.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""Run the tools/testing/selftests/net/csum testsuite."""
+
+from os import path
+
+from lib.py import ksft_run, ksft_exit, KsftSkipEx
+from lib.py import EthtoolFamily, NetDrvEpEnv
+from lib.py import bkg, cmd, wait_port_listen
+
+def test_receive(cfg, ipv4=False, extra_args=None):
+ """Test local nic checksum receive. Remote host sends crafted packets."""
+ if not cfg.have_rx_csum:
+ raise KsftSkipEx(f"Test requires rx checksum offload on {cfg.ifname}")
+
+ if ipv4:
+ ip_args = f"-4 -S {cfg.remote_v4} -D {cfg.v4}"
+ else:
+ ip_args = f"-6 -S {cfg.remote_v6} -D {cfg.v6}"
+
+ rx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -n 100 {ip_args} -r 1 -R {extra_args}"
+ tx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -n 100 {ip_args} -r 1 -T {extra_args}"
+
+ with bkg(rx_cmd, exit_wait=True):
+ wait_port_listen(34000, proto="udp")
+ cmd(tx_cmd, host=cfg.remote)
+
+
+def test_transmit(cfg, ipv4=False, extra_args=None):
+ """Test local nic checksum transmit. Remote host verifies packets."""
+ if (not cfg.have_tx_csum_generic and
+ not (cfg.have_tx_csum_ipv4 and ipv4) and
+ not (cfg.have_tx_csum_ipv6 and not ipv4)):
+ raise KsftSkipEx(f"Test requires tx checksum offload on {cfg.ifname}")
+
+ if ipv4:
+ ip_args = f"-4 -S {cfg.v4} -D {cfg.remote_v4}"
+ else:
+ ip_args = f"-6 -S {cfg.v6} -D {cfg.remote_v6}"
+
+ # Cannot randomize input when calculating zero checksum
+ if extra_args != "-U -Z":
+ extra_args += " -r 1"
+
+ rx_cmd = f"{cfg.bin_remote} -i {cfg.ifname} -L 1 -n 100 {ip_args} -R {extra_args}"
+ tx_cmd = f"{cfg.bin_local} -i {cfg.ifname} -L 1 -n 100 {ip_args} -T {extra_args}"
+
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(34000, proto="udp", host=cfg.remote)
+ cmd(tx_cmd)
+
+
+def test_builder(name, cfg, ipv4=False, tx=False, extra_args=""):
+ """Construct specific tests from the common template.
+
+ Most tests follow the same basic pattern, differing only in
+ Direction of the test and optional flags passed to csum."""
+ def f(cfg):
+ if ipv4:
+ cfg.require_v4()
+ else:
+ cfg.require_v6()
+
+ if tx:
+ test_transmit(cfg, ipv4, extra_args)
+ else:
+ test_receive(cfg, ipv4, extra_args)
+
+ if ipv4:
+ f.__name__ = "ipv4_" + name
+ else:
+ f.__name__ = "ipv6_" + name
+ return f
+
+
+def check_nic_features(cfg) -> None:
+ """Test whether Tx and Rx checksum offload are enabled.
+
+ If the device under test has either off, then skip the relevant tests."""
+ cfg.have_tx_csum_generic = False
+ cfg.have_tx_csum_ipv4 = False
+ cfg.have_tx_csum_ipv6 = False
+ cfg.have_rx_csum = False
+
+ ethnl = EthtoolFamily()
+ features = ethnl.features_get({"header": {"dev-index": cfg.ifindex}})
+ for f in features["active"]["bits"]["bit"]:
+ if f["name"] == "tx-checksum-ip-generic":
+ cfg.have_tx_csum_generic = True
+ elif f["name"] == "tx-checksum-ipv4":
+ cfg.have_tx_csum_ipv4 = True
+ elif f["name"] == "tx-checksum-ipv6":
+ cfg.have_tx_csum_ipv6 = True
+ elif f["name"] == "rx-checksum":
+ cfg.have_rx_csum = True
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ check_nic_features(cfg)
+
+ cfg.bin_local = path.abspath(path.dirname(__file__) + "/../../../net/lib/csum")
+ cfg.bin_remote = cfg.remote.deploy(cfg.bin_local)
+
+ cases = []
+ for ipv4 in [True, False]:
+ cases.append(test_builder("rx_tcp", cfg, ipv4, False, "-t"))
+ cases.append(test_builder("rx_tcp_invalid", cfg, ipv4, False, "-t -E"))
+
+ cases.append(test_builder("rx_udp", cfg, ipv4, False, ""))
+ cases.append(test_builder("rx_udp_invalid", cfg, ipv4, False, "-E"))
+
+ cases.append(test_builder("tx_udp_csum_offload", cfg, ipv4, True, "-U"))
+ cases.append(test_builder("tx_udp_zero_checksum", cfg, ipv4, True, "-U -Z"))
+
+ ksft_run(cases=cases, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py
index 2d84c7a0be6b..2d84c7a0be6b 100755
--- a/tools/testing/selftests/net/devlink_port_split.py
+++ b/tools/testing/selftests/drivers/net/hw/devlink_port_split.py
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/drivers/net/hw/ethtool.sh
index aa2eafb7b243..fa6953de6b6d 100755
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ b/tools/testing/selftests/drivers/net/hw/ethtool.sh
@@ -10,7 +10,8 @@ ALL_TESTS="
different_speeds_autoneg_on
"
NUM_NETIFS=2
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
source ethtool_lib.sh
h1_create()
@@ -64,9 +65,8 @@ same_speeds_autoneg_off()
setup_wait_dev_with_timeout $h1
setup_wait_dev_with_timeout $h2
ping_do $h1 192.0.2.2
- check_err $? "speed $speed autoneg off"
- log_test "force of same speed autoneg off"
- log_info "speed = $speed"
+ check_err $? "ping with speed $speed autoneg off"
+ log_test "force speed $speed on both ends"
done
ethtool -s $h2 autoneg on
@@ -111,9 +111,8 @@ combination_of_neg_on_and_off()
setup_wait_dev_with_timeout $h1
setup_wait_dev_with_timeout $h2
ping_do $h1 192.0.2.2
- check_err $? "h1-speed=$speed autoneg off, h2 autoneg on"
- log_test "one side with autoneg off and another with autoneg on"
- log_info "force speed = $speed"
+ check_err $? "ping with h1-speed=$speed autoneg off, h2 autoneg on"
+ log_test "force speed $speed vs. autoneg"
done
ethtool -s $h1 autoneg on
@@ -206,10 +205,9 @@ advertise_subset_of_speeds()
setup_wait_dev_with_timeout $h1
setup_wait_dev_with_timeout $h2
ping_do $h1 192.0.2.2
- check_err $? "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)"
+ check_err $? "ping with h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)"
- log_test "advertise subset of speeds"
- log_info "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise"
+ log_test "advertise $speed_1_to_advertise vs. $speed_2_to_advertise"
done
ethtool -s $h2 autoneg on
@@ -286,8 +284,6 @@ different_speeds_autoneg_on()
ethtool -s $h1 autoneg on
}
-skip_on_veth
-
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh
index 17f89c3b7c02..a7584448416e 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_extended_state.sh
@@ -8,7 +8,8 @@ ALL_TESTS="
"
NUM_NETIFS=2
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
source ethtool_lib.sh
TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
@@ -108,8 +109,6 @@ no_cable()
ip link set dev $swp3 down
}
-skip_on_veth
-
setup_prepare
tests_run
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh
index b9bfb45085af..b9bfb45085af 100644
--- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_lib.sh
diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh
index 50d5bfb17ef1..c301e735c8ab 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_mm.sh
@@ -14,7 +14,8 @@ ALL_TESTS="
NUM_NETIFS=2
REQUIRE_MZ=no
PREEMPTIBLE_PRIO=0
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
traffic_test()
{
diff --git a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh
index 41a34a61f763..8f60c1685ad4 100755
--- a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh
+++ b/tools/testing/selftests/drivers/net/hw/ethtool_rmon.sh
@@ -7,7 +7,8 @@ ALL_TESTS="
"
NUM_NETIFS=2
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
ETH_FCS_LEN=4
ETH_HLEN=$((6+6+2))
@@ -43,6 +44,7 @@ bucket_test()
# Mausezahn does not include FCS bytes in its length - but the
# histogram counters do
len=$((len - ETH_FCS_LEN))
+ len=$((len > 0 ? len : 0))
before=$(ethtool --json -S $iface --groups rmon | \
jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
@@ -78,7 +80,7 @@ rmon_histogram()
for if in $iface $neigh; do
if ! ensure_mtu $if ${bucket[0]}; then
- log_test_skip "$if does not support the required MTU for $step"
+ log_test_xfail "$if does not support the required MTU for $step"
return
fi
done
@@ -93,7 +95,7 @@ rmon_histogram()
jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null)
if [ $nbuckets -eq 0 ]; then
- log_test_skip "$iface does not support $set histogram counters"
+ log_test_xfail "$iface does not support $set histogram counters"
return
fi
}
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh
index 48584a51388f..67fafefc80be 100755
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
+++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3.sh
@@ -48,7 +48,9 @@ ALL_TESTS="
test_double_enable
"
NUM_NETIFS=4
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
h1_create()
{
@@ -324,17 +326,9 @@ setup_wait
used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info |
jq '.[].info.l3_stats.used')
-kind=$(ip -j -d link show dev $rp1 |
- jq -r '.[].linkinfo.info_kind')
-if [[ $used != true ]]; then
- if [[ $kind == veth ]]; then
- log_test_skip "l3_stats not offloaded on veth interface"
- EXIT_STATUS=$ksft_skip
- else
- RET=1 log_test "l3_stats not offloaded"
- fi
-else
- tests_run
-fi
+[[ $used = true ]]
+check_err $? "hw_stats_info.used=$used"
+log_test "l3_stats offloaded"
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh
index 7594bbb49029..a94d92e1abce 100755
--- a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
+++ b/tools/testing/selftests/drivers/net/hw/hw_stats_l3_gre.sh
@@ -12,8 +12,10 @@ ALL_TESTS="
test_stats_tx
"
NUM_NETIFS=6
-source lib.sh
-source ipip_lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/../../../net/forwarding/ipip_lib.sh
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
setup_prepare()
{
@@ -99,8 +101,6 @@ test_stats_rx()
test_stats g2a rx
}
-skip_on_veth
-
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
new file mode 100644
index 000000000000..b582885786f5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import sys
+from pathlib import Path
+
+KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve()
+
+try:
+ sys.path.append(KSFT_DIR.as_posix())
+ from net.lib.py import *
+ from drivers.net.lib.py import *
+except ModuleNotFoundError as e:
+ ksft_pr("Failed importing `net` library from kernel sources")
+ ksft_pr(str(e))
+ ktap_result(True, comment="SKIP")
+ sys.exit(4)
diff --git a/tools/testing/selftests/net/forwarding/loopback.sh b/tools/testing/selftests/drivers/net/hw/loopback.sh
index 8f4057310b5b..5acc3ff820aa 100755
--- a/tools/testing/selftests/net/forwarding/loopback.sh
+++ b/tools/testing/selftests/drivers/net/hw/loopback.sh
@@ -6,8 +6,9 @@ ksft_skip=4
ALL_TESTS="loopback_test"
NUM_NETIFS=2
-source tc_common.sh
-source lib.sh
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/tc_common.sh
+source "$lib_dir"/../../../net/forwarding/lib.sh
h1_create()
{
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
new file mode 100755
index 000000000000..026d98976c35
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
@@ -0,0 +1,129 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import time
+import os
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import NetdevFamily, NlError
+from lib.py import NetDrvEpEnv
+from lib.py import cmd, tool, GenerateTraffic
+
+
+def _write_fail_config(config):
+ for key, value in config.items():
+ with open("/sys/kernel/debug/fail_function/" + key, "w") as fp:
+ fp.write(str(value) + "\n")
+
+
+def _enable_pp_allocation_fail():
+ if not os.path.exists("/sys/kernel/debug/fail_function"):
+ raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
+
+ if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
+ with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
+ fp.write("page_pool_alloc_pages\n")
+
+ _write_fail_config({
+ "verbose": 0,
+ "interval": 511,
+ "probability": 100,
+ "times": -1,
+ })
+
+
+def _disable_pp_allocation_fail():
+ if not os.path.exists("/sys/kernel/debug/fail_function"):
+ return
+
+ if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
+ with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
+ fp.write("\n")
+
+ _write_fail_config({
+ "probability": 0,
+ "times": 0,
+ })
+
+
+def test_pp_alloc(cfg, netdevnl):
+ def get_stats():
+ return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0]
+
+ def check_traffic_flowing():
+ stat1 = get_stats()
+ time.sleep(1)
+ stat2 = get_stats()
+ if stat2['rx-packets'] - stat1['rx-packets'] < 15000:
+ raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets'])
+
+
+ try:
+ stats = get_stats()
+ except NlError as e:
+ if e.nl_msg.error == -95:
+ stats = {}
+ else:
+ raise
+ if 'rx-alloc-fail' not in stats:
+ raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats")
+
+ set_g = False
+ traffic = None
+ try:
+ traffic = GenerateTraffic(cfg)
+
+ check_traffic_flowing()
+
+ _enable_pp_allocation_fail()
+
+ s1 = get_stats()
+ time.sleep(3)
+ s2 = get_stats()
+
+ if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 1:
+ raise KsftSkipEx("Allocation failures not increasing")
+ if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 100:
+ raise KsftSkipEx("Allocation increasing too slowly", s2['rx-alloc-fail'] - s1['rx-alloc-fail'],
+ "packets:", s2['rx-packets'] - s1['rx-packets'])
+
+ # Basic failures are fine, try to wobble some settings to catch extra failures
+ check_traffic_flowing()
+ g = tool("ethtool", "-g " + cfg.ifname, json=True)[0]
+ if 'rx' in g and g["rx"] * 2 <= g["rx-max"]:
+ new_g = g['rx'] * 2
+ elif 'rx' in g:
+ new_g = g['rx'] // 2
+ else:
+ new_g = None
+
+ if new_g:
+ set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0
+ if set_g:
+ ksft_pr("ethtool -G change retval: success")
+ else:
+ ksft_pr("ethtool -G change retval: did not succeed", new_g)
+ else:
+ ksft_pr("ethtool -G change retval: did not try")
+
+ time.sleep(0.1)
+ check_traffic_flowing()
+ finally:
+ _disable_pp_allocation_fail()
+ if traffic:
+ traffic.stop()
+ time.sleep(0.1)
+ if set_g:
+ cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}")
+
+
+def main() -> None:
+ netdevnl = NetdevFamily()
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+
+ ksft_run([test_pp_alloc], args=(cfg, netdevnl, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
new file mode 100755
index 000000000000..011508ca604b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py
@@ -0,0 +1,553 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import datetime
+import random
+from lib.py import ksft_run, ksft_pr, ksft_exit, ksft_eq, ksft_ge, ksft_lt
+from lib.py import NetDrvEpEnv
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import KsftSkipEx
+from lib.py import rand_port
+from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure
+
+
+def _rss_key_str(key):
+ return ":".join(["{:02x}".format(x) for x in key])
+
+
+def _rss_key_rand(length):
+ return [random.randint(0, 255) for _ in range(length)]
+
+
+def _rss_key_check(cfg, data=None, context=0):
+ if data is None:
+ data = get_rss(cfg, context=context)
+ if 'rss-hash-key' not in data:
+ return
+ non_zero = [x for x in data['rss-hash-key'] if x != 0]
+ ksft_eq(bool(non_zero), True, comment=f"RSS key is all zero {data['rss-hash-key']}")
+
+
+def get_rss(cfg, context=0):
+ return ethtool(f"-x {cfg.ifname} context {context}", json=True)[0]
+
+
+def get_drop_err_sum(cfg):
+ stats = ip("-s -s link show dev " + cfg.ifname, json=True)[0]
+ cnt = 0
+ for key in ['errors', 'dropped', 'over_errors', 'fifo_errors',
+ 'length_errors', 'crc_errors', 'missed_errors',
+ 'frame_errors']:
+ cnt += stats["stats64"]["rx"][key]
+ return cnt, stats["stats64"]["tx"]["carrier_changes"]
+
+
+def ethtool_create(cfg, act, opts):
+ output = ethtool(f"{act} {cfg.ifname} {opts}").stdout
+ # Output will be something like: "New RSS context is 1" or
+ # "Added rule with ID 7", we want the integer from the end
+ return int(output.split()[-1])
+
+
+def require_ntuple(cfg):
+ features = ethtool(f"-k {cfg.ifname}", json=True)[0]
+ if not features["ntuple-filters"]["active"]:
+ # ntuple is more of a capability than a config knob, don't bother
+ # trying to enable it (until some driver actually needs it).
+ raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"]))
+
+
+# Get Rx packet counts for all queues, as a simple list of integers
+# if @prev is specified the prev counts will be subtracted
+def _get_rx_cnts(cfg, prev=None):
+ cfg.wait_hw_stats_settle()
+ data = cfg.netdevnl.qstats_get({"ifindex": cfg.ifindex, "scope": ["queue"]}, dump=True)
+ data = [x for x in data if x['queue-type'] == "rx"]
+ max_q = max([x["queue-id"] for x in data])
+ queue_stats = [0] * (max_q + 1)
+ for q in data:
+ queue_stats[q["queue-id"]] = q["rx-packets"]
+ if prev and q["queue-id"] < len(prev):
+ queue_stats[q["queue-id"]] -= prev[q["queue-id"]]
+ return queue_stats
+
+
+def _send_traffic_check(cfg, port, name, params):
+ # params is a dict with 3 possible keys:
+ # - "target": required, which queues we expect to get iperf traffic
+ # - "empty": optional, which queues should see no traffic at all
+ # - "noise": optional, which queues we expect to see low traffic;
+ # used for queues of the main context, since some background
+ # OS activity may use those queues while we're testing
+ # the value for each is a list, or some other iterable containing queue ids.
+
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+
+ directed = sum(cnts[i] for i in params['target'])
+
+ ksft_ge(directed, 20000, f"traffic on {name}: " + str(cnts))
+ if params.get('noise'):
+ ksft_lt(sum(cnts[i] for i in params['noise']), directed / 2,
+ "traffic on other queues:" + str(cnts))
+ if params.get('empty'):
+ ksft_eq(sum(cnts[i] for i in params['empty']), 0,
+ "traffic on inactive queues: " + str(cnts))
+
+
+def test_rss_key_indir(cfg):
+ """Test basics like updating the main RSS key and indirection table."""
+
+ qcnt = len(_get_rx_cnts(cfg))
+ if qcnt < 3:
+ KsftSkipEx("Device has fewer than 3 queues (or doesn't support queue stats)")
+
+ data = get_rss(cfg)
+ want_keys = ['rss-hash-key', 'rss-hash-function', 'rss-indirection-table']
+ for k in want_keys:
+ if k not in data:
+ raise KsftFailEx("ethtool results missing key: " + k)
+ if not data[k]:
+ raise KsftFailEx(f"ethtool results empty for '{k}': {data[k]}")
+
+ _rss_key_check(cfg, data=data)
+ key_len = len(data['rss-hash-key'])
+
+ # Set the key
+ key = _rss_key_rand(key_len)
+ ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key))
+
+ data = get_rss(cfg)
+ ksft_eq(key, data['rss-hash-key'])
+
+ # Set the indirection table and the key together
+ key = _rss_key_rand(key_len)
+ ethtool(f"-X {cfg.ifname} equal 3 hkey " + _rss_key_str(key))
+ reset_indir = defer(ethtool, f"-X {cfg.ifname} default")
+
+ data = get_rss(cfg)
+ _rss_key_check(cfg, data=data)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(2, max(data['rss-indirection-table']))
+
+ # Reset indirection table and set the key
+ key = _rss_key_rand(key_len)
+ ethtool(f"-X {cfg.ifname} default hkey " + _rss_key_str(key))
+ data = get_rss(cfg)
+ _rss_key_check(cfg, data=data)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(qcnt - 1, max(data['rss-indirection-table']))
+
+ # Set the indirection table
+ ethtool(f"-X {cfg.ifname} equal 2")
+ data = get_rss(cfg)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(1, max(data['rss-indirection-table']))
+
+ # Check we only get traffic on the first 2 queues
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+ # 2 queues, 20k packets, must be at least 5k per queue
+ ksft_ge(cnts[0], 5000, "traffic on main context (1/2): " + str(cnts))
+ ksft_ge(cnts[1], 5000, "traffic on main context (2/2): " + str(cnts))
+ # The other queues should be unused
+ ksft_eq(sum(cnts[2:]), 0, "traffic on unused queues: " + str(cnts))
+
+ # Restore, and check traffic gets spread again
+ reset_indir.exec()
+
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+ # First two queues get less traffic than all the rest
+ ksft_lt(sum(cnts[:2]), sum(cnts[2:]), "traffic distributed: " + str(cnts))
+
+
+def test_rss_queue_reconfigure(cfg, main_ctx=True):
+ """Make sure queue changes can't override requested RSS config.
+
+ By default main RSS table should change to include all queues.
+ When user sets a specific RSS config the driver should preserve it,
+ even when queue count changes. Driver should refuse to deactivate
+ queues used in the user-set RSS config.
+ """
+
+ if not main_ctx:
+ require_ntuple(cfg)
+
+ # Start with 4 queues, an arbitrary known number.
+ try:
+ qcnt = len(_get_rx_cnts(cfg))
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test or qstat not supported")
+
+ if main_ctx:
+ ctx_id = 0
+ ctx_ref = ""
+ else:
+ ctx_id = ethtool_create(cfg, "-X", "context new")
+ ctx_ref = f"context {ctx_id}"
+ defer(ethtool, f"-X {cfg.ifname} {ctx_ref} delete")
+
+ # Indirection table should be distributing to all queues.
+ data = get_rss(cfg, context=ctx_id)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(3, max(data['rss-indirection-table']))
+
+ # Increase queues, indirection table should be distributing to all queues.
+ # It's unclear whether tables of additional contexts should be reset, too.
+ if main_ctx:
+ ethtool(f"-L {cfg.ifname} combined 5")
+ data = get_rss(cfg)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(4, max(data['rss-indirection-table']))
+ ethtool(f"-L {cfg.ifname} combined 4")
+
+ # Configure the table explicitly
+ port = rand_port()
+ ethtool(f"-X {cfg.ifname} {ctx_ref} weight 1 0 0 1")
+ if main_ctx:
+ other_key = 'empty'
+ defer(ethtool, f"-X {cfg.ifname} default")
+ else:
+ other_key = 'noise'
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}"
+ ntuple = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
+
+ _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
+ other_key: (1, 2) })
+
+ # We should be able to increase queues, but table should be left untouched
+ ethtool(f"-L {cfg.ifname} combined 5")
+ data = get_rss(cfg, context=ctx_id)
+ ksft_eq({0, 3}, set(data['rss-indirection-table']))
+
+ _send_traffic_check(cfg, port, ctx_ref, { 'target': (0, 3),
+ other_key: (1, 2, 4) })
+
+ # Setting queue count to 3 should fail, queue 3 is used
+ try:
+ ethtool(f"-L {cfg.ifname} combined 3")
+ except CmdExitFailure:
+ pass
+ else:
+ raise Exception(f"Driver didn't prevent us from deactivating a used queue (context {ctx_id})")
+
+
+def test_rss_resize(cfg):
+ """Test resizing of the RSS table.
+
+ Some devices dynamically increase and decrease the size of the RSS
+ indirection table based on the number of enabled queues.
+ When that happens driver must maintain the balance of entries
+ (preferably duplicating the smaller table).
+ """
+
+ channels = cfg.ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ ch_max = channels['combined-max']
+ qcnt = channels['combined-count']
+
+ if ch_max < 2:
+ raise KsftSkipEx(f"Not enough queues for the test: {ch_max}")
+
+ ethtool(f"-L {cfg.ifname} combined 2")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+
+ ethtool(f"-X {cfg.ifname} weight 1 7")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ ethtool(f"-L {cfg.ifname} combined {ch_max}")
+ data = get_rss(cfg)
+ ksft_eq(0, min(data['rss-indirection-table']))
+ ksft_eq(1, max(data['rss-indirection-table']))
+
+ ksft_eq(7,
+ data['rss-indirection-table'].count(1) /
+ data['rss-indirection-table'].count(0),
+ f"Table imbalance after resize: {data['rss-indirection-table']}")
+
+
+def test_hitless_key_update(cfg):
+ """Test that flows may be rehashed without impacting traffic.
+
+ Some workloads may want to rehash the flows in response to an imbalance.
+ Most effective way to do that is changing the RSS key. Check that changing
+ the key does not cause link flaps or traffic disruption.
+
+ Disrupting traffic for key update is not a bug, but makes the key
+ update unusable for rehashing under load.
+ """
+ data = get_rss(cfg)
+ key_len = len(data['rss-hash-key'])
+
+ key = _rss_key_rand(key_len)
+
+ tgen = GenerateTraffic(cfg)
+ try:
+ errors0, carrier0 = get_drop_err_sum(cfg)
+ t0 = datetime.datetime.now()
+ ethtool(f"-X {cfg.ifname} hkey " + _rss_key_str(key))
+ t1 = datetime.datetime.now()
+ errors1, carrier1 = get_drop_err_sum(cfg)
+ finally:
+ tgen.wait_pkts_and_stop(5000)
+
+ ksft_lt((t1 - t0).total_seconds(), 0.2)
+ ksft_eq(errors1 - errors1, 0)
+ ksft_eq(carrier1 - carrier0, 0)
+
+
+def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None):
+ """
+ Test separating traffic into RSS contexts.
+ The queues will be allocated 2 for each context:
+ ctx0 ctx1 ctx2 ctx3
+ [0 1] [2 3] [4 5] [6 7] ...
+ """
+
+ require_ntuple(cfg)
+
+ requested_ctx_cnt = ctx_cnt
+
+ # Try to allocate more queues when necessary
+ qcnt = len(_get_rx_cnts(cfg))
+ if qcnt < 2 + 2 * ctx_cnt:
+ try:
+ ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
+ ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test")
+
+ ports = []
+
+ # Use queues 0 and 1 for normal traffic
+ ethtool(f"-X {cfg.ifname} equal 2")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ for i in range(ctx_cnt):
+ want_cfg = f"start {2 + i * 2} equal 2"
+ create_cfg = want_cfg if create_with_cfg else ""
+
+ try:
+ ctx_id = ethtool_create(cfg, "-X", f"context new {create_cfg}")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+ except CmdExitFailure:
+ # try to carry on and skip at the end
+ if i == 0:
+ raise
+ ksft_pr(f"Failed to create context {i + 1}, trying to test what we got")
+ ctx_cnt = i
+ break
+
+ _rss_key_check(cfg, context=ctx_id)
+
+ if not create_with_cfg:
+ ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}")
+ _rss_key_check(cfg, context=ctx_id)
+
+ # Sanity check the context we just created
+ data = get_rss(cfg, ctx_id)
+ ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data))
+ ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data))
+
+ ports.append(rand_port())
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}"
+ ntuple = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple}")
+
+ for i in range(ctx_cnt):
+ _send_traffic_check(cfg, ports[i], f"context {i}",
+ { 'target': (2+i*2, 3+i*2),
+ 'noise': (0, 1),
+ 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt)) })
+
+ if requested_ctx_cnt != ctx_cnt:
+ raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
+
+
+def test_rss_context4(cfg):
+ test_rss_context(cfg, 4)
+
+
+def test_rss_context32(cfg):
+ test_rss_context(cfg, 32)
+
+
+def test_rss_context4_create_with_cfg(cfg):
+ test_rss_context(cfg, 4, create_with_cfg=True)
+
+
+def test_rss_context_queue_reconfigure(cfg):
+ test_rss_queue_reconfigure(cfg, main_ctx=False)
+
+
+def test_rss_context_out_of_order(cfg, ctx_cnt=4):
+ """
+ Test separating traffic into RSS contexts.
+ Contexts are removed in semi-random order, and steering re-tested
+ to make sure removal doesn't break steering to surviving contexts.
+ Test requires 3 contexts to work.
+ """
+
+ require_ntuple(cfg)
+
+ requested_ctx_cnt = ctx_cnt
+
+ # Try to allocate more queues when necessary
+ qcnt = len(_get_rx_cnts(cfg))
+ if qcnt < 2 + 2 * ctx_cnt:
+ try:
+ ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}")
+ ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}")
+ defer(ethtool, f"-L {cfg.ifname} combined {qcnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test")
+
+ ntuple = []
+ ctx = []
+ ports = []
+
+ def remove_ctx(idx):
+ ntuple[idx].exec()
+ ntuple[idx] = None
+ ctx[idx].exec()
+ ctx[idx] = None
+
+ def check_traffic():
+ for i in range(ctx_cnt):
+ if ctx[i]:
+ expected = {
+ 'target': (2+i*2, 3+i*2),
+ 'noise': (0, 1),
+ 'empty': list(range(2, 2+i*2)) + list(range(4+i*2, 2+2*ctx_cnt))
+ }
+ else:
+ expected = {
+ 'target': (0, 1),
+ 'empty': range(2, 2+2*ctx_cnt)
+ }
+
+ _send_traffic_check(cfg, ports[i], f"context {i}", expected)
+
+ # Use queues 0 and 1 for normal traffic
+ ethtool(f"-X {cfg.ifname} equal 2")
+ defer(ethtool, f"-X {cfg.ifname} default")
+
+ for i in range(ctx_cnt):
+ ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2")
+ ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete"))
+
+ ports.append(rand_port())
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}"))
+
+ check_traffic()
+
+ # Remove middle context
+ remove_ctx(ctx_cnt // 2)
+ check_traffic()
+
+ # Remove first context
+ remove_ctx(0)
+ check_traffic()
+
+ # Remove last context
+ remove_ctx(-1)
+ check_traffic()
+
+ if requested_ctx_cnt != ctx_cnt:
+ raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}")
+
+
+def test_rss_context_overlap(cfg, other_ctx=0):
+ """
+ Test contexts overlapping with each other.
+ Use 4 queues for the main context, but only queues 2 and 3 for context 1.
+ """
+
+ require_ntuple(cfg)
+
+ queue_cnt = len(_get_rx_cnts(cfg))
+ if queue_cnt < 4:
+ try:
+ ksft_pr(f"Increasing queue count {queue_cnt} -> 4")
+ ethtool(f"-L {cfg.ifname} combined 4")
+ defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}")
+ except:
+ raise KsftSkipEx("Not enough queues for the test")
+
+ if other_ctx == 0:
+ ethtool(f"-X {cfg.ifname} equal 4")
+ defer(ethtool, f"-X {cfg.ifname} default")
+ else:
+ other_ctx = ethtool_create(cfg, "-X", "context new")
+ ethtool(f"-X {cfg.ifname} context {other_ctx} equal 4")
+ defer(ethtool, f"-X {cfg.ifname} context {other_ctx} delete")
+
+ ctx_id = ethtool_create(cfg, "-X", "context new")
+ ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2")
+ defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")
+
+ port = rand_port()
+ if other_ctx:
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {other_ctx}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+ # Test the main context
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+
+ ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts))
+ ksft_ge(sum(cnts[ :2]), 7000, "traffic on main context (1/2): " + str(cnts))
+ ksft_ge(sum(cnts[2:4]), 7000, "traffic on main context (2/2): " + str(cnts))
+ if other_ctx == 0:
+ ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts))
+
+ # Now create a rule for context 1 and make sure traffic goes to a subset
+ if other_ctx:
+ ntuple.exec()
+ flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}"
+ ntuple_id = ethtool_create(cfg, "-N", flow)
+ defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")
+
+ cnts = _get_rx_cnts(cfg)
+ GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000)
+ cnts = _get_rx_cnts(cfg, prev=cnts)
+
+ directed = sum(cnts[2:4])
+ ksft_lt(sum(cnts[ :2]), directed / 2, "traffic on main context: " + str(cnts))
+ ksft_ge(directed, 20000, "traffic on extra context: " + str(cnts))
+ if other_ctx == 0:
+ ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts))
+
+
+def test_rss_context_overlap2(cfg):
+ test_rss_context_overlap(cfg, True)
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__, nsim_test=False) as cfg:
+ cfg.ethnl = EthtoolFamily()
+ cfg.netdevnl = NetdevFamily()
+
+ ksft_run([test_rss_key_indir, test_rss_queue_reconfigure,
+ test_rss_resize, test_hitless_key_update,
+ test_rss_context, test_rss_context4, test_rss_context32,
+ test_rss_context_queue_reconfigure,
+ test_rss_context_overlap, test_rss_context_overlap2,
+ test_rss_context_out_of_order, test_rss_context4_create_with_cfg],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/settings b/tools/testing/selftests/drivers/net/hw/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py
new file mode 100644
index 000000000000..401e70f7f136
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import sys
+from pathlib import Path
+
+KSFT_DIR = (Path(__file__).parent / "../../../..").resolve()
+
+try:
+ sys.path.append(KSFT_DIR.as_posix())
+ from net.lib.py import *
+except ModuleNotFoundError as e:
+ ksft_pr("Failed importing `net` library from kernel sources")
+ ksft_pr(str(e))
+ ktap_result(True, comment="SKIP")
+ sys.exit(4)
+
+from .env import *
+from .load import *
+from .remote import Remote
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
new file mode 100644
index 000000000000..a5e800b8f103
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -0,0 +1,241 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import time
+from pathlib import Path
+from lib.py import KsftSkipEx, KsftXfailEx
+from lib.py import cmd, ethtool, ip
+from lib.py import NetNS, NetdevSimDev
+from .remote import Remote
+
+
+def _load_env_file(src_path):
+ env = os.environ.copy()
+
+ src_dir = Path(src_path).parent.resolve()
+ if not (src_dir / "net.config").exists():
+ return env
+
+ with open((src_dir / "net.config").as_posix(), 'r') as fp:
+ for line in fp.readlines():
+ full_file = line
+ # Strip comments
+ pos = line.find("#")
+ if pos >= 0:
+ line = line[:pos]
+ line = line.strip()
+ if not line:
+ continue
+ pair = line.split('=', maxsplit=1)
+ if len(pair) != 2:
+ raise Exception("Can't parse configuration line:", full_file)
+ env[pair[0]] = pair[1]
+ return env
+
+
+class NetDrvEnv:
+ """
+ Class for a single NIC / host env, with no remote end
+ """
+ def __init__(self, src_path, **kwargs):
+ self._ns = None
+
+ self.env = _load_env_file(src_path)
+
+ if 'NETIF' in self.env:
+ self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0]
+ else:
+ self._ns = NetdevSimDev(**kwargs)
+ self.dev = self._ns.nsims[0].dev
+ self.ifindex = self.dev['ifindex']
+
+ def __enter__(self):
+ ip(f"link set dev {self.dev['ifname']} up")
+
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ """
+ __exit__ gets called at the end of a "with" block.
+ """
+ self.__del__()
+
+ def __del__(self):
+ if self._ns:
+ self._ns.remove()
+ self._ns = None
+
+
+class NetDrvEpEnv:
+ """
+ Class for an environment with a local device and "remote endpoint"
+ which can be used to send traffic in.
+
+ For local testing it creates two network namespaces and a pair
+ of netdevsim devices.
+ """
+
+ # Network prefixes used for local tests
+ nsim_v4_pfx = "192.0.2."
+ nsim_v6_pfx = "2001:db8::"
+
+ def __init__(self, src_path, nsim_test=None):
+
+ self.env = _load_env_file(src_path)
+
+ self._stats_settle_time = None
+
+ # Things we try to destroy
+ self.remote = None
+ # These are for local testing state
+ self._netns = None
+ self._ns = None
+ self._ns_peer = None
+
+ if "NETIF" in self.env:
+ if nsim_test is True:
+ raise KsftXfailEx("Test only works on netdevsim")
+ self._check_env()
+
+ self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0]
+
+ self.v4 = self.env.get("LOCAL_V4")
+ self.v6 = self.env.get("LOCAL_V6")
+ self.remote_v4 = self.env.get("REMOTE_V4")
+ self.remote_v6 = self.env.get("REMOTE_V6")
+ kind = self.env["REMOTE_TYPE"]
+ args = self.env["REMOTE_ARGS"]
+ else:
+ if nsim_test is False:
+ raise KsftXfailEx("Test does not work on netdevsim")
+
+ self.create_local()
+
+ self.dev = self._ns.nsims[0].dev
+
+ self.v4 = self.nsim_v4_pfx + "1"
+ self.v6 = self.nsim_v6_pfx + "1"
+ self.remote_v4 = self.nsim_v4_pfx + "2"
+ self.remote_v6 = self.nsim_v6_pfx + "2"
+ kind = "netns"
+ args = self._netns.name
+
+ self.remote = Remote(kind, args, src_path)
+
+ self.addr = self.v6 if self.v6 else self.v4
+ self.remote_addr = self.remote_v6 if self.remote_v6 else self.remote_v4
+
+ self.addr_ipver = "6" if self.v6 else "4"
+ # Bracketed addresses, some commands need IPv6 to be inside []
+ self.baddr = f"[{self.v6}]" if self.v6 else self.v4
+ self.remote_baddr = f"[{self.remote_v6}]" if self.remote_v6 else self.remote_v4
+
+ self.ifname = self.dev['ifname']
+ self.ifindex = self.dev['ifindex']
+
+ self._required_cmd = {}
+
+ def create_local(self):
+ self._netns = NetNS()
+ self._ns = NetdevSimDev()
+ self._ns_peer = NetdevSimDev(ns=self._netns)
+
+ with open("/proc/self/ns/net") as nsfd0, \
+ open("/var/run/netns/" + self._netns.name) as nsfd1:
+ ifi0 = self._ns.nsims[0].ifindex
+ ifi1 = self._ns_peer.nsims[0].ifindex
+ NetdevSimDev.ctrl_write('link_device',
+ f'{nsfd0.fileno()}:{ifi0} {nsfd1.fileno()}:{ifi1}')
+
+ ip(f" addr add dev {self._ns.nsims[0].ifname} {self.nsim_v4_pfx}1/24")
+ ip(f"-6 addr add dev {self._ns.nsims[0].ifname} {self.nsim_v6_pfx}1/64 nodad")
+ ip(f" link set dev {self._ns.nsims[0].ifname} up")
+
+ ip(f" addr add dev {self._ns_peer.nsims[0].ifname} {self.nsim_v4_pfx}2/24", ns=self._netns)
+ ip(f"-6 addr add dev {self._ns_peer.nsims[0].ifname} {self.nsim_v6_pfx}2/64 nodad", ns=self._netns)
+ ip(f" link set dev {self._ns_peer.nsims[0].ifname} up", ns=self._netns)
+
+ def _check_env(self):
+ vars_needed = [
+ ["LOCAL_V4", "LOCAL_V6"],
+ ["REMOTE_V4", "REMOTE_V6"],
+ ["REMOTE_TYPE"],
+ ["REMOTE_ARGS"]
+ ]
+ missing = []
+
+ for choice in vars_needed:
+ for entry in choice:
+ if entry in self.env:
+ break
+ else:
+ missing.append(choice)
+ # Make sure v4 / v6 configs are symmetric
+ if ("LOCAL_V6" in self.env) != ("REMOTE_V6" in self.env):
+ missing.append(["LOCAL_V6", "REMOTE_V6"])
+ if ("LOCAL_V4" in self.env) != ("REMOTE_V4" in self.env):
+ missing.append(["LOCAL_V4", "REMOTE_V4"])
+ if missing:
+ raise Exception("Invalid environment, missing configuration:", missing,
+ "Please see tools/testing/selftests/drivers/net/README.rst")
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ """
+ __exit__ gets called at the end of a "with" block.
+ """
+ self.__del__()
+
+ def __del__(self):
+ if self._ns:
+ self._ns.remove()
+ self._ns = None
+ if self._ns_peer:
+ self._ns_peer.remove()
+ self._ns_peer = None
+ if self._netns:
+ del self._netns
+ self._netns = None
+ if self.remote:
+ del self.remote
+ self.remote = None
+
+ def require_v4(self):
+ if not self.v4 or not self.remote_v4:
+ raise KsftSkipEx("Test requires IPv4 connectivity")
+
+ def require_v6(self):
+ if not self.v6 or not self.remote_v6:
+ raise KsftSkipEx("Test requires IPv6 connectivity")
+
+ def _require_cmd(self, comm, key, host=None):
+ cached = self._required_cmd.get(comm, {})
+ if cached.get(key) is None:
+ cached[key] = cmd("command -v -- " + comm, fail=False,
+ shell=True, host=host).ret == 0
+ self._required_cmd[comm] = cached
+ return cached[key]
+
+ def require_cmd(self, comm, local=True, remote=False):
+ if local:
+ if not self._require_cmd(comm, "local"):
+ raise KsftSkipEx("Test requires command: " + comm)
+ if remote:
+ if not self._require_cmd(comm, "remote"):
+ raise KsftSkipEx("Test requires (remote) command: " + comm)
+
+ def wait_hw_stats_settle(self):
+ """
+ Wait for HW stats to become consistent, some devices DMA HW stats
+ periodically so events won't be reflected until next sync.
+ Good drivers will tell us via ethtool what their sync period is.
+ """
+ if self._stats_settle_time is None:
+ data = ethtool("-c " + self.ifname, json=True)[0]
+
+ self._stats_settle_time = 0.025 + \
+ data.get('stats-block-usecs', 0) / 1000 / 1000
+
+ time.sleep(self._stats_settle_time)
diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py
new file mode 100644
index 000000000000..d9c10613ae67
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/load.py
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import time
+
+from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen
+
+class GenerateTraffic:
+ def __init__(self, env, port=None):
+ env.require_cmd("iperf3", remote=True)
+
+ self.env = env
+
+ if port is None:
+ port = rand_port()
+ self._iperf_server = cmd(f"iperf3 -s -1 -p {port}", background=True)
+ wait_port_listen(port)
+ time.sleep(0.1)
+ self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400",
+ background=True, host=env.remote)
+
+ # Wait for traffic to ramp up
+ if not self._wait_pkts(pps=1000):
+ self.stop(verbose=True)
+ raise Exception("iperf3 traffic did not ramp up")
+
+ def _wait_pkts(self, pkt_cnt=None, pps=None):
+ """
+ Wait until we've seen pkt_cnt or until traffic ramps up to pps.
+ Only one of pkt_cnt or pss can be specified.
+ """
+ pkt_start = ip("-s link show dev " + self.env.ifname, json=True)[0]["stats64"]["rx"]["packets"]
+ for _ in range(50):
+ time.sleep(0.1)
+ pkt_now = ip("-s link show dev " + self.env.ifname, json=True)[0]["stats64"]["rx"]["packets"]
+ if pps:
+ if pkt_now - pkt_start > pps / 10:
+ return True
+ pkt_start = pkt_now
+ elif pkt_cnt:
+ if pkt_now - pkt_start > pkt_cnt:
+ return True
+ return False
+
+ def wait_pkts_and_stop(self, pkt_cnt):
+ failed = not self._wait_pkts(pkt_cnt=pkt_cnt)
+ self.stop(verbose=failed)
+
+ def stop(self, verbose=None):
+ self._iperf_client.process(terminate=True)
+ if verbose:
+ ksft_pr(">> Client:")
+ ksft_pr(self._iperf_client.stdout)
+ ksft_pr(self._iperf_client.stderr)
+ self._iperf_server.process(terminate=True)
+ if verbose:
+ ksft_pr(">> Server:")
+ ksft_pr(self._iperf_server.stdout)
+ ksft_pr(self._iperf_server.stderr)
diff --git a/tools/testing/selftests/drivers/net/lib/py/remote.py b/tools/testing/selftests/drivers/net/lib/py/remote.py
new file mode 100644
index 000000000000..b1780b987722
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/remote.py
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import importlib
+
+_modules = {}
+
+def Remote(kind, args, src_path):
+ global _modules
+
+ if kind not in _modules:
+ _modules[kind] = importlib.import_module("..remote_" + kind, __name__)
+
+ dir_path = os.path.abspath(src_path + "/../")
+ return getattr(_modules[kind], "Remote")(args, dir_path)
diff --git a/tools/testing/selftests/drivers/net/lib/py/remote_netns.py b/tools/testing/selftests/drivers/net/lib/py/remote_netns.py
new file mode 100644
index 000000000000..7d5eeb0271bc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/remote_netns.py
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import subprocess
+
+from lib.py import cmd
+
+
+class Remote:
+ def __init__(self, name, dir_path):
+ self.name = name
+ self.dir_path = dir_path
+
+ def cmd(self, comm):
+ return subprocess.Popen(["ip", "netns", "exec", self.name, "bash", "-c", comm],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+ def deploy(self, what):
+ if os.path.isabs(what):
+ return what
+ return os.path.abspath(self.dir_path + "/" + what)
diff --git a/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py b/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py
new file mode 100644
index 000000000000..924addde19a3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/py/remote_ssh.py
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+import string
+import subprocess
+import random
+
+from lib.py import cmd
+
+
+class Remote:
+ def __init__(self, name, dir_path):
+ self.name = name
+ self.dir_path = dir_path
+ self._tmpdir = None
+
+ def __del__(self):
+ if self._tmpdir:
+ cmd("rm -rf " + self._tmpdir, host=self)
+ self._tmpdir = None
+
+ def cmd(self, comm):
+ return subprocess.Popen(["ssh", "-q", self.name, comm],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+ def _mktmp(self):
+ return ''.join(random.choice(string.ascii_lowercase) for _ in range(8))
+
+ def deploy(self, what):
+ if not self._tmpdir:
+ self._tmpdir = "/tmp/" + self._mktmp()
+ cmd("mkdir " + self._tmpdir, host=self)
+ file_name = self._tmpdir + "/" + self._mktmp() + os.path.basename(what)
+
+ if not os.path.isabs(what):
+ what = os.path.abspath(self.dir_path + "/" + what)
+
+ cmd(f"scp {what} {self.name}:{file_name}")
+ return file_name
diff --git a/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh b/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh
new file mode 100755
index 000000000000..82be5d013330
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/microchip/ksz9477_qos.sh
@@ -0,0 +1,668 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2024 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
+
+# The script is adopted to work with the Microchip KSZ switch driver.
+
+ETH_FCS_LEN=4
+
+WAIT_TIME=1
+NUM_NETIFS=4
+REQUIRE_JQ="yes"
+REQUIRE_MZ="yes"
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command dcb
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# On h1_ and h2_create do not set IP addresses to avoid interaction with the
+# system, to keep packet counters clean.
+h1_create()
+{
+ simple_if_init $h1
+ sysctl_set net.ipv6.conf.${h1}.disable_ipv6 1
+ # Get the MAC address of the interface to use it with mausezahn
+ h1_mac=$(ip -j link show dev ${h1} | jq -e '.[].address')
+}
+
+h1_destroy()
+{
+ sysctl_restore net.ipv6.conf.${h1}.disable_ipv6
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ sysctl_set net.ipv6.conf.${h2}.disable_ipv6 1
+ h2_mac=$(ip -j link show dev ${h2} | jq -e '.[].address')
+}
+
+h2_destroy()
+{
+ sysctl_restore net.ipv6.conf.${h2}.disable_ipv6
+ simple_if_fini $h2
+}
+
+switch_create()
+{
+ ip link set ${swp1} up
+ ip link set ${swp2} up
+ sysctl_set net.ipv6.conf.${swp1}.disable_ipv6 1
+ sysctl_set net.ipv6.conf.${swp2}.disable_ipv6 1
+
+ # Ports should trust VLAN PCP even with vlan_filtering=0
+ ip link add br0 type bridge
+ ip link set ${swp1} master br0
+ ip link set ${swp2} master br0
+ ip link set br0 up
+ sysctl_set net.ipv6.conf.br0.disable_ipv6 1
+}
+
+switch_destroy()
+{
+ sysctl_restore net.ipv6.conf.${swp2}.disable_ipv6
+ sysctl_restore net.ipv6.conf.${swp1}.disable_ipv6
+
+ ip link del br0
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+ switch_destroy
+
+ vrf_cleanup
+}
+
+set_apptrust_order()
+{
+ local if_name=$1
+ local order=$2
+
+ dcb apptrust set dev ${if_name} order ${order}
+}
+
+# Function to extract a specified field from a given JSON stats string
+extract_network_stat() {
+ local stats_json=$1
+ local field_name=$2
+
+ echo $(echo "$stats_json" | jq -r "$field_name")
+}
+
+run_test()
+{
+ local test_name=$1;
+ local apptrust_order=$2;
+ local port_prio=$3;
+ local dscp_ipv=$4;
+ local dscp=$5;
+ local have_vlan=$6;
+ local pcp_ipv=$7;
+ local vlan_pcp=$8;
+ local ip_v6=$9
+
+ local rx_ipv
+ local tx_ipv
+
+ RET=0
+
+ # Send some packet to populate the switch MAC table
+ $MZ ${h2} -a ${h2_mac} -b ${h1_mac} -p 64 -t icmp echores -c 1
+
+ # Based on the apptrust order, set the expected Internal Priority values
+ # for the RX and TX paths.
+ if [ "${apptrust_order}" == "" ]; then
+ echo "Apptrust order not set."
+ rx_ipv=${port_prio}
+ tx_ipv=${port_prio}
+ elif [ "${apptrust_order}" == "dscp" ]; then
+ echo "Apptrust order is DSCP."
+ rx_ipv=${dscp_ipv}
+ tx_ipv=${dscp_ipv}
+ elif [ "${apptrust_order}" == "pcp" ]; then
+ echo "Apptrust order is PCP."
+ rx_ipv=${pcp_ipv}
+ tx_ipv=${pcp_ipv}
+ elif [ "${apptrust_order}" == "pcp dscp" ]; then
+ echo "Apptrust order is PCP DSCP."
+ if [ ${have_vlan} -eq 1 ]; then
+ rx_ipv=$((dscp_ipv > pcp_ipv ? dscp_ipv : pcp_ipv))
+ tx_ipv=${pcp_ipv}
+ else
+ rx_ipv=${dscp_ipv}
+ tx_ipv=${dscp_ipv}
+ fi
+ else
+ RET=1
+ echo "Error: Unknown apptrust order ${apptrust_order}"
+ log_test "${test_name}"
+ return
+ fi
+
+ # Most/all? of the KSZ switches do not provide per-TC counters. There
+ # are only tx_hi and rx_hi counters, which are used to count packets
+ # which are considered as high priority and most likely not assigned
+ # to the queue 0.
+ # On the ingress path, packets seem to get high priority status
+ # independently of the DSCP or PCP global mapping. On the egress path,
+ # the high priority status is assigned based on the DSCP or PCP global
+ # map configuration.
+ # The thresholds for the high priority status are not documented, but
+ # it seems that the switch considers packets as high priority on the
+ # ingress path if detected Internal Priority is greater than 0. On the
+ # egress path, the switch considers packets as high priority if
+ # detected Internal Priority is greater than 1.
+ if [ ${rx_ipv} -ge 1 ]; then
+ local expect_rx_high_prio=1
+ else
+ local expect_rx_high_prio=0
+ fi
+
+ if [ ${tx_ipv} -ge 2 ]; then
+ local expect_tx_high_prio=1
+ else
+ local expect_tx_high_prio=0
+ fi
+
+ # Use ip tool to get the current switch packet counters. ethool stats
+ # need to be recalculated to get the correct values.
+ local swp1_stats=$(ip -s -j link show dev ${swp1})
+ local swp2_stats=$(ip -s -j link show dev ${swp2})
+ local swp1_rx_packets_before=$(extract_network_stat "$swp1_stats" \
+ '.[0].stats64.rx.packets')
+ local swp1_rx_bytes_before=$(extract_network_stat "$swp1_stats" \
+ '.[0].stats64.rx.bytes')
+ local swp2_tx_packets_before=$(extract_network_stat "$swp2_stats" \
+ '.[0].stats64.tx.packets')
+ local swp2_tx_bytes_before=$(extract_network_stat "$swp2_stats" \
+ '.[0].stats64.tx.bytes')
+ local swp1_rx_hi_before=$(ethtool_stats_get ${swp1} "rx_hi")
+ local swp2_tx_hi_before=$(ethtool_stats_get ${swp2} "tx_hi")
+
+ # Assamble the mausezahn command based on the test parameters
+ # For the testis with ipv4 or ipv6, use icmp response packets,
+ # to avoid interaction with the system, to keep packet counters
+ # clean.
+ if [ ${ip_v6} -eq 0 ]; then
+ local ip="-a ${h1_mac} -b ${h2_mac} -A ${H1_IPV4} \
+ -B ${H2_IPV4} -t icmp unreach,code=1,dscp=${dscp}"
+ else
+ local ip="-6 -a ${h1_mac} -b ${h2_mac} -A ${H1_IPV6} \
+ -B ${H2_IPV6} -t icmp6 type=1,code=0,dscp=${dscp}"
+ fi
+
+ if [ ${have_vlan} -eq 1 ]; then
+ local vlan_pcp_opt="-Q ${vlan_pcp}:0"
+ else
+ local vlan_pcp_opt=""
+ fi
+ $MZ ${h1} ${ip} -c ${PING_COUNT} -d 10msec ${vlan_pcp_opt}
+
+ # Wait until the switch packet counters are updated
+ sleep 6
+
+ local swp1_stats=$(ip -s -j link show dev ${swp1})
+ local swp2_stats=$(ip -s -j link show dev ${swp2})
+
+ local swp1_rx_packets_after=$(extract_network_stat "$swp1_stats" \
+ '.[0].stats64.rx.packets')
+ local swp1_rx_bytes_after=$(extract_network_stat "$swp1_stats" \
+ '.[0].stats64.rx.bytes')
+ local swp2_tx_packets_after=$(extract_network_stat "$swp2_stats" \
+ '.[0].stats64.tx.packets')
+ local swp2_tx_bytes_after=$(extract_network_stat "$swp2_stats" \
+ '.[0].stats64.tx.bytes')
+
+ local swp1_rx_packets_diff=$((${swp1_rx_packets_after} - \
+ ${swp1_rx_packets_before}))
+ local swp2_tx_packets_diff=$((${swp2_tx_packets_after} - \
+ ${swp2_tx_packets_before}))
+
+ local swp1_rx_hi_after=$(ethtool_stats_get ${swp1} "rx_hi")
+ local swp2_tx_hi_after=$(ethtool_stats_get ${swp2} "tx_hi")
+
+ # Test if any packets were received on swp1, we will rx before and after
+ if [ ${swp1_rx_packets_diff} -lt ${PING_COUNT} ]; then
+ echo "Not expected amount of received packets on ${swp1}"
+ echo "before ${swp1_rx_packets_before} after ${swp1_rx_packets_after}"
+ RET=1
+ fi
+
+ # Test if any packets were transmitted on swp2, we will tx before and after
+ if [ ${swp2_tx_packets_diff} -lt ${PING_COUNT} ]; then
+ echo "Not expected amount of transmitted packets on ${swp2}"
+ echo "before ${swp2_tx_packets_before} after ${swp2_tx_packets_after}"
+ RET=1
+ fi
+
+ # tx/rx_hi counted in bytes. So, we need to compare the difference in bytes
+ local swp1_rx_bytes_diff=$(($swp1_rx_bytes_after - $swp1_rx_bytes_before))
+ local swp2_tx_bytes_diff=$(($swp2_tx_bytes_after - $swp2_tx_bytes_before))
+ local swp1_rx_hi_diff=$(($swp1_rx_hi_after - $swp1_rx_hi_before))
+ local swp2_tx_hi_diff=$(($swp2_tx_hi_after - $swp2_tx_hi_before))
+
+ if [ ${expect_rx_high_prio} -eq 1 ]; then
+ swp1_rx_hi_diff=$((${swp1_rx_hi_diff} - \
+ ${swp1_rx_packets_diff} * ${ETH_FCS_LEN}))
+ if [ ${swp1_rx_hi_diff} -ne ${swp1_rx_bytes_diff} ]; then
+ echo "Not expected amount of high priority packets received on ${swp1}"
+ echo "RX hi diff: ${swp1_rx_hi_diff}, expected RX bytes diff: ${swp1_rx_bytes_diff}"
+ RET=1
+ fi
+ else
+ if [ ${swp1_rx_hi_diff} -ne 0 ]; then
+ echo "Unexpected amount of high priority packets received on ${swp1}"
+ echo "RX hi diff: ${swp1_rx_hi_diff}, expected 0"
+ RET=1
+ fi
+ fi
+
+ if [ ${expect_tx_high_prio} -eq 1 ]; then
+ swp2_tx_hi_diff=$((${swp2_tx_hi_diff} - \
+ ${swp2_tx_packets_diff} * ${ETH_FCS_LEN}))
+ if [ ${swp2_tx_hi_diff} -ne ${swp2_tx_bytes_diff} ]; then
+ echo "Not expected amount of high priority packets transmitted on ${swp2}"
+ echo "TX hi diff: ${swp2_tx_hi_diff}, expected TX bytes diff: ${swp2_tx_bytes_diff}"
+ RET=1
+ fi
+ else
+ if [ ${swp2_tx_hi_diff} -ne 0 ]; then
+ echo "Unexpected amount of high priority packets transmitted on ${swp2}"
+ echo "TX hi diff: ${swp2_tx_hi_diff}, expected 0"
+ RET=1
+ fi
+ fi
+
+ log_test "${test_name}"
+}
+
+run_test_dscp()
+{
+ # IPv4 test
+ run_test "$1" "$2" "$3" "$4" "$5" 0 0 0 0
+ # IPv6 test
+ run_test "$1" "$2" "$3" "$4" "$5" 0 0 0 1
+}
+
+run_test_dscp_pcp()
+{
+ # IPv4 test
+ run_test "$1" "$2" "$3" "$4" "$5" 1 "$6" "$7" 0
+ # IPv6 test
+ run_test "$1" "$2" "$3" "$4" "$5" 1 "$6" "$7" 1
+}
+
+port_default_prio_get()
+{
+ local if_name=$1
+ local prio
+
+ prio="$(dcb -j app show dev ${if_name} default-prio | \
+ jq '.default_prio[]')"
+ if [ -z "${prio}" ]; then
+ prio=0
+ fi
+
+ echo ${prio}
+}
+
+test_port_default()
+{
+ local orig_apptrust=$(port_get_default_apptrust ${swp1})
+ local orig_prio=$(port_default_prio_get ${swp1})
+ local apptrust_order=""
+
+ RET=0
+
+ # Make sure no other priority sources will interfere with the test
+ set_apptrust_order ${swp1} "${apptrust_order}"
+
+ for val in $(seq 0 7); do
+ dcb app replace dev ${swp1} default-prio ${val}
+ if [ $val -ne $(port_default_prio_get ${swp1}) ]; then
+ RET=1
+ break
+ fi
+
+ run_test_dscp "Port-default QoS classification, prio: ${val}" \
+ "${apptrust_order}" ${val} 0 0
+ done
+
+ set_apptrust_order ${swp1} "${orig_apptrust}"
+ if [[ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]]; then
+ RET=1
+ fi
+
+ dcb app replace dev ${swp1} default-prio ${orig_prio}
+ if [ $orig_prio -ne $(port_default_prio_get ${swp1}) ]; then
+ RET=1
+ fi
+
+ log_test "Port-default QoS classification"
+}
+
+port_get_default_apptrust()
+{
+ local if_name=$1
+
+ dcb -j apptrust show dev ${if_name} | jq -r '.order[]' | \
+ tr '\n' ' ' | xargs
+}
+
+test_port_apptrust()
+{
+ local original_dscp_prios_swp1=$(get_dscp_prios ${swp1})
+ local orig_apptrust=$(port_get_default_apptrust ${swp1})
+ local orig_port_prio=$(port_default_prio_get ${swp1})
+ local order_variants=("pcp dscp" "dscp" "pcp")
+ local apptrust_order
+ local port_prio
+ local dscp_prio
+ local pcp_prio
+ local dscp
+ local pcp
+
+ RET=0
+
+ # First, test if apptrust configuration as taken by the kernel
+ for order in "${order_variants[@]}"; do
+ set_apptrust_order ${swp1} "${order}"
+ if [[ "$order" != "$(port_get_default_apptrust ${swp1})" ]]; then
+ RET=1
+ break
+ fi
+ done
+
+ log_test "Apptrust, supported variants"
+
+ # To test if the apptrust configuration is working as expected, we need
+ # to set DSCP priorities for the switch port.
+ init_dscp_prios "${swp1}" "${original_dscp_prios_swp1}"
+
+ # Start with a simple test where all apptrust sources are disabled
+ # default port priority is 0, DSCP priority is mapped to 7.
+ # No high priority packets should be received or transmitted.
+ port_prio=0
+ dscp_prio=7
+ dscp=4
+
+ dcb app replace dev ${swp1} default-prio ${port_prio}
+ dcb app replace dev ${swp1} dscp-prio ${dscp}:${dscp_prio}
+
+ apptrust_order=""
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ # Test with apptrust sources disabled, Packets should get port default
+ # priority which is 0
+ run_test_dscp "Apptrust, all disabled. DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+ apptrust_order="pcp"
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ # If PCP is enabled, packets should get PCP priority, which is not
+ # set in this test (no VLAN tags are present in the packet). No high
+ # priority packets should be received or transmitted.
+ run_test_dscp "Apptrust, PCP enabled. DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+ apptrust_order="dscp"
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ # If DSCP is enabled, packets should get DSCP priority which is set to 7
+ # in this test. High priority packets should be received and transmitted.
+ run_test_dscp "Apptrust, DSCP enabled. DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+ apptrust_order="pcp dscp"
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ # If PCP and DSCP are enabled, PCP would have higher apptrust priority
+ # so packets should get PCP priority. But in this test VLAN PCP is not
+ # set, so it should get DSCP priority which is set to 7. High priority
+ # packets should be received and transmitted.
+ run_test_dscp "Apptrust, PCP and DSCP are enabled. DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+
+ # If VLAN PCP is set, it should have higher apptrust priority than DSCP
+ # so packets should get VLAN PCP priority. Send packets with VLAN PCP
+ # set to 0, DSCP set to 7. Packets should get VLAN PCP priority.
+ # No high priority packets should be transmitted. Due to nature of the
+ # switch, high priority packets will be received.
+ pcp_prio=0
+ pcp=0
+ run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+
+ # If VLAN PCP is set to 7, it should have higher apptrust priority than
+ # DSCP so packets should get VLAN PCP priority. Send packets with VLAN
+ # PCP set to 7, DSCP set to 7. Packets should get VLAN PCP priority.
+ # High priority packets should be received and transmitted.
+ pcp_prio=7
+ pcp=7
+ run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+ # Now make sure that the switch is able to handle the case where DSCP
+ # priority is set to 0 and PCP priority is set to 7. Packets should get
+ # PCP priority. High priority packets should be received and transmitted.
+ dscp_prio=0
+ dcb app replace dev ${swp1} dscp-prio ${dscp}:${dscp_prio}
+ run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+ # If both VLAN PCP and DSCP are set to 0, packets should get 0 priority.
+ # No high priority packets should be received or transmitted.
+ pcp_prio=0
+ pcp=0
+ run_test_dscp_pcp "Apptrust, PCP and DSCP are enabled. PCP ${pcp_prio}, DSCP-prio ${dscp}:${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp} ${pcp_prio} ${pcp}
+
+ # Restore original priorities
+ if ! restore_priorities "${swp1}" "${original_dscp_prios_swp1}"; then
+ RET=1
+ fi
+
+ set_apptrust_order ${swp1} "${orig_apptrust}"
+ if [ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]; then
+ RET=1
+ fi
+
+ dcb app replace dev ${swp1} default-prio ${orig_port_prio}
+ if [ $orig_port_prio -ne $(port_default_prio_get ${swp1}) ]; then
+ RET=1
+ fi
+
+ log_test "Apptrust, restore original settings"
+}
+
+# Function to get current DSCP priorities
+get_dscp_prios() {
+ local if_name=$1
+ dcb -j app show dev ${if_name} | jq -c '.dscp_prio'
+}
+
+# Function to set a specific DSCP priority on a device
+replace_dscp_prio() {
+ local if_name=$1
+ local dscp=$2
+ local prio=$3
+ dcb app replace dev ${if_name} dscp-prio ${dscp}:${prio}
+}
+
+# Function to compare DSCP maps
+compare_dscp_maps() {
+ local old_json=$1
+ local new_json=$2
+ local dscp=$3
+ local prio=$4
+
+ # Create a modified old_json with the expected change for comparison
+ local modified_old_json=$(echo "$old_json" |
+ jq --argjson dscp $dscp --argjson prio $prio \
+ 'map(if .[0] == $dscp then [$dscp, $prio] else . end)' |
+ tr -d " \n")
+
+ # Compare new_json with the modified_old_json
+ if [[ "$modified_old_json" == "$new_json" ]]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+# Function to set DSCP priorities
+set_and_verify_dscp() {
+ local port=$1
+ local dscp=$2
+ local new_prio=$3
+
+ local old_prios=$(get_dscp_prios $port)
+
+ replace_dscp_prio "$port" $dscp $new_prio
+
+ # Fetch current settings and compare
+ local current_prios=$(get_dscp_prios $port)
+ if ! compare_dscp_maps "$old_prios" "$current_prios" $dscp $new_prio; then
+ echo "Error: Unintended changes detected in DSCP map for $port after setting DSCP $dscp to $new_prio."
+ return 1
+ fi
+ return 0
+}
+
+# Function to restore original priorities
+restore_priorities() {
+ local port=$1
+ local original_prios=$2
+
+ echo "Removing test artifacts for $port"
+ local current_prios=$(get_dscp_prios $port)
+ local prio_str=$(echo "$current_prios" |
+ jq -r 'map("\(.[0]):\(.[1])") | join(" ")')
+ dcb app del dev $port dscp-prio $prio_str
+
+ echo "Restoring original DSCP priorities for $port"
+ local restore_str=$(echo "$original_prios" |
+ jq -r 'map("\(.[0]):\(.[1])") | join(" ")')
+ dcb app add dev $port dscp-prio $restore_str
+
+ local current_prios=$(get_dscp_prios $port)
+ if [[ "$original_prios" != "$current_prios" ]]; then
+ echo "Error: Failed to restore original DSCP priorities for $port"
+ return 1
+ fi
+ return 0
+}
+
+# Initialize DSCP priorities. Set them to predictable values for testing.
+init_dscp_prios() {
+ local port=$1
+ local original_prios=$2
+
+ echo "Removing any existing DSCP priority mappins for $port"
+ local prio_str=$(echo "$original_prios" |
+ jq -r 'map("\(.[0]):\(.[1])") | join(" ")')
+ dcb app del dev $port dscp-prio $prio_str
+
+ # Initialize DSCP priorities list
+ local dscp_prios=""
+ for dscp in {0..63}; do
+ dscp_prios+=("$dscp:0")
+ done
+
+ echo "Setting initial DSCP priorities map to 0 for $port"
+ dcb app add dev $port dscp-prio ${dscp_prios[@]}
+}
+
+# Main function to test global DSCP map across specified ports
+test_global_dscp_map() {
+ local ports=("$swp1" "$swp2")
+ local original_dscp_prios_port0=$(get_dscp_prios ${ports[0]})
+ local orig_apptrust=$(port_get_default_apptrust ${swp1})
+ local orig_port_prio=$(port_default_prio_get ${swp1})
+ local apptrust_order="dscp"
+ local port_prio=0
+ local dscp_prio
+ local dscp
+
+ RET=0
+
+ set_apptrust_order ${swp1} "${apptrust_order}"
+ dcb app replace dev ${swp1} default-prio ${port_prio}
+
+ # Initialize DSCP priorities
+ init_dscp_prios "${ports[0]}" "$original_dscp_prios_port0"
+
+ # Loop over each DSCP index
+ for dscp in {0..63}; do
+ # and test each Internal Priority value
+ for dscp_prio in {0..7}; do
+ # do it for each port. This is to test if the global DSCP map
+ # is accessible from all ports.
+ for port in "${ports[@]}"; do
+ if ! set_and_verify_dscp "$port" $dscp $dscp_prio; then
+ RET=1
+ fi
+ done
+
+ # Test if the DSCP priority is correctly applied to the packets
+ run_test_dscp "DSCP (${dscp}) QoS classification, prio: ${dscp_prio}" \
+ "${apptrust_order}" ${port_prio} ${dscp_prio} ${dscp}
+ if [ ${RET} -eq 1 ]; then
+ break
+ fi
+ done
+ done
+
+ # Restore original priorities
+ if ! restore_priorities "${ports[0]}" "${original_dscp_prios_port0}"; then
+ RET=1
+ fi
+
+ set_apptrust_order ${swp1} "${orig_apptrust}"
+ if [[ "$orig_apptrust" != "$(port_get_default_apptrust ${swp1})" ]]; then
+ RET=1
+ fi
+
+ dcb app replace dev ${swp1} default-prio ${orig_port_prio}
+ if [ $orig_port_prio -ne $(port_default_prio_get ${swp1}) ]; then
+ RET=1
+ fi
+
+ log_test "DSCP global map"
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+ test_port_default
+ test_port_apptrust
+ test_global_dscp_map
+"
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
index 91891b9418d7..877cd6df94a1 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
@@ -24,8 +24,8 @@ setup_prepare()
busywait "$TIMEOUT" wait_for_port_up ethtool $swp2
check_err $? "ports did not come up"
- local lanes_exist=$(ethtool $swp1 | grep 'Lanes:')
- if [[ -z $lanes_exist ]]; then
+ busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:"
+ if [[ $? -ne 0 ]]; then
log_test "SKIP: driver does not support lanes setting"
exit 1
fi
@@ -122,8 +122,9 @@ autoneg()
ethtool_set $swp1 speed $max_speed lanes $lanes
ip link set dev $swp1 up
ip link set dev $swp2 up
- busywait "$TIMEOUT" wait_for_port_up ethtool $swp2
- check_err $? "ports did not come up"
+
+ busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:"
+ check_err $? "Lanes parameter is not presented on time"
check_lanes $swp1 $lanes $max_speed
log_test "$lanes lanes is autonegotiated"
@@ -160,8 +161,9 @@ autoneg_force_mode()
ethtool_set $swp2 speed $max_speed lanes $lanes autoneg off
ip link set dev $swp1 up
ip link set dev $swp2 up
- busywait "$TIMEOUT" wait_for_port_up ethtool $swp2
- check_err $? "ports did not come up"
+
+ busywait $TIMEOUT sh -c "ethtool $swp1 | grep -q Lanes:"
+ check_err $? "Lanes parameter is not presented on time"
check_lanes $swp1 $lanes $max_speed
log_test "Autoneg off, $lanes lanes detected during force mode"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
index 76f1ab4898d9..e1ad623146d7 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh
@@ -15,6 +15,13 @@ source $lib_dir/mirror_lib.sh
source $lib_dir/mirror_gre_lib.sh
source $lib_dir/mirror_gre_topo_lib.sh
+ALL_TESTS="
+ test_keyful
+ test_soft
+ test_tos_fixed
+ test_ttl_inherit
+"
+
setup_keyful()
{
tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \
@@ -118,15 +125,15 @@ test_span_gre_ttl_inherit()
RET=0
ip link set dev $tundev type $type ttl inherit
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
ip link set dev $tundev type $type ttl 100
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: no offload on TTL of inherit ($tcflags)"
+ log_test "$what: no offload on TTL of inherit"
}
test_span_gre_tos_fixed()
@@ -138,61 +145,49 @@ test_span_gre_tos_fixed()
RET=0
ip link set dev $tundev type $type tos 0x10
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
ip link set dev $tundev type $type tos inherit
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: no offload on a fixed TOS ($tcflags)"
+ log_test "$what: no offload on a fixed TOS"
}
test_span_failable()
{
- local should_fail=$1; shift
local tundev=$1; shift
local what=$1; shift
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- if ((should_fail)); then
- fail_test_span_gre_dir $tundev ingress
- else
- quick_test_span_gre_dir $tundev ingress
- fi
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: should_fail=$should_fail ($tcflags)"
+ log_test "fail $what"
}
-test_failable()
+test_keyful()
{
- local should_fail=$1; shift
-
- test_span_failable $should_fail gt6-key "mirror to keyful gretap"
- test_span_failable $should_fail gt6-soft "mirror to gretap w/ soft underlay"
+ test_span_failable gt6-key "mirror to keyful gretap"
}
-test_sw()
+test_soft()
{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- test_failable 0
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
+ test_span_failable gt6-soft "mirror to gretap w/ soft underlay"
}
-test_hw()
+test_tos_fixed()
{
- test_failable 1
-
test_span_gre_tos_fixed gt4 gretap "mirror to gretap"
test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap"
+}
+
+test_ttl_inherit()
+{
test_span_gre_ttl_inherit gt4 gretap "mirror to gretap"
test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap"
}
@@ -202,16 +197,6 @@ trap cleanup EXIT
setup_prepare
setup_wait
-if ! tc_offload_check; then
- check_err 1 "Could not test offloaded functionality"
- log_test "mlxsw-specific tests for mirror to gretap"
- exit
-fi
-
-tcflags="skip_hw"
-test_sw
-
-tcflags="skip_sw"
-test_hw
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
index e5589e2fca85..d43093310e23 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
@@ -79,7 +79,7 @@ mirror_gre_tunnels_create()
cat >> $MIRROR_GRE_BATCH_FILE <<-EOF
filter add dev $swp1 ingress pref 1000 \
protocol ipv6 \
- flower $tcflags dst_ip $match_dip \
+ flower skip_sw dst_ip $match_dip \
action mirred egress mirror dev $tun
EOF
done
@@ -107,7 +107,7 @@ mirror_gre_tunnels_destroy()
done
}
-__mirror_gre_test()
+mirror_gre_test()
{
local count=$1; shift
local should_fail=$1; shift
@@ -131,20 +131,6 @@ __mirror_gre_test()
done
}
-mirror_gre_test()
-{
- local count=$1; shift
- local should_fail=$1; shift
-
- if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then
- check_err 1 "Could not test offloaded functionality"
- return
- fi
-
- tcflags="skip_sw"
- __mirror_gre_test $count $should_fail
-}
-
mirror_gre_setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
index 6369927e9c37..48395cfd4f95 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
@@ -42,7 +42,7 @@ __mlxsw_only_on_spectrum()
local src=$1; shift
if ! mlxsw_on_spectrum "$rev"; then
- log_test_skip $src:$caller "(Spectrum-$rev only)"
+ log_test_xfail $src:$caller "(Spectrum-$rev only)"
return 1
fi
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index a88d8a8c85f2..899b6892603f 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -47,7 +47,6 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
RET=0
target=$(${current_test}_get_target "$should_fail")
if ((target == 0)); then
- log_test_skip "'$current_test' should_fail=$should_fail test"
continue
fi
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
index 31252bc8775e..4994bea5daf8 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -11,7 +11,7 @@ ALL_TESTS="single_mask_test identical_filters_test two_masks_test \
multiple_masks_test ctcam_edge_cases_test delta_simple_test \
delta_two_masks_one_key_test delta_simple_rehash_test \
bloom_simple_test bloom_complex_test bloom_delta_test \
- max_erp_entries_test max_group_size_test"
+ max_erp_entries_test max_group_size_test collision_test"
NUM_NETIFS=2
source $lib_dir/lib.sh
source $lib_dir/tc_common.sh
@@ -457,7 +457,7 @@ delta_two_masks_one_key_test()
{
# If 2 keys are the same and only differ in mask in a way that
# they belong under the same ERP (second is delta of the first),
- # there should be no C-TCAM spill.
+ # there should be C-TCAM spill.
RET=0
@@ -474,8 +474,8 @@ delta_two_masks_one_key_test()
tp_record "mlxsw:*" "tc filter add dev $h2 ingress protocol ip \
pref 2 handle 102 flower $tcflags dst_ip 192.0.2.2 \
action drop"
- tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 0
- check_err $? "incorrect C-TCAM spill while inserting the second rule"
+ tp_check_hits "mlxsw:mlxsw_sp_acl_atcam_entry_add_ctcam_spill" 1
+ check_err $? "C-TCAM spill did not happen while inserting the second rule"
$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
-t ip -q
@@ -1087,6 +1087,53 @@ max_group_size_test()
log_test "max ACL group size test ($tcflags). max size $max_size"
}
+collision_test()
+{
+ # Filters cannot share an eRP if in the common unmasked part (i.e.,
+ # without the delta bits) they have the same values. If the driver does
+ # not prevent such configuration (by spilling into the C-TCAM), then
+ # multiple entries will be present in the device with the same key,
+ # leading to collisions and a reduced scale.
+ #
+ # Create such a scenario and make sure all the filters are successfully
+ # added.
+
+ RET=0
+
+ local ret
+
+ if [[ "$tcflags" != "skip_sw" ]]; then
+ return 0;
+ fi
+
+ # Add a single dst_ip/24 filter and multiple dst_ip/32 filters that all
+ # have the same values in the common unmasked part (dst_ip/24).
+
+ tc filter add dev $h2 ingress pref 1 proto ipv4 handle 101 \
+ flower $tcflags dst_ip 198.51.100.0/24 \
+ action drop
+
+ for i in {0..255}; do
+ tc filter add dev $h2 ingress pref 2 proto ipv4 \
+ handle $((102 + i)) \
+ flower $tcflags dst_ip 198.51.100.${i}/32 \
+ action drop
+ ret=$?
+ [[ $ret -ne 0 ]] && break
+ done
+
+ check_err $ret "failed to add all the filters"
+
+ for i in {255..0}; do
+ tc filter del dev $h2 ingress pref 2 proto ipv4 \
+ handle $((102 + i)) flower
+ done
+
+ tc filter del dev $h2 ingress pref 1 proto ipv4 handle 101 flower
+
+ log_test "collision test ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index f981c957f097..482ebb744eba 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -52,7 +52,6 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
RET=0
target=$(${current_test}_get_target "$should_fail")
if ((target == 0)); then
- log_test_skip "'$current_test' [$profile] should_fail=$should_fail test"
continue
fi
${current_test}_setup_prepare
diff --git a/tools/testing/selftests/drivers/net/ping.py b/tools/testing/selftests/drivers/net/ping.py
new file mode 100755
index 000000000000..eb83e7b48797
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ping.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq
+from lib.py import NetDrvEpEnv
+from lib.py import bkg, cmd, wait_port_listen, rand_port
+
+
+def test_v4(cfg) -> None:
+ cfg.require_v4()
+
+ cmd(f"ping -c 1 -W0.5 {cfg.remote_v4}")
+ cmd(f"ping -c 1 -W0.5 {cfg.v4}", host=cfg.remote)
+
+
+def test_v6(cfg) -> None:
+ cfg.require_v6()
+
+ cmd(f"ping -c 1 -W0.5 {cfg.remote_v6}")
+ cmd(f"ping -c 1 -W0.5 {cfg.v6}", host=cfg.remote)
+
+
+def test_tcp(cfg) -> None:
+ cfg.require_cmd("socat", remote=True)
+
+ port = rand_port()
+ listen_cmd = f"socat -{cfg.addr_ipver} -t 2 -u TCP-LISTEN:{port},reuseport STDOUT"
+
+ with bkg(listen_cmd, exit_wait=True) as nc:
+ wait_port_listen(port)
+
+ cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.baddr}:{port}",
+ shell=True, host=cfg.remote)
+ ksft_eq(nc.stdout.strip(), "ping")
+
+ with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as nc:
+ wait_port_listen(port, host=cfg.remote)
+
+ cmd(f"echo ping | socat -t 2 -u STDIN TCP:{cfg.remote_baddr}:{port}", shell=True)
+ ksft_eq(nc.stdout.strip(), "ping")
+
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run(globs=globals(), case_pfx={"test_"}, args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
new file mode 100755
index 000000000000..30f29096e27c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit, ksft_eq, KsftSkipEx
+from lib.py import EthtoolFamily, NetdevFamily
+from lib.py import NetDrvEnv
+from lib.py import cmd
+import glob
+
+
+def sys_get_queues(ifname) -> int:
+ folders = glob.glob(f'/sys/class/net/{ifname}/queues/rx-*')
+ return len(folders)
+
+
+def nl_get_queues(cfg, nl):
+ queues = nl.queue_get({'ifindex': cfg.ifindex}, dump=True)
+ if queues:
+ return len([q for q in queues if q['type'] == 'rx'])
+ return None
+
+
+def get_queues(cfg, nl) -> None:
+ queues = nl_get_queues(cfg, nl)
+ if not queues:
+ raise KsftSkipEx('queue-get not supported by device')
+
+ expected = sys_get_queues(cfg.dev['ifname'])
+ ksft_eq(queues, expected)
+
+
+def addremove_queues(cfg, nl) -> None:
+ queues = nl_get_queues(cfg, nl)
+ if not queues:
+ raise KsftSkipEx('queue-get not supported by device')
+
+ curr_queues = sys_get_queues(cfg.dev['ifname'])
+ if curr_queues == 1:
+ raise KsftSkipEx('cannot decrement queue: already at 1')
+
+ netnl = EthtoolFamily()
+ channels = netnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if channels['combined-count'] == 0:
+ rx_type = 'rx'
+ else:
+ rx_type = 'combined'
+
+ expected = curr_queues - 1
+ cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10)
+ queues = nl_get_queues(cfg, nl)
+ ksft_eq(queues, expected)
+
+ expected = curr_queues
+ cmd(f"ethtool -L {cfg.dev['ifname']} {rx_type} {expected}", timeout=10)
+ queues = nl_get_queues(cfg, nl)
+ ksft_eq(queues, expected)
+
+
+def main() -> None:
+ with NetDrvEnv(__file__, queue_count=3) as cfg:
+ ksft_run([get_queues, addremove_queues], args=(cfg, NetdevFamily()))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
new file mode 100755
index 000000000000..820b8e0a22c6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -0,0 +1,144 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx
+from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
+from lib.py import NetDrvEnv
+
+ethnl = EthtoolFamily()
+netfam = NetdevFamily()
+rtnl = RtnlFamily()
+
+
+def check_pause(cfg) -> None:
+ global ethnl
+
+ try:
+ ethnl.pause_get({"header": {"dev-index": cfg.ifindex}})
+ except NlError as e:
+ if e.error == 95:
+ raise KsftXfailEx("pause not supported by the device")
+ raise
+
+ data = ethnl.pause_get({"header": {"dev-index": cfg.ifindex,
+ "flags": {'stats'}}})
+ ksft_true(data['stats'], "driver does not report stats")
+
+
+def check_fec(cfg) -> None:
+ global ethnl
+
+ try:
+ ethnl.fec_get({"header": {"dev-index": cfg.ifindex}})
+ except NlError as e:
+ if e.error == 95:
+ raise KsftXfailEx("FEC not supported by the device")
+ raise
+
+ data = ethnl.fec_get({"header": {"dev-index": cfg.ifindex,
+ "flags": {'stats'}}})
+ ksft_true(data['stats'], "driver does not report stats")
+
+
+def pkt_byte_sum(cfg) -> None:
+ global netfam, rtnl
+
+ def get_qstat(test):
+ global netfam
+ stats = netfam.qstats_get({}, dump=True)
+ if stats:
+ for qs in stats:
+ if qs["ifindex"]== test.ifindex:
+ return qs
+
+ qstat = get_qstat(cfg)
+ if qstat is None:
+ raise KsftSkipEx("qstats not supported by the device")
+
+ for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
+ ksft_in(key, qstat, "Drivers should always report basic keys")
+
+ # Compare stats, rtnl stats and qstats must match,
+ # but the interface may be up, so do a series of dumps
+ # each time the more "recent" stats must be higher or same.
+ def stat_cmp(rstat, qstat):
+ for key in ['tx-packets', 'tx-bytes', 'rx-packets', 'rx-bytes']:
+ if rstat[key] != qstat[key]:
+ return rstat[key] - qstat[key]
+ return 0
+
+ for _ in range(10):
+ rtstat = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ if stat_cmp(rtstat, qstat) < 0:
+ raise Exception("RTNL stats are lower, fetched later")
+ qstat = get_qstat(cfg)
+ if stat_cmp(rtstat, qstat) > 0:
+ raise Exception("Qstats are lower, fetched later")
+
+
+def qstat_by_ifindex(cfg) -> None:
+ global netfam
+ global rtnl
+
+ # Construct a map ifindex -> [dump, by-index, dump]
+ ifindexes = {}
+ stats = netfam.qstats_get({}, dump=True)
+ for entry in stats:
+ ifindexes[entry['ifindex']] = [entry, None, None]
+
+ for ifindex in ifindexes.keys():
+ entry = netfam.qstats_get({"ifindex": ifindex}, dump=True)
+ ksft_eq(len(entry), 1)
+ ifindexes[entry[0]['ifindex']][1] = entry[0]
+
+ stats = netfam.qstats_get({}, dump=True)
+ for entry in stats:
+ ifindexes[entry['ifindex']][2] = entry
+
+ if len(ifindexes) == 0:
+ raise KsftSkipEx("No ifindex supports qstats")
+
+ # Now make sure the stats match/make sense
+ for ifindex, triple in ifindexes.items():
+ all_keys = triple[0].keys() | triple[1].keys() | triple[2].keys()
+
+ for key in all_keys:
+ ksft_ge(triple[1][key], triple[0][key], comment="bad key: " + key)
+ ksft_ge(triple[2][key], triple[1][key], comment="bad key: " + key)
+
+ # Test invalid dumps
+ # 0 is invalid
+ with ksft_raises(NlError) as cm:
+ netfam.qstats_get({"ifindex": 0}, dump=True)
+ ksft_eq(cm.exception.nl_msg.error, -34)
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+
+ # loopback has no stats
+ with ksft_raises(NlError) as cm:
+ netfam.qstats_get({"ifindex": 1}, dump=True)
+ ksft_eq(cm.exception.nl_msg.error, -95)
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+
+ # Try to get stats for lowest unused ifindex but not 0
+ devs = rtnl.getlink({}, dump=True)
+ all_ifindexes = set([dev["ifi-index"] for dev in devs])
+ lowest = 2
+ while lowest in all_ifindexes:
+ lowest += 1
+
+ with ksft_raises(NlError) as cm:
+ netfam.qstats_get({"ifindex": lowest}, dump=True)
+ ksft_eq(cm.exception.nl_msg.error, -19)
+ ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.ifindex')
+
+
+def main() -> None:
+ with NetDrvEnv(__file__) as cfg:
+ ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex],
+ args=(cfg, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/virtio_net/Makefile b/tools/testing/selftests/drivers/net/virtio_net/Makefile
new file mode 100644
index 000000000000..7ec7cd3ab2cc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = basic_features.sh \
+ #
+
+TEST_FILES = \
+ virtio_net_common.sh \
+ #
+
+TEST_INCLUDES = \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/lib.sh \
+ #
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh b/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh
new file mode 100755
index 000000000000..cf8cf816ed48
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/basic_features.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# See virtio_net_common.sh comments for more details about assumed setup
+
+ALL_TESTS="
+ initial_ping_test
+ f_mac_test
+"
+
+source virtio_net_common.sh
+
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p2]}
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+initial_ping_test()
+{
+ setup_cleanup
+ setup_prepare
+ ping_test $h1 $H2_IPV4 " simple"
+}
+
+f_mac_test()
+{
+ RET=0
+ local test_name="mac feature filtered"
+
+ virtio_feature_present $h1 $VIRTIO_NET_F_MAC
+ if [ $? -ne 0 ]; then
+ log_test_skip "$test_name" "Device $h1 is missing feature $VIRTIO_NET_F_MAC."
+ return 0
+ fi
+ virtio_feature_present $h1 $VIRTIO_NET_F_MAC
+ if [ $? -ne 0 ]; then
+ log_test_skip "$test_name" "Device $h2 is missing feature $VIRTIO_NET_F_MAC."
+ return 0
+ fi
+
+ setup_cleanup
+ setup_prepare
+
+ grep -q 0 /sys/class/net/$h1/addr_assign_type
+ check_err $? "Permanent address assign type for $h1 is not set"
+ grep -q 0 /sys/class/net/$h2/addr_assign_type
+ check_err $? "Permanent address assign type for $h2 is not set"
+
+ setup_cleanup
+ virtio_filter_feature_add $h1 $VIRTIO_NET_F_MAC
+ virtio_filter_feature_add $h2 $VIRTIO_NET_F_MAC
+ setup_prepare
+
+ grep -q 0 /sys/class/net/$h1/addr_assign_type
+ check_fail $? "Permanent address assign type for $h1 is set when F_MAC feature is filtered"
+ grep -q 0 /sys/class/net/$h2/addr_assign_type
+ check_fail $? "Permanent address assign type for $h2 is set when F_MAC feature is filtered"
+
+ ping_do $h1 $H2_IPV4
+ check_err $? "Ping failed"
+
+ log_test "$test_name"
+}
+
+setup_prepare()
+{
+ virtio_device_rebind $h1
+ virtio_device_rebind $h2
+ wait_for_dev $h1
+ wait_for_dev $h2
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+setup_cleanup()
+{
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ virtio_filter_features_clear $h1
+ virtio_filter_features_clear $h2
+ virtio_device_rebind $h1
+ virtio_device_rebind $h2
+ wait_for_dev $h1
+ wait_for_dev $h2
+}
+
+cleanup()
+{
+ pre_cleanup
+ setup_cleanup
+}
+
+check_driver $h1 "virtio_net"
+check_driver $h2 "virtio_net"
+check_virtio_debugfs $h1
+check_virtio_debugfs $h2
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/virtio_net/config b/tools/testing/selftests/drivers/net/virtio_net/config
new file mode 100644
index 000000000000..bcf7555eaffe
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/config
@@ -0,0 +1,8 @@
+CONFIG_BPF_SYSCALL=y
+CONFIG_CGROUP_BPF=y
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=m
+CONFIG_VIRTIO_DEBUG=y
+CONFIG_VIRTIO_NET=y
diff --git a/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh b/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh
new file mode 100644
index 000000000000..57bd8055e2e5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/virtio_net/virtio_net_common.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This assumes running on a host with two virtio interfaces connected
+# back to back. Example script to do such wire-up of tap devices would
+# look like this:
+#
+# =======================================================================================================
+# #!/bin/bash
+#
+# DEV1="$1"
+# DEV2="$2"
+#
+# sudo tc qdisc add dev $DEV1 clsact
+# sudo tc qdisc add dev $DEV2 clsact
+# sudo tc filter add dev $DEV1 ingress protocol all pref 1 matchall action mirred egress redirect dev $DEV2
+# sudo tc filter add dev $DEV2 ingress protocol all pref 1 matchall action mirred egress redirect dev $DEV1
+# sudo ip link set $DEV1 up
+# sudo ip link set $DEV2 up
+# =======================================================================================================
+
+REQUIRE_MZ="no"
+NETIF_CREATE="no"
+NETIF_FIND_DRIVER="virtio_net"
+NUM_NETIFS=2
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+VIRTIO_NET_F_MAC=5
+
+virtio_device_get()
+{
+ local dev=$1; shift
+ local device_path="/sys/class/net/$dev/device/"
+
+ basename `realpath $device_path`
+}
+
+virtio_device_rebind()
+{
+ local dev=$1; shift
+ local device=`virtio_device_get $dev`
+
+ echo "$device" > /sys/bus/virtio/drivers/virtio_net/unbind
+ echo "$device" > /sys/bus/virtio/drivers/virtio_net/bind
+}
+
+virtio_debugfs_get()
+{
+ local dev=$1; shift
+ local device=`virtio_device_get $dev`
+
+ echo /sys/kernel/debug/virtio/$device/
+}
+
+check_virtio_debugfs()
+{
+ local dev=$1; shift
+ local debugfs=`virtio_debugfs_get $dev`
+
+ if [ ! -f "$debugfs/device_features" ] ||
+ [ ! -f "$debugfs/filter_feature_add" ] ||
+ [ ! -f "$debugfs/filter_feature_del" ] ||
+ [ ! -f "$debugfs/filter_features" ] ||
+ [ ! -f "$debugfs/filter_features_clear" ]; then
+ echo "SKIP: not possible to access debugfs for $dev"
+ exit $ksft_skip
+ fi
+}
+
+virtio_feature_present()
+{
+ local dev=$1; shift
+ local feature=$1; shift
+ local debugfs=`virtio_debugfs_get $dev`
+
+ cat $debugfs/device_features |grep "^$feature$" &> /dev/null
+ return $?
+}
+
+virtio_filter_features_clear()
+{
+ local dev=$1; shift
+ local debugfs=`virtio_debugfs_get $dev`
+
+ echo "1" > $debugfs/filter_features_clear
+}
+
+virtio_filter_feature_add()
+{
+ local dev=$1; shift
+ local feature=$1; shift
+ local debugfs=`virtio_debugfs_get $dev`
+
+ echo "$feature" > $debugfs/filter_feature_add
+}
diff --git a/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile b/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile
new file mode 100644
index 000000000000..03d0449d307c
--- /dev/null
+++ b/tools/testing/selftests/drivers/platform/x86/intel/ifs/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for ifs(In Field Scan) selftests
+
+TEST_PROGS := test_ifs.sh
+
+include ../../../../../lib.mk
diff --git a/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh b/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh
new file mode 100755
index 000000000000..8b68964b29f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/platform/x86/intel/ifs/test_ifs.sh
@@ -0,0 +1,494 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test the functionality of the Intel IFS(In Field Scan) driver.
+#
+
+# Matched with kselftest framework: tools/testing/selftests/kselftest.h
+readonly KSFT_PASS=0
+readonly KSFT_FAIL=1
+readonly KSFT_XFAIL=2
+readonly KSFT_SKIP=4
+
+readonly CPU_SYSFS="/sys/devices/system/cpu"
+readonly CPU_OFFLINE_SYSFS="${CPU_SYSFS}/offline"
+readonly IMG_PATH="/lib/firmware/intel/ifs_0"
+readonly IFS_SCAN_MODE="0"
+readonly IFS_ARRAY_BIST_SCAN_MODE="1"
+readonly IFS_PATH="/sys/devices/virtual/misc/intel_ifs"
+readonly IFS_SCAN_SYSFS_PATH="${IFS_PATH}_${IFS_SCAN_MODE}"
+readonly IFS_ARRAY_BIST_SYSFS_PATH="${IFS_PATH}_${IFS_ARRAY_BIST_SCAN_MODE}"
+readonly RUN_TEST="run_test"
+readonly STATUS="status"
+readonly DETAILS="details"
+readonly STATUS_PASS="pass"
+readonly PASS="PASS"
+readonly FAIL="FAIL"
+readonly INFO="INFO"
+readonly XFAIL="XFAIL"
+readonly SKIP="SKIP"
+readonly IFS_NAME="intel_ifs"
+readonly ALL="all"
+readonly SIBLINGS="siblings"
+
+# Matches arch/x86/include/asm/intel-family.h and
+# drivers/platform/x86/intel/ifs/core.c requirement as follows
+readonly SAPPHIRERAPIDS_X="8f"
+readonly EMERALDRAPIDS_X="cf"
+
+readonly INTEL_FAM6="06"
+
+LOOP_TIMES=3
+FML=""
+MODEL=""
+STEPPING=""
+CPU_FMS=""
+TRUE="true"
+FALSE="false"
+RESULT=$KSFT_PASS
+IMAGE_NAME=""
+INTERVAL_TIME=1
+OFFLINE_CPUS=""
+# For IFS cleanup tags
+ORIGIN_IFS_LOADED=""
+IFS_IMAGE_NEED_RESTORE=$FALSE
+IFS_LOG="/tmp/ifs_logs.$$"
+RANDOM_CPU=""
+DEFAULT_IMG_ID=""
+
+append_log()
+{
+ echo -e "$1" | tee -a "$IFS_LOG"
+}
+
+online_offline_cpu_list()
+{
+ local on_off=$1
+ local target_cpus=$2
+ local cpu=""
+ local cpu_start=""
+ local cpu_end=""
+ local i=""
+
+ if [[ -n "$target_cpus" ]]; then
+ for cpu in $(echo "$target_cpus" | tr ',' ' '); do
+ if [[ "$cpu" == *"-"* ]]; then
+ cpu_start=""
+ cpu_end=""
+ i=""
+ cpu_start=$(echo "$cpu" | cut -d "-" -f 1)
+ cpu_end=$(echo "$cpu" | cut -d "-" -f 2)
+ for((i=cpu_start;i<=cpu_end;i++)); do
+ append_log "[$INFO] echo $on_off > \
+${CPU_SYSFS}/cpu${i}/online"
+ echo "$on_off" > "$CPU_SYSFS"/cpu"$i"/online
+ done
+ else
+ set_target_cpu "$on_off" "$cpu"
+ fi
+ done
+ fi
+}
+
+ifs_scan_result_summary()
+{
+ local failed_info pass_num skip_num fail_num
+
+ if [[ -e "$IFS_LOG" ]]; then
+ failed_info=$(grep ^"\[${FAIL}\]" "$IFS_LOG")
+ fail_num=$(grep -c ^"\[${FAIL}\]" "$IFS_LOG")
+ skip_num=$(grep -c ^"\[${SKIP}\]" "$IFS_LOG")
+ pass_num=$(grep -c ^"\[${PASS}\]" "$IFS_LOG")
+
+ if [[ "$fail_num" -ne 0 ]]; then
+ RESULT=$KSFT_FAIL
+ echo "[$INFO] IFS test failure summary:"
+ echo "$failed_info"
+ elif [[ "$skip_num" -ne 0 ]]; then
+ RESULT=$KSFT_SKIP
+ fi
+ echo "[$INFO] IFS test pass:$pass_num, skip:$skip_num, fail:$fail_num"
+ else
+ echo "[$INFO] No file $IFS_LOG for IFS scan summary"
+ fi
+}
+
+ifs_cleanup()
+{
+ echo "[$INFO] Restore environment after IFS test"
+
+ # Restore ifs origin image if origin image backup step is needed
+ [[ "$IFS_IMAGE_NEED_RESTORE" == "$TRUE" ]] && {
+ mv -f "$IMG_PATH"/"$IMAGE_NAME"_origin "$IMG_PATH"/"$IMAGE_NAME"
+ }
+
+ # Restore the CPUs to the state before testing
+ [[ -z "$OFFLINE_CPUS" ]] || online_offline_cpu_list "0" "$OFFLINE_CPUS"
+
+ lsmod | grep -q "$IFS_NAME" && [[ "$ORIGIN_IFS_LOADED" == "$FALSE" ]] && {
+ echo "[$INFO] modprobe -r $IFS_NAME"
+ modprobe -r "$IFS_NAME"
+ }
+
+ ifs_scan_result_summary
+ [[ -e "$IFS_LOG" ]] && rm -rf "$IFS_LOG"
+
+ echo "[RESULT] IFS test exit with $RESULT"
+ exit "$RESULT"
+}
+
+do_cmd()
+{
+ local cmd=$*
+ local ret=""
+
+ append_log "[$INFO] $cmd"
+ eval "$cmd"
+ ret=$?
+ if [[ $ret -ne 0 ]]; then
+ append_log "[$FAIL] $cmd failed. Return code is $ret"
+ RESULT=$KSFT_XFAIL
+ ifs_cleanup
+ fi
+}
+
+test_exit()
+{
+ local info=$1
+ RESULT=$2
+
+ declare -A EXIT_MAP
+ EXIT_MAP[$KSFT_PASS]=$PASS
+ EXIT_MAP[$KSFT_FAIL]=$FAIL
+ EXIT_MAP[$KSFT_XFAIL]=$XFAIL
+ EXIT_MAP[$KSFT_SKIP]=$SKIP
+
+ append_log "[${EXIT_MAP[$RESULT]}] $info"
+ ifs_cleanup
+}
+
+online_all_cpus()
+{
+ local off_cpus=""
+
+ OFFLINE_CPUS=$(cat "$CPU_OFFLINE_SYSFS")
+ online_offline_cpu_list "1" "$OFFLINE_CPUS"
+
+ off_cpus=$(cat "$CPU_OFFLINE_SYSFS")
+ if [[ -z "$off_cpus" ]]; then
+ append_log "[$INFO] All CPUs are online."
+ else
+ append_log "[$XFAIL] There is offline cpu:$off_cpus after online all cpu!"
+ RESULT=$KSFT_XFAIL
+ ifs_cleanup
+ fi
+}
+
+get_cpu_fms()
+{
+ FML=$(grep -m 1 "family" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}')
+ MODEL=$(grep -m 1 "model" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}')
+ STEPPING=$(grep -m 1 "stepping" /proc/cpuinfo | awk -F ":" '{printf "%02x",$2;}')
+ CPU_FMS="${FML}-${MODEL}-${STEPPING}"
+}
+
+check_cpu_ifs_support_interval_time()
+{
+ get_cpu_fms
+
+ if [[ "$FML" != "$INTEL_FAM6" ]]; then
+ test_exit "CPU family:$FML does not support IFS" "$KSFT_SKIP"
+ fi
+
+ # Ucode has time interval requirement for IFS scan on same CPU as follows:
+ case $MODEL in
+ "$SAPPHIRERAPIDS_X")
+ INTERVAL_TIME=180;
+ ;;
+ "$EMERALDRAPIDS_X")
+ INTERVAL_TIME=30;
+ ;;
+ *)
+ # Set default interval time for other platforms
+ INTERVAL_TIME=1;
+ append_log "[$INFO] CPU FML:$FML model:0x$MODEL, default: 1s interval time"
+ ;;
+ esac
+}
+
+check_ifs_loaded()
+{
+ local ifs_info=""
+
+ ifs_info=$(lsmod | grep "$IFS_NAME")
+ if [[ -z "$ifs_info" ]]; then
+ append_log "[$INFO] modprobe $IFS_NAME"
+ modprobe "$IFS_NAME" || {
+ test_exit "Check if CONFIG_INTEL_IFS is set to m or \
+platform doesn't support ifs" "$KSFT_SKIP"
+ }
+ ifs_info=$(lsmod | grep "$IFS_NAME")
+ [[ -n "$ifs_info" ]] || test_exit "No ifs module listed by lsmod" "$KSFT_FAIL"
+ fi
+}
+
+test_ifs_scan_entry()
+{
+ local ifs_info=""
+
+ ifs_info=$(lsmod | grep "$IFS_NAME")
+
+ if [[ -z "$ifs_info" ]]; then
+ ORIGIN_IFS_LOADED="$FALSE"
+ check_ifs_loaded
+ else
+ ORIGIN_IFS_LOADED="$TRUE"
+ append_log "[$INFO] Module $IFS_NAME is already loaded"
+ fi
+
+ if [[ -d "$IFS_SCAN_SYSFS_PATH" ]]; then
+ append_log "[$PASS] IFS sysfs $IFS_SCAN_SYSFS_PATH entry is created\n"
+ else
+ test_exit "No sysfs entry in $IFS_SCAN_SYSFS_PATH" "$KSFT_FAIL"
+ fi
+}
+
+load_image()
+{
+ local image_id=$1
+ local image_info=""
+ local ret=""
+
+ check_ifs_loaded
+ if [[ -e "${IMG_PATH}/${IMAGE_NAME}" ]]; then
+ append_log "[$INFO] echo 0x$image_id > ${IFS_SCAN_SYSFS_PATH}/current_batch"
+ echo "0x$image_id" > "$IFS_SCAN_SYSFS_PATH"/current_batch 2>/dev/null
+ ret=$?
+ [[ "$ret" -eq 0 ]] || {
+ append_log "[$FAIL] Load ifs image $image_id failed with ret:$ret\n"
+ return "$ret"
+ }
+ image_info=$(cat ${IFS_SCAN_SYSFS_PATH}/current_batch)
+ if [[ "$image_info" == 0x"$image_id" ]]; then
+ append_log "[$PASS] load IFS current_batch:$image_info"
+ else
+ append_log "[$FAIL] current_batch:$image_info is not expected:$image_id"
+ return "$KSFT_FAIL"
+ fi
+ else
+ append_log "[$FAIL] No IFS image file ${IMG_PATH}/${IMAGE_NAME}"\
+ return "$KSFT_FAIL"
+ fi
+ return 0
+}
+
+test_load_origin_ifs_image()
+{
+ local image_id=$1
+
+ IMAGE_NAME="${CPU_FMS}-${image_id}.scan"
+
+ load_image "$image_id" || return $?
+ return 0
+}
+
+test_load_bad_ifs_image()
+{
+ local image_id=$1
+
+ IMAGE_NAME="${CPU_FMS}-${image_id}.scan"
+
+ do_cmd "mv -f ${IMG_PATH}/${IMAGE_NAME} ${IMG_PATH}/${IMAGE_NAME}_origin"
+
+ # Set IFS_IMAGE_NEED_RESTORE to true before corrupt the origin ifs image file
+ IFS_IMAGE_NEED_RESTORE=$TRUE
+ do_cmd "dd if=/dev/urandom of=${IMG_PATH}/${IMAGE_NAME} bs=1K count=6 2>/dev/null"
+
+ # Use the specified judgment for negative testing
+ append_log "[$INFO] echo 0x$image_id > ${IFS_SCAN_SYSFS_PATH}/current_batch"
+ echo "0x$image_id" > "$IFS_SCAN_SYSFS_PATH"/current_batch 2>/dev/null
+ ret=$?
+ if [[ "$ret" -ne 0 ]]; then
+ append_log "[$PASS] Load invalid ifs image failed with ret:$ret not 0 as expected"
+ else
+ append_log "[$FAIL] Load invalid ifs image ret:$ret unexpectedly"
+ fi
+
+ do_cmd "mv -f ${IMG_PATH}/${IMAGE_NAME}_origin ${IMG_PATH}/${IMAGE_NAME}"
+ IFS_IMAGE_NEED_RESTORE=$FALSE
+}
+
+test_bad_and_origin_ifs_image()
+{
+ local image_id=$1
+
+ append_log "[$INFO] Test loading bad and then loading original IFS image:"
+ test_load_origin_ifs_image "$image_id" || return $?
+ test_load_bad_ifs_image "$image_id"
+ # Load origin image again and make sure it's worked
+ test_load_origin_ifs_image "$image_id" || return $?
+ append_log "[$INFO] Loading invalid IFS image and then loading initial image passed.\n"
+}
+
+ifs_test_cpu()
+{
+ local ifs_mode=$1
+ local cpu_num=$2
+ local image_id status details ret result result_info
+
+ echo "$cpu_num" > "$IFS_PATH"_"$ifs_mode"/"$RUN_TEST"
+ ret=$?
+
+ status=$(cat "${IFS_PATH}_${ifs_mode}/${STATUS}")
+ details=$(cat "${IFS_PATH}_${ifs_mode}/${DETAILS}")
+
+ if [[ "$ret" -eq 0 && "$status" == "$STATUS_PASS" ]]; then
+ result="$PASS"
+ else
+ result="$FAIL"
+ fi
+
+ cpu_num=$(cat "${CPU_SYSFS}/cpu${cpu_num}/topology/thread_siblings_list")
+
+ # There is no image file for IFS ARRAY BIST scan
+ if [[ -e "${IFS_PATH}_${ifs_mode}/current_batch" ]]; then
+ image_id=$(cat "${IFS_PATH}_${ifs_mode}/current_batch")
+ result_info=$(printf "[%s] ifs_%1d cpu(s):%s, current_batch:0x%02x, \
+ret:%2d, status:%s, details:0x%016x" \
+ "$result" "$ifs_mode" "$cpu_num" "$image_id" "$ret" \
+ "$status" "$details")
+ else
+ result_info=$(printf "[%s] ifs_%1d cpu(s):%s, ret:%2d, status:%s, details:0x%016x" \
+ "$result" "$ifs_mode" "$cpu_num" "$ret" "$status" "$details")
+ fi
+
+ append_log "$result_info"
+}
+
+ifs_test_cpus()
+{
+ local cpus_type=$1
+ local ifs_mode=$2
+ local image_id=$3
+ local cpu_max_num=""
+ local cpu_num=""
+
+ case "$cpus_type" in
+ "$ALL")
+ cpu_max_num=$(($(nproc) - 1))
+ cpus=$(seq 0 $cpu_max_num)
+ ;;
+ "$SIBLINGS")
+ cpus=$(cat ${CPU_SYSFS}/cpu*/topology/thread_siblings_list \
+ | sed -e 's/,.*//' \
+ | sed -e 's/-.*//' \
+ | sort -n \
+ | uniq)
+ ;;
+ *)
+ test_exit "Invalid cpus_type:$cpus_type" "$KSFT_XFAIL"
+ ;;
+ esac
+
+ for cpu_num in $cpus; do
+ ifs_test_cpu "$ifs_mode" "$cpu_num"
+ done
+
+ if [[ -z "$image_id" ]]; then
+ append_log "[$INFO] ifs_$ifs_mode test $cpus_type cpus completed\n"
+ else
+ append_log "[$INFO] ifs_$ifs_mode $cpus_type cpus with $CPU_FMS-$image_id.scan \
+completed\n"
+ fi
+}
+
+test_ifs_same_cpu_loop()
+{
+ local ifs_mode=$1
+ local cpu_num=$2
+ local loop_times=$3
+
+ append_log "[$INFO] Test ifs mode $ifs_mode on CPU:$cpu_num for $loop_times rounds:"
+ [[ "$ifs_mode" == "$IFS_SCAN_MODE" ]] && {
+ load_image "$DEFAULT_IMG_ID" || return $?
+ }
+ for (( i=1; i<=loop_times; i++ )); do
+ append_log "[$INFO] Loop iteration: $i in total of $loop_times"
+ # Only IFS scan needs the interval time
+ if [[ "$ifs_mode" == "$IFS_SCAN_MODE" ]]; then
+ do_cmd "sleep $INTERVAL_TIME"
+ elif [[ "$ifs_mode" == "$IFS_ARRAY_BIST_SCAN_MODE" ]]; then
+ true
+ else
+ test_exit "Invalid ifs_mode:$ifs_mode" "$KSFT_XFAIL"
+ fi
+
+ ifs_test_cpu "$ifs_mode" "$cpu_num"
+ done
+ append_log "[$INFO] $loop_times rounds of ifs_$ifs_mode test on CPU:$cpu_num completed.\n"
+}
+
+test_ifs_scan_available_imgs()
+{
+ local image_ids=""
+ local image_id=""
+
+ append_log "[$INFO] Test ifs scan with available images:"
+ image_ids=$(find "$IMG_PATH" -maxdepth 1 -name "${CPU_FMS}-[0-9a-fA-F][0-9a-fA-F].scan" \
+ 2>/dev/null \
+ | sort \
+ | awk -F "-" '{print $NF}' \
+ | cut -d "." -f 1)
+
+ for image_id in $image_ids; do
+ load_image "$image_id" || return $?
+
+ ifs_test_cpus "$SIBLINGS" "$IFS_SCAN_MODE" "$image_id"
+ # IFS scan requires time interval for the scan on the same CPU
+ do_cmd "sleep $INTERVAL_TIME"
+ done
+}
+
+prepare_ifs_test_env()
+{
+ local max_cpu=""
+
+ check_cpu_ifs_support_interval_time
+
+ online_all_cpus
+ max_cpu=$(($(nproc) - 1))
+ RANDOM_CPU=$(shuf -i 0-$max_cpu -n 1)
+
+ DEFAULT_IMG_ID=$(find $IMG_PATH -maxdepth 1 -name "${CPU_FMS}-[0-9a-fA-F][0-9a-fA-F].scan" \
+ 2>/dev/null \
+ | sort \
+ | head -n 1 \
+ | awk -F "-" '{print $NF}' \
+ | cut -d "." -f 1)
+}
+
+test_ifs()
+{
+ prepare_ifs_test_env
+
+ test_ifs_scan_entry
+
+ if [[ -z "$DEFAULT_IMG_ID" ]]; then
+ append_log "[$SKIP] No proper ${IMG_PATH}/${CPU_FMS}-*.scan, skip ifs_0 scan"
+ else
+ test_bad_and_origin_ifs_image "$DEFAULT_IMG_ID"
+ test_ifs_scan_available_imgs
+ test_ifs_same_cpu_loop "$IFS_SCAN_MODE" "$RANDOM_CPU" "$LOOP_TIMES"
+ fi
+
+ if [[ -d "$IFS_ARRAY_BIST_SYSFS_PATH" ]]; then
+ ifs_test_cpus "$SIBLINGS" "$IFS_ARRAY_BIST_SCAN_MODE"
+ test_ifs_same_cpu_loop "$IFS_ARRAY_BIST_SCAN_MODE" "$RANDOM_CPU" "$LOOP_TIMES"
+ else
+ append_log "[$SKIP] No $IFS_ARRAY_BIST_SYSFS_PATH, skip IFS ARRAY BIST scan"
+ fi
+}
+
+trap ifs_cleanup SIGTERM SIGINT
+test_ifs
+ifs_cleanup
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index fb4472ddffd8..ba012bc5aab9 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -1,10 +1,14 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS = -Wall
CFLAGS += -Wno-nonnull
-CFLAGS += -D_GNU_SOURCE
+
+ALIGNS := 0x1000 0x200000 0x1000000
+ALIGN_PIES := $(patsubst %,load_address.%,$(ALIGNS))
+ALIGN_STATIC_PIES := $(patsubst %,load_address.static.%,$(ALIGNS))
+ALIGNMENT_TESTS := $(ALIGN_PIES) $(ALIGN_STATIC_PIES)
TEST_PROGS := binfmt_script.py
-TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 non-regular
+TEST_GEN_PROGS := execveat non-regular $(ALIGNMENT_TESTS)
TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
# Makefile is a run-time dependency, since it's accessed by the execveat test
TEST_FILES := Makefile
@@ -28,9 +32,9 @@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
$(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
cp $< $@
chmod -x $@
-$(OUTPUT)/load_address_4096: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@
-$(OUTPUT)/load_address_2097152: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@
-$(OUTPUT)/load_address_16777216: load_address.c
- $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@
+$(OUTPUT)/load_address.0x%: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \
+ -fPIE -pie $< -o $@
+$(OUTPUT)/load_address.static.0x%: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=$(lastword $(subst ., ,$@)) \
+ -fPIE -static-pie $< -o $@
diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c
index 17e3207d34ae..8257fddba8c8 100644
--- a/tools/testing/selftests/exec/load_address.c
+++ b/tools/testing/selftests/exec/load_address.c
@@ -5,11 +5,13 @@
#include <link.h>
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#include "../kselftest.h"
struct Statistics {
unsigned long long load_address;
unsigned long long alignment;
+ bool interp;
};
int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data)
@@ -26,11 +28,20 @@ int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data)
stats->alignment = 0;
for (i = 0; i < info->dlpi_phnum; i++) {
+ unsigned long long align;
+
+ if (info->dlpi_phdr[i].p_type == PT_INTERP) {
+ stats->interp = true;
+ continue;
+ }
+
if (info->dlpi_phdr[i].p_type != PT_LOAD)
continue;
- if (info->dlpi_phdr[i].p_align > stats->alignment)
- stats->alignment = info->dlpi_phdr[i].p_align;
+ align = info->dlpi_phdr[i].p_align;
+
+ if (align > stats->alignment)
+ stats->alignment = align;
}
return 1; // Terminate dl_iterate_phdr.
@@ -38,27 +49,57 @@ int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data)
int main(int argc, char **argv)
{
- struct Statistics extracted;
- unsigned long long misalign;
+ struct Statistics extracted = { };
+ unsigned long long misalign, pow2;
+ bool interp_needed;
+ char buf[1024];
+ FILE *maps;
int ret;
ksft_print_header();
- ksft_set_plan(1);
+ ksft_set_plan(4);
+
+ /* Dump maps file for debugging reference. */
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps)
+ ksft_exit_fail_msg("FAILED: /proc/self/maps: %s\n", strerror(errno));
+ while (fgets(buf, sizeof(buf), maps)) {
+ ksft_print_msg("%s", buf);
+ }
+ fclose(maps);
+ /* Walk the program headers. */
ret = dl_iterate_phdr(ExtractStatistics, &extracted);
if (ret != 1)
ksft_exit_fail_msg("FAILED: dl_iterate_phdr\n");
- if (extracted.alignment == 0)
- ksft_exit_fail_msg("FAILED: No alignment found\n");
- else if (extracted.alignment & (extracted.alignment - 1))
- ksft_exit_fail_msg("FAILED: Alignment is not a power of 2\n");
+ /* Report our findings. */
+ ksft_print_msg("load_address=%#llx alignment=%#llx\n",
+ extracted.load_address, extracted.alignment);
+
+ /* If we're named with ".static." we expect no INTERP. */
+ interp_needed = strstr(argv[0], ".static.") == NULL;
+
+ /* Were we built as expected? */
+ ksft_test_result(interp_needed == extracted.interp,
+ "%s INTERP program header %s\n",
+ interp_needed ? "Wanted" : "Unwanted",
+ extracted.interp ? "seen" : "missing");
+
+ /* Did we find an alignment? */
+ ksft_test_result(extracted.alignment != 0,
+ "Alignment%s found\n", extracted.alignment ? "" : " NOT");
+
+ /* Is the alignment sane? */
+ pow2 = extracted.alignment & (extracted.alignment - 1);
+ ksft_test_result(pow2 == 0,
+ "Alignment is%s a power of 2: %#llx\n",
+ pow2 == 0 ? "" : " NOT", extracted.alignment);
+ /* Is the load address aligned? */
misalign = extracted.load_address & (extracted.alignment - 1);
- if (misalign)
- ksft_exit_fail_msg("FAILED: alignment = %llu, load_address = %llu\n",
- extracted.alignment, extracted.load_address);
+ ksft_test_result(misalign == 0, "Load Address is %saligned (%#llx)\n",
+ misalign ? "MIS" : "", misalign);
- ksft_test_result_pass("Completed\n");
ksft_finished();
}
diff --git a/tools/testing/selftests/exec/recursion-depth.c b/tools/testing/selftests/exec/recursion-depth.c
index b2f37d86a5f6..438c8ff2fd26 100644
--- a/tools/testing/selftests/exec/recursion-depth.c
+++ b/tools/testing/selftests/exec/recursion-depth.c
@@ -37,25 +37,25 @@ int main(void)
ksft_test_result_skip("error: unshare, errno %d\n", errno);
ksft_finished();
}
- ksft_exit_fail_msg("error: unshare, errno %d\n", errno);
+ ksft_exit_fail_perror("error: unshare");
}
if (mount(NULL, "/", NULL, MS_PRIVATE | MS_REC, NULL) == -1)
- ksft_exit_fail_msg("error: mount '/', errno %d\n", errno);
+ ksft_exit_fail_perror("error: mount '/'");
/* Require "exec" filesystem. */
if (mount(NULL, "/tmp", "ramfs", 0, NULL) == -1)
- ksft_exit_fail_msg("error: mount ramfs, errno %d\n", errno);
+ ksft_exit_fail_perror("error: mount ramfs");
#define FILENAME "/tmp/1"
fd = creat(FILENAME, 0700);
if (fd == -1)
- ksft_exit_fail_msg("error: creat, errno %d\n", errno);
+ ksft_exit_fail_perror("error: creat");
#define S "#!" FILENAME "\n"
if (write(fd, S, strlen(S)) != strlen(S))
- ksft_exit_fail_msg("error: write, errno %d\n", errno);
+ ksft_exit_fail_perror("error: write");
close(fd);
diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile
index 71ec34bf1501..4373cea79b79 100644
--- a/tools/testing/selftests/fchmodat2/Makefile
+++ b/tools/testing/selftests/fchmodat2/Makefile
@@ -1,6 +1,15 @@
# SPDX-License-Identifier: GPL-2.0-or-later
-CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan $(KHDR_INCLUDES)
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined $(KHDR_INCLUDES)
+
+# gcc requires -static-libasan in order to ensure that Address Sanitizer's
+# library is the first one loaded. However, clang already statically links the
+# Address Sanitizer if -fsanitize is specified. Therefore, simply omit
+# -static-libasan for clang builds.
+ifeq ($(LLVM),)
+ CFLAGS += -static-libasan
+endif
+
TEST_GEN_PROGS := fchmodat2_test
include ../lib.mk
diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile
index c2f7cef919c0..eb4c3b411934 100644
--- a/tools/testing/selftests/filesystems/binderfs/Makefile
+++ b/tools/testing/selftests/filesystems/binderfs/Makefile
@@ -3,6 +3,4 @@
CFLAGS += $(KHDR_INCLUDES) -pthread
TEST_GEN_PROGS := binderfs_test
-binderfs_test: binderfs_test.c ../../kselftest.h ../../kselftest_harness.h
-
include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
index f142a137526c..85acb4e3ef00 100644
--- a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
+++ b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
@@ -13,6 +13,8 @@
#include <sys/eventfd.h>
#include "../../kselftest_harness.h"
+#define EVENTFD_TEST_ITERATIONS 100000UL
+
struct error {
int code;
char msg[512];
@@ -40,7 +42,7 @@ static inline int sys_eventfd2(unsigned int count, int flags)
return syscall(__NR_eventfd2, count, flags);
}
-TEST(eventfd01)
+TEST(eventfd_check_flag_rdwr)
{
int fd, flags;
@@ -54,7 +56,7 @@ TEST(eventfd01)
close(fd);
}
-TEST(eventfd02)
+TEST(eventfd_check_flag_cloexec)
{
int fd, flags;
@@ -68,7 +70,7 @@ TEST(eventfd02)
close(fd);
}
-TEST(eventfd03)
+TEST(eventfd_check_flag_nonblock)
{
int fd, flags;
@@ -83,7 +85,7 @@ TEST(eventfd03)
close(fd);
}
-TEST(eventfd04)
+TEST(eventfd_chek_flag_cloexec_and_nonblock)
{
int fd, flags;
@@ -161,7 +163,7 @@ static int verify_fdinfo(int fd, struct error *err, const char *prefix,
return 0;
}
-TEST(eventfd05)
+TEST(eventfd_check_flag_semaphore)
{
struct error err = {0};
int fd, ret;
@@ -183,4 +185,128 @@ TEST(eventfd05)
close(fd);
}
+/*
+ * A write(2) fails with the error EINVAL if the size of the supplied buffer
+ * is less than 8 bytes, or if an attempt is made to write the value
+ * 0xffffffffffffffff.
+ */
+TEST(eventfd_check_write)
+{
+ uint64_t value = 1;
+ ssize_t size;
+ int fd;
+
+ fd = sys_eventfd2(0, 0);
+ ASSERT_GE(fd, 0);
+
+ size = write(fd, &value, sizeof(int));
+ EXPECT_EQ(size, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ size = write(fd, &value, sizeof(value));
+ EXPECT_EQ(size, sizeof(value));
+
+ value = (uint64_t)-1;
+ size = write(fd, &value, sizeof(value));
+ EXPECT_EQ(size, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ close(fd);
+}
+
+/*
+ * A read(2) fails with the error EINVAL if the size of the supplied buffer is
+ * less than 8 bytes.
+ */
+TEST(eventfd_check_read)
+{
+ uint64_t value;
+ ssize_t size;
+ int fd;
+
+ fd = sys_eventfd2(1, 0);
+ ASSERT_GE(fd, 0);
+
+ size = read(fd, &value, sizeof(int));
+ EXPECT_EQ(size, -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ size = read(fd, &value, sizeof(value));
+ EXPECT_EQ(size, sizeof(value));
+ EXPECT_EQ(value, 1);
+
+ close(fd);
+}
+
+
+/*
+ * If EFD_SEMAPHORE was not specified and the eventfd counter has a nonzero
+ * value, then a read(2) returns 8 bytes containing that value, and the
+ * counter's value is reset to zero.
+ * If the eventfd counter is zero at the time of the call to read(2), then the
+ * call fails with the error EAGAIN if the file descriptor has been made nonblocking.
+ */
+TEST(eventfd_check_read_with_nonsemaphore)
+{
+ uint64_t value;
+ ssize_t size;
+ int fd;
+ int i;
+
+ fd = sys_eventfd2(0, EFD_NONBLOCK);
+ ASSERT_GE(fd, 0);
+
+ value = 1;
+ for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) {
+ size = write(fd, &value, sizeof(value));
+ EXPECT_EQ(size, sizeof(value));
+ }
+
+ size = read(fd, &value, sizeof(value));
+ EXPECT_EQ(size, sizeof(uint64_t));
+ EXPECT_EQ(value, EVENTFD_TEST_ITERATIONS);
+
+ size = read(fd, &value, sizeof(value));
+ EXPECT_EQ(size, -1);
+ EXPECT_EQ(errno, EAGAIN);
+
+ close(fd);
+}
+
+/*
+ * If EFD_SEMAPHORE was specified and the eventfd counter has a nonzero value,
+ * then a read(2) returns 8 bytes containing the value 1, and the counter's
+ * value is decremented by 1.
+ * If the eventfd counter is zero at the time of the call to read(2), then the
+ * call fails with the error EAGAIN if the file descriptor has been made nonblocking.
+ */
+TEST(eventfd_check_read_with_semaphore)
+{
+ uint64_t value;
+ ssize_t size;
+ int fd;
+ int i;
+
+ fd = sys_eventfd2(0, EFD_SEMAPHORE|EFD_NONBLOCK);
+ ASSERT_GE(fd, 0);
+
+ value = 1;
+ for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) {
+ size = write(fd, &value, sizeof(value));
+ EXPECT_EQ(size, sizeof(value));
+ }
+
+ for (i = 0; i < EVENTFD_TEST_ITERATIONS; i++) {
+ size = read(fd, &value, sizeof(value));
+ EXPECT_EQ(size, sizeof(value));
+ EXPECT_EQ(value, 1);
+ }
+
+ size = read(fd, &value, sizeof(value));
+ EXPECT_EQ(size, -1);
+ EXPECT_EQ(errno, EAGAIN);
+
+ close(fd);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
index 759f86e7d263..2862aae58b79 100644
--- a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
+++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
#include <inttypes.h>
#include <unistd.h>
diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile
index 07a0d5b545ca..3af3136e35a4 100644
--- a/tools/testing/selftests/filesystems/statmount/Makefile
+++ b/tools/testing/selftests/filesystems/statmount/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-or-later
CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
-TEST_GEN_PROGS := statmount_test
+TEST_GEN_PROGS := statmount_test statmount_test_ns
include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h
new file mode 100644
index 000000000000..f4294bab9d73
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/statmount.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __STATMOUNT_H
+#define __STATMOUNT_H
+
+#include <stdint.h>
+#include <linux/mount.h>
+#include <asm/unistd.h>
+
+static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
+ struct statmount *buf, size_t bufsize,
+ unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = mask,
+ };
+
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
+
+ return syscall(__NR_statmount, &req, buf, bufsize, flags);
+}
+
+static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
+ uint64_t last_mnt_id, uint64_t list[], size_t num,
+ unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = last_mnt_id,
+ };
+
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
+
+ return syscall(__NR_listmount, &req, list, num, flags);
+}
+
+#endif /* __STATMOUNT_H */
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
index 3eafd7da58e2..c773334bbcc9 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -3,17 +3,16 @@
#define _GNU_SOURCE
#include <assert.h>
-#include <stdint.h>
+#include <stddef.h>
#include <sched.h>
#include <fcntl.h>
#include <sys/param.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/statfs.h>
-#include <linux/mount.h>
#include <linux/stat.h>
-#include <asm/unistd.h>
+#include "statmount.h"
#include "../../kselftest.h"
static const char *const known_fs[] = {
@@ -35,18 +34,6 @@ static const char *const known_fs[] = {
"ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs",
"zonefs", NULL };
-static int statmount(uint64_t mnt_id, uint64_t mask, struct statmount *buf,
- size_t bufsize, unsigned int flags)
-{
- struct mnt_id_req req = {
- .size = MNT_ID_REQ_SIZE_VER0,
- .mnt_id = mnt_id,
- .param = mask,
- };
-
- return syscall(__NR_statmount, &req, buf, bufsize, flags);
-}
-
static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags)
{
size_t bufsize = 1 << 15;
@@ -55,7 +42,7 @@ static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigne
int ret;
for (;;) {
- ret = statmount(mnt_id, mask, tmp, bufsize, flags);
+ ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags);
if (ret != -1)
break;
if (tofree)
@@ -120,12 +107,20 @@ static char root_mntpoint[] = "/tmp/statmount_test_root.XXXXXX";
static int orig_root;
static uint64_t root_id, parent_id;
static uint32_t old_root_id, old_parent_id;
-
+static FILE *f_mountinfo;
static void cleanup_namespace(void)
{
- fchdir(orig_root);
- chroot(".");
+ int ret;
+
+ ret = fchdir(orig_root);
+ if (ret == -1)
+ ksft_perror("fchdir to original root");
+
+ ret = chroot(".");
+ if (ret == -1)
+ ksft_perror("chroot to original root");
+
umount2(root_mntpoint, MNT_DETACH);
rmdir(root_mntpoint);
}
@@ -137,7 +132,7 @@ static void setup_namespace(void)
uid_t uid = getuid();
gid_t gid = getgid();
- ret = unshare(CLONE_NEWNS|CLONE_NEWUSER);
+ ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
if (ret == -1)
ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
strerror(errno));
@@ -148,6 +143,11 @@ static void setup_namespace(void)
sprintf(buf, "0 %d 1", gid);
write_file("/proc/self/gid_map", buf);
+ f_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!f_mountinfo)
+ ksft_exit_fail_msg("failed to open mountinfo: %s\n",
+ strerror(errno));
+
ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
if (ret == -1)
ksft_exit_fail_msg("making mount tree private: %s\n",
@@ -207,25 +207,13 @@ static int setup_mount_tree(int log2_num)
return 0;
}
-static ssize_t listmount(uint64_t mnt_id, uint64_t last_mnt_id,
- uint64_t list[], size_t num, unsigned int flags)
-{
- struct mnt_id_req req = {
- .size = MNT_ID_REQ_SIZE_VER0,
- .mnt_id = mnt_id,
- .param = last_mnt_id,
- };
-
- return syscall(__NR_listmount, &req, list, num, flags);
-}
-
static void test_listmount_empty_root(void)
{
ssize_t res;
const unsigned int size = 32;
uint64_t list[size];
- res = listmount(LSMT_ROOT, 0, list, size, 0);
+ res = listmount(LSMT_ROOT, 0, 0, list, size, 0);
if (res == -1) {
ksft_test_result_fail("listmount: %s\n", strerror(errno));
return;
@@ -250,7 +238,7 @@ static void test_statmount_zero_mask(void)
struct statmount sm;
int ret;
- ret = statmount(root_id, 0, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount zero mask: %s\n",
strerror(errno));
@@ -276,7 +264,7 @@ static void test_statmount_mnt_basic(void)
int ret;
uint64_t mask = STATMOUNT_MNT_BASIC;
- ret = statmount(root_id, mask, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount mnt basic: %s\n",
strerror(errno));
@@ -336,7 +324,7 @@ static void test_statmount_sb_basic(void)
struct statx sx;
struct statfs sf;
- ret = statmount(root_id, mask, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount sb basic: %s\n",
strerror(errno));
@@ -461,6 +449,88 @@ static void test_statmount_fs_type(void)
free(sm);
}
+static void test_statmount_mnt_opts(void)
+{
+ struct statmount *sm;
+ const char *statmount_opts;
+ char *line = NULL;
+ size_t len = 0;
+
+ sm = statmount_alloc(root_id, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS,
+ 0);
+ if (!sm) {
+ ksft_test_result_fail("statmount mnt opts: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ while (getline(&line, &len, f_mountinfo) != -1) {
+ int i;
+ char *p, *p2;
+ unsigned int old_mnt_id;
+
+ old_mnt_id = atoi(line);
+ if (old_mnt_id != sm->mnt_id_old)
+ continue;
+
+ for (p = line, i = 0; p && i < 5; i++)
+ p = strchr(p + 1, ' ');
+ if (!p)
+ continue;
+
+ p2 = strchr(p + 1, ' ');
+ if (!p2)
+ continue;
+ *p2 = '\0';
+ p = strchr(p2 + 1, '-');
+ if (!p)
+ continue;
+ for (p++, i = 0; p && i < 2; i++)
+ p = strchr(p + 1, ' ');
+ if (!p)
+ continue;
+ p++;
+
+ /* skip generic superblock options */
+ if (strncmp(p, "ro", 2) == 0)
+ p += 2;
+ else if (strncmp(p, "rw", 2) == 0)
+ p += 2;
+ if (*p == ',')
+ p++;
+ if (strncmp(p, "sync", 4) == 0)
+ p += 4;
+ if (*p == ',')
+ p++;
+ if (strncmp(p, "dirsync", 7) == 0)
+ p += 7;
+ if (*p == ',')
+ p++;
+ if (strncmp(p, "lazytime", 8) == 0)
+ p += 8;
+ if (*p == ',')
+ p++;
+ p2 = strrchr(p, '\n');
+ if (p2)
+ *p2 = '\0';
+
+ statmount_opts = sm->str + sm->mnt_opts;
+ if (strcmp(statmount_opts, p) != 0)
+ ksft_test_result_fail(
+ "unexpected mount options: '%s' != '%s'\n",
+ statmount_opts, p);
+ else
+ ksft_test_result_pass("statmount mount options\n");
+ free(sm);
+ free(line);
+ return;
+ }
+
+ ksft_test_result_fail("didnt't find mount entry\n");
+ free(sm);
+ free(line);
+}
+
static void test_statmount_string(uint64_t mask, size_t off, const char *name)
{
struct statmount *sm;
@@ -497,14 +567,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name)
exactsize = sm->size;
shortsize = sizeof(*sm) + i;
- ret = statmount(root_id, mask, sm, exactsize, 0);
+ ret = statmount(root_id, 0, mask, sm, exactsize, 0);
if (ret == -1) {
ksft_test_result_fail("statmount exact size: %s\n",
strerror(errno));
goto out;
}
errno = 0;
- ret = statmount(root_id, mask, sm, shortsize, 0);
+ ret = statmount(root_id, 0, mask, sm, shortsize, 0);
if (ret != -1 || errno != EOVERFLOW) {
ksft_test_result_fail("should have failed with EOVERFLOW: %s\n",
strerror(errno));
@@ -532,7 +602,7 @@ static void test_listmount_tree(void)
if (res == -1)
return;
- num = res = listmount(LSMT_ROOT, 0, list, size, 0);
+ num = res = listmount(LSMT_ROOT, 0, 0, list, size, 0);
if (res == -1) {
ksft_test_result_fail("listmount: %s\n", strerror(errno));
return;
@@ -544,7 +614,7 @@ static void test_listmount_tree(void)
}
for (i = 0; i < size - step;) {
- res = listmount(LSMT_ROOT, i ? list2[i - 1] : 0, list2 + i, step, 0);
+ res = listmount(LSMT_ROOT, 0, i ? list2[i - 1] : 0, list2 + i, step, 0);
if (res == -1)
ksft_test_result_fail("short listmount: %s\n",
strerror(errno));
@@ -576,18 +646,18 @@ int main(void)
int ret;
uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC |
STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT |
- STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE;
+ STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE | STATMOUNT_MNT_NS_ID;
ksft_print_header();
- ret = statmount(0, 0, NULL, 0, 0);
+ ret = statmount(0, 0, 0, NULL, 0, 0);
assert(ret == -1);
if (errno == ENOSYS)
ksft_exit_skip("statmount() syscall not supported\n");
setup_namespace();
- ksft_set_plan(14);
+ ksft_set_plan(15);
test_listmount_empty_root();
test_statmount_zero_mask();
test_statmount_mnt_basic();
@@ -595,6 +665,7 @@ int main(void)
test_statmount_mnt_root();
test_statmount_mnt_point();
test_statmount_fs_type();
+ test_statmount_mnt_opts();
test_statmount_string(STATMOUNT_MNT_ROOT, str_off(mnt_root), "mount root");
test_statmount_string(STATMOUNT_MNT_POINT, str_off(mnt_point), "mount point");
test_statmount_string(STATMOUNT_FS_TYPE, str_off(fs_type), "fs type");
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
new file mode 100644
index 000000000000..e044f5fc57fd
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <linux/nsfs.h>
+#include <linux/stat.h>
+
+#include "statmount.h"
+#include "../../kselftest.h"
+
+#define NSID_PASS 0
+#define NSID_FAIL 1
+#define NSID_SKIP 2
+#define NSID_ERROR 3
+
+static void handle_result(int ret, const char *testname)
+{
+ if (ret == NSID_PASS)
+ ksft_test_result_pass("%s\n", testname);
+ else if (ret == NSID_FAIL)
+ ksft_test_result_fail("%s\n", testname);
+ else if (ret == NSID_ERROR)
+ ksft_exit_fail_msg("%s\n", testname);
+ else
+ ksft_test_result_skip("%s\n", testname);
+}
+
+static inline int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ ksft_print_msg("waitpid returned -1, errno=%d\n", errno);
+ return -1;
+ }
+
+ if (!WIFEXITED(status)) {
+ ksft_print_msg(
+ "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n",
+ WIFSIGNALED(status), WTERMSIG(status));
+ return -1;
+ }
+
+ ret = WEXITSTATUS(status);
+ return ret;
+}
+
+static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id)
+{
+ int fd = open(mnt_ns, O_RDONLY);
+
+ if (fd < 0) {
+ ksft_print_msg("failed to open for ns %s: %s\n",
+ mnt_ns, strerror(errno));
+ sleep(60);
+ return NSID_ERROR;
+ }
+
+ if (ioctl(fd, NS_GET_MNTNS_ID, mnt_ns_id) < 0) {
+ ksft_print_msg("failed to get the nsid for ns %s: %s\n",
+ mnt_ns, strerror(errno));
+ return NSID_ERROR;
+ }
+ close(fd);
+ return NSID_PASS;
+}
+
+static int get_mnt_id(const char *path, uint64_t *mnt_id)
+{
+ struct statx sx;
+ int ret;
+
+ ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx);
+ if (ret == -1) {
+ ksft_print_msg("retrieving unique mount ID for %s: %s\n", path,
+ strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) {
+ ksft_print_msg("no unique mount ID available for %s\n", path);
+ return NSID_ERROR;
+ }
+
+ *mnt_id = sx.stx_mnt_id;
+ return NSID_PASS;
+}
+
+static int write_file(const char *path, const char *val)
+{
+ int fd = open(path, O_WRONLY);
+ size_t len = strlen(val);
+ int ret;
+
+ if (fd == -1) {
+ ksft_print_msg("opening %s for write: %s\n", path, strerror(errno));
+ return NSID_ERROR;
+ }
+
+ ret = write(fd, val, len);
+ if (ret == -1) {
+ ksft_print_msg("writing to %s: %s\n", path, strerror(errno));
+ return NSID_ERROR;
+ }
+ if (ret != len) {
+ ksft_print_msg("short write to %s\n", path);
+ return NSID_ERROR;
+ }
+
+ ret = close(fd);
+ if (ret == -1) {
+ ksft_print_msg("closing %s\n", path);
+ return NSID_ERROR;
+ }
+
+ return NSID_PASS;
+}
+
+static int setup_namespace(void)
+{
+ int ret;
+ char buf[32];
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+
+ ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
+ if (ret == -1)
+ ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
+ strerror(errno));
+
+ sprintf(buf, "0 %d 1", uid);
+ ret = write_file("/proc/self/uid_map", buf);
+ if (ret != NSID_PASS)
+ return ret;
+ ret = write_file("/proc/self/setgroups", "deny");
+ if (ret != NSID_PASS)
+ return ret;
+ sprintf(buf, "0 %d 1", gid);
+ ret = write_file("/proc/self/gid_map", buf);
+ if (ret != NSID_PASS)
+ return ret;
+
+ ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ if (ret == -1) {
+ ksft_print_msg("making mount tree private: %s\n",
+ strerror(errno));
+ return NSID_ERROR;
+ }
+
+ return NSID_PASS;
+}
+
+static int _test_statmount_mnt_ns_id(void)
+{
+ struct statmount sm;
+ uint64_t mnt_ns_id;
+ uint64_t root_id;
+ int ret;
+
+ ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id);
+ if (ret != NSID_PASS)
+ return ret;
+
+ ret = get_mnt_id("/", &root_id);
+ if (ret != NSID_PASS)
+ return ret;
+
+ ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
+ if (ret == -1) {
+ ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (sm.size != sizeof(sm)) {
+ ksft_print_msg("unexpected size: %u != %u\n", sm.size,
+ (uint32_t)sizeof(sm));
+ return NSID_FAIL;
+ }
+ if (sm.mask != STATMOUNT_MNT_NS_ID) {
+ ksft_print_msg("statmount mnt ns id unavailable\n");
+ return NSID_SKIP;
+ }
+
+ if (sm.mnt_ns_id != mnt_ns_id) {
+ ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n",
+ (unsigned long long)sm.mnt_ns_id,
+ (unsigned long long)mnt_ns_id);
+ return NSID_FAIL;
+ }
+
+ return NSID_PASS;
+}
+
+static void test_statmount_mnt_ns_id(void)
+{
+ pid_t pid;
+ int ret;
+
+ pid = fork();
+ if (pid < 0)
+ ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno));
+
+ /* We're the original pid, wait for the result. */
+ if (pid != 0) {
+ ret = wait_for_pid(pid);
+ handle_result(ret, "test statmount ns id");
+ return;
+ }
+
+ ret = setup_namespace();
+ if (ret != NSID_PASS)
+ exit(ret);
+ ret = _test_statmount_mnt_ns_id();
+ exit(ret);
+}
+
+static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts)
+{
+ uint64_t list[256];
+ uint64_t mnt_ns_id;
+ uint64_t nr_mounts;
+ char buf[256];
+ int ret;
+
+ /* Get the mount ns id for our child. */
+ snprintf(buf, sizeof(buf), "/proc/%lu/ns/mnt", (unsigned long)pid);
+ ret = get_mnt_ns_id(buf, &mnt_ns_id);
+
+ nr_mounts = listmount(LSMT_ROOT, mnt_ns_id, 0, list, 256, 0);
+ if (nr_mounts == (uint64_t)-1) {
+ ksft_print_msg("listmount: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (nr_mounts != child_nr_mounts) {
+ ksft_print_msg("listmount results is %zi != %zi\n", nr_mounts,
+ child_nr_mounts);
+ return NSID_FAIL;
+ }
+
+ /* Validate that all of our entries match our mnt_ns_id. */
+ for (int i = 0; i < nr_mounts; i++) {
+ struct statmount sm;
+
+ ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm,
+ sizeof(sm), 0);
+ if (ret < 0) {
+ ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (sm.mask != STATMOUNT_MNT_NS_ID) {
+ ksft_print_msg("statmount mnt ns id unavailable\n");
+ return NSID_SKIP;
+ }
+
+ if (sm.mnt_ns_id != mnt_ns_id) {
+ ksft_print_msg("listmount gave us the wrong ns id: 0x%llx != 0x%llx\n",
+ (unsigned long long)sm.mnt_ns_id,
+ (unsigned long long)mnt_ns_id);
+ return NSID_FAIL;
+ }
+ }
+
+ return NSID_PASS;
+}
+
+static void test_listmount_ns(void)
+{
+ uint64_t nr_mounts;
+ char pval;
+ int child_ready_pipe[2];
+ int parent_ready_pipe[2];
+ pid_t pid;
+ int ret, child_ret;
+
+ if (pipe(child_ready_pipe) < 0)
+ ksft_exit_fail_msg("failed to create the child pipe: %s\n",
+ strerror(errno));
+ if (pipe(parent_ready_pipe) < 0)
+ ksft_exit_fail_msg("failed to create the parent pipe: %s\n",
+ strerror(errno));
+
+ pid = fork();
+ if (pid < 0)
+ ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno));
+
+ if (pid == 0) {
+ char cval;
+ uint64_t list[256];
+
+ close(child_ready_pipe[0]);
+ close(parent_ready_pipe[1]);
+
+ ret = setup_namespace();
+ if (ret != NSID_PASS)
+ exit(ret);
+
+ nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 256, 0);
+ if (nr_mounts == (uint64_t)-1) {
+ ksft_print_msg("listmount: %s\n", strerror(errno));
+ exit(NSID_FAIL);
+ }
+
+ /*
+ * Tell our parent how many mounts we have, and then wait for it
+ * to tell us we're done.
+ */
+ write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts));
+ read(parent_ready_pipe[0], &cval, sizeof(cval));
+ exit(NSID_PASS);
+ }
+
+ close(child_ready_pipe[1]);
+ close(parent_ready_pipe[0]);
+
+ /* Wait until the child has created everything. */
+ if (read(child_ready_pipe[0], &nr_mounts, sizeof(nr_mounts)) !=
+ sizeof(nr_mounts))
+ ret = NSID_ERROR;
+
+ ret = validate_external_listmount(pid, nr_mounts);
+
+ if (write(parent_ready_pipe[1], &pval, sizeof(pval)) != sizeof(pval))
+ ret = NSID_ERROR;
+
+ child_ret = wait_for_pid(pid);
+ if (child_ret != NSID_PASS)
+ ret = child_ret;
+ handle_result(ret, "test listmount ns id");
+}
+
+int main(void)
+{
+ int ret;
+
+ ksft_print_header();
+ ret = statmount(0, 0, 0, NULL, 0, 0);
+ assert(ret == -1);
+ if (errno == ENOSYS)
+ ksft_exit_skip("statmount() syscall not supported\n");
+
+ ksft_set_plan(2);
+ test_statmount_mnt_ns_id();
+ test_listmount_ns();
+
+ if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config
index e59d985eeff0..048a312abf40 100644
--- a/tools/testing/selftests/ftrace/config
+++ b/tools/testing/selftests/ftrace/config
@@ -1,16 +1,28 @@
-CONFIG_KPROBES=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_EPROBE_EVENTS=y
+CONFIG_FPROBE=y
+CONFIG_FPROBE_EVENTS=y
CONFIG_FTRACE=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_FUNCTION_GRAPH_RETVAL=y
CONFIG_FUNCTION_PROFILER=y
-CONFIG_TRACER_SNAPSHOT=y
-CONFIG_STACK_TRACER=y
CONFIG_HIST_TRIGGERS=y
-CONFIG_SCHED_TRACER=y
-CONFIG_PREEMPT_TRACER=y
CONFIG_IRQSOFF_TRACER=y
-CONFIG_PREEMPTIRQ_DELAY_TEST=m
+CONFIG_KALLSYMS_ALL=y
+CONFIG_KPROBES=y
+CONFIG_KPROBE_EVENTS=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
+CONFIG_PREEMPTIRQ_DELAY_TEST=m
+CONFIG_PREEMPT_TRACER=y
+CONFIG_PROBE_EVENTS_BTF_ARGS=y
CONFIG_SAMPLES=y
CONFIG_SAMPLE_FTRACE_DIRECT=m
CONFIG_SAMPLE_TRACE_PRINTK=m
-CONFIG_KALLSYMS_ALL=y
+CONFIG_SCHED_TRACER=y
+CONFIG_STACK_TRACER=y
+CONFIG_TRACER_SNAPSHOT=y
+CONFIG_UPROBES=y
+CONFIG_UPROBE_EVENTS=y
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index 25d4e0fca385..cce72f8b03dc 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -255,7 +255,13 @@ prlog() { # messages
[ "$LOG_FILE" ] && printf "$*$newline" | strip_esc >> $LOG_FILE
}
catlog() { #file
- cat $1
+ if [ "${KTAP}" = "1" ]; then
+ cat $1 | while read line ; do
+ echo "# $line"
+ done
+ else
+ cat $1
+ fi
[ "$LOG_FILE" ] && cat $1 | strip_esc >> $LOG_FILE
}
prlog "=== Ftrace unit tests ==="
diff --git a/tools/testing/selftests/ftrace/ftracetest-ktap b/tools/testing/selftests/ftrace/ftracetest-ktap
index b3284679ef3a..14e62ef3f3b9 100755
--- a/tools/testing/selftests/ftrace/ftracetest-ktap
+++ b/tools/testing/selftests/ftrace/ftracetest-ktap
@@ -5,4 +5,4 @@
#
# Copyright (C) Arm Ltd., 2023
-./ftracetest -K
+./ftracetest -K -v
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc
index b9c21a81d248..c0cdad4c400e 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc
@@ -53,7 +53,7 @@ fi
echo > dynamic_events
-if [ "$FIELDS" ] ; then
+if [ "$FIELDS" -a "$FPROBES" ] ; then
echo "t:tpevent ${TP2} obj_size=s->object_size" >> dynamic_events
echo "f:fpevent ${TP3}%return path=\$retval->name:string" >> dynamic_events
echo "t:tpevent2 ${TP4} p->se.group_node.next->prev" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc
new file mode 100644
index 000000000000..c6a9d2466a71
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_args_vfs.tc
@@ -0,0 +1,41 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Fprobe event VFS type argument
+# requires: dynamic_events "%pd/%pD":README "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README
+
+
+: "Test argument %pd with name for fprobe"
+echo 'f:testprobe dput name=$arg1:%pd' > dynamic_events
+echo 1 > events/fprobes/testprobe/enable
+grep -q "1" events/fprobes/testprobe/enable
+echo 0 > events/fprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > dynamic_events
+echo "" > trace
+
+: "Test argument %pd without name for fprobe"
+echo 'f:testprobe dput $arg1:%pd' > dynamic_events
+echo 1 > events/fprobes/testprobe/enable
+grep -q "1" events/fprobes/testprobe/enable
+echo 0 > events/fprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > dynamic_events
+echo "" > trace
+
+: "Test argument %pD with name for fprobe"
+echo 'f:testprobe vfs_read name=$arg1:%pD' > dynamic_events
+echo 1 > events/fprobes/testprobe/enable
+grep -q "1" events/fprobes/testprobe/enable
+echo 0 > events/fprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > dynamic_events
+echo "" > trace
+
+: "Test argument %pD without name for fprobe"
+echo 'f:testprobe vfs_read $arg1:%pD' > dynamic_events
+echo 1 > events/fprobes/testprobe/enable
+grep -q "1" events/fprobes/testprobe/enable
+echo 0 > events/fprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > dynamic_events
+echo "" > trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_entry_arg.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_entry_arg.tc
index d183b8a8ecf8..1e251ce2998e 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_entry_arg.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_entry_arg.tc
@@ -11,7 +11,7 @@ echo 1 > events/tests/enable
echo > trace
cat trace > /dev/null
-function streq() {
+streq() {
test $1 = $2
}
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
index d3a79da215c8..5f72abe6fa79 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Generic dynamic event - check if duplicate events are caught
-# requires: dynamic_events "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README
+# requires: dynamic_events "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README events/syscalls/sys_enter_openat
echo 0 > events/enable
diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
index 3f74c09c56b6..118247b8dd84 100644
--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -10,7 +10,6 @@ fail() { #msg
}
sample_events() {
- echo > trace
echo 1 > events/kmem/kmem_cache_free/enable
echo 1 > tracing_on
ls > /dev/null
@@ -22,6 +21,7 @@ echo 0 > tracing_on
echo 0 > events/enable
echo "Get the most frequently calling function"
+echo > trace
sample_events
target_func=`cat trace | grep -o 'call_site=\([^+]*\)' | sed 's/call_site=//' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'`
@@ -32,7 +32,16 @@ echo > trace
echo "Test event filter function name"
echo "call_site.function == $target_func" > events/kmem/kmem_cache_free/filter
+
+sample_events
+max_retry=10
+while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do
sample_events
+max_retry=$((max_retry - 1))
+if [ $max_retry -eq 0 ]; then
+ exit_fail
+fi
+done
hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l`
misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l`
@@ -49,7 +58,16 @@ address=`grep " ${target_func}\$" /proc/kallsyms | cut -d' ' -f1`
echo "Test event filter function address"
echo "call_site.function == 0x$address" > events/kmem/kmem_cache_free/filter
+echo > trace
+sample_events
+max_retry=10
+while [ `grep kmem_cache_free trace| wc -l` -eq 0 ]; do
sample_events
+max_retry=$((max_retry - 1))
+if [ $max_retry -eq 0 ]; then
+ exit_fail
+fi
+done
hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l`
misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l`
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc
new file mode 100644
index 000000000000..ff88f97e41fb
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-multi.tc
@@ -0,0 +1,103 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph filters
+# requires: set_ftrace_filter function_graph:tracer
+
+# Make sure that function graph filtering works
+
+INSTANCE1="instances/test1_$$"
+INSTANCE2="instances/test2_$$"
+INSTANCE3="instances/test3_$$"
+
+WD=`pwd`
+
+do_reset() {
+ cd $WD
+ if [ -d $INSTANCE1 ]; then
+ echo nop > $INSTANCE1/current_tracer
+ rmdir $INSTANCE1
+ fi
+ if [ -d $INSTANCE2 ]; then
+ echo nop > $INSTANCE2/current_tracer
+ rmdir $INSTANCE2
+ fi
+ if [ -d $INSTANCE3 ]; then
+ echo nop > $INSTANCE3/current_tracer
+ rmdir $INSTANCE3
+ fi
+}
+
+mkdir $INSTANCE1
+if ! grep -q function_graph $INSTANCE1/available_tracers; then
+ echo "function_graph not allowed with instances"
+ rmdir $INSTANCE1
+ exit_unsupported
+fi
+
+mkdir $INSTANCE2
+mkdir $INSTANCE3
+
+fail() { # msg
+ do_reset
+ echo $1
+ exit_fail
+}
+
+disable_tracing
+clear_trace
+
+do_test() {
+ REGEX=$1
+ TEST=$2
+
+ # filter something, schedule is always good
+ if ! echo "$REGEX" > set_ftrace_filter; then
+ fail "can not enable filter $REGEX"
+ fi
+
+ echo > trace
+ echo function_graph > current_tracer
+ enable_tracing
+ sleep 1
+ # search for functions (has "{" or ";" on the line)
+ echo 0 > tracing_on
+ count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep -v "$TEST" | wc -l`
+ echo 1 > tracing_on
+ if [ $count -ne 0 ]; then
+ fail "Graph filtering not working by itself against $TEST?"
+ fi
+
+ # Make sure we did find something
+ echo 0 > tracing_on
+ count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep "$TEST" | wc -l`
+ echo 1 > tracing_on
+ if [ $count -eq 0 ]; then
+ fail "No traces found with $TEST?"
+ fi
+}
+
+do_test '*sched*' 'sched'
+cd $INSTANCE1
+do_test '*lock*' 'lock'
+cd $WD
+cd $INSTANCE2
+do_test '*rcu*' 'rcu'
+cd $WD
+cd $INSTANCE3
+echo function_graph > current_tracer
+
+sleep 1
+count=`cat trace | grep -v '^#' | grep -e '{' -e ';' | grep "$TEST" | wc -l`
+if [ $count -eq 0 ]; then
+ fail "No traces found with all tracing?"
+fi
+
+cd $WD
+echo nop > current_tracer
+echo nop > $INSTANCE1/current_tracer
+echo nop > $INSTANCE2/current_tracer
+echo nop > $INSTANCE3/current_tracer
+
+do_reset
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
index 2f7211254529..8dcce001881d 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -8,12 +8,18 @@
# Also test it on an instance directory
do_function_fork=1
+do_funcgraph_proc=1
if [ ! -f options/function-fork ]; then
do_function_fork=0
echo "no option for function-fork found. Option will not be tested."
fi
+if [ ! -f options/funcgraph-proc ]; then
+ do_funcgraph_proc=0
+ echo "no option for function-fork found. Option will not be tested."
+fi
+
read PID _ < /proc/self/stat
if [ $do_function_fork -eq 1 ]; then
@@ -21,12 +27,19 @@ if [ $do_function_fork -eq 1 ]; then
orig_value=`grep function-fork trace_options`
fi
+if [ $do_funcgraph_proc -eq 1 ]; then
+ orig_value2=`cat options/funcgraph-proc`
+ echo 1 > options/funcgraph-proc
+fi
+
do_reset() {
- if [ $do_function_fork -eq 0 ]; then
- return
+ if [ $do_function_fork -eq 1 ]; then
+ echo $orig_value > trace_options
fi
- echo $orig_value > trace_options
+ if [ $do_funcgraph_proc -eq 1 ]; then
+ echo $orig_value2 > options/funcgraph-proc
+ fi
}
fail() { # msg
@@ -36,13 +49,15 @@ fail() { # msg
}
do_test() {
+ TRACER=$1
+
disable_tracing
echo do_execve* > set_ftrace_filter
echo $FUNCTION_FORK >> set_ftrace_filter
echo $PID > set_ftrace_pid
- echo function > current_tracer
+ echo $TRACER > current_tracer
if [ $do_function_fork -eq 1 ]; then
# don't allow children to be traced
@@ -82,7 +97,11 @@ do_test() {
fi
}
-do_test
+do_test function
+if grep -s function_graph available_tracers; then
+ do_test function_graph
+fi
+
do_reset
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
index 25432b8cd5bd..073a748b9380 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -19,7 +19,7 @@ fail() { # mesg
FILTER=set_ftrace_filter
FUNC1="schedule"
-FUNC2="scheduler_tick"
+FUNC2="sched_tick"
ALL_FUNCS="#### all functions enabled ####"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
new file mode 100644
index 000000000000..21a54be6894c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_vfs.tc
@@ -0,0 +1,40 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event VFS type argument
+# requires: kprobe_events "%pd/%pD":README
+
+: "Test argument %pd with name"
+echo 'p:testprobe dput name=$arg1:%pd' > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pd without name"
+echo 'p:testprobe dput $arg1:%pd' > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "dput" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pD with name"
+echo 'p:testprobe vfs_read name=$arg1:%pD' > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
+
+: "Test argument %pD without name"
+echo 'p:testprobe vfs_read $arg1:%pD' > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+grep -q "1" events/kprobes/testprobe/enable
+echo 0 > events/kprobes/testprobe/enable
+grep "vfs_read" trace | grep -q "enable"
+echo "" > kprobe_events
+echo "" > trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
index 1f6981ef7afa..ba19b81cef39 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_eventname.tc
@@ -30,7 +30,8 @@ find_dot_func() {
fi
grep " [tT] .*\.isra\..*" /proc/kallsyms | cut -f 3 -d " " | while read f; do
- if grep -s $f available_filter_functions; then
+ cnt=`grep -s $f available_filter_functions | wc -l`;
+ if [ $cnt -eq 1 ]; then
echo $f
break
fi
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_entry_arg.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_entry_arg.tc
index 53b82f36a1d0..e50470b53164 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_entry_arg.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_entry_arg.tc
@@ -11,7 +11,7 @@ echo 1 > events/kprobes/enable
echo > trace
cat trace > /dev/null
-function streq() {
+streq() {
test $1 = $2
}
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 11e157d7533b..78ab2cd111f6 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -3,8 +3,6 @@ SUBDIRS := functional
TEST_PROGS := run.sh
-.PHONY: all clean
-
include ../lib.mk
all:
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index a392d0917b4e..f79f9bac7918 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
INCLUDES := -I../include -I../../ $(KHDR_INCLUDES)
-CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) $(KHDR_INCLUDES)
+CFLAGS := $(CFLAGS) -g -O2 -Wall -pthread $(INCLUDES) $(KHDR_INCLUDES)
LDLIBS := -lpthread -lrt
LOCAL_HDRS := \
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
index 7f3ca5c78df1..215c6cb539b4 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -360,7 +360,7 @@ out:
int main(int argc, char *argv[])
{
- const char *test_name;
+ char *test_name;
int c, ret;
while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common
index 0f456dbab62f..45b5570441ce 100644
--- a/tools/testing/selftests/hid/config.common
+++ b/tools/testing/selftests/hid/config.common
@@ -238,3 +238,4 @@ CONFIG_VLAN_8021Q=y
CONFIG_XFRM_SUB_POLICY=y
CONFIG_XFRM_USER=y
CONFIG_ZEROPLUS_FF=y
+CONFIG_KASAN=y
diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c
index 2cf96f818f25..75b7b4ef6cfa 100644
--- a/tools/testing/selftests/hid/hid_bpf.c
+++ b/tools/testing/selftests/hid/hid_bpf.c
@@ -16,6 +16,11 @@
#define SHOW_UHID_DEBUG 0
+#define min(a, b) \
+ ({ __typeof__(a) _a = (a); \
+ __typeof__(b) _b = (b); \
+ _a < _b ? _a : _b; })
+
static unsigned char rdesc[] = {
0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
0x09, 0x21, /* Usage (Vendor Usage 0x21) */
@@ -111,6 +116,10 @@ struct hid_hw_request_syscall_args {
static pthread_mutex_t uhid_started_mtx = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t uhid_started = PTHREAD_COND_INITIALIZER;
+static pthread_mutex_t uhid_output_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t uhid_output_cond = PTHREAD_COND_INITIALIZER;
+static unsigned char output_report[10];
+
/* no need to protect uhid_stopped, only one thread accesses it */
static bool uhid_stopped;
@@ -205,6 +214,13 @@ static int uhid_event(struct __test_metadata *_metadata, int fd)
break;
case UHID_OUTPUT:
UHID_LOG("UHID_OUTPUT from uhid-dev");
+
+ pthread_mutex_lock(&uhid_output_mtx);
+ memcpy(output_report,
+ ev.u.output.data,
+ min(ev.u.output.size, sizeof(output_report)));
+ pthread_cond_signal(&uhid_output_cond);
+ pthread_mutex_unlock(&uhid_output_mtx);
break;
case UHID_GET_REPORT:
UHID_LOG("UHID_GET_REPORT from uhid-dev");
@@ -444,7 +460,7 @@ FIXTURE(hid_bpf) {
int hid_id;
pthread_t tid;
struct hid *skel;
- int hid_links[3]; /* max number of programs loaded in a single test */
+ struct bpf_link *hid_links[3]; /* max number of programs loaded in a single test */
};
static void detach_bpf(FIXTURE_DATA(hid_bpf) * self)
{
@@ -454,9 +470,14 @@ static void detach_bpf(FIXTURE_DATA(hid_bpf) * self)
close(self->hidraw_fd);
self->hidraw_fd = 0;
+ if (!self->skel)
+ return;
+
+ hid__detach(self->skel);
+
for (i = 0; i < ARRAY_SIZE(self->hid_links); i++) {
if (self->hid_links[i])
- close(self->hid_links[i]);
+ bpf_link__destroy(self->hid_links[i]);
}
hid__destroy(self->skel);
@@ -511,14 +532,8 @@ static void load_programs(const struct test_program programs[],
FIXTURE_DATA(hid_bpf) * self,
const FIXTURE_VARIANT(hid_bpf) * variant)
{
- int attach_fd, err = -EINVAL;
- struct attach_prog_args args = {
- .retval = -1,
- };
- DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattr,
- .ctx_in = &args,
- .ctx_size_in = sizeof(args),
- );
+ struct bpf_map *iter_map;
+ int err = -EINVAL;
ASSERT_LE(progs_count, ARRAY_SIZE(self->hid_links))
TH_LOG("too many programs are to be loaded");
@@ -529,37 +544,52 @@ static void load_programs(const struct test_program programs[],
for (int i = 0; i < progs_count; i++) {
struct bpf_program *prog;
+ struct bpf_map *map;
+ int *ops_hid_id;
prog = bpf_object__find_program_by_name(*self->skel->skeleton->obj,
programs[i].name);
ASSERT_OK_PTR(prog) TH_LOG("can not find program by name '%s'", programs[i].name);
bpf_program__set_autoload(prog, true);
+
+ map = bpf_object__find_map_by_name(*self->skel->skeleton->obj,
+ programs[i].name + 4);
+ ASSERT_OK_PTR(map) TH_LOG("can not find struct_ops by name '%s'",
+ programs[i].name + 4);
+
+ /* hid_id is the first field of struct hid_bpf_ops */
+ ops_hid_id = bpf_map__initial_value(map, NULL);
+ ASSERT_OK_PTR(ops_hid_id) TH_LOG("unable to retrieve struct_ops data");
+
+ *ops_hid_id = self->hid_id;
}
+ /* we disable the auto-attach feature of all maps because we
+ * only want the tested one to be manually attached in the next
+ * call to bpf_map__attach_struct_ops()
+ */
+ bpf_object__for_each_map(iter_map, *self->skel->skeleton->obj)
+ bpf_map__set_autoattach(iter_map, false);
+
err = hid__load(self->skel);
ASSERT_OK(err) TH_LOG("hid_skel_load failed: %d", err);
- attach_fd = bpf_program__fd(self->skel->progs.attach_prog);
- ASSERT_GE(attach_fd, 0) TH_LOG("locate attach_prog: %d", attach_fd);
-
for (int i = 0; i < progs_count; i++) {
- struct bpf_program *prog;
+ struct bpf_map *map;
- prog = bpf_object__find_program_by_name(*self->skel->skeleton->obj,
- programs[i].name);
- ASSERT_OK_PTR(prog) TH_LOG("can not find program by name '%s'", programs[i].name);
-
- args.prog_fd = bpf_program__fd(prog);
- args.hid = self->hid_id;
- args.insert_head = programs[i].insert_head;
- err = bpf_prog_test_run_opts(attach_fd, &tattr);
- ASSERT_GE(args.retval, 0)
- TH_LOG("attach_hid(%s): %d", programs[i].name, args.retval);
+ map = bpf_object__find_map_by_name(*self->skel->skeleton->obj,
+ programs[i].name + 4);
+ ASSERT_OK_PTR(map) TH_LOG("can not find struct_ops by name '%s'",
+ programs[i].name + 4);
- self->hid_links[i] = args.retval;
+ self->hid_links[i] = bpf_map__attach_struct_ops(map);
+ ASSERT_OK_PTR(self->hid_links[i]) TH_LOG("failed to attach struct ops '%s'",
+ programs[i].name + 4);
}
+ hid__attach(self->skel);
+
self->hidraw_fd = open_hidraw(self->dev_id);
ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw");
}
@@ -624,6 +654,65 @@ TEST_F(hid_bpf, raw_event)
}
/*
+ * Attach hid_first_event to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * inject one event in the uhid device,
+ * check that the program sees it and can change the data
+ */
+TEST_F(hid_bpf, subprog_raw_event)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_subprog_first_event" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[2], 47);
+
+ /* inject another event */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 1;
+ buf[1] = 47;
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[2], 52);
+}
+
+/*
+ * Attach hid_first_event to the given uhid device,
+ * attempt at re-attaching it, we should not lock and
+ * return an invalid struct bpf_link
+ */
+TEST_F(hid_bpf, multiple_attach)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_first_event" },
+ };
+ struct bpf_link *link;
+
+ LOAD_PROGRAMS(progs);
+
+ link = bpf_map__attach_struct_ops(self->skel->maps.first_event);
+ ASSERT_NULL(link) TH_LOG("unexpected return value when re-attaching the struct_ops");
+}
+
+/*
* Ensures that we can attach/detach programs
*/
TEST_F(hid_bpf, test_attach_detach)
@@ -632,13 +721,17 @@ TEST_F(hid_bpf, test_attach_detach)
{ .name = "hid_first_event" },
{ .name = "hid_second_event" },
};
+ struct bpf_link *link;
__u8 buf[10] = {0};
- int err, link;
+ int err, link_fd;
LOAD_PROGRAMS(progs);
link = self->hid_links[0];
- ASSERT_GT(link, 0) TH_LOG("HID-BPF link not created");
+ ASSERT_OK_PTR(link) TH_LOG("HID-BPF link not created");
+
+ link_fd = bpf_link__fd(link);
+ ASSERT_GE(link_fd, 0) TH_LOG("HID-BPF link FD not valid");
/* inject one event */
buf[0] = 1;
@@ -657,7 +750,7 @@ TEST_F(hid_bpf, test_attach_detach)
/* pin the first program and immediately unpin it */
#define PIN_PATH "/sys/fs/bpf/hid_first_event"
- err = bpf_obj_pin(link, PIN_PATH);
+ err = bpf_obj_pin(link_fd, PIN_PATH);
ASSERT_OK(err) TH_LOG("error while calling bpf_obj_pin");
remove(PIN_PATH);
#undef PIN_PATH
@@ -734,8 +827,100 @@ TEST_F(hid_bpf, test_hid_change_report)
}
/*
- * Attach hid_user_raw_request to the given uhid device,
- * call the bpf program from userspace
+ * Call hid_bpf_input_report against the given uhid device,
+ * check that the program is called and does the expected.
+ */
+TEST_F(hid_bpf, test_hid_user_input_report_call)
+{
+ struct hid_hw_request_syscall_args args = {
+ .retval = -1,
+ .size = 10,
+ };
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattrs,
+ .ctx_in = &args,
+ .ctx_size_in = sizeof(args),
+ );
+ __u8 buf[10] = {0};
+ int err, prog_fd;
+
+ LOAD_BPF;
+
+ args.hid = self->hid_id;
+ args.data[0] = 1; /* report ID */
+ args.data[1] = 2; /* report ID */
+ args.data[2] = 42; /* report ID */
+
+ prog_fd = bpf_program__fd(self->skel->progs.hid_user_input_report);
+
+ /* check that there is no data to read from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, -1) TH_LOG("read_hidraw");
+
+ err = bpf_prog_test_run_opts(prog_fd, &tattrs);
+
+ ASSERT_OK(err) TH_LOG("error while calling bpf_prog_test_run_opts");
+
+ ASSERT_EQ(args.retval, 0);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 2);
+ ASSERT_EQ(buf[2], 42);
+}
+
+/*
+ * Call hid_bpf_hw_output_report against the given uhid device,
+ * check that the program is called and does the expected.
+ */
+TEST_F(hid_bpf, test_hid_user_output_report_call)
+{
+ struct hid_hw_request_syscall_args args = {
+ .retval = -1,
+ .size = 10,
+ };
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattrs,
+ .ctx_in = &args,
+ .ctx_size_in = sizeof(args),
+ );
+ int err, cond_err, prog_fd;
+ struct timespec time_to_wait;
+
+ LOAD_BPF;
+
+ args.hid = self->hid_id;
+ args.data[0] = 1; /* report ID */
+ args.data[1] = 2; /* report ID */
+ args.data[2] = 42; /* report ID */
+
+ prog_fd = bpf_program__fd(self->skel->progs.hid_user_output_report);
+
+ pthread_mutex_lock(&uhid_output_mtx);
+
+ memset(output_report, 0, sizeof(output_report));
+ clock_gettime(CLOCK_REALTIME, &time_to_wait);
+ time_to_wait.tv_sec += 2;
+
+ err = bpf_prog_test_run_opts(prog_fd, &tattrs);
+ cond_err = pthread_cond_timedwait(&uhid_output_cond, &uhid_output_mtx, &time_to_wait);
+
+ ASSERT_OK(err) TH_LOG("error while calling bpf_prog_test_run_opts");
+ ASSERT_OK(cond_err) TH_LOG("error while calling waiting for the condition");
+
+ ASSERT_EQ(args.retval, 3);
+
+ ASSERT_EQ(output_report[0], 1);
+ ASSERT_EQ(output_report[1], 2);
+ ASSERT_EQ(output_report[2], 42);
+
+ pthread_mutex_unlock(&uhid_output_mtx);
+}
+
+/*
+ * Call hid_hw_raw_request against the given uhid device,
* check that the program is called and does the expected.
*/
TEST_F(hid_bpf, test_hid_user_raw_request_call)
@@ -768,6 +953,325 @@ TEST_F(hid_bpf, test_hid_user_raw_request_call)
}
/*
+ * Call hid_hw_raw_request against the given uhid device,
+ * check that the program is called and prevents the
+ * call to uhid.
+ */
+TEST_F(hid_bpf, test_hid_filter_raw_request_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_filter_raw_request" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* first check that we did not attach to device_event */
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 42);
+ ASSERT_EQ(buf[2], 0) TH_LOG("leftovers_from_previous_test");
+
+ /* now check that our program is preventing hid_hw_raw_request() */
+
+ /* emit hid_hw_raw_request from hidraw */
+ /* Get Feature */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 0x1; /* Report Number */
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_LT(err, 0) TH_LOG("unexpected success while reading HIDIOCGFEATURE: %d", err);
+ ASSERT_EQ(errno, 20) TH_LOG("unexpected error code while reading HIDIOCGFEATURE: %d",
+ errno);
+
+ /* remove our bpf program and check that we can now emit commands */
+
+ /* detach the program */
+ detach_bpf(self);
+
+ self->hidraw_fd = open_hidraw(self->dev_id);
+ ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw");
+
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_GE(err, 0) TH_LOG("error while reading HIDIOCGFEATURE: %d", err);
+}
+
+/*
+ * Call hid_hw_raw_request against the given uhid device,
+ * check that the program is called and can issue the call
+ * to uhid and transform the answer.
+ */
+TEST_F(hid_bpf, test_hid_change_raw_request_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_hidraw_raw_request" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* emit hid_hw_raw_request from hidraw */
+ /* Get Feature */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 0x1; /* Report Number */
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_EQ(err, 3) TH_LOG("unexpected returned size while reading HIDIOCGFEATURE: %d", err);
+
+ ASSERT_EQ(buf[0], 2);
+ ASSERT_EQ(buf[1], 3);
+ ASSERT_EQ(buf[2], 4);
+}
+
+/*
+ * Call hid_hw_raw_request against the given uhid device,
+ * check that the program is not making infinite loops.
+ */
+TEST_F(hid_bpf, test_hid_infinite_loop_raw_request_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_infinite_loop_raw_request" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* emit hid_hw_raw_request from hidraw */
+ /* Get Feature */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 0x1; /* Report Number */
+ err = ioctl(self->hidraw_fd, HIDIOCGFEATURE(sizeof(buf)), buf);
+ ASSERT_EQ(err, 3) TH_LOG("unexpected returned size while reading HIDIOCGFEATURE: %d", err);
+}
+
+/*
+ * Call hid_hw_output_report against the given uhid device,
+ * check that the program is called and prevents the
+ * call to uhid.
+ */
+TEST_F(hid_bpf, test_hid_filter_output_report_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_filter_output_report" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* first check that we did not attach to device_event */
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 42);
+ ASSERT_EQ(buf[2], 0) TH_LOG("leftovers_from_previous_test");
+
+ /* now check that our program is preventing hid_hw_output_report() */
+
+ buf[0] = 1; /* report ID */
+ buf[1] = 2;
+ buf[2] = 42;
+
+ err = write(self->hidraw_fd, buf, 3);
+ ASSERT_LT(err, 0) TH_LOG("unexpected success while sending hid_hw_output_report: %d", err);
+ ASSERT_EQ(errno, 25) TH_LOG("unexpected error code while sending hid_hw_output_report: %d",
+ errno);
+
+ /* remove our bpf program and check that we can now emit commands */
+
+ /* detach the program */
+ detach_bpf(self);
+
+ self->hidraw_fd = open_hidraw(self->dev_id);
+ ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw");
+
+ err = write(self->hidraw_fd, buf, 3);
+ ASSERT_GE(err, 0) TH_LOG("error while sending hid_hw_output_report: %d", err);
+}
+
+/*
+ * Call hid_hw_output_report against the given uhid device,
+ * check that the program is called and can issue the call
+ * to uhid and transform the answer.
+ */
+TEST_F(hid_bpf, test_hid_change_output_report_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_hidraw_output_report" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* emit hid_hw_output_report from hidraw */
+ buf[0] = 1; /* report ID */
+ buf[1] = 2;
+ buf[2] = 42;
+
+ err = write(self->hidraw_fd, buf, 10);
+ ASSERT_EQ(err, 2) TH_LOG("unexpected returned size while sending hid_hw_output_report: %d",
+ err);
+}
+
+/*
+ * Call hid_hw_output_report against the given uhid device,
+ * check that the program is not making infinite loops.
+ */
+TEST_F(hid_bpf, test_hid_infinite_loop_output_report_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_infinite_loop_output_report" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* emit hid_hw_output_report from hidraw */
+ buf[0] = 1; /* report ID */
+ buf[1] = 2;
+ buf[2] = 42;
+
+ err = write(self->hidraw_fd, buf, 8);
+ ASSERT_EQ(err, 2) TH_LOG("unexpected returned size while sending hid_hw_output_report: %d",
+ err);
+}
+
+/*
+ * Attach hid_multiply_event_wq to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * inject one event in the uhid device,
+ * check that the program sees it and can add extra data
+ */
+TEST_F(hid_bpf, test_multiply_events_wq)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_multiply_events_wq" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 47);
+
+ usleep(100000);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 9) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 2);
+ ASSERT_EQ(buf[1], 3);
+}
+
+/*
+ * Attach hid_multiply_event to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * inject one event in the uhid device,
+ * check that the program sees it and can add extra data
+ */
+TEST_F(hid_bpf, test_multiply_events)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_multiply_events" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 9) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 2);
+ ASSERT_EQ(buf[1], 47);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 9) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 2);
+ ASSERT_EQ(buf[1], 52);
+}
+
+/*
+ * Call hid_bpf_input_report against the given uhid device,
+ * check that the program is not making infinite loops.
+ */
+TEST_F(hid_bpf, test_hid_infinite_loop_input_report_call)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_test_infinite_loop_input_report" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* emit hid_hw_output_report from hidraw */
+ buf[0] = 1; /* report ID */
+ buf[1] = 2;
+ buf[2] = 42;
+
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 3);
+
+ /* read the data from hidraw: hid_bpf_try_input_report should work exactly one time */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 4);
+
+ /* read the data from hidraw: there should be none */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, -1) TH_LOG("read_hidraw");
+}
+
+/*
* Attach hid_insert{0,1,2} to the given uhid device,
* retrieve and open the matching hidraw node,
* inject one event in the uhid device,
diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c
index 1e558826b809..5ecc845ef792 100644
--- a/tools/testing/selftests/hid/progs/hid.c
+++ b/tools/testing/selftests/hid/progs/hid.c
@@ -14,8 +14,8 @@ struct attach_prog_args {
__u64 callback_check = 52;
__u64 callback2_check = 52;
-SEC("?fmod_ret/hid_bpf_device_event")
-int BPF_PROG(hid_first_event, struct hid_bpf_ctx *hid_ctx)
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_first_event, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
{
__u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */);
@@ -29,8 +29,38 @@ int BPF_PROG(hid_first_event, struct hid_bpf_ctx *hid_ctx)
return hid_ctx->size;
}
-SEC("?fmod_ret/hid_bpf_device_event")
-int BPF_PROG(hid_second_event, struct hid_bpf_ctx *hid_ctx)
+SEC(".struct_ops.link")
+struct hid_bpf_ops first_event = {
+ .hid_device_event = (void *)hid_first_event,
+ .hid_id = 2,
+};
+
+int __hid_subprog_first_event(struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */);
+
+ if (!rw_data)
+ return 0; /* EPERM check */
+
+ rw_data[2] = rw_data[1] + 5;
+
+ return hid_ctx->size;
+}
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_subprog_first_event, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ return __hid_subprog_first_event(hid_ctx, type);
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops subprog_first_event = {
+ .hid_device_event = (void *)hid_subprog_first_event,
+ .hid_id = 2,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_second_event, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
{
__u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */);
@@ -42,8 +72,13 @@ int BPF_PROG(hid_second_event, struct hid_bpf_ctx *hid_ctx)
return hid_ctx->size;
}
-SEC("?fmod_ret/hid_bpf_device_event")
-int BPF_PROG(hid_change_report_id, struct hid_bpf_ctx *hid_ctx)
+SEC(".struct_ops.link")
+struct hid_bpf_ops second_event = {
+ .hid_device_event = (void *)hid_second_event,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_change_report_id, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
{
__u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */);
@@ -55,15 +90,10 @@ int BPF_PROG(hid_change_report_id, struct hid_bpf_ctx *hid_ctx)
return 9;
}
-SEC("syscall")
-int attach_prog(struct attach_prog_args *ctx)
-{
- ctx->retval = hid_bpf_attach_prog(ctx->hid,
- ctx->prog_fd,
- ctx->insert_head ? HID_BPF_FLAG_INSERT_HEAD :
- HID_BPF_FLAG_NONE);
- return 0;
-}
+SEC(".struct_ops.link")
+struct hid_bpf_ops change_report_id = {
+ .hid_device_event = (void *)hid_change_report_id,
+};
struct hid_hw_request_syscall_args {
/* data needs to come at offset 0 so we can use it in calls */
@@ -101,6 +131,52 @@ int hid_user_raw_request(struct hid_hw_request_syscall_args *args)
return 0;
}
+SEC("syscall")
+int hid_user_output_report(struct hid_hw_request_syscall_args *args)
+{
+ struct hid_bpf_ctx *ctx;
+ const size_t size = args->size;
+ int i, ret = 0;
+
+ if (size > sizeof(args->data))
+ return -7; /* -E2BIG */
+
+ ctx = hid_bpf_allocate_context(args->hid);
+ if (!ctx)
+ return -1; /* EPERM check */
+
+ ret = hid_bpf_hw_output_report(ctx,
+ args->data,
+ size);
+ args->retval = ret;
+
+ hid_bpf_release_context(ctx);
+
+ return 0;
+}
+
+SEC("syscall")
+int hid_user_input_report(struct hid_hw_request_syscall_args *args)
+{
+ struct hid_bpf_ctx *ctx;
+ const size_t size = args->size;
+ int i, ret = 0;
+
+ if (size > sizeof(args->data))
+ return -7; /* -E2BIG */
+
+ ctx = hid_bpf_allocate_context(args->hid);
+ if (!ctx)
+ return -1; /* EPERM check */
+
+ ret = hid_bpf_input_report(ctx, HID_INPUT_REPORT, args->data, size);
+ args->retval = ret;
+
+ hid_bpf_release_context(ctx);
+
+ return 0;
+}
+
static const __u8 rdesc[] = {
0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */
0x09, 0x32, /* USAGE (Z) */
@@ -135,7 +211,12 @@ static const __u8 rdesc[] = {
0xc0, /* END_COLLECTION */
};
-SEC("?fmod_ret/hid_bpf_rdesc_fixup")
+/*
+ * the following program is marked as sleepable (struct_ops.s).
+ * This is not strictly mandatory but is a nice test for
+ * sleepable struct_ops
+ */
+SEC("?struct_ops.s/hid_rdesc_fixup")
int BPF_PROG(hid_rdesc_fixup, struct hid_bpf_ctx *hid_ctx)
{
__u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4096 /* size */);
@@ -154,8 +235,13 @@ int BPF_PROG(hid_rdesc_fixup, struct hid_bpf_ctx *hid_ctx)
return sizeof(rdesc) + 73;
}
-SEC("?fmod_ret/hid_bpf_device_event")
-int BPF_PROG(hid_test_insert1, struct hid_bpf_ctx *hid_ctx)
+SEC(".struct_ops.link")
+struct hid_bpf_ops rdesc_fixup = {
+ .hid_rdesc_fixup = (void *)hid_rdesc_fixup,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_test_insert1, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
{
__u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */);
@@ -171,8 +257,14 @@ int BPF_PROG(hid_test_insert1, struct hid_bpf_ctx *hid_ctx)
return 0;
}
-SEC("?fmod_ret/hid_bpf_device_event")
-int BPF_PROG(hid_test_insert2, struct hid_bpf_ctx *hid_ctx)
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_insert1 = {
+ .hid_device_event = (void *)hid_test_insert1,
+ .flags = BPF_F_BEFORE,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_test_insert2, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
{
__u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */);
@@ -188,8 +280,13 @@ int BPF_PROG(hid_test_insert2, struct hid_bpf_ctx *hid_ctx)
return 0;
}
-SEC("?fmod_ret/hid_bpf_device_event")
-int BPF_PROG(hid_test_insert3, struct hid_bpf_ctx *hid_ctx)
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_insert2 = {
+ .hid_device_event = (void *)hid_test_insert2,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_test_insert3, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
{
__u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */);
@@ -204,3 +301,300 @@ int BPF_PROG(hid_test_insert3, struct hid_bpf_ctx *hid_ctx)
return 0;
}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_insert3 = {
+ .hid_device_event = (void *)hid_test_insert3,
+};
+
+SEC("?struct_ops/hid_hw_request")
+int BPF_PROG(hid_test_filter_raw_request, struct hid_bpf_ctx *hctx, unsigned char reportnum,
+ enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source)
+{
+ return -20;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_filter_raw_request = {
+ .hid_hw_request = (void *)hid_test_filter_raw_request,
+};
+
+static struct file *current_file;
+
+SEC("fentry/hidraw_open")
+int BPF_PROG(hidraw_open, struct inode *inode, struct file *file)
+{
+ current_file = file;
+ return 0;
+}
+
+SEC("?struct_ops.s/hid_hw_request")
+int BPF_PROG(hid_test_hidraw_raw_request, struct hid_bpf_ctx *hctx, unsigned char reportnum,
+ enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source)
+{
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */);
+ int ret;
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* check if the incoming request comes from our hidraw operation */
+ if (source == (__u64)current_file) {
+ data[0] = reportnum;
+
+ ret = hid_bpf_hw_request(hctx, data, 2, rtype, reqtype);
+ if (ret != 2)
+ return -1;
+ data[0] = reportnum + 1;
+ data[1] = reportnum + 2;
+ data[2] = reportnum + 3;
+ return 3;
+ }
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_hidraw_raw_request = {
+ .hid_hw_request = (void *)hid_test_hidraw_raw_request,
+};
+
+SEC("?struct_ops.s/hid_hw_request")
+int BPF_PROG(hid_test_infinite_loop_raw_request, struct hid_bpf_ctx *hctx, unsigned char reportnum,
+ enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source)
+{
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */);
+ int ret;
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* always forward the request as-is to the device, hid-bpf should prevent
+ * infinite loops.
+ */
+ data[0] = reportnum;
+
+ ret = hid_bpf_hw_request(hctx, data, 2, rtype, reqtype);
+ if (ret == 2)
+ return 3;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_infinite_loop_raw_request = {
+ .hid_hw_request = (void *)hid_test_infinite_loop_raw_request,
+};
+
+SEC("?struct_ops/hid_hw_output_report")
+int BPF_PROG(hid_test_filter_output_report, struct hid_bpf_ctx *hctx, unsigned char reportnum,
+ enum hid_report_type rtype, enum hid_class_request reqtype, __u64 source)
+{
+ return -25;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_filter_output_report = {
+ .hid_hw_output_report = (void *)hid_test_filter_output_report,
+};
+
+SEC("?struct_ops.s/hid_hw_output_report")
+int BPF_PROG(hid_test_hidraw_output_report, struct hid_bpf_ctx *hctx, __u64 source)
+{
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */);
+ int ret;
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* check if the incoming request comes from our hidraw operation */
+ if (source == (__u64)current_file)
+ return hid_bpf_hw_output_report(hctx, data, 2);
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_hidraw_output_report = {
+ .hid_hw_output_report = (void *)hid_test_hidraw_output_report,
+};
+
+SEC("?struct_ops.s/hid_hw_output_report")
+int BPF_PROG(hid_test_infinite_loop_output_report, struct hid_bpf_ctx *hctx, __u64 source)
+{
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 3 /* size */);
+ int ret;
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* always forward the request as-is to the device, hid-bpf should prevent
+ * infinite loops.
+ */
+
+ ret = hid_bpf_hw_output_report(hctx, data, 2);
+ if (ret == 2)
+ return 2;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_infinite_loop_output_report = {
+ .hid_hw_output_report = (void *)hid_test_infinite_loop_output_report,
+};
+
+struct elem {
+ struct bpf_wq work;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} hmap SEC(".maps");
+
+static int wq_cb_sleepable(void *map, int *key, void *work)
+{
+ __u8 buf[9] = {2, 3, 4, 5, 6, 7, 8, 9, 10};
+ struct hid_bpf_ctx *hid_ctx;
+
+ hid_ctx = hid_bpf_allocate_context(*key);
+ if (!hid_ctx)
+ return 0; /* EPERM check */
+
+ hid_bpf_input_report(hid_ctx, HID_INPUT_REPORT, buf, sizeof(buf));
+
+ hid_bpf_release_context(hid_ctx);
+
+ return 0;
+}
+
+static int test_inject_input_report_callback(int *key)
+{
+ struct elem init = {}, *val;
+ struct bpf_wq *wq;
+
+ if (bpf_map_update_elem(&hmap, key, &init, 0))
+ return -1;
+
+ val = bpf_map_lookup_elem(&hmap, key);
+ if (!val)
+ return -2;
+
+ wq = &val->work;
+ if (bpf_wq_init(wq, &hmap, 0) != 0)
+ return -3;
+
+ if (bpf_wq_set_callback(wq, wq_cb_sleepable, 0))
+ return -4;
+
+ if (bpf_wq_start(wq, 0))
+ return -5;
+
+ return 0;
+}
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_test_multiply_events_wq, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 9 /* size */);
+ int hid = hid_ctx->hid->id;
+ int ret;
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ if (data[0] != 1)
+ return 0;
+
+ ret = test_inject_input_report_callback(&hid);
+ if (ret)
+ return ret;
+
+ data[1] += 5;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_multiply_events_wq = {
+ .hid_device_event = (void *)hid_test_multiply_events_wq,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_test_multiply_events, struct hid_bpf_ctx *hid_ctx, enum hid_report_type type)
+{
+ __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 9 /* size */);
+ __u8 buf[9];
+ int ret;
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ if (data[0] != 1)
+ return 0;
+
+ /*
+ * we have to use an intermediate buffer as hid_bpf_input_report
+ * will memset data to \0
+ */
+ __builtin_memcpy(buf, data, sizeof(buf));
+
+ buf[0] = 2;
+ buf[1] += 5;
+ ret = hid_bpf_try_input_report(hid_ctx, HID_INPUT_REPORT, buf, sizeof(buf));
+ if (ret < 0)
+ return ret;
+
+ /*
+ * In real world we should reset the original buffer as data might be garbage now,
+ * but it actually now has the content of 'buf'
+ */
+ data[1] += 5;
+
+ return 9;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_multiply_events = {
+ .hid_device_event = (void *)hid_test_multiply_events,
+};
+
+SEC("?struct_ops/hid_device_event")
+int BPF_PROG(hid_test_infinite_loop_input_report, struct hid_bpf_ctx *hctx,
+ enum hid_report_type report_type, __u64 source)
+{
+ __u8 *data = hid_bpf_get_data(hctx, 0 /* offset */, 6 /* size */);
+ __u8 buf[6];
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /*
+ * we have to use an intermediate buffer as hid_bpf_input_report
+ * will memset data to \0
+ */
+ __builtin_memcpy(buf, data, sizeof(buf));
+
+ /* always forward the request as-is to the device, hid-bpf should prevent
+ * infinite loops.
+ * the return value is ignored so the event is passing to userspace.
+ */
+
+ hid_bpf_try_input_report(hctx, report_type, buf, sizeof(buf));
+
+ /* each time we process the event, we increment by one data[1]:
+ * after each successful call to hid_bpf_try_input_report, buf
+ * has been memcopied into data by the kernel.
+ */
+ data[1] += 1;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct hid_bpf_ops test_infinite_loop_input_report = {
+ .hid_device_event = (void *)hid_test_infinite_loop_input_report,
+};
diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
index 65e657ac1198..e5db897586bb 100644
--- a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
+++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
@@ -7,6 +7,7 @@
/* "undefine" structs and enums in vmlinux.h, because we "override" them below */
#define hid_bpf_ctx hid_bpf_ctx___not_used
+#define hid_bpf_ops hid_bpf_ops___not_used
#define hid_report_type hid_report_type___not_used
#define hid_class_request hid_class_request___not_used
#define hid_bpf_attach_flags hid_bpf_attach_flags___not_used
@@ -20,13 +21,11 @@
#define HID_REQ_SET_REPORT HID_REQ_SET_REPORT___not_used
#define HID_REQ_SET_IDLE HID_REQ_SET_IDLE___not_used
#define HID_REQ_SET_PROTOCOL HID_REQ_SET_PROTOCOL___not_used
-#define HID_BPF_FLAG_NONE HID_BPF_FLAG_NONE___not_used
-#define HID_BPF_FLAG_INSERT_HEAD HID_BPF_FLAG_INSERT_HEAD___not_used
-#define HID_BPF_FLAG_MAX HID_BPF_FLAG_MAX___not_used
#include "vmlinux.h"
#undef hid_bpf_ctx
+#undef hid_bpf_ops
#undef hid_report_type
#undef hid_class_request
#undef hid_bpf_attach_flags
@@ -40,9 +39,6 @@
#undef HID_REQ_SET_REPORT
#undef HID_REQ_SET_IDLE
#undef HID_REQ_SET_PROTOCOL
-#undef HID_BPF_FLAG_NONE
-#undef HID_BPF_FLAG_INSERT_HEAD
-#undef HID_BPF_FLAG_MAX
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
@@ -57,10 +53,8 @@ enum hid_report_type {
};
struct hid_bpf_ctx {
- __u32 index;
- const struct hid_device *hid;
+ struct hid_device *hid;
__u32 allocated_size;
- enum hid_report_type report_type;
union {
__s32 retval;
__s32 size;
@@ -76,17 +70,28 @@ enum hid_class_request {
HID_REQ_SET_PROTOCOL = 0x0B,
};
-enum hid_bpf_attach_flags {
- HID_BPF_FLAG_NONE = 0,
- HID_BPF_FLAG_INSERT_HEAD = _BITUL(0),
- HID_BPF_FLAG_MAX,
+struct hid_bpf_ops {
+ int hid_id;
+ u32 flags;
+ struct list_head list;
+ int (*hid_device_event)(struct hid_bpf_ctx *ctx, enum hid_report_type report_type,
+ u64 source);
+ int (*hid_rdesc_fixup)(struct hid_bpf_ctx *ctx);
+ int (*hid_hw_request)(struct hid_bpf_ctx *ctx, unsigned char reportnum,
+ enum hid_report_type rtype, enum hid_class_request reqtype,
+ u64 source);
+ int (*hid_hw_output_report)(struct hid_bpf_ctx *ctx, u64 source);
+ struct hid_device *hdev;
};
+#ifndef BPF_F_BEFORE
+#define BPF_F_BEFORE (1U << 3)
+#endif
+
/* following are kfuncs exported by HID for HID-BPF */
extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx,
unsigned int offset,
const size_t __sz) __ksym;
-extern int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, u32 flags) __ksym;
extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __ksym;
extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __ksym;
extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx,
@@ -94,5 +99,24 @@ extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx,
size_t buf__sz,
enum hid_report_type type,
enum hid_class_request reqtype) __ksym;
+extern int hid_bpf_hw_output_report(struct hid_bpf_ctx *ctx,
+ __u8 *buf, size_t buf__sz) __ksym;
+extern int hid_bpf_input_report(struct hid_bpf_ctx *ctx,
+ enum hid_report_type type,
+ __u8 *data,
+ size_t buf__sz) __ksym;
+extern int hid_bpf_try_input_report(struct hid_bpf_ctx *ctx,
+ enum hid_report_type type,
+ __u8 *data,
+ size_t buf__sz) __ksym;
+
+/* bpf_wq implementation */
+extern int bpf_wq_init(struct bpf_wq *wq, void *p__map, unsigned int flags) __weak __ksym;
+extern int bpf_wq_start(struct bpf_wq *wq, unsigned int flags) __weak __ksym;
+extern int bpf_wq_set_callback_impl(struct bpf_wq *wq,
+ int (callback_fn)(void *map, int *key, void *wq),
+ unsigned int flags__k, void *aux__ign) __ksym;
+#define bpf_wq_set_callback(timer, cb, flags) \
+ bpf_wq_set_callback_impl(timer, cb, flags, NULL)
#endif /* __HID_BPF_HELPERS_H */
diff --git a/tools/testing/selftests/hid/tests/base.py b/tools/testing/selftests/hid/tests/base.py
index 51433063b227..3a465768e507 100644
--- a/tools/testing/selftests/hid/tests/base.py
+++ b/tools/testing/selftests/hid/tests/base.py
@@ -8,11 +8,13 @@
import libevdev
import os
import pytest
+import shutil
+import subprocess
import time
import logging
-from hidtools.device.base_device import BaseDevice, EvdevMatch, SysfsFile
+from .base_device import BaseDevice, EvdevMatch, SysfsFile
from pathlib import Path
from typing import Final, List, Tuple
@@ -157,6 +159,17 @@ class BaseTestCase:
# for example ("playstation", "hid-playstation")
kernel_modules: List[Tuple[str, str]] = []
+ # List of in kernel HID-BPF object files to load
+ # before starting the test
+ # Any existing pre-loaded HID-BPF module will be removed
+ # before the ones in this list will be manually loaded.
+ # Each Element is a tuple '(hid_bpf_object, rdesc_fixup_present)',
+ # for example '("xppen-ArtistPro16Gen2.bpf.o", True)'
+ # If 'rdesc_fixup_present' is True, the test needs to wait
+ # for one unbind and rebind before it can be sure the kernel is
+ # ready
+ hid_bpfs: List[Tuple[str, bool]] = []
+
def assertInputEventsIn(self, expected_events, effective_events):
effective_events = effective_events.copy()
for ev in expected_events:
@@ -211,8 +224,6 @@ class BaseTestCase:
# we don't know beforehand the name of the module from modinfo
sysfs_path = Path("/sys/module") / kernel_module.replace("-", "_")
if not sysfs_path.exists():
- import subprocess
-
ret = subprocess.run(["/usr/sbin/modprobe", kernel_module])
if ret.returncode != 0:
pytest.skip(
@@ -225,6 +236,64 @@ class BaseTestCase:
self._load_kernel_module(kernel_driver, kernel_module)
yield
+ def load_hid_bpfs(self):
+ script_dir = Path(os.path.dirname(os.path.realpath(__file__)))
+ root_dir = (script_dir / "../../../../..").resolve()
+ bpf_dir = root_dir / "drivers/hid/bpf/progs"
+
+ udev_hid_bpf = shutil.which("udev-hid-bpf")
+ if not udev_hid_bpf:
+ pytest.skip("udev-hid-bpf not found in $PATH, skipping")
+
+ wait = False
+ for _, rdesc_fixup in self.hid_bpfs:
+ if rdesc_fixup:
+ wait = True
+
+ for hid_bpf, _ in self.hid_bpfs:
+ # We need to start `udev-hid-bpf` in the background
+ # and dispatch uhid events in case the kernel needs
+ # to fetch features on the device
+ process = subprocess.Popen(
+ [
+ "udev-hid-bpf",
+ "--verbose",
+ "add",
+ str(self.uhdev.sys_path),
+ str(bpf_dir / hid_bpf),
+ ],
+ )
+ while process.poll() is None:
+ self.uhdev.dispatch(1)
+
+ if process.poll() != 0:
+ pytest.fail(
+ f"Couldn't insert hid-bpf program '{hid_bpf}', marking the test as failed"
+ )
+
+ if wait:
+ # the HID-BPF program exports a rdesc fixup, so it needs to be
+ # unbound by the kernel and then rebound.
+ # Ensure we get the bound event exactly 2 times (one for the normal
+ # uhid loading, and then the reload from HID-BPF)
+ now = time.time()
+ while self.uhdev.kernel_ready_count < 2 and time.time() - now < 2:
+ self.uhdev.dispatch(1)
+
+ if self.uhdev.kernel_ready_count < 2:
+ pytest.fail(
+ f"Couldn't insert hid-bpf programs, marking the test as failed"
+ )
+
+ def unload_hid_bpfs(self):
+ ret = subprocess.run(
+ ["udev-hid-bpf", "--verbose", "remove", str(self.uhdev.sys_path)],
+ )
+ if ret.returncode != 0:
+ pytest.fail(
+ f"Couldn't unload hid-bpf programs, marking the test as failed"
+ )
+
@pytest.fixture()
def new_uhdev(self, load_kernel_module):
return self.create_device()
@@ -248,12 +317,18 @@ class BaseTestCase:
now = time.time()
while not self.uhdev.is_ready() and time.time() - now < 5:
self.uhdev.dispatch(1)
+
+ if self.hid_bpfs:
+ self.load_hid_bpfs()
+
if self.uhdev.get_evdev() is None:
logger.warning(
f"available list of input nodes: (default application is '{self.uhdev.application}')"
)
logger.warning(self.uhdev.input_nodes)
yield
+ if self.hid_bpfs:
+ self.unload_hid_bpfs()
self.uhdev = None
except PermissionError:
pytest.skip("Insufficient permissions, run me as root")
@@ -313,8 +388,6 @@ class HIDTestUdevRule(object):
self.reload_udev_rules()
def reload_udev_rules(self):
- import subprocess
-
subprocess.run("udevadm control --reload-rules".split())
subprocess.run("systemd-hwdb update".split())
@@ -330,10 +403,11 @@ class HIDTestUdevRule(object):
delete=False,
) as f:
f.write(
- 'KERNELS=="*input*", ATTRS{name}=="*uhid test *", ENV{LIBINPUT_IGNORE_DEVICE}="1"\n'
- )
- f.write(
- 'KERNELS=="*input*", ATTRS{name}=="*uhid test * System Multi Axis", ENV{ID_INPUT_TOUCHSCREEN}="", ENV{ID_INPUT_SYSTEM_MULTIAXIS}="1"\n'
+ """
+KERNELS=="*input*", ATTRS{name}=="*uhid test *", ENV{LIBINPUT_IGNORE_DEVICE}="1"
+KERNELS=="*hid*", ENV{HID_NAME}=="*uhid test *", ENV{HID_BPF_IGNORE_DEVICE}="1"
+KERNELS=="*input*", ATTRS{name}=="*uhid test * System Multi Axis", ENV{ID_INPUT_TOUCHSCREEN}="", ENV{ID_INPUT_SYSTEM_MULTIAXIS}="1"
+"""
)
self.rulesfile = f
diff --git a/tools/testing/selftests/hid/tests/base_device.py b/tools/testing/selftests/hid/tests/base_device.py
new file mode 100644
index 000000000000..e0515be97f83
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/base_device.py
@@ -0,0 +1,421 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+import fcntl
+import functools
+import libevdev
+import os
+
+try:
+ import pyudev
+except ImportError:
+ raise ImportError("UHID is not supported due to missing pyudev dependency")
+
+import logging
+
+import hidtools.hid as hid
+from hidtools.uhid import UHIDDevice
+from hidtools.util import BusType
+
+from pathlib import Path
+from typing import Any, ClassVar, Dict, List, Optional, Tuple, Type, Union
+
+logger = logging.getLogger("hidtools.device.base_device")
+
+
+class SysfsFile(object):
+ def __init__(self, path):
+ self.path = path
+
+ def __set_value(self, value):
+ with open(self.path, "w") as f:
+ return f.write(f"{value}\n")
+
+ def __get_value(self):
+ with open(self.path) as f:
+ return f.read().strip()
+
+ @property
+ def int_value(self) -> int:
+ return int(self.__get_value())
+
+ @int_value.setter
+ def int_value(self, v: int) -> None:
+ self.__set_value(v)
+
+ @property
+ def str_value(self) -> str:
+ return self.__get_value()
+
+ @str_value.setter
+ def str_value(self, v: str) -> None:
+ self.__set_value(v)
+
+
+class LED(object):
+ def __init__(self, sys_path):
+ self.max_brightness = SysfsFile(sys_path / "max_brightness").int_value
+ self.__brightness = SysfsFile(sys_path / "brightness")
+
+ @property
+ def brightness(self) -> int:
+ return self.__brightness.int_value
+
+ @brightness.setter
+ def brightness(self, value: int) -> None:
+ self.__brightness.int_value = value
+
+
+class PowerSupply(object):
+ """Represents Linux power_supply_class sysfs nodes."""
+
+ def __init__(self, sys_path):
+ self._capacity = SysfsFile(sys_path / "capacity")
+ self._status = SysfsFile(sys_path / "status")
+ self._type = SysfsFile(sys_path / "type")
+
+ @property
+ def capacity(self) -> int:
+ return self._capacity.int_value
+
+ @property
+ def status(self) -> str:
+ return self._status.str_value
+
+ @property
+ def type(self) -> str:
+ return self._type.str_value
+
+
+class HIDIsReady(object):
+ """
+ Companion class that binds to a kernel mechanism
+ and that allows to know when a uhid device is ready or not.
+
+ See :meth:`is_ready` for details.
+ """
+
+ def __init__(self: "HIDIsReady", uhid: UHIDDevice) -> None:
+ self.uhid = uhid
+
+ def is_ready(self: "HIDIsReady") -> bool:
+ """
+ Overwrite in subclasses: should return True or False whether
+ the attached uhid device is ready or not.
+ """
+ return False
+
+
+class UdevHIDIsReady(HIDIsReady):
+ _pyudev_context: ClassVar[Optional[pyudev.Context]] = None
+ _pyudev_monitor: ClassVar[Optional[pyudev.Monitor]] = None
+ _uhid_devices: ClassVar[Dict[int, Tuple[bool, int]]] = {}
+
+ def __init__(self: "UdevHIDIsReady", uhid: UHIDDevice) -> None:
+ super().__init__(uhid)
+ self._init_pyudev()
+
+ @classmethod
+ def _init_pyudev(cls: Type["UdevHIDIsReady"]) -> None:
+ if cls._pyudev_context is None:
+ cls._pyudev_context = pyudev.Context()
+ cls._pyudev_monitor = pyudev.Monitor.from_netlink(cls._pyudev_context)
+ cls._pyudev_monitor.filter_by("hid")
+ cls._pyudev_monitor.start()
+
+ UHIDDevice._append_fd_to_poll(
+ cls._pyudev_monitor.fileno(), cls._cls_udev_event_callback
+ )
+
+ @classmethod
+ def _cls_udev_event_callback(cls: Type["UdevHIDIsReady"]) -> None:
+ if cls._pyudev_monitor is None:
+ return
+ event: pyudev.Device
+ for event in iter(functools.partial(cls._pyudev_monitor.poll, 0.02), None):
+ if event.action not in ["bind", "remove", "unbind"]:
+ return
+
+ logger.debug(f"udev event: {event.action} -> {event}")
+
+ id = int(event.sys_path.strip().split(".")[-1], 16)
+
+ device_ready, count = cls._uhid_devices.get(id, (False, 0))
+
+ ready = event.action == "bind"
+ if not device_ready and ready:
+ count += 1
+ cls._uhid_devices[id] = (ready, count)
+
+ def is_ready(self: "UdevHIDIsReady") -> Tuple[bool, int]:
+ try:
+ return self._uhid_devices[self.uhid.hid_id]
+ except KeyError:
+ return (False, 0)
+
+
+class EvdevMatch(object):
+ def __init__(
+ self: "EvdevMatch",
+ *,
+ requires: List[Any] = [],
+ excludes: List[Any] = [],
+ req_properties: List[Any] = [],
+ excl_properties: List[Any] = [],
+ ) -> None:
+ self.requires = requires
+ self.excludes = excludes
+ self.req_properties = req_properties
+ self.excl_properties = excl_properties
+
+ def is_a_match(self: "EvdevMatch", evdev: libevdev.Device) -> bool:
+ for m in self.requires:
+ if not evdev.has(m):
+ return False
+ for m in self.excludes:
+ if evdev.has(m):
+ return False
+ for p in self.req_properties:
+ if not evdev.has_property(p):
+ return False
+ for p in self.excl_properties:
+ if evdev.has_property(p):
+ return False
+ return True
+
+
+class EvdevDevice(object):
+ """
+ Represents an Evdev node and its properties.
+ This is a stub for the libevdev devices, as they are relying on
+ uevent to get the data, saving us some ioctls to fetch the names
+ and properties.
+ """
+
+ def __init__(self: "EvdevDevice", sysfs: Path) -> None:
+ self.sysfs = sysfs
+ self.event_node: Any = None
+ self.libevdev: Optional[libevdev.Device] = None
+
+ self.uevents = {}
+ # all of the interesting properties are stored in the input uevent, so in the parent
+ # so convert the uevent file of the parent input node into a dict
+ with open(sysfs.parent / "uevent") as f:
+ for line in f.readlines():
+ key, value = line.strip().split("=")
+ self.uevents[key] = value.strip('"')
+
+ # we open all evdev nodes in order to not miss any event
+ self.open()
+
+ @property
+ def name(self: "EvdevDevice") -> str:
+ assert "NAME" in self.uevents
+
+ return self.uevents["NAME"]
+
+ @property
+ def evdev(self: "EvdevDevice") -> Path:
+ return Path("/dev/input") / self.sysfs.name
+
+ def matches_application(
+ self: "EvdevDevice", application: str, matches: Dict[str, EvdevMatch]
+ ) -> bool:
+ if self.libevdev is None:
+ return False
+
+ if application in matches:
+ return matches[application].is_a_match(self.libevdev)
+
+ logger.error(
+ f"application '{application}' is unknown, please update/fix hid-tools"
+ )
+ assert False # hid-tools likely needs an update
+
+ def open(self: "EvdevDevice") -> libevdev.Device:
+ self.event_node = open(self.evdev, "rb")
+ self.libevdev = libevdev.Device(self.event_node)
+
+ assert self.libevdev.fd is not None
+
+ fd = self.libevdev.fd.fileno()
+ flag = fcntl.fcntl(fd, fcntl.F_GETFD)
+ fcntl.fcntl(fd, fcntl.F_SETFL, flag | os.O_NONBLOCK)
+
+ return self.libevdev
+
+ def close(self: "EvdevDevice") -> None:
+ if self.libevdev is not None and self.libevdev.fd is not None:
+ self.libevdev.fd.close()
+ self.libevdev = None
+ if self.event_node is not None:
+ self.event_node.close()
+ self.event_node = None
+
+
+class BaseDevice(UHIDDevice):
+ # default _application_matches that matches nothing. This needs
+ # to be set in the subclasses to have get_evdev() working
+ _application_matches: Dict[str, EvdevMatch] = {}
+
+ def __init__(
+ self,
+ name,
+ application,
+ rdesc_str: Optional[str] = None,
+ rdesc: Optional[Union[hid.ReportDescriptor, str, bytes]] = None,
+ input_info=None,
+ ) -> None:
+ self._kernel_is_ready: HIDIsReady = UdevHIDIsReady(self)
+ if rdesc_str is None and rdesc is None:
+ raise Exception("Please provide at least a rdesc or rdesc_str")
+ super().__init__()
+ if name is None:
+ name = f"uhid gamepad test {self.__class__.__name__}"
+ if input_info is None:
+ input_info = (BusType.USB, 1, 2)
+ self.name = name
+ self.info = input_info
+ self.default_reportID = None
+ self.opened = False
+ self.started = False
+ self.application = application
+ self._input_nodes: Optional[list[EvdevDevice]] = None
+ if rdesc is None:
+ assert rdesc_str is not None
+ self.rdesc = hid.ReportDescriptor.from_human_descr(rdesc_str) # type: ignore
+ else:
+ self.rdesc = rdesc # type: ignore
+
+ @property
+ def power_supply_class(self: "BaseDevice") -> Optional[PowerSupply]:
+ ps = self.walk_sysfs("power_supply", "power_supply/*")
+ if ps is None or len(ps) < 1:
+ return None
+
+ return PowerSupply(ps[0])
+
+ @property
+ def led_classes(self: "BaseDevice") -> List[LED]:
+ leds = self.walk_sysfs("led", "**/max_brightness")
+ if leds is None:
+ return []
+
+ return [LED(led.parent) for led in leds]
+
+ @property
+ def kernel_is_ready(self: "BaseDevice") -> bool:
+ return self._kernel_is_ready.is_ready()[0] and self.started
+
+ @property
+ def kernel_ready_count(self: "BaseDevice") -> int:
+ return self._kernel_is_ready.is_ready()[1]
+
+ @property
+ def input_nodes(self: "BaseDevice") -> List[EvdevDevice]:
+ if self._input_nodes is not None:
+ return self._input_nodes
+
+ if not self.kernel_is_ready or not self.started:
+ return []
+
+ self._input_nodes = [
+ EvdevDevice(path)
+ for path in self.walk_sysfs("input", "input/input*/event*")
+ ]
+ return self._input_nodes
+
+ def match_evdev_rule(self, application, evdev):
+ """Replace this in subclasses if the device has multiple reports
+ of the same type and we need to filter based on the actual evdev
+ node.
+
+ returning True will append the corresponding report to
+ `self.input_nodes[type]`
+ returning False will ignore this report / type combination
+ for the device.
+ """
+ return True
+
+ def open(self):
+ self.opened = True
+
+ def _close_all_opened_evdev(self):
+ if self._input_nodes is not None:
+ for e in self._input_nodes:
+ e.close()
+
+ def __del__(self):
+ self._close_all_opened_evdev()
+
+ def close(self):
+ self.opened = False
+
+ def start(self, flags):
+ self.started = True
+
+ def stop(self):
+ self.started = False
+ self._close_all_opened_evdev()
+
+ def next_sync_events(self, application=None):
+ evdev = self.get_evdev(application)
+ if evdev is not None:
+ return list(evdev.events())
+ return []
+
+ @property
+ def application_matches(self: "BaseDevice") -> Dict[str, EvdevMatch]:
+ return self._application_matches
+
+ @application_matches.setter
+ def application_matches(self: "BaseDevice", data: Dict[str, EvdevMatch]) -> None:
+ self._application_matches = data
+
+ def get_evdev(self, application=None):
+ if application is None:
+ application = self.application
+
+ if len(self.input_nodes) == 0:
+ return None
+
+ assert self._input_nodes is not None
+
+ if len(self._input_nodes) == 1:
+ evdev = self._input_nodes[0]
+ if self.match_evdev_rule(application, evdev.libevdev):
+ return evdev.libevdev
+ else:
+ for _evdev in self._input_nodes:
+ if _evdev.matches_application(application, self.application_matches):
+ if self.match_evdev_rule(application, _evdev.libevdev):
+ return _evdev.libevdev
+
+ def is_ready(self):
+ """Returns whether a UHID device is ready. Can be overwritten in
+ subclasses to add extra conditions on when to consider a UHID
+ device ready. This can be:
+
+ - we need to wait on different types of input devices to be ready
+ (Touch Screen and Pen for example)
+ - we need to have at least 4 LEDs present
+ (len(self.uhdev.leds_classes) == 4)
+ - or any other combinations"""
+ return self.kernel_is_ready
diff --git a/tools/testing/selftests/hid/tests/base_gamepad.py b/tools/testing/selftests/hid/tests/base_gamepad.py
new file mode 100644
index 000000000000..ec74d75767a2
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/base_gamepad.py
@@ -0,0 +1,238 @@
+# SPDX-License-Identifier: GPL-2.0
+import libevdev
+
+from .base_device import BaseDevice
+from hidtools.util import BusType
+
+
+class InvalidHIDCommunication(Exception):
+ pass
+
+
+class GamepadData(object):
+ pass
+
+
+class AxisMapping(object):
+ """Represents a mapping between a HID type
+ and an evdev event"""
+
+ def __init__(self, hid, evdev=None):
+ self.hid = hid.lower()
+
+ if evdev is None:
+ evdev = f"ABS_{hid.upper()}"
+
+ self.evdev = libevdev.evbit("EV_ABS", evdev)
+
+
+class BaseGamepad(BaseDevice):
+ buttons_map = {
+ 1: "BTN_SOUTH",
+ 2: "BTN_EAST",
+ 3: "BTN_C",
+ 4: "BTN_NORTH",
+ 5: "BTN_WEST",
+ 6: "BTN_Z",
+ 7: "BTN_TL",
+ 8: "BTN_TR",
+ 9: "BTN_TL2",
+ 10: "BTN_TR2",
+ 11: "BTN_SELECT",
+ 12: "BTN_START",
+ 13: "BTN_MODE",
+ 14: "BTN_THUMBL",
+ 15: "BTN_THUMBR",
+ }
+
+ axes_map = {
+ "left_stick": {
+ "x": AxisMapping("x"),
+ "y": AxisMapping("y"),
+ },
+ "right_stick": {
+ "x": AxisMapping("z"),
+ "y": AxisMapping("Rz"),
+ },
+ }
+
+ def __init__(self, rdesc, application="Game Pad", name=None, input_info=None):
+ assert rdesc is not None
+ super().__init__(name, application, input_info=input_info, rdesc=rdesc)
+ self.buttons = (1, 2, 3)
+ self._buttons = {}
+ self.left = (127, 127)
+ self.right = (127, 127)
+ self.hat_switch = 15
+ assert self.parsed_rdesc is not None
+
+ self.fields = []
+ for r in self.parsed_rdesc.input_reports.values():
+ if r.application_name == self.application:
+ self.fields.extend([f.usage_name for f in r])
+
+ def store_axes(self, which, gamepad, data):
+ amap = self.axes_map[which]
+ x, y = data
+ setattr(gamepad, amap["x"].hid, x)
+ setattr(gamepad, amap["y"].hid, y)
+
+ def create_report(
+ self,
+ *,
+ left=(None, None),
+ right=(None, None),
+ hat_switch=None,
+ buttons=None,
+ reportID=None,
+ application="Game Pad",
+ ):
+ """
+ Return an input report for this device.
+
+ :param left: a tuple of absolute (x, y) value of the left joypad
+ where ``None`` is "leave unchanged"
+ :param right: a tuple of absolute (x, y) value of the right joypad
+ where ``None`` is "leave unchanged"
+ :param hat_switch: an absolute angular value of the hat switch
+ (expressed in 1/8 of circle, 0 being North, 2 East)
+ where ``None`` is "leave unchanged"
+ :param buttons: a dict of index/bool for the button states,
+ where ``None`` is "leave unchanged"
+ :param reportID: the numeric report ID for this report, if needed
+ :param application: the application used to report the values
+ """
+ if buttons is not None:
+ for i, b in buttons.items():
+ if i not in self.buttons:
+ raise InvalidHIDCommunication(
+ f"button {i} is not part of this {self.application}"
+ )
+ if b is not None:
+ self._buttons[i] = b
+
+ def replace_none_in_tuple(item, default):
+ if item is None:
+ item = (None, None)
+
+ if None in item:
+ if item[0] is None:
+ item = (default[0], item[1])
+ if item[1] is None:
+ item = (item[0], default[1])
+
+ return item
+
+ right = replace_none_in_tuple(right, self.right)
+ self.right = right
+ left = replace_none_in_tuple(left, self.left)
+ self.left = left
+
+ if hat_switch is None:
+ hat_switch = self.hat_switch
+ else:
+ self.hat_switch = hat_switch
+
+ reportID = reportID or self.default_reportID
+
+ gamepad = GamepadData()
+ for i, b in self._buttons.items():
+ gamepad.__setattr__(f"b{i}", int(b) if b is not None else 0)
+
+ self.store_axes("left_stick", gamepad, left)
+ self.store_axes("right_stick", gamepad, right)
+ gamepad.hatswitch = hat_switch # type: ignore ### gamepad is by default empty
+ return super().create_report(
+ gamepad, reportID=reportID, application=application
+ )
+
+ def event(
+ self, *, left=(None, None), right=(None, None), hat_switch=None, buttons=None
+ ):
+ """
+ Send an input event on the default report ID.
+
+ :param left: a tuple of absolute (x, y) value of the left joypad
+ where ``None`` is "leave unchanged"
+ :param right: a tuple of absolute (x, y) value of the right joypad
+ where ``None`` is "leave unchanged"
+ :param hat_switch: an absolute angular value of the hat switch
+ where ``None`` is "leave unchanged"
+ :param buttons: a dict of index/bool for the button states,
+ where ``None`` is "leave unchanged"
+ """
+ r = self.create_report(
+ left=left, right=right, hat_switch=hat_switch, buttons=buttons
+ )
+ self.call_input_event(r)
+ return [r]
+
+
+class JoystickGamepad(BaseGamepad):
+ buttons_map = {
+ 1: "BTN_TRIGGER",
+ 2: "BTN_THUMB",
+ 3: "BTN_THUMB2",
+ 4: "BTN_TOP",
+ 5: "BTN_TOP2",
+ 6: "BTN_PINKIE",
+ 7: "BTN_BASE",
+ 8: "BTN_BASE2",
+ 9: "BTN_BASE3",
+ 10: "BTN_BASE4",
+ 11: "BTN_BASE5",
+ 12: "BTN_BASE6",
+ 13: "BTN_DEAD",
+ }
+
+ axes_map = {
+ "left_stick": {
+ "x": AxisMapping("x"),
+ "y": AxisMapping("y"),
+ },
+ "right_stick": {
+ "x": AxisMapping("rudder"),
+ "y": AxisMapping("throttle"),
+ },
+ }
+
+ def __init__(self, rdesc, application="Joystick", name=None, input_info=None):
+ super().__init__(rdesc, application, name, input_info)
+
+ def create_report(
+ self,
+ *,
+ left=(None, None),
+ right=(None, None),
+ hat_switch=None,
+ buttons=None,
+ reportID=None,
+ application=None,
+ ):
+ """
+ Return an input report for this device.
+
+ :param left: a tuple of absolute (x, y) value of the left joypad
+ where ``None`` is "leave unchanged"
+ :param right: a tuple of absolute (x, y) value of the right joypad
+ where ``None`` is "leave unchanged"
+ :param hat_switch: an absolute angular value of the hat switch
+ where ``None`` is "leave unchanged"
+ :param buttons: a dict of index/bool for the button states,
+ where ``None`` is "leave unchanged"
+ :param reportID: the numeric report ID for this report, if needed
+ :param application: the application for this report, if needed
+ """
+ if application is None:
+ application = "Joystick"
+ return super().create_report(
+ left=left,
+ right=right,
+ hat_switch=hat_switch,
+ buttons=buttons,
+ reportID=reportID,
+ application=application,
+ )
+
+ def store_right_joystick(self, gamepad, data):
+ gamepad.rudder, gamepad.throttle = data
diff --git a/tools/testing/selftests/hid/tests/test_gamepad.py b/tools/testing/selftests/hid/tests/test_gamepad.py
index 26c74040b796..8d5b5ffdae49 100644
--- a/tools/testing/selftests/hid/tests/test_gamepad.py
+++ b/tools/testing/selftests/hid/tests/test_gamepad.py
@@ -10,7 +10,8 @@ from . import base
import libevdev
import pytest
-from hidtools.device.base_gamepad import AsusGamepad, SaitekGamepad
+from .base_gamepad import BaseGamepad, JoystickGamepad, AxisMapping
+from hidtools.util import BusType
import logging
@@ -199,6 +200,449 @@ class BaseTest:
)
+class SaitekGamepad(JoystickGamepad):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 0
+ 0x09, 0x04, # Usage (Joystick) 2
+ 0xa1, 0x01, # Collection (Application) 4
+ 0x09, 0x01, # .Usage (Pointer) 6
+ 0xa1, 0x00, # .Collection (Physical) 8
+ 0x85, 0x01, # ..Report ID (1) 10
+ 0x09, 0x30, # ..Usage (X) 12
+ 0x15, 0x00, # ..Logical Minimum (0) 14
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 16
+ 0x35, 0x00, # ..Physical Minimum (0) 19
+ 0x46, 0xff, 0x00, # ..Physical Maximum (255) 21
+ 0x75, 0x08, # ..Report Size (8) 24
+ 0x95, 0x01, # ..Report Count (1) 26
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 28
+ 0x09, 0x31, # ..Usage (Y) 30
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 32
+ 0x05, 0x02, # ..Usage Page (Simulation Controls) 34
+ 0x09, 0xba, # ..Usage (Rudder) 36
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 38
+ 0x09, 0xbb, # ..Usage (Throttle) 40
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 42
+ 0x05, 0x09, # ..Usage Page (Button) 44
+ 0x19, 0x01, # ..Usage Minimum (1) 46
+ 0x29, 0x0c, # ..Usage Maximum (12) 48
+ 0x25, 0x01, # ..Logical Maximum (1) 50
+ 0x45, 0x01, # ..Physical Maximum (1) 52
+ 0x75, 0x01, # ..Report Size (1) 54
+ 0x95, 0x0c, # ..Report Count (12) 56
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 58
+ 0x95, 0x01, # ..Report Count (1) 60
+ 0x75, 0x00, # ..Report Size (0) 62
+ 0x81, 0x03, # ..Input (Cnst,Var,Abs) 64
+ 0x05, 0x01, # ..Usage Page (Generic Desktop) 66
+ 0x09, 0x39, # ..Usage (Hat switch) 68
+ 0x25, 0x07, # ..Logical Maximum (7) 70
+ 0x46, 0x3b, 0x01, # ..Physical Maximum (315) 72
+ 0x55, 0x00, # ..Unit Exponent (0) 75
+ 0x65, 0x44, # ..Unit (Degrees^4,EngRotation) 77
+ 0x75, 0x04, # ..Report Size (4) 79
+ 0x81, 0x42, # ..Input (Data,Var,Abs,Null) 81
+ 0x65, 0x00, # ..Unit (None) 83
+ 0xc0, # .End Collection 85
+ 0x05, 0x0f, # .Usage Page (Vendor Usage Page 0x0f) 86
+ 0x09, 0x92, # .Usage (Vendor Usage 0x92) 88
+ 0xa1, 0x02, # .Collection (Logical) 90
+ 0x85, 0x02, # ..Report ID (2) 92
+ 0x09, 0xa0, # ..Usage (Vendor Usage 0xa0) 94
+ 0x09, 0x9f, # ..Usage (Vendor Usage 0x9f) 96
+ 0x25, 0x01, # ..Logical Maximum (1) 98
+ 0x45, 0x00, # ..Physical Maximum (0) 100
+ 0x75, 0x01, # ..Report Size (1) 102
+ 0x95, 0x02, # ..Report Count (2) 104
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 106
+ 0x75, 0x06, # ..Report Size (6) 108
+ 0x95, 0x01, # ..Report Count (1) 110
+ 0x81, 0x03, # ..Input (Cnst,Var,Abs) 112
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 114
+ 0x75, 0x07, # ..Report Size (7) 116
+ 0x25, 0x7f, # ..Logical Maximum (127) 118
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 120
+ 0x09, 0x94, # ..Usage (Vendor Usage 0x94) 122
+ 0x75, 0x01, # ..Report Size (1) 124
+ 0x25, 0x01, # ..Logical Maximum (1) 126
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 128
+ 0xc0, # .End Collection 130
+ 0x09, 0x21, # .Usage (Vendor Usage 0x21) 131
+ 0xa1, 0x02, # .Collection (Logical) 133
+ 0x85, 0x0b, # ..Report ID (11) 135
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 137
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 139
+ 0x75, 0x08, # ..Report Size (8) 142
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 144
+ 0x09, 0x53, # ..Usage (Vendor Usage 0x53) 146
+ 0x25, 0x0a, # ..Logical Maximum (10) 148
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 150
+ 0x09, 0x50, # ..Usage (Vendor Usage 0x50) 152
+ 0x27, 0xfe, 0xff, 0x00, 0x00, # ..Logical Maximum (65534) 154
+ 0x47, 0xfe, 0xff, 0x00, 0x00, # ..Physical Maximum (65534) 159
+ 0x75, 0x10, # ..Report Size (16) 164
+ 0x55, 0xfd, # ..Unit Exponent (237) 166
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 168
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 171
+ 0x55, 0x00, # ..Unit Exponent (0) 173
+ 0x65, 0x00, # ..Unit (None) 175
+ 0x09, 0x54, # ..Usage (Vendor Usage 0x54) 177
+ 0x55, 0xfd, # ..Unit Exponent (237) 179
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 181
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 184
+ 0x55, 0x00, # ..Unit Exponent (0) 186
+ 0x65, 0x00, # ..Unit (None) 188
+ 0x09, 0xa7, # ..Usage (Vendor Usage 0xa7) 190
+ 0x55, 0xfd, # ..Unit Exponent (237) 192
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 194
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 197
+ 0x55, 0x00, # ..Unit Exponent (0) 199
+ 0x65, 0x00, # ..Unit (None) 201
+ 0xc0, # .End Collection 203
+ 0x09, 0x5a, # .Usage (Vendor Usage 0x5a) 204
+ 0xa1, 0x02, # .Collection (Logical) 206
+ 0x85, 0x0c, # ..Report ID (12) 208
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 210
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 212
+ 0x45, 0x00, # ..Physical Maximum (0) 215
+ 0x75, 0x08, # ..Report Size (8) 217
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 219
+ 0x09, 0x5c, # ..Usage (Vendor Usage 0x5c) 221
+ 0x26, 0x10, 0x27, # ..Logical Maximum (10000) 223
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 226
+ 0x75, 0x10, # ..Report Size (16) 229
+ 0x55, 0xfd, # ..Unit Exponent (237) 231
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 233
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 236
+ 0x55, 0x00, # ..Unit Exponent (0) 238
+ 0x65, 0x00, # ..Unit (None) 240
+ 0x09, 0x5b, # ..Usage (Vendor Usage 0x5b) 242
+ 0x25, 0x7f, # ..Logical Maximum (127) 244
+ 0x75, 0x08, # ..Report Size (8) 246
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 248
+ 0x09, 0x5e, # ..Usage (Vendor Usage 0x5e) 250
+ 0x26, 0x10, 0x27, # ..Logical Maximum (10000) 252
+ 0x75, 0x10, # ..Report Size (16) 255
+ 0x55, 0xfd, # ..Unit Exponent (237) 257
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 259
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 262
+ 0x55, 0x00, # ..Unit Exponent (0) 264
+ 0x65, 0x00, # ..Unit (None) 266
+ 0x09, 0x5d, # ..Usage (Vendor Usage 0x5d) 268
+ 0x25, 0x7f, # ..Logical Maximum (127) 270
+ 0x75, 0x08, # ..Report Size (8) 272
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 274
+ 0xc0, # .End Collection 276
+ 0x09, 0x73, # .Usage (Vendor Usage 0x73) 277
+ 0xa1, 0x02, # .Collection (Logical) 279
+ 0x85, 0x0d, # ..Report ID (13) 281
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 283
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 285
+ 0x45, 0x00, # ..Physical Maximum (0) 288
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 290
+ 0x09, 0x70, # ..Usage (Vendor Usage 0x70) 292
+ 0x15, 0x81, # ..Logical Minimum (-127) 294
+ 0x25, 0x7f, # ..Logical Maximum (127) 296
+ 0x36, 0xf0, 0xd8, # ..Physical Minimum (-10000) 298
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 301
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 304
+ 0xc0, # .End Collection 306
+ 0x09, 0x6e, # .Usage (Vendor Usage 0x6e) 307
+ 0xa1, 0x02, # .Collection (Logical) 309
+ 0x85, 0x0e, # ..Report ID (14) 311
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 313
+ 0x15, 0x00, # ..Logical Minimum (0) 315
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 317
+ 0x35, 0x00, # ..Physical Minimum (0) 320
+ 0x45, 0x00, # ..Physical Maximum (0) 322
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 324
+ 0x09, 0x70, # ..Usage (Vendor Usage 0x70) 326
+ 0x25, 0x7f, # ..Logical Maximum (127) 328
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 330
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 333
+ 0x09, 0x6f, # ..Usage (Vendor Usage 0x6f) 335
+ 0x15, 0x81, # ..Logical Minimum (-127) 337
+ 0x36, 0xf0, 0xd8, # ..Physical Minimum (-10000) 339
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 342
+ 0x09, 0x71, # ..Usage (Vendor Usage 0x71) 344
+ 0x15, 0x00, # ..Logical Minimum (0) 346
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 348
+ 0x35, 0x00, # ..Physical Minimum (0) 351
+ 0x46, 0x68, 0x01, # ..Physical Maximum (360) 353
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 356
+ 0x09, 0x72, # ..Usage (Vendor Usage 0x72) 358
+ 0x75, 0x10, # ..Report Size (16) 360
+ 0x26, 0x10, 0x27, # ..Logical Maximum (10000) 362
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 365
+ 0x55, 0xfd, # ..Unit Exponent (237) 368
+ 0x66, 0x01, 0x10, # ..Unit (Seconds,SILinear) 370
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 373
+ 0x55, 0x00, # ..Unit Exponent (0) 375
+ 0x65, 0x00, # ..Unit (None) 377
+ 0xc0, # .End Collection 379
+ 0x09, 0x77, # .Usage (Vendor Usage 0x77) 380
+ 0xa1, 0x02, # .Collection (Logical) 382
+ 0x85, 0x51, # ..Report ID (81) 384
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 386
+ 0x25, 0x7f, # ..Logical Maximum (127) 388
+ 0x45, 0x00, # ..Physical Maximum (0) 390
+ 0x75, 0x08, # ..Report Size (8) 392
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 394
+ 0x09, 0x78, # ..Usage (Vendor Usage 0x78) 396
+ 0xa1, 0x02, # ..Collection (Logical) 398
+ 0x09, 0x7b, # ...Usage (Vendor Usage 0x7b) 400
+ 0x09, 0x79, # ...Usage (Vendor Usage 0x79) 402
+ 0x09, 0x7a, # ...Usage (Vendor Usage 0x7a) 404
+ 0x15, 0x01, # ...Logical Minimum (1) 406
+ 0x25, 0x03, # ...Logical Maximum (3) 408
+ 0x91, 0x00, # ...Output (Data,Arr,Abs) 410
+ 0xc0, # ..End Collection 412
+ 0x09, 0x7c, # ..Usage (Vendor Usage 0x7c) 413
+ 0x15, 0x00, # ..Logical Minimum (0) 415
+ 0x26, 0xfe, 0x00, # ..Logical Maximum (254) 417
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 420
+ 0xc0, # .End Collection 422
+ 0x09, 0x92, # .Usage (Vendor Usage 0x92) 423
+ 0xa1, 0x02, # .Collection (Logical) 425
+ 0x85, 0x52, # ..Report ID (82) 427
+ 0x09, 0x96, # ..Usage (Vendor Usage 0x96) 429
+ 0xa1, 0x02, # ..Collection (Logical) 431
+ 0x09, 0x9a, # ...Usage (Vendor Usage 0x9a) 433
+ 0x09, 0x99, # ...Usage (Vendor Usage 0x99) 435
+ 0x09, 0x97, # ...Usage (Vendor Usage 0x97) 437
+ 0x09, 0x98, # ...Usage (Vendor Usage 0x98) 439
+ 0x09, 0x9b, # ...Usage (Vendor Usage 0x9b) 441
+ 0x09, 0x9c, # ...Usage (Vendor Usage 0x9c) 443
+ 0x15, 0x01, # ...Logical Minimum (1) 445
+ 0x25, 0x06, # ...Logical Maximum (6) 447
+ 0x91, 0x00, # ...Output (Data,Arr,Abs) 449
+ 0xc0, # ..End Collection 451
+ 0xc0, # .End Collection 452
+ 0x05, 0xff, # .Usage Page (Vendor Usage Page 0xff) 453
+ 0x0a, 0x01, 0x03, # .Usage (Vendor Usage 0x301) 455
+ 0xa1, 0x02, # .Collection (Logical) 458
+ 0x85, 0x40, # ..Report ID (64) 460
+ 0x0a, 0x02, 0x03, # ..Usage (Vendor Usage 0x302) 462
+ 0xa1, 0x02, # ..Collection (Logical) 465
+ 0x1a, 0x11, 0x03, # ...Usage Minimum (785) 467
+ 0x2a, 0x20, 0x03, # ...Usage Maximum (800) 470
+ 0x25, 0x10, # ...Logical Maximum (16) 473
+ 0x91, 0x00, # ...Output (Data,Arr,Abs) 475
+ 0xc0, # ..End Collection 477
+ 0x0a, 0x03, 0x03, # ..Usage (Vendor Usage 0x303) 478
+ 0x15, 0x00, # ..Logical Minimum (0) 481
+ 0x27, 0xff, 0xff, 0x00, 0x00, # ..Logical Maximum (65535) 483
+ 0x75, 0x10, # ..Report Size (16) 488
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 490
+ 0xc0, # .End Collection 492
+ 0x05, 0x0f, # .Usage Page (Vendor Usage Page 0x0f) 493
+ 0x09, 0x7d, # .Usage (Vendor Usage 0x7d) 495
+ 0xa1, 0x02, # .Collection (Logical) 497
+ 0x85, 0x43, # ..Report ID (67) 499
+ 0x09, 0x7e, # ..Usage (Vendor Usage 0x7e) 501
+ 0x26, 0x80, 0x00, # ..Logical Maximum (128) 503
+ 0x46, 0x10, 0x27, # ..Physical Maximum (10000) 506
+ 0x75, 0x08, # ..Report Size (8) 509
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 511
+ 0xc0, # .End Collection 513
+ 0x09, 0x7f, # .Usage (Vendor Usage 0x7f) 514
+ 0xa1, 0x02, # .Collection (Logical) 516
+ 0x85, 0x0b, # ..Report ID (11) 518
+ 0x09, 0x80, # ..Usage (Vendor Usage 0x80) 520
+ 0x26, 0xff, 0x7f, # ..Logical Maximum (32767) 522
+ 0x45, 0x00, # ..Physical Maximum (0) 525
+ 0x75, 0x0f, # ..Report Size (15) 527
+ 0xb1, 0x03, # ..Feature (Cnst,Var,Abs) 529
+ 0x09, 0xa9, # ..Usage (Vendor Usage 0xa9) 531
+ 0x25, 0x01, # ..Logical Maximum (1) 533
+ 0x75, 0x01, # ..Report Size (1) 535
+ 0xb1, 0x03, # ..Feature (Cnst,Var,Abs) 537
+ 0x09, 0x83, # ..Usage (Vendor Usage 0x83) 539
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 541
+ 0x75, 0x08, # ..Report Size (8) 544
+ 0xb1, 0x03, # ..Feature (Cnst,Var,Abs) 546
+ 0xc0, # .End Collection 548
+ 0x09, 0xab, # .Usage (Vendor Usage 0xab) 549
+ 0xa1, 0x03, # .Collection (Report) 551
+ 0x85, 0x15, # ..Report ID (21) 553
+ 0x09, 0x25, # ..Usage (Vendor Usage 0x25) 555
+ 0xa1, 0x02, # ..Collection (Logical) 557
+ 0x09, 0x26, # ...Usage (Vendor Usage 0x26) 559
+ 0x09, 0x30, # ...Usage (Vendor Usage 0x30) 561
+ 0x09, 0x32, # ...Usage (Vendor Usage 0x32) 563
+ 0x09, 0x31, # ...Usage (Vendor Usage 0x31) 565
+ 0x09, 0x33, # ...Usage (Vendor Usage 0x33) 567
+ 0x09, 0x34, # ...Usage (Vendor Usage 0x34) 569
+ 0x15, 0x01, # ...Logical Minimum (1) 571
+ 0x25, 0x06, # ...Logical Maximum (6) 573
+ 0xb1, 0x00, # ...Feature (Data,Arr,Abs) 575
+ 0xc0, # ..End Collection 577
+ 0xc0, # .End Collection 578
+ 0x09, 0x89, # .Usage (Vendor Usage 0x89) 579
+ 0xa1, 0x03, # .Collection (Report) 581
+ 0x85, 0x16, # ..Report ID (22) 583
+ 0x09, 0x8b, # ..Usage (Vendor Usage 0x8b) 585
+ 0xa1, 0x02, # ..Collection (Logical) 587
+ 0x09, 0x8c, # ...Usage (Vendor Usage 0x8c) 589
+ 0x09, 0x8d, # ...Usage (Vendor Usage 0x8d) 591
+ 0x09, 0x8e, # ...Usage (Vendor Usage 0x8e) 593
+ 0x25, 0x03, # ...Logical Maximum (3) 595
+ 0xb1, 0x00, # ...Feature (Data,Arr,Abs) 597
+ 0xc0, # ..End Collection 599
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 600
+ 0x15, 0x00, # ..Logical Minimum (0) 602
+ 0x26, 0xfe, 0x00, # ..Logical Maximum (254) 604
+ 0xb1, 0x02, # ..Feature (Data,Var,Abs) 607
+ 0xc0, # .End Collection 609
+ 0x09, 0x90, # .Usage (Vendor Usage 0x90) 610
+ 0xa1, 0x03, # .Collection (Report) 612
+ 0x85, 0x50, # ..Report ID (80) 614
+ 0x09, 0x22, # ..Usage (Vendor Usage 0x22) 616
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 618
+ 0x91, 0x02, # ..Output (Data,Var,Abs) 621
+ 0xc0, # .End Collection 623
+ 0xc0, # End Collection 624
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None):
+ super().__init__(rdesc, name=name, input_info=(BusType.USB, 0x06A3, 0xFF0D))
+ self.buttons = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
+
+
+class AsusGamepad(BaseGamepad):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 0
+ 0x09, 0x05, # Usage (Game Pad) 2
+ 0xa1, 0x01, # Collection (Application) 4
+ 0x85, 0x01, # .Report ID (1) 6
+ 0x05, 0x09, # .Usage Page (Button) 8
+ 0x0a, 0x01, 0x00, # .Usage (Vendor Usage 0x01) 10
+ 0x0a, 0x02, 0x00, # .Usage (Vendor Usage 0x02) 13
+ 0x0a, 0x04, 0x00, # .Usage (Vendor Usage 0x04) 16
+ 0x0a, 0x05, 0x00, # .Usage (Vendor Usage 0x05) 19
+ 0x0a, 0x07, 0x00, # .Usage (Vendor Usage 0x07) 22
+ 0x0a, 0x08, 0x00, # .Usage (Vendor Usage 0x08) 25
+ 0x0a, 0x0e, 0x00, # .Usage (Vendor Usage 0x0e) 28
+ 0x0a, 0x0f, 0x00, # .Usage (Vendor Usage 0x0f) 31
+ 0x0a, 0x0d, 0x00, # .Usage (Vendor Usage 0x0d) 34
+ 0x05, 0x0c, # .Usage Page (Consumer Devices) 37
+ 0x0a, 0x24, 0x02, # .Usage (AC Back) 39
+ 0x0a, 0x23, 0x02, # .Usage (AC Home) 42
+ 0x15, 0x00, # .Logical Minimum (0) 45
+ 0x25, 0x01, # .Logical Maximum (1) 47
+ 0x75, 0x01, # .Report Size (1) 49
+ 0x95, 0x0b, # .Report Count (11) 51
+ 0x81, 0x02, # .Input (Data,Var,Abs) 53
+ 0x75, 0x01, # .Report Size (1) 55
+ 0x95, 0x01, # .Report Count (1) 57
+ 0x81, 0x03, # .Input (Cnst,Var,Abs) 59
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 61
+ 0x75, 0x04, # .Report Size (4) 63
+ 0x95, 0x01, # .Report Count (1) 65
+ 0x25, 0x07, # .Logical Maximum (7) 67
+ 0x46, 0x3b, 0x01, # .Physical Maximum (315) 69
+ 0x66, 0x14, 0x00, # .Unit (Degrees,EngRotation) 72
+ 0x09, 0x39, # .Usage (Hat switch) 75
+ 0x81, 0x42, # .Input (Data,Var,Abs,Null) 77
+ 0x66, 0x00, 0x00, # .Unit (None) 79
+ 0x09, 0x01, # .Usage (Pointer) 82
+ 0xa1, 0x00, # .Collection (Physical) 84
+ 0x09, 0x30, # ..Usage (X) 86
+ 0x09, 0x31, # ..Usage (Y) 88
+ 0x09, 0x32, # ..Usage (Z) 90
+ 0x09, 0x35, # ..Usage (Rz) 92
+ 0x05, 0x02, # ..Usage Page (Simulation Controls) 94
+ 0x09, 0xc5, # ..Usage (Brake) 96
+ 0x09, 0xc4, # ..Usage (Accelerator) 98
+ 0x15, 0x00, # ..Logical Minimum (0) 100
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 102
+ 0x35, 0x00, # ..Physical Minimum (0) 105
+ 0x46, 0xff, 0x00, # ..Physical Maximum (255) 107
+ 0x75, 0x08, # ..Report Size (8) 110
+ 0x95, 0x06, # ..Report Count (6) 112
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 114
+ 0xc0, # .End Collection 116
+ 0x85, 0x02, # .Report ID (2) 117
+ 0x05, 0x08, # .Usage Page (LEDs) 119
+ 0x0a, 0x01, 0x00, # .Usage (Num Lock) 121
+ 0x0a, 0x02, 0x00, # .Usage (Caps Lock) 124
+ 0x0a, 0x03, 0x00, # .Usage (Scroll Lock) 127
+ 0x0a, 0x04, 0x00, # .Usage (Compose) 130
+ 0x15, 0x00, # .Logical Minimum (0) 133
+ 0x25, 0x01, # .Logical Maximum (1) 135
+ 0x75, 0x01, # .Report Size (1) 137
+ 0x95, 0x04, # .Report Count (4) 139
+ 0x91, 0x02, # .Output (Data,Var,Abs) 141
+ 0x75, 0x04, # .Report Size (4) 143
+ 0x95, 0x01, # .Report Count (1) 145
+ 0x91, 0x03, # .Output (Cnst,Var,Abs) 147
+ 0xc0, # End Collection 149
+ 0x05, 0x0c, # Usage Page (Consumer Devices) 150
+ 0x09, 0x01, # Usage (Consumer Control) 152
+ 0xa1, 0x01, # Collection (Application) 154
+ 0x85, 0x03, # .Report ID (3) 156
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 158
+ 0x09, 0x06, # .Usage (Keyboard) 160
+ 0xa1, 0x02, # .Collection (Logical) 162
+ 0x05, 0x06, # ..Usage Page (Generic Device Controls) 164
+ 0x09, 0x20, # ..Usage (Battery Strength) 166
+ 0x15, 0x00, # ..Logical Minimum (0) 168
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255) 170
+ 0x75, 0x08, # ..Report Size (8) 173
+ 0x95, 0x01, # ..Report Count (1) 175
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 177
+ 0x06, 0xbc, 0xff, # ..Usage Page (Vendor Usage Page 0xffbc) 179
+ 0x0a, 0xad, 0xbd, # ..Usage (Vendor Usage 0xbdad) 182
+ 0x75, 0x08, # ..Report Size (8) 185
+ 0x95, 0x06, # ..Report Count (6) 187
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 189
+ 0xc0, # .End Collection 191
+ 0xc0, # End Collection 192
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None):
+ super().__init__(rdesc, name=name, input_info=(BusType.USB, 0x18D1, 0x2C40))
+ self.buttons = (1, 2, 4, 5, 7, 8, 14, 15, 13)
+
+
+class RaptorMach2Joystick(JoystickGamepad):
+ axes_map = {
+ "left_stick": {
+ "x": AxisMapping("x"),
+ "y": AxisMapping("y"),
+ },
+ "right_stick": {
+ "x": AxisMapping("z"),
+ "y": AxisMapping("Rz"),
+ },
+ }
+
+ def __init__(
+ self,
+ name,
+ rdesc=None,
+ application="Joystick",
+ input_info=(BusType.USB, 0x11C0, 0x5606),
+ ):
+ super().__init__(rdesc, application, name, input_info)
+ self.buttons = (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12)
+ self.hat_switch = 240 # null value is 240 as max is 239
+
+ def event(
+ self, *, left=(None, None), right=(None, None), hat_switch=None, buttons=None
+ ):
+ if hat_switch is not None:
+ hat_switch *= 30
+
+ return super().event(
+ left=left, right=right, hat_switch=hat_switch, buttons=buttons
+ )
+
+
class TestSaitekGamepad(BaseTest.TestGamepad):
def create_device(self):
return SaitekGamepad()
@@ -207,3 +651,14 @@ class TestSaitekGamepad(BaseTest.TestGamepad):
class TestAsusGamepad(BaseTest.TestGamepad):
def create_device(self):
return AsusGamepad()
+
+
+class TestRaptorMach2Joystick(BaseTest.TestGamepad):
+ hid_bpfs = [("FR-TEC__Raptor-Mach-2.bpf.o", True)]
+
+ def create_device(self):
+ return RaptorMach2Joystick(
+ "uhid test Sanmos Group FR-TEC Raptor MACH 2",
+ rdesc="05 01 09 04 a1 01 05 01 85 01 05 01 09 30 75 10 95 01 15 00 26 ff 07 46 ff 07 81 02 05 01 09 31 75 10 95 01 15 00 26 ff 07 46 ff 07 81 02 05 01 09 33 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 00 09 00 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 01 09 32 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 01 09 35 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 01 09 34 75 10 95 01 15 00 26 ff 07 46 ff 07 81 02 05 01 09 36 75 10 95 01 15 00 26 ff 03 46 ff 03 81 02 05 09 19 01 2a 1d 00 15 00 25 01 75 01 96 80 00 81 02 05 01 09 39 26 ef 00 46 68 01 65 14 75 10 95 01 81 42 05 01 09 00 75 08 95 1d 81 01 15 00 26 ef 00 85 58 26 ff 00 46 ff 00 75 08 95 3f 09 00 91 02 85 59 75 08 95 80 09 00 b1 02 c0",
+ input_info=(BusType.USB, 0x11C0, 0x5606),
+ )
diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py
index 903f19f7cbe9..a9e2de1e8861 100644
--- a/tools/testing/selftests/hid/tests/test_tablet.py
+++ b/tools/testing/selftests/hid/tests/test_tablet.py
@@ -35,6 +35,7 @@ class BtnPressed(Enum):
PRIMARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS
SECONDARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS2
+ THIRD_PRESSED = libevdev.EV_KEY.BTN_STYLUS3
class PenState(Enum):
@@ -44,58 +45,28 @@ class PenState(Enum):
We extend it with the various buttons when we need to check them.
"""
- PEN_IS_OUT_OF_RANGE = BtnTouch.UP, None, None
- PEN_IS_IN_RANGE = BtnTouch.UP, ToolType.PEN, None
- PEN_IS_IN_RANGE_WITH_BUTTON = BtnTouch.UP, ToolType.PEN, BtnPressed.PRIMARY_PRESSED
- PEN_IS_IN_RANGE_WITH_SECOND_BUTTON = (
- BtnTouch.UP,
- ToolType.PEN,
- BtnPressed.SECONDARY_PRESSED,
- )
- PEN_IS_IN_CONTACT = BtnTouch.DOWN, ToolType.PEN, None
- PEN_IS_IN_CONTACT_WITH_BUTTON = (
- BtnTouch.DOWN,
- ToolType.PEN,
- BtnPressed.PRIMARY_PRESSED,
- )
- PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON = (
- BtnTouch.DOWN,
- ToolType.PEN,
- BtnPressed.SECONDARY_PRESSED,
- )
- PEN_IS_IN_RANGE_WITH_ERASING_INTENT = BtnTouch.UP, ToolType.RUBBER, None
- PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON = (
- BtnTouch.UP,
- ToolType.RUBBER,
- BtnPressed.PRIMARY_PRESSED,
- )
- PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_SECOND_BUTTON = (
- BtnTouch.UP,
- ToolType.RUBBER,
- BtnPressed.SECONDARY_PRESSED,
- )
- PEN_IS_ERASING = BtnTouch.DOWN, ToolType.RUBBER, None
- PEN_IS_ERASING_WITH_BUTTON = (
- BtnTouch.DOWN,
- ToolType.RUBBER,
- BtnPressed.PRIMARY_PRESSED,
- )
- PEN_IS_ERASING_WITH_SECOND_BUTTON = (
- BtnTouch.DOWN,
- ToolType.RUBBER,
- BtnPressed.SECONDARY_PRESSED,
- )
-
- def __init__(self, touch: BtnTouch, tool: Optional[ToolType], button: Optional[BtnPressed]):
+ PEN_IS_OUT_OF_RANGE = BtnTouch.UP, None, False
+ PEN_IS_IN_RANGE = BtnTouch.UP, ToolType.PEN, False
+ PEN_IS_IN_RANGE_WITH_BUTTON = BtnTouch.UP, ToolType.PEN, True
+ PEN_IS_IN_CONTACT = BtnTouch.DOWN, ToolType.PEN, False
+ PEN_IS_IN_CONTACT_WITH_BUTTON = BtnTouch.DOWN, ToolType.PEN, True
+ PEN_IS_IN_RANGE_WITH_ERASING_INTENT = BtnTouch.UP, ToolType.RUBBER, False
+ PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON = BtnTouch.UP, ToolType.RUBBER, True
+ PEN_IS_ERASING = BtnTouch.DOWN, ToolType.RUBBER, False
+ PEN_IS_ERASING_WITH_BUTTON = BtnTouch.DOWN, ToolType.RUBBER, True
+
+ def __init__(
+ self, touch: BtnTouch, tool: Optional[ToolType], button: Optional[bool]
+ ):
self.touch = touch # type: ignore
self.tool = tool # type: ignore
self.button = button # type: ignore
@classmethod
- def from_evdev(cls, evdev) -> "PenState":
+ def from_evdev(cls, evdev, test_button) -> "PenState":
touch = BtnTouch(evdev.value[libevdev.EV_KEY.BTN_TOUCH])
tool = None
- button = None
+ button = False
if (
evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER]
and not evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN]
@@ -112,19 +83,20 @@ class PenState(Enum):
):
raise ValueError("2 tools are not allowed")
- # we take only the highest button in account
- for b in [libevdev.EV_KEY.BTN_STYLUS, libevdev.EV_KEY.BTN_STYLUS2]:
- if bool(evdev.value[b]):
- button = BtnPressed(b)
+ # we take only the provided button into account
+ if test_button is not None:
+ button = bool(evdev.value[test_button.value])
# the kernel tends to insert an EV_SYN once removing the tool, so
# the button will be released after
if tool is None:
- button = None
+ button = False
return cls((touch, tool, button)) # type: ignore
- def apply(self, events: List[libevdev.InputEvent], strict: bool) -> "PenState":
+ def apply(
+ self, events: List[libevdev.InputEvent], strict: bool, test_button: BtnPressed
+ ) -> "PenState":
if libevdev.EV_SYN.SYN_REPORT in events:
raise ValueError("EV_SYN is in the event sequence")
touch = self.touch
@@ -148,19 +120,16 @@ class PenState(Enum):
raise ValueError(f"duplicated BTN_TOOL_* in {events}")
tool_found = True
tool = ToolType(ev.code) if ev.value else None
- elif ev in (
- libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS),
- libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS2),
- ):
+ elif test_button is not None and ev in (test_button.value,):
if button_found:
raise ValueError(f"duplicated BTN_STYLUS* in {events}")
button_found = True
- button = BtnPressed(ev.code) if ev.value else None
+ button = bool(ev.value)
# the kernel tends to insert an EV_SYN once removing the tool, so
# the button will be released after
if tool is None:
- button = None
+ button = False
new_state = PenState((touch, tool, button)) # type: ignore
if strict:
@@ -183,11 +152,9 @@ class PenState(Enum):
PenState.PEN_IS_OUT_OF_RANGE,
PenState.PEN_IS_IN_RANGE,
PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
PenState.PEN_IS_IN_CONTACT,
PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
PenState.PEN_IS_ERASING,
)
@@ -195,7 +162,6 @@ class PenState(Enum):
return (
PenState.PEN_IS_IN_RANGE,
PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
PenState.PEN_IS_OUT_OF_RANGE,
PenState.PEN_IS_IN_CONTACT,
)
@@ -204,7 +170,6 @@ class PenState(Enum):
return (
PenState.PEN_IS_IN_CONTACT,
PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
PenState.PEN_IS_IN_RANGE,
)
@@ -236,21 +201,6 @@ class PenState(Enum):
PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
)
- if self == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON:
- return (
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_OUT_OF_RANGE,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- )
-
- if self == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON:
- return (
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_CONTACT,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- )
-
return tuple()
def historically_tolerated_transitions(self) -> Tuple["PenState", ...]:
@@ -263,11 +213,9 @@ class PenState(Enum):
PenState.PEN_IS_OUT_OF_RANGE,
PenState.PEN_IS_IN_RANGE,
PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
PenState.PEN_IS_IN_CONTACT,
PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
PenState.PEN_IS_ERASING,
)
@@ -275,7 +223,6 @@ class PenState(Enum):
return (
PenState.PEN_IS_IN_RANGE,
PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
PenState.PEN_IS_OUT_OF_RANGE,
PenState.PEN_IS_IN_CONTACT,
)
@@ -284,7 +231,6 @@ class PenState(Enum):
return (
PenState.PEN_IS_IN_CONTACT,
PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
PenState.PEN_IS_IN_RANGE,
PenState.PEN_IS_OUT_OF_RANGE,
)
@@ -319,22 +265,6 @@ class PenState(Enum):
PenState.PEN_IS_OUT_OF_RANGE,
)
- if self == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON:
- return (
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_OUT_OF_RANGE,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- )
-
- if self == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON:
- return (
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_CONTACT,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_OUT_OF_RANGE,
- )
-
return tuple()
@staticmethod
@@ -402,9 +332,9 @@ class PenState(Enum):
}
@staticmethod
- def legal_transitions_with_primary_button() -> Dict[str, Tuple["PenState", ...]]:
+ def legal_transitions_with_button() -> Dict[str, Tuple["PenState", ...]]:
"""We revisit the Windows Pen Implementation state machine:
- we now have a primary button.
+ we now have a button.
"""
return {
"hover-button": (PenState.PEN_IS_IN_RANGE_WITH_BUTTON,),
@@ -451,56 +381,6 @@ class PenState(Enum):
}
@staticmethod
- def legal_transitions_with_secondary_button() -> Dict[str, Tuple["PenState", ...]]:
- """We revisit the Windows Pen Implementation state machine:
- we now have a secondary button.
- Note: we don't looks for 2 buttons interactions.
- """
- return {
- "hover-button": (PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,),
- "hover-button -> out-of-range": (
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_OUT_OF_RANGE,
- ),
- "in-range -> button-press": (
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- ),
- "in-range -> button-press -> button-release": (
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE,
- ),
- "in-range -> touch -> button-press -> button-release": (
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_IN_CONTACT,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_CONTACT,
- ),
- "in-range -> touch -> button-press -> release -> button-release": (
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_IN_CONTACT,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE,
- ),
- "in-range -> button-press -> touch -> release -> button-release": (
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_RANGE,
- ),
- "in-range -> button-press -> touch -> button-release -> release": (
- PenState.PEN_IS_IN_RANGE,
- PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
- PenState.PEN_IS_IN_CONTACT,
- PenState.PEN_IS_IN_RANGE,
- ),
- }
-
- @staticmethod
def tolerated_transitions() -> Dict[str, Tuple["PenState", ...]]:
"""This is not adhering to the Windows Pen Implementation state machine
but we should expect the kernel to behave properly, mostly for historical
@@ -616,10 +496,22 @@ class Pen(object):
evdev.value[axis] == value
), f"assert evdev.value[{axis}] ({evdev.value[axis]}) != {value}"
- def assert_expected_input_events(self, evdev):
+ def assert_expected_input_events(self, evdev, button):
assert evdev.value[libevdev.EV_ABS.ABS_X] == self.x
assert evdev.value[libevdev.EV_ABS.ABS_Y] == self.y
- assert self.current_state == PenState.from_evdev(evdev)
+
+ # assert no other buttons than the tested ones are set
+ buttons = [
+ BtnPressed.PRIMARY_PRESSED,
+ BtnPressed.SECONDARY_PRESSED,
+ BtnPressed.THIRD_PRESSED,
+ ]
+ if button is not None:
+ buttons.remove(button)
+ for b in buttons:
+ assert evdev.value[b.value] is None or evdev.value[b.value] == False
+
+ assert self.current_state == PenState.from_evdev(evdev, button)
class PenDigitizer(base.UHIDTestDevice):
@@ -647,7 +539,7 @@ class PenDigitizer(base.UHIDTestDevice):
continue
self.fields = [f.usage_name for f in r]
- def move_to(self, pen, state):
+ def move_to(self, pen, state, button):
# fill in the previous values
if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
pen.restore()
@@ -690,29 +582,17 @@ class PenDigitizer(base.UHIDTestDevice):
pen.inrange = True
pen.invert = False
pen.eraser = False
- pen.barrelswitch = True
- pen.secondarybarrelswitch = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.secondarybarrelswitch = button == BtnPressed.SECONDARY_PRESSED
elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
pen.tipswitch = True
pen.inrange = True
pen.invert = False
pen.eraser = False
- pen.barrelswitch = True
- pen.secondarybarrelswitch = False
- elif state == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON:
- pen.tipswitch = False
- pen.inrange = True
- pen.invert = False
- pen.eraser = False
- pen.barrelswitch = False
- pen.secondarybarrelswitch = True
- elif state == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON:
- pen.tipswitch = True
- pen.inrange = True
- pen.invert = False
- pen.eraser = False
- pen.barrelswitch = False
- pen.secondarybarrelswitch = True
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.secondarybarrelswitch = button == BtnPressed.SECONDARY_PRESSED
elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
pen.tipswitch = False
pen.inrange = True
@@ -730,7 +610,7 @@ class PenDigitizer(base.UHIDTestDevice):
pen.current_state = state
- def event(self, pen):
+ def event(self, pen, button):
rs = []
r = self.create_report(application=self.cur_application, data=pen)
self.call_input_event(r)
@@ -771,17 +651,17 @@ class BaseTest:
def create_device(self):
raise Exception("please reimplement me in subclasses")
- def post(self, uhdev, pen):
- r = uhdev.event(pen)
+ def post(self, uhdev, pen, test_button):
+ r = uhdev.event(pen, test_button)
events = uhdev.next_sync_events()
self.debug_reports(r, uhdev, events)
return events
def validate_transitions(
- self, from_state, pen, evdev, events, allow_intermediate_states
+ self, from_state, pen, evdev, events, allow_intermediate_states, button
):
# check that the final state is correct
- pen.assert_expected_input_events(evdev)
+ pen.assert_expected_input_events(evdev, button)
state = from_state
@@ -794,12 +674,14 @@ class BaseTest:
events = events[idx + 1 :]
# now check for a valid transition
- state = state.apply(sync_events, not allow_intermediate_states)
+ state = state.apply(sync_events, not allow_intermediate_states, button)
if events:
- state = state.apply(sync_events, not allow_intermediate_states)
+ state = state.apply(sync_events, not allow_intermediate_states, button)
- def _test_states(self, state_list, scribble, allow_intermediate_states):
+ def _test_states(
+ self, state_list, scribble, allow_intermediate_states, button=None
+ ):
"""Internal method to test against a list of
transition between states.
state_list is a list of PenState objects
@@ -812,10 +694,10 @@ class BaseTest:
cur_state = PenState.PEN_IS_OUT_OF_RANGE
p = Pen(50, 60)
- uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE)
- events = self.post(uhdev, p)
+ uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE, button)
+ events = self.post(uhdev, p, button)
self.validate_transitions(
- cur_state, p, evdev, events, allow_intermediate_states
+ cur_state, p, evdev, events, allow_intermediate_states, button
)
cur_state = p.current_state
@@ -824,18 +706,18 @@ class BaseTest:
if scribble and cur_state != PenState.PEN_IS_OUT_OF_RANGE:
p.x += 1
p.y -= 1
- events = self.post(uhdev, p)
+ events = self.post(uhdev, p, button)
self.validate_transitions(
- cur_state, p, evdev, events, allow_intermediate_states
+ cur_state, p, evdev, events, allow_intermediate_states, button
)
assert len(events) >= 3 # X, Y, SYN
- uhdev.move_to(p, state)
+ uhdev.move_to(p, state, button)
if scribble and state != PenState.PEN_IS_OUT_OF_RANGE:
p.x += 1
p.y -= 1
- events = self.post(uhdev, p)
+ events = self.post(uhdev, p, button)
self.validate_transitions(
- cur_state, p, evdev, events, allow_intermediate_states
+ cur_state, p, evdev, events, allow_intermediate_states, button
)
cur_state = p.current_state
@@ -874,12 +756,17 @@ class BaseTest:
"state_list",
[
pytest.param(v, id=k)
- for k, v in PenState.legal_transitions_with_primary_button().items()
+ for k, v in PenState.legal_transitions_with_button().items()
],
)
def test_valid_primary_button_pen_states(self, state_list, scribble):
"""Rework the transition state machine by adding the primary button."""
- self._test_states(state_list, scribble, allow_intermediate_states=False)
+ self._test_states(
+ state_list,
+ scribble,
+ allow_intermediate_states=False,
+ button=BtnPressed.PRIMARY_PRESSED,
+ )
@pytest.mark.skip_if_uhdev(
lambda uhdev: "Secondary Barrel Switch" not in uhdev.fields,
@@ -890,12 +777,38 @@ class BaseTest:
"state_list",
[
pytest.param(v, id=k)
- for k, v in PenState.legal_transitions_with_secondary_button().items()
+ for k, v in PenState.legal_transitions_with_button().items()
],
)
def test_valid_secondary_button_pen_states(self, state_list, scribble):
"""Rework the transition state machine by adding the secondary button."""
- self._test_states(state_list, scribble, allow_intermediate_states=False)
+ self._test_states(
+ state_list,
+ scribble,
+ allow_intermediate_states=False,
+ button=BtnPressed.SECONDARY_PRESSED,
+ )
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Third Barrel Switch" not in uhdev.fields,
+ "Device not compatible, missing Third Barrel Switch usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [
+ pytest.param(v, id=k)
+ for k, v in PenState.legal_transitions_with_button().items()
+ ],
+ )
+ def test_valid_third_button_pen_states(self, state_list, scribble):
+ """Rework the transition state machine by adding the secondary button."""
+ self._test_states(
+ state_list,
+ scribble,
+ allow_intermediate_states=False,
+ button=BtnPressed.THIRD_PRESSED,
+ )
@pytest.mark.skip_if_uhdev(
lambda uhdev: "Invert" not in uhdev.fields,
@@ -956,7 +869,7 @@ class BaseTest:
class GXTP_pen(PenDigitizer):
- def event(self, pen):
+ def event(self, pen, test_button):
if not hasattr(self, "prev_tip_state"):
self.prev_tip_state = False
@@ -977,13 +890,407 @@ class GXTP_pen(PenDigitizer):
if pen.eraser:
internal_pen.invert = False
- return super().event(internal_pen)
+ return super().event(internal_pen, test_button)
class USIPen(PenDigitizer):
pass
+class XPPen_ArtistPro16Gen2_28bd_095b(PenDigitizer):
+ """
+ Pen with two buttons and a rubber end, but which reports
+ the second button as an eraser
+ """
+
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Pen",
+ physical="Stylus",
+ input_info=(BusType.USB, 0x28BD, 0x095B),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix
+ )
+ self.fields.append("Secondary Barrel Switch")
+
+ def move_to(self, pen, state, button):
+ # fill in the previous values
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.restore()
+
+ print(f"\n *** pen is moving to {state} ***")
+
+ if state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.backup()
+ pen.x = 0
+ pen.y = 0
+ pen.tipswitch = False
+ pen.tippressure = 0
+ pen.azimuth = 0
+ pen.inrange = False
+ pen.width = 0
+ pen.height = 0
+ pen.invert = False
+ pen.eraser = False
+ pen.xtilt = 0
+ pen.ytilt = 0
+ pen.twist = 0
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_CONTACT:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.eraser = button == BtnPressed.SECONDARY_PRESSED
+ elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.eraser = button == BtnPressed.SECONDARY_PRESSED
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = True
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_ERASING:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = True
+ pen.eraser = False
+ pen.barrelswitch = False
+
+ pen.current_state = state
+
+ def event(self, pen, test_button):
+ import math
+
+ pen_copy = copy.copy(pen)
+ width = 13.567
+ height = 8.480
+ tip_height = 0.055677699
+ hx = tip_height * (32767 / width)
+ hy = tip_height * (32767 / height)
+ if pen_copy.xtilt != 0:
+ pen_copy.x += round(hx * math.sin(math.radians(pen_copy.xtilt)))
+ if pen_copy.ytilt != 0:
+ pen_copy.y += round(hy * math.sin(math.radians(pen_copy.ytilt)))
+
+ return super().event(pen_copy, test_button)
+
+
+class XPPen_Artist24_28bd_093a(PenDigitizer):
+ """
+ Pen that reports secondary barrel switch through eraser
+ """
+
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Pen",
+ physical="Stylus",
+ input_info=(BusType.USB, 0x28BD, 0x093A),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix
+ )
+ self.fields.append("Secondary Barrel Switch")
+ self.previous_state = PenState.PEN_IS_OUT_OF_RANGE
+
+ def move_to(self, pen, state, button, debug=True):
+ # fill in the previous values
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.restore()
+
+ if debug:
+ print(f"\n *** pen is moving to {state} ***")
+
+ if state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.backup()
+ pen.tipswitch = False
+ pen.tippressure = 0
+ pen.azimuth = 0
+ pen.inrange = False
+ pen.width = 0
+ pen.height = 0
+ pen.invert = False
+ pen.eraser = False
+ pen.xtilt = 0
+ pen.ytilt = 0
+ pen.twist = 0
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_CONTACT:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.eraser = button == BtnPressed.SECONDARY_PRESSED
+ elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.eraser = button == BtnPressed.SECONDARY_PRESSED
+
+ pen.current_state = state
+
+ def send_intermediate_state(self, pen, state, button):
+ intermediate_pen = copy.copy(pen)
+ self.move_to(intermediate_pen, state, button, debug=False)
+ return super().event(intermediate_pen, button)
+
+ def event(self, pen, button):
+ rs = []
+
+ # the pen reliably sends in-range events in a normal case (non emulation of eraser mode)
+ if self.previous_state == PenState.PEN_IS_IN_CONTACT:
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ rs.extend(
+ self.send_intermediate_state(pen, PenState.PEN_IS_IN_RANGE, button)
+ )
+
+ if button == BtnPressed.SECONDARY_PRESSED:
+ if self.previous_state == PenState.PEN_IS_IN_RANGE:
+ if pen.current_state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_OUT_OF_RANGE, button
+ )
+ )
+
+ if self.previous_state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ if pen.current_state == PenState.PEN_IS_IN_RANGE:
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_OUT_OF_RANGE, button
+ )
+ )
+
+ if self.previous_state == PenState.PEN_IS_IN_CONTACT:
+ if pen.current_state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_OUT_OF_RANGE, button
+ )
+ )
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_IN_RANGE_WITH_BUTTON, button
+ )
+ )
+
+ if self.previous_state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ if pen.current_state == PenState.PEN_IS_IN_CONTACT:
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_OUT_OF_RANGE, button
+ )
+ )
+ rs.extend(
+ self.send_intermediate_state(
+ pen, PenState.PEN_IS_IN_RANGE, button
+ )
+ )
+
+ rs.extend(super().event(pen, button))
+ self.previous_state = pen.current_state
+ return rs
+
+
+class Huion_Kamvas_Pro_19_256c_006b(PenDigitizer):
+ """
+ Pen that reports secondary barrel switch through secondary TipSwtich
+ and 3rd button through Invert
+ """
+
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Stylus",
+ physical=None,
+ input_info=(BusType.USB, 0x256C, 0x006B),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, input_info, evdev_name_suffix
+ )
+ self.fields.append("Secondary Barrel Switch")
+ self.fields.append("Third Barrel Switch")
+ self.previous_state = PenState.PEN_IS_OUT_OF_RANGE
+
+ def move_to(self, pen, state, button, debug=True):
+ # fill in the previous values
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.restore()
+
+ if debug:
+ print(f"\n *** pen is moving to {state} ***")
+
+ if state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.backup()
+ pen.tipswitch = False
+ pen.tippressure = 0
+ pen.azimuth = 0
+ pen.inrange = False
+ pen.width = 0
+ pen.height = 0
+ pen.invert = False
+ pen.eraser = False
+ pen.xtilt = 0
+ pen.ytilt = 0
+ pen.twist = 0
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+ elif state == PenState.PEN_IS_IN_CONTACT:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.eraser = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.secondarytipswitch = button == BtnPressed.SECONDARY_PRESSED
+ pen.invert = button == BtnPressed.THIRD_PRESSED
+ elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.eraser = False
+ assert button is not None
+ pen.barrelswitch = button == BtnPressed.PRIMARY_PRESSED
+ pen.secondarytipswitch = button == BtnPressed.SECONDARY_PRESSED
+ pen.invert = button == BtnPressed.THIRD_PRESSED
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = True
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+ elif state == PenState.PEN_IS_ERASING:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = True
+ pen.barrelswitch = False
+ pen.secondarytipswitch = False
+
+ pen.current_state = state
+
+ def call_input_event(self, report):
+ if report[0] == 0x0a:
+ # ensures the original second Eraser usage is null
+ report[1] &= 0xdf
+
+ # ensures the original last bit is equal to bit 6 (In Range)
+ if report[1] & 0x40:
+ report[1] |= 0x80
+
+ super().call_input_event(report)
+
+ def send_intermediate_state(self, pen, state, test_button):
+ intermediate_pen = copy.copy(pen)
+ self.move_to(intermediate_pen, state, test_button, debug=False)
+ return super().event(intermediate_pen, test_button)
+
+ def event(self, pen, button):
+ rs = []
+
+ # it's not possible to go between eraser mode or not without
+ # going out-of-prox: the eraser mode is activated by presenting
+ # the tail of the pen
+ if self.previous_state in (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ ) and pen.current_state in (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_ERASING_WITH_BUTTON,
+ ):
+ rs.extend(
+ self.send_intermediate_state(pen, PenState.PEN_IS_OUT_OF_RANGE, button)
+ )
+
+ # same than above except from eraser to normal
+ if self.previous_state in (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_ERASING_WITH_BUTTON,
+ ) and pen.current_state in (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ ):
+ rs.extend(
+ self.send_intermediate_state(pen, PenState.PEN_IS_OUT_OF_RANGE, button)
+ )
+
+ if self.previous_state == PenState.PEN_IS_OUT_OF_RANGE:
+ if pen.current_state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ rs.extend(
+ self.send_intermediate_state(pen, PenState.PEN_IS_IN_RANGE, button)
+ )
+
+ rs.extend(super().event(pen, button))
+ self.previous_state = pen.current_state
+ return rs
+
+
################################################################################
#
# Windows 7 compatible devices
@@ -1162,3 +1469,37 @@ class TestGoodix_27c6_0e00(BaseTest.TestTablet):
rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 55 0e 65 11 35 00 15 00 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 75 08 09 51 95 01 81 02 05 01 26 04 20 75 10 55 0e 65 11 09 30 35 00 46 e6 09 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 75 08 09 51 95 01 81 02 05 01 26 04 20 75 10 55 0e 65 11 09 30 35 00 46 e6 09 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 54 15 00 25 7f 75 08 95 01 81 02 85 02 09 55 95 01 25 0a b1 02 85 03 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 02 a1 01 09 20 a1 00 85 08 05 01 a4 09 30 35 00 46 e6 09 15 00 26 04 20 55 0d 65 13 75 10 95 01 81 02 09 31 46 9a 06 26 60 15 81 02 b4 05 0d 09 38 95 01 75 08 15 00 25 01 81 02 09 30 75 10 26 ff 0f 81 02 09 31 81 02 09 42 09 44 09 5a 09 3c 09 45 09 32 75 01 95 06 25 01 81 02 95 02 81 03 09 3d 55 0e 65 14 36 d8 dc 46 28 23 16 d8 dc 26 28 23 95 01 75 10 81 02 09 3e 81 02 09 41 15 00 27 a0 8c 00 00 35 00 47 a0 8c 00 00 81 02 05 20 0a 53 04 65 00 16 01 f8 26 ff 07 75 10 95 01 81 02 0a 54 04 81 02 0a 55 04 81 02 0a 57 04 81 02 0a 58 04 81 02 0a 59 04 81 02 0a 72 04 81 02 0a 73 04 81 02 0a 74 04 81 02 05 0d 09 3b 15 00 25 64 75 08 81 02 09 5b 25 ff 75 40 81 02 06 00 ff 09 5b 75 20 81 02 05 0d 09 5c 26 ff 00 75 08 81 02 09 5e 81 02 09 70 a1 02 15 01 25 06 09 72 09 73 09 74 09 75 09 76 09 77 81 20 c0 06 00 ff 09 01 15 00 27 ff ff 00 00 75 10 95 01 81 02 85 09 09 81 a1 02 09 81 15 01 25 04 09 82 09 83 09 84 09 85 81 20 c0 85 10 09 5c a1 02 15 00 25 01 75 08 95 01 09 38 b1 02 09 5c 26 ff 00 b1 02 09 5d 75 01 95 01 25 01 b1 02 95 07 b1 03 c0 85 11 09 5e a1 02 09 38 15 00 25 01 75 08 95 01 b1 02 09 5e 26 ff 00 b1 02 09 5f 75 01 25 01 b1 02 75 07 b1 03 c0 85 12 09 70 a1 02 75 08 95 01 15 00 25 01 09 38 b1 02 09 70 a1 02 25 06 09 72 09 73 09 74 09 75 09 76 09 77 b1 20 c0 09 71 75 01 25 01 b1 02 75 07 b1 03 c0 85 13 09 80 15 00 25 ff 75 40 95 01 b1 02 85 14 09 44 a1 02 09 38 75 08 95 01 25 01 b1 02 15 01 25 03 09 44 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 5a a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 45 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 c0 85 15 75 08 95 01 05 0d 09 90 a1 02 09 38 25 01 b1 02 09 91 75 10 26 ff 0f b1 02 09 92 75 40 25 ff b1 02 05 06 09 2a 75 08 26 ff 00 a1 02 09 2d b1 02 09 2e b1 02 c0 c0 85 16 05 06 09 2b a1 02 05 0d 25 01 09 38 b1 02 05 06 09 2b a1 02 09 2d 26 ff 00 b1 02 09 2e b1 02 c0 c0 85 17 06 00 ff 09 01 a1 02 05 0d 09 38 75 08 95 01 25 01 b1 02 06 00 ff 09 01 75 10 27 ff ff 00 00 b1 02 c0 85 18 05 0d 09 38 75 08 95 01 15 00 25 01 b1 02 c0 c0 06 f0 ff 09 01 a1 01 85 0e 09 01 15 00 25 ff 75 08 95 40 91 02 09 01 15 00 25 ff 75 08 95 40 81 02 c0",
input_info=(BusType.I2C, 0x27C6, 0x0E00),
)
+
+
+class TestXPPen_ArtistPro16Gen2_28bd_095b(BaseTest.TestTablet):
+ hid_bpfs = [("XPPen__ArtistPro16Gen2.bpf.o", True)]
+
+ def create_device(self):
+ dev = XPPen_ArtistPro16Gen2_28bd_095b(
+ "uhid test XPPen Artist Pro 16 Gen2 28bd 095b",
+ rdesc="05 0d 09 02 a1 01 85 07 09 20 a1 00 09 42 09 44 09 45 09 3c 15 00 25 01 75 01 95 04 81 02 95 01 81 03 09 32 15 00 25 01 95 01 81 02 95 02 81 03 75 10 95 01 35 00 a4 05 01 09 30 65 13 55 0d 46 ff 34 26 ff 7f 81 02 09 31 46 20 21 26 ff 7f 81 02 b4 09 30 45 00 26 ff 3f 81 42 09 3d 15 81 25 7f 75 08 95 01 81 02 09 3e 15 81 25 7f 81 02 c0 c0",
+ input_info=(BusType.USB, 0x28BD, 0x095B),
+ )
+ return dev
+
+
+class TestXPPen_Artist24_28bd_093a(BaseTest.TestTablet):
+ hid_bpfs = [("XPPen__Artist24.bpf.o", True)]
+
+ def create_device(self):
+ return XPPen_Artist24_28bd_093a(
+ "uhid test XPPen Artist 24 28bd 093a",
+ rdesc="05 0d 09 02 a1 01 85 07 09 20 a1 00 09 42 09 44 09 45 15 00 25 01 75 01 95 03 81 02 95 02 81 03 09 32 95 01 81 02 95 02 81 03 75 10 95 01 35 00 a4 05 01 09 30 65 13 55 0d 46 f0 50 26 ff 7f 81 02 09 31 46 91 2d 26 ff 7f 81 02 b4 09 30 45 00 26 ff 1f 81 42 09 3d 15 81 25 7f 75 08 95 01 81 02 09 3e 15 81 25 7f 81 02 c0 c0",
+ input_info=(BusType.USB, 0x28BD, 0x093A),
+ )
+
+
+class TestHuion_Kamvas_Pro_19_256c_006b(BaseTest.TestTablet):
+ hid_bpfs = [("Huion__Kamvas-Pro-19.bpf.o", True)]
+
+ def create_device(self):
+ return Huion_Kamvas_Pro_19_256c_006b(
+ "uhid test HUION Huion Tablet_GT1902",
+ rdesc="05 0d 09 02 a1 01 85 0a 09 20 a1 01 09 42 09 44 09 43 09 3c 09 45 15 00 25 01 75 01 95 06 81 02 09 32 75 01 95 01 81 02 81 03 05 01 09 30 09 31 55 0d 65 33 26 ff 7f 35 00 46 00 08 75 10 95 02 81 02 05 0d 09 30 26 ff 3f 75 10 95 01 81 02 09 3d 09 3e 15 a6 25 5a 75 08 95 02 81 02 c0 c0 05 0d 09 04 a1 01 85 04 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 ff 7f 35 00 46 15 0c 81 42 09 31 26 ff 7f 46 cb 06 81 42 05 0d 09 30 26 ff 1f 75 10 95 01 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 75 08 95 08 81 03 85 05 09 55 25 0a 75 08 95 01 b1 02 06 00 ff 09 c5 85 06 15 00 26 ff 00 75 08 96 00 01 b1 02 c0",
+ input_info=(BusType.USB, 0x256C, 0x006B),
+ )
diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
index 05d66ef50c97..f45372cb00fe 100644
--- a/tools/testing/selftests/intel_pstate/Makefile
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
+CFLAGS := $(CFLAGS) -Wall
LDLIBS += -lm
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
diff --git a/tools/testing/selftests/iommu/Makefile b/tools/testing/selftests/iommu/Makefile
index 32c5fdfd0eef..fd6477911f24 100644
--- a/tools/testing/selftests/iommu/Makefile
+++ b/tools/testing/selftests/iommu/Makefile
@@ -2,8 +2,6 @@
CFLAGS += -Wall -O2 -Wno-unused-function
CFLAGS += $(KHDR_INCLUDES)
-CFLAGS += -D_GNU_SOURCE
-
TEST_GEN_PROGS :=
TEST_GEN_PROGS += iommufd
TEST_GEN_PROGS += iommufd_fail_nth
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
index edf1c99c9936..6343f4053bd4 100644
--- a/tools/testing/selftests/iommu/iommufd.c
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -279,6 +279,9 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
uint32_t parent_hwpt_id = 0;
uint32_t parent_hwpt_id_not_work = 0;
uint32_t test_hwpt_id = 0;
+ uint32_t iopf_hwpt_id;
+ uint32_t fault_id;
+ uint32_t fault_fd;
if (self->device_id) {
/* Negative tests */
@@ -326,6 +329,7 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
sizeof(data));
/* Allocate two nested hwpts sharing one common parent hwpt */
+ test_ioctl_fault_alloc(&fault_id, &fault_fd);
test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0,
&nested_hwpt_id[0],
IOMMU_HWPT_DATA_SELFTEST, &data,
@@ -334,6 +338,14 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
&nested_hwpt_id[1],
IOMMU_HWPT_DATA_SELFTEST, &data,
sizeof(data));
+ test_err_hwpt_alloc_iopf(ENOENT, self->device_id, parent_hwpt_id,
+ UINT32_MAX, IOMMU_HWPT_FAULT_ID_VALID,
+ &iopf_hwpt_id, IOMMU_HWPT_DATA_SELFTEST,
+ &data, sizeof(data));
+ test_cmd_hwpt_alloc_iopf(self->device_id, parent_hwpt_id, fault_id,
+ IOMMU_HWPT_FAULT_ID_VALID, &iopf_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0],
IOMMU_TEST_IOTLB_DEFAULT);
test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1],
@@ -504,14 +516,24 @@ TEST_F(iommufd_ioas, alloc_hwpt_nested)
_test_ioctl_destroy(self->fd, nested_hwpt_id[1]));
test_ioctl_destroy(nested_hwpt_id[0]);
+ /* Switch from nested_hwpt_id[1] to iopf_hwpt_id */
+ test_cmd_mock_domain_replace(self->stdev_id, iopf_hwpt_id);
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, iopf_hwpt_id));
+ /* Trigger an IOPF on the device */
+ test_cmd_trigger_iopf(self->device_id, fault_fd);
+
/* Detach from nested_hwpt_id[1] and destroy it */
test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id);
test_ioctl_destroy(nested_hwpt_id[1]);
+ test_ioctl_destroy(iopf_hwpt_id);
/* Detach from the parent hw_pagetable and destroy it */
test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
test_ioctl_destroy(parent_hwpt_id);
test_ioctl_destroy(parent_hwpt_id_not_work);
+ close(fault_fd);
+ test_ioctl_destroy(fault_id);
} else {
test_err_hwpt_alloc(ENOENT, self->device_id, self->ioas_id, 0,
&parent_hwpt_id);
@@ -1722,10 +1744,17 @@ FIXTURE_VARIANT(iommufd_dirty_tracking)
FIXTURE_SETUP(iommufd_dirty_tracking)
{
+ unsigned long size;
int mmap_flags;
void *vrc;
int rc;
+ if (variant->buffer_size < MOCK_PAGE_SIZE) {
+ SKIP(return,
+ "Skipping buffer_size=%lu, less than MOCK_PAGE_SIZE=%lu",
+ variant->buffer_size, MOCK_PAGE_SIZE);
+ }
+
self->fd = open("/dev/iommu", O_RDWR);
ASSERT_NE(-1, self->fd);
@@ -1749,12 +1778,11 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
assert(vrc == self->buffer);
self->page_size = MOCK_PAGE_SIZE;
- self->bitmap_size =
- variant->buffer_size / self->page_size / BITS_PER_BYTE;
+ self->bitmap_size = variant->buffer_size / self->page_size;
/* Provision with an extra (PAGE_SIZE) for the unaligned case */
- rc = posix_memalign(&self->bitmap, PAGE_SIZE,
- self->bitmap_size + PAGE_SIZE);
+ size = DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE);
+ rc = posix_memalign(&self->bitmap, PAGE_SIZE, size + PAGE_SIZE);
assert(!rc);
assert(self->bitmap);
assert((uintptr_t)self->bitmap % PAGE_SIZE == 0);
@@ -1775,51 +1803,63 @@ FIXTURE_SETUP(iommufd_dirty_tracking)
FIXTURE_TEARDOWN(iommufd_dirty_tracking)
{
munmap(self->buffer, variant->buffer_size);
- munmap(self->bitmap, self->bitmap_size);
+ munmap(self->bitmap, DIV_ROUND_UP(self->bitmap_size, BITS_PER_BYTE));
teardown_iommufd(self->fd, _metadata);
}
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty8k)
+{
+ /* half of an u8 index bitmap */
+ .buffer_size = 8UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty16k)
+{
+ /* one u8 index bitmap */
+ .buffer_size = 16UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64k)
{
/* one u32 index bitmap */
- .buffer_size = 128UL * 1024UL,
+ .buffer_size = 64UL * 1024UL,
};
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256k)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k)
{
/* one u64 index bitmap */
- .buffer_size = 256UL * 1024UL,
+ .buffer_size = 128UL * 1024UL,
};
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty640k)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty320k)
{
/* two u64 index and trailing end bitmap */
- .buffer_size = 640UL * 1024UL,
+ .buffer_size = 320UL * 1024UL,
};
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M)
{
- /* 4K bitmap (128M IOVA range) */
- .buffer_size = 128UL * 1024UL * 1024UL,
+ /* 4K bitmap (64M IOVA range) */
+ .buffer_size = 64UL * 1024UL * 1024UL,
};
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty64M_huge)
{
- /* 4K bitmap (128M IOVA range) */
- .buffer_size = 128UL * 1024UL * 1024UL,
+ /* 4K bitmap (64M IOVA range) */
+ .buffer_size = 64UL * 1024UL * 1024UL,
.hugepages = true,
};
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M)
{
- /* 8K bitmap (256M IOVA range) */
- .buffer_size = 256UL * 1024UL * 1024UL,
+ /* 8K bitmap (128M IOVA range) */
+ .buffer_size = 128UL * 1024UL * 1024UL,
};
-FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M_huge)
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge)
{
- /* 8K bitmap (256M IOVA range) */
- .buffer_size = 256UL * 1024UL * 1024UL,
+ /* 8K bitmap (128M IOVA range) */
+ .buffer_size = 128UL * 1024UL * 1024UL,
.hugepages = true,
};
diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c
index f590417cd67a..c5d5e69452b0 100644
--- a/tools/testing/selftests/iommu/iommufd_fail_nth.c
+++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c
@@ -615,7 +615,7 @@ TEST_FAIL_NTH(basic_fail_nth, device)
if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL))
return -1;
- if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, &hwpt_id,
+ if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, 0, &hwpt_id,
IOMMU_HWPT_DATA_NONE, 0, 0))
return -1;
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
index 8d2b46b2114d..40f6f14ce136 100644
--- a/tools/testing/selftests/iommu/iommufd_utils.h
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -22,6 +22,8 @@
#define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG))
#define BIT_WORD(nr) ((nr) / __BITS_PER_LONG)
+#define DIV_ROUND_UP(n, d) (((n) + (d) - 1) / (d))
+
static inline void set_bit(unsigned int nr, unsigned long *addr)
{
unsigned long mask = BIT_MASK(nr);
@@ -153,7 +155,7 @@ static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id,
EXPECT_ERRNO(_errno, _test_cmd_mock_domain_replace(self->fd, stdev_id, \
pt_id, NULL))
-static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id,
+static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id, __u32 ft_id,
__u32 flags, __u32 *hwpt_id, __u32 data_type,
void *data, size_t data_len)
{
@@ -165,6 +167,7 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id,
.data_type = data_type,
.data_len = data_len,
.data_uptr = (uint64_t)data,
+ .fault_id = ft_id,
};
int ret;
@@ -177,24 +180,36 @@ static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id,
}
#define test_cmd_hwpt_alloc(device_id, pt_id, flags, hwpt_id) \
- ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
+ ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \
0))
#define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \
EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \
- self->fd, device_id, pt_id, flags, \
+ self->fd, device_id, pt_id, 0, flags, \
hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, 0))
#define test_cmd_hwpt_alloc_nested(device_id, pt_id, flags, hwpt_id, \
data_type, data, data_len) \
- ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
+ ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
hwpt_id, data_type, data, data_len))
#define test_err_hwpt_alloc_nested(_errno, device_id, pt_id, flags, hwpt_id, \
data_type, data, data_len) \
EXPECT_ERRNO(_errno, \
- _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
+ _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, 0, flags, \
hwpt_id, data_type, data, data_len))
+#define test_cmd_hwpt_alloc_iopf(device_id, pt_id, fault_id, flags, hwpt_id, \
+ data_type, data, data_len) \
+ ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \
+ flags, hwpt_id, data_type, data, \
+ data_len))
+#define test_err_hwpt_alloc_iopf(_errno, device_id, pt_id, fault_id, flags, \
+ hwpt_id, data_type, data, data_len) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, fault_id, \
+ flags, hwpt_id, data_type, data, \
+ data_len))
+
#define test_cmd_hwpt_check_iotlb(hwpt_id, iotlb_id, expected) \
({ \
struct iommu_test_cmd test_cmd = { \
@@ -346,12 +361,12 @@ static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length,
static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
__u64 iova, size_t page_size,
size_t pte_page_size, __u64 *bitmap,
- __u64 bitmap_size, __u32 flags,
+ __u64 nbits, __u32 flags,
struct __test_metadata *_metadata)
{
unsigned long npte = pte_page_size / page_size, pteset = 2 * npte;
- unsigned long nbits = bitmap_size * BITS_PER_BYTE;
unsigned long j, i, nr = nbits / pteset ?: 1;
+ unsigned long bitmap_size = DIV_ROUND_UP(nbits, BITS_PER_BYTE);
__u64 out_dirty = 0;
/* Mark all even bits as dirty in the mock domain */
@@ -684,3 +699,66 @@ static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
#define test_cmd_get_hw_capabilities(device_id, caps, mask) \
ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps))
+
+static int _test_ioctl_fault_alloc(int fd, __u32 *fault_id, __u32 *fault_fd)
+{
+ struct iommu_fault_alloc cmd = {
+ .size = sizeof(cmd),
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_FAULT_QUEUE_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ *fault_id = cmd.out_fault_id;
+ *fault_fd = cmd.out_fault_fd;
+ return 0;
+}
+
+#define test_ioctl_fault_alloc(fault_id, fault_fd) \
+ ({ \
+ ASSERT_EQ(0, _test_ioctl_fault_alloc(self->fd, fault_id, \
+ fault_fd)); \
+ ASSERT_NE(0, *(fault_id)); \
+ ASSERT_NE(0, *(fault_fd)); \
+ })
+
+static int _test_cmd_trigger_iopf(int fd, __u32 device_id, __u32 fault_fd)
+{
+ struct iommu_test_cmd trigger_iopf_cmd = {
+ .size = sizeof(trigger_iopf_cmd),
+ .op = IOMMU_TEST_OP_TRIGGER_IOPF,
+ .trigger_iopf = {
+ .dev_id = device_id,
+ .pasid = 0x1,
+ .grpid = 0x2,
+ .perm = IOMMU_PGFAULT_PERM_READ | IOMMU_PGFAULT_PERM_WRITE,
+ .addr = 0xdeadbeaf,
+ },
+ };
+ struct iommu_hwpt_page_response response = {
+ .code = IOMMUFD_PAGE_RESP_SUCCESS,
+ };
+ struct iommu_hwpt_pgfault fault = {};
+ ssize_t bytes;
+ int ret;
+
+ ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_IOPF), &trigger_iopf_cmd);
+ if (ret)
+ return ret;
+
+ bytes = read(fault_fd, &fault, sizeof(fault));
+ if (bytes <= 0)
+ return -EIO;
+
+ response.cookie = fault.cookie;
+
+ bytes = write(fault_fd, &response, sizeof(response));
+ if (bytes <= 0)
+ return -EIO;
+
+ return 0;
+}
+
+#define test_cmd_trigger_iopf(device_id, fault_fd) \
+ ASSERT_EQ(0, _test_cmd_trigger_iopf(self->fd, device_id, fault_fd))
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index 656c43c24044..c75ea4094870 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -198,13 +198,12 @@ int main(int argc, char **argv)
struct msgque_data msgque;
if (getuid() != 0)
- return ksft_exit_skip(
- "Please run the test as root - Exiting.\n");
+ ksft_exit_skip("Please run the test as root - Exiting.\n");
msgque.key = ftok(argv[0], 822155650);
if (msgque.key == -1) {
printf("Can't make key: %d\n", -errno);
- return ksft_exit_fail();
+ ksft_exit_fail();
}
msgque.msq_id = msgget(msgque.key, IPC_CREAT | IPC_EXCL | 0666);
@@ -243,13 +242,13 @@ int main(int argc, char **argv)
printf("Failed to test queue: %d\n", err);
goto err_out;
}
- return ksft_exit_pass();
+ ksft_exit_pass();
err_destroy:
if (msgctl(msgque.msq_id, IPC_RMID, NULL)) {
printf("Failed to destroy queue: %d\n", -errno);
- return ksft_exit_fail();
+ ksft_exit_fail();
}
err_out:
- return ksft_exit_fail();
+ ksft_exit_fail();
}
diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c
index 25110c7c0b3e..d7a8e321bb16 100644
--- a/tools/testing/selftests/kcmp/kcmp_test.c
+++ b/tools/testing/selftests/kcmp/kcmp_test.c
@@ -91,7 +91,7 @@ int main(int argc, char **argv)
ksft_print_header();
ksft_set_plan(3);
- fd2 = open(kpath, O_RDWR, 0644);
+ fd2 = open(kpath, O_RDWR);
if (fd2 < 0) {
perror("Can't open file");
ksft_exit_fail();
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 14bbab0cce13..b8967b6e29d5 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -16,10 +16,12 @@
* For each test, report any progress, debugging, etc with:
*
* ksft_print_msg(fmt, ...);
+ * ksft_perror(msg);
*
* and finally report the pass/fail/skip/xfail state of the test with one of:
*
* ksft_test_result(condition, fmt, ...);
+ * ksft_test_result_report(result, fmt, ...);
* ksft_test_result_pass(fmt, ...);
* ksft_test_result_fail(fmt, ...);
* ksft_test_result_skip(fmt, ...);
@@ -39,6 +41,7 @@
* the program is aborting before finishing all tests):
*
* ksft_exit_fail_msg(fmt, ...);
+ * ksft_exit_fail_perror(msg);
*
*/
#ifndef __KSELFTEST_H
@@ -165,15 +168,7 @@ static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...)
static inline void ksft_perror(const char *msg)
{
-#ifndef NOLIBC
ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno);
-#else
- /*
- * nolibc doesn't provide strerror() and it seems
- * inappropriate to add one, just print the errno.
- */
- ksft_print_msg("%s: %d)\n", msg, errno);
-#endif
}
static inline __printf(1, 2) void ksft_test_result_pass(const char *msg, ...)
@@ -305,13 +300,34 @@ void ksft_test_result_code(int exit_code, const char *test_name,
printf("\n");
}
-static inline __noreturn int ksft_exit_pass(void)
+/**
+ * ksft_test_result() - Report test success based on truth of condition
+ *
+ * @condition: if true, report test success, otherwise failure.
+ */
+#define ksft_test_result_report(result, fmt, ...) do { \
+ switch (result) { \
+ case KSFT_PASS: \
+ ksft_test_result_pass(fmt, ##__VA_ARGS__); \
+ break; \
+ case KSFT_FAIL: \
+ ksft_test_result_fail(fmt, ##__VA_ARGS__); \
+ break; \
+ case KSFT_XFAIL: \
+ ksft_test_result_xfail(fmt, ##__VA_ARGS__); \
+ break; \
+ case KSFT_SKIP: \
+ ksft_test_result_skip(fmt, ##__VA_ARGS__); \
+ break; \
+ } } while (0)
+
+static inline __noreturn void ksft_exit_pass(void)
{
ksft_print_cnts();
exit(KSFT_PASS);
}
-static inline __noreturn int ksft_exit_fail(void)
+static inline __noreturn void ksft_exit_fail(void)
{
ksft_print_cnts();
exit(KSFT_FAIL);
@@ -338,7 +354,7 @@ static inline __noreturn int ksft_exit_fail(void)
ksft_cnt.ksft_xfail + \
ksft_cnt.ksft_xskip)
-static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
+static inline __noreturn __printf(1, 2) void ksft_exit_fail_msg(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -353,19 +369,32 @@ static inline __noreturn __printf(1, 2) int ksft_exit_fail_msg(const char *msg,
exit(KSFT_FAIL);
}
-static inline __noreturn int ksft_exit_xfail(void)
+static inline __noreturn void ksft_exit_fail_perror(const char *msg)
+{
+#ifndef NOLIBC
+ ksft_exit_fail_msg("%s: %s (%d)\n", msg, strerror(errno), errno);
+#else
+ /*
+ * nolibc doesn't provide strerror() and it seems
+ * inappropriate to add one, just print the errno.
+ */
+ ksft_exit_fail_msg("%s: %d)\n", msg, errno);
+#endif
+}
+
+static inline __noreturn void ksft_exit_xfail(void)
{
ksft_print_cnts();
exit(KSFT_XFAIL);
}
-static inline __noreturn int ksft_exit_xpass(void)
+static inline __noreturn void ksft_exit_xpass(void)
{
ksft_print_cnts();
exit(KSFT_XPASS);
}
-static inline __noreturn __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
+static inline __noreturn __printf(1, 2) void ksft_exit_skip(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
diff --git a/tools/testing/selftests/devices/ksft.py b/tools/testing/selftests/kselftest/ksft.py
index cd89fb2bc10e..cd89fb2bc10e 100644
--- a/tools/testing/selftests/devices/ksft.py
+++ b/tools/testing/selftests/kselftest/ksft.py
diff --git a/tools/testing/selftests/kselftest/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh
index f2fbb914e058..79a125eb24c2 100644
--- a/tools/testing/selftests/kselftest/ktap_helpers.sh
+++ b/tools/testing/selftests/kselftest/ktap_helpers.sh
@@ -43,7 +43,7 @@ __ktap_test() {
directive="$3" # optional
local directive_str=
- [[ ! -z "$directive" ]] && directive_str="# $directive"
+ [ ! -z "$directive" ] && directive_str="# $directive"
echo $result $KTAP_TESTNO $description $directive_str
@@ -99,7 +99,7 @@ ktap_exit_fail_msg() {
ktap_finished() {
ktap_print_totals
- if [ $(("$KTAP_CNT_PASS" + "$KTAP_CNT_SKIP")) -eq "$KSFT_NUM_TESTS" ]; then
+ if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP)) -eq "$KSFT_NUM_TESTS" ]; then
exit "$KSFT_PASS"
else
exit "$KSFT_FAIL"
diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh
index de59cc8f03c3..487e49fdf2a6 100755
--- a/tools/testing/selftests/kselftest_deps.sh
+++ b/tools/testing/selftests/kselftest_deps.sh
@@ -244,6 +244,7 @@ l4_test()
l5_test()
{
tests=$(find $(dirname "$test") -type f -name "*.mk")
+ [[ -z "${tests// }" ]] && return
test_libs=$(grep "^IOURING_EXTRA_LIBS +\?=" $tests | \
cut -d "=" -f 2)
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index d98702b6955d..40723a6a083f 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -281,6 +281,32 @@
* A bare "return;" statement may be used to return early.
*/
#define FIXTURE_TEARDOWN(fixture_name) \
+ static const bool fixture_name##_teardown_parent; \
+ __FIXTURE_TEARDOWN(fixture_name)
+
+/**
+ * FIXTURE_TEARDOWN_PARENT()
+ * *_metadata* is included so that EXPECT_*, ASSERT_* etc. work correctly.
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ * FIXTURE_TEARDOWN_PARENT(fixture_name) { implementation }
+ *
+ * Same as FIXTURE_TEARDOWN() but run this code in a parent process. This
+ * enables the test process to drop its privileges without impacting the
+ * related FIXTURE_TEARDOWN_PARENT() (e.g. to remove files from a directory
+ * where write access was dropped).
+ *
+ * To make it possible for the parent process to use *self*, share (MAP_SHARED)
+ * the fixture data between all forked processes.
+ */
+#define FIXTURE_TEARDOWN_PARENT(fixture_name) \
+ static const bool fixture_name##_teardown_parent = true; \
+ __FIXTURE_TEARDOWN(fixture_name)
+
+#define __FIXTURE_TEARDOWN(fixture_name) \
void fixture_name##_teardown( \
struct __test_metadata __attribute__((unused)) *_metadata, \
FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
@@ -325,7 +351,7 @@
* variant.
*/
#define FIXTURE_VARIANT_ADD(fixture_name, variant_name) \
- extern FIXTURE_VARIANT(fixture_name) \
+ extern const FIXTURE_VARIANT(fixture_name) \
_##fixture_name##_##variant_name##_variant; \
static struct __fixture_variant_metadata \
_##fixture_name##_##variant_name##_object = \
@@ -337,7 +363,7 @@
__register_fixture_variant(&_##fixture_name##_fixture_object, \
&_##fixture_name##_##variant_name##_object); \
} \
- FIXTURE_VARIANT(fixture_name) \
+ const FIXTURE_VARIANT(fixture_name) \
_##fixture_name##_##variant_name##_variant =
/**
@@ -355,10 +381,11 @@
* Very similar to TEST() except that *self* is the setup instance of fixture's
* datatype exposed for use by the implementation.
*
- * The @test_name code is run in a separate process sharing the same memory
- * (i.e. vfork), which means that the test process can update its privileges
- * without impacting the related FIXTURE_TEARDOWN() (e.g. to remove files from
- * a directory where write access was dropped).
+ * The _metadata object is shared (MAP_SHARED) with all the potential forked
+ * processes, which enables them to use EXCEPT_*() and ASSERT_*().
+ *
+ * The *self* object is only shared with the potential forked processes if
+ * FIXTURE_TEARDOWN_PARENT() is used instead of FIXTURE_TEARDOWN().
*/
#define TEST_F(fixture_name, test_name) \
__TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT)
@@ -379,53 +406,71 @@
struct __fixture_variant_metadata *variant) \
{ \
/* fixture data is alloced, setup, and torn down per call. */ \
- FIXTURE_DATA(fixture_name) self; \
+ FIXTURE_DATA(fixture_name) self_private, *self = NULL; \
pid_t child = 1; \
int status = 0; \
- bool jmp = false; \
- memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
+ /* Makes sure there is only one teardown, even when child forks again. */ \
+ bool *teardown = mmap(NULL, sizeof(*teardown), \
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
+ *teardown = false; \
+ if (sizeof(*self) > 0) { \
+ if (fixture_name##_teardown_parent) { \
+ self = mmap(NULL, sizeof(*self), PROT_READ | PROT_WRITE, \
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
+ } else { \
+ memset(&self_private, 0, sizeof(self_private)); \
+ self = &self_private; \
+ } \
+ } \
if (setjmp(_metadata->env) == 0) { \
- /* Use the same _metadata. */ \
- child = vfork(); \
+ /* _metadata and potentially self are shared with all forks. */ \
+ child = fork(); \
if (child == 0) { \
- fixture_name##_setup(_metadata, &self, variant->data); \
+ fixture_name##_setup(_metadata, self, variant->data); \
/* Let setup failure terminate early. */ \
if (_metadata->exit_code) \
_exit(0); \
_metadata->setup_completed = true; \
- fixture_name##_##test_name(_metadata, &self, variant->data); \
+ fixture_name##_##test_name(_metadata, self, variant->data); \
} else if (child < 0 || child != waitpid(child, &status, 0)) { \
ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \
_metadata->exit_code = KSFT_FAIL; \
} \
} \
- else \
- jmp = true; \
if (child == 0) { \
- if (_metadata->setup_completed && !_metadata->teardown_parent && !jmp) \
- fixture_name##_teardown(_metadata, &self, variant->data); \
+ if (_metadata->setup_completed && !fixture_name##_teardown_parent && \
+ __sync_bool_compare_and_swap(teardown, false, true)) \
+ fixture_name##_teardown(_metadata, self, variant->data); \
_exit(0); \
} \
- if (_metadata->setup_completed && _metadata->teardown_parent) \
- fixture_name##_teardown(_metadata, &self, variant->data); \
- if (!WIFEXITED(status) && WIFSIGNALED(status)) \
+ if (_metadata->setup_completed && fixture_name##_teardown_parent && \
+ __sync_bool_compare_and_swap(teardown, false, true)) \
+ fixture_name##_teardown(_metadata, self, variant->data); \
+ munmap(teardown, sizeof(*teardown)); \
+ if (self && fixture_name##_teardown_parent) \
+ munmap(self, sizeof(*self)); \
+ if (WIFEXITED(status)) { \
+ if (WEXITSTATUS(status)) \
+ _metadata->exit_code = WEXITSTATUS(status); \
+ } else if (WIFSIGNALED(status)) { \
/* Forward signal to __wait_for_test(). */ \
kill(getpid(), WTERMSIG(status)); \
+ } \
__test_check_assert(_metadata); \
} \
- static struct __test_metadata \
- _##fixture_name##_##test_name##_object = { \
- .name = #test_name, \
- .fn = &wrapper_##fixture_name##_##test_name, \
- .fixture = &_##fixture_name##_fixture_object, \
- .termsig = signal, \
- .timeout = tmout, \
- .teardown_parent = false, \
- }; \
+ static struct __test_metadata *_##fixture_name##_##test_name##_object; \
static void __attribute__((constructor)) \
_register_##fixture_name##_##test_name(void) \
{ \
- __register_test(&_##fixture_name##_##test_name##_object); \
+ struct __test_metadata *object = mmap(NULL, sizeof(*object), \
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0); \
+ object->name = #test_name; \
+ object->fn = &wrapper_##fixture_name##_##test_name; \
+ object->fixture = &_##fixture_name##_fixture_object; \
+ object->termsig = signal; \
+ object->timeout = tmout; \
+ _##fixture_name##_##test_name##_object = object; \
+ __register_test(object); \
} \
static void fixture_name##_##test_name( \
struct __test_metadata __attribute__((unused)) *_metadata, \
@@ -833,11 +878,12 @@ struct __test_xfail {
{ \
.fixture = &_##fixture_name##_fixture_object, \
.variant = &_##fixture_name##_##variant_name##_object, \
- .test = &_##fixture_name##_##test_name##_object, \
}; \
static void __attribute__((constructor)) \
_register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \
{ \
+ _##fixture_name##_##variant_name##_##test_name##_xfail.test = \
+ _##fixture_name##_##test_name##_object; \
__register_xfail(&_##fixture_name##_##variant_name##_##test_name##_xfail); \
}
@@ -880,7 +926,6 @@ struct __test_metadata {
bool timed_out; /* did this test timeout instead of exiting? */
bool aborted; /* stopped test due to failed ASSERT */
bool setup_completed; /* did setup finish? */
- bool teardown_parent; /* run teardown in a parent process */
jmp_buf env; /* for exiting out of test early */
struct __test_results *results;
struct __test_metadata *prev, *next;
@@ -958,7 +1003,14 @@ void __wait_for_test(struct __test_metadata *t)
.sa_flags = SA_SIGINFO,
};
struct sigaction saved_action;
- int status;
+ /*
+ * Sets status so that WIFEXITED(status) returns true and
+ * WEXITSTATUS(status) returns KSFT_FAIL. This safe default value
+ * should never be evaluated because of the waitpid(2) check and
+ * SIGALRM handling.
+ */
+ int status = KSFT_FAIL << 8;
+ int child;
if (sigaction(SIGALRM, &action, &saved_action)) {
t->exit_code = KSFT_FAIL;
@@ -970,7 +1022,15 @@ void __wait_for_test(struct __test_metadata *t)
__active_test = t;
t->timed_out = false;
alarm(t->timeout);
- waitpid(t->pid, &status, 0);
+ child = waitpid(t->pid, &status, 0);
+ if (child == -1 && errno != EINTR) {
+ t->exit_code = KSFT_FAIL;
+ fprintf(TH_LOG_STREAM,
+ "# %s: Failed to wait for PID %d (errno: %d)\n",
+ t->name, t->pid, errno);
+ return;
+ }
+
alarm(0);
if (sigaction(SIGALRM, &saved_action, NULL)) {
t->exit_code = KSFT_FAIL;
@@ -1025,6 +1085,7 @@ void __wait_for_test(struct __test_metadata *t)
WTERMSIG(status));
}
} else {
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
"# %s: Test ended in some other way [%u]\n",
t->name,
@@ -1158,20 +1219,20 @@ void __run_test(struct __fixture_metadata *f,
struct __test_metadata *t)
{
struct __test_xfail *xfail;
- char *test_name;
+ char test_name[1024];
const char *diagnostic;
+ int child;
/* reset test struct */
t->exit_code = KSFT_PASS;
t->trigger = 0;
+ t->aborted = false;
+ t->setup_completed = false;
+ memset(t->env, 0, sizeof(t->env));
memset(t->results->reason, 0, sizeof(t->results->reason));
- if (asprintf(&test_name, "%s%s%s.%s", f->name,
- variant->name[0] ? "." : "", variant->name, t->name) == -1) {
- ksft_print_msg("ERROR ALLOCATING MEMORY\n");
- t->exit_code = KSFT_FAIL;
- _exit(t->exit_code);
- }
+ snprintf(test_name, sizeof(test_name), "%s%s%s.%s",
+ f->name, variant->name[0] ? "." : "", variant->name, t->name);
ksft_print_msg(" RUN %s ...\n", test_name);
@@ -1179,15 +1240,16 @@ void __run_test(struct __fixture_metadata *f,
fflush(stdout);
fflush(stderr);
- t->pid = fork();
- if (t->pid < 0) {
+ child = fork();
+ if (child < 0) {
ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
t->exit_code = KSFT_FAIL;
- } else if (t->pid == 0) {
+ } else if (child == 0) {
setpgrp();
t->fn(t, variant);
_exit(t->exit_code);
} else {
+ t->pid = child;
__wait_for_test(t);
}
ksft_print_msg(" %4s %s\n",
@@ -1209,7 +1271,6 @@ void __run_test(struct __fixture_metadata *f,
ksft_test_result_code(t->exit_code, test_name,
diagnostic ? "%s" : NULL, diagnostic);
- free(test_name);
}
static int test_harness_run(int argc, char **argv)
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 741c7dc16afc..48d32c5aa3eb 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -45,6 +45,7 @@ LIBKVM_x86_64 += lib/x86_64/vmx.c
LIBKVM_aarch64 += lib/aarch64/gic.c
LIBKVM_aarch64 += lib/aarch64/gic_v3.c
+LIBKVM_aarch64 += lib/aarch64/gic_v3_its.c
LIBKVM_aarch64 += lib/aarch64/handlers.S
LIBKVM_aarch64 += lib/aarch64/processor.c
LIBKVM_aarch64 += lib/aarch64/spinlock.c
@@ -111,6 +112,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
+TEST_GEN_PROGS_x86_64 += x86_64/apic_bus_clock_test
TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test
TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test
@@ -120,6 +122,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
+TEST_GEN_PROGS_x86_64 += x86_64/sev_init2_tests
TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test
TEST_GEN_PROGS_x86_64 += x86_64/amx_test
@@ -143,6 +146,7 @@ TEST_GEN_PROGS_x86_64 += set_memory_region_test
TEST_GEN_PROGS_x86_64 += steal_time
TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
TEST_GEN_PROGS_x86_64 += system_counter_offset_test
+TEST_GEN_PROGS_x86_64 += pre_fault_memory_test
# Compiled outputs used by test targets
TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
@@ -157,6 +161,7 @@ TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter
TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
+TEST_GEN_PROGS_aarch64 += aarch64/vgic_lpi_stress
TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
TEST_GEN_PROGS_aarch64 += arch_timer
@@ -180,6 +185,7 @@ TEST_GEN_PROGS_s390x += s390x/sync_regs_test
TEST_GEN_PROGS_s390x += s390x/tprot
TEST_GEN_PROGS_s390x += s390x/cmma_test
TEST_GEN_PROGS_s390x += s390x/debug_test
+TEST_GEN_PROGS_s390x += s390x/shared_zeropage_test
TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += guest_print_test
@@ -189,6 +195,8 @@ TEST_GEN_PROGS_s390x += rseq_test
TEST_GEN_PROGS_s390x += set_memory_region_test
TEST_GEN_PROGS_s390x += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv += riscv/sbi_pmu_test
+TEST_GEN_PROGS_riscv += riscv/ebreak_test
TEST_GEN_PROGS_riscv += arch_timer
TEST_GEN_PROGS_riscv += demand_paging_test
TEST_GEN_PROGS_riscv += dirty_log_test
@@ -225,8 +233,8 @@ LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
endif
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
-Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
- -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
- -fno-builtin-strnlen \
+ -fno-builtin-memcmp -fno-builtin-memcpy \
+ -fno-builtin-memset -fno-builtin-strnlen \
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
-I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
index 4eaba83cdcf3..eeba1cc87ff8 100644
--- a/tools/testing/selftests/kvm/aarch64/arch_timer.c
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -5,18 +5,14 @@
*
* Copyright (c) 2021, Google LLC.
*/
-#define _GNU_SOURCE
-
#include "arch_timer.h"
#include "delay.h"
#include "gic.h"
#include "processor.h"
#include "timer_test.h"
+#include "ucall_common.h"
#include "vgic.h"
-#define GICD_BASE_GPA 0x8000000ULL
-#define GICR_BASE_GPA 0x80A0000ULL
-
enum guest_stage {
GUEST_STAGE_VTIMER_CVAL = 1,
GUEST_STAGE_VTIMER_TVAL,
@@ -149,8 +145,7 @@ static void guest_code(void)
local_irq_disable();
- gic_init(GIC_V3, test_args.nr_vcpus,
- (void *)GICD_BASE_GPA, (void *)GICR_BASE_GPA);
+ gic_init(GIC_V3, test_args.nr_vcpus);
timer_set_ctl(VIRTUAL, CTL_IMASK);
timer_set_ctl(PHYSICAL, CTL_IMASK);
@@ -209,7 +204,7 @@ struct kvm_vm *test_vm_create(void)
vcpu_init_descriptor_tables(vcpus[i]);
test_init_timer_irq(vm);
- gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+ gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
__TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
/* Make all the test's cmdline args visible to the guest */
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
index 5972905275cf..d29b08198b42 100644
--- a/tools/testing/selftests/kvm/aarch64/page_fault_test.c
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -7,7 +7,6 @@
* hugetlbfs with a hole). It checks that the expected handling method is
* called (e.g., uffd faults with the right address and write/read flag).
*/
-#define _GNU_SOURCE
#include <linux/bitmap.h>
#include <fcntl.h>
#include <test_util.h>
@@ -375,14 +374,14 @@ static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
*pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
pt_args.hva,
pt_args.paging_size,
- test->uffd_pt_handler);
+ 1, test->uffd_pt_handler);
*data_uffd = NULL;
if (test->uffd_data_handler)
*data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
data_args.hva,
data_args.paging_size,
- test->uffd_data_handler);
+ 1, test->uffd_data_handler);
}
static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
index 9b004905d1d3..61731a950def 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -11,9 +11,9 @@
* KVM_SYSTEM_EVENT_SUSPEND UAPI.
*/
-#define _GNU_SOURCE
-
+#include <linux/kernel.h>
#include <linux/psci.h>
+#include <asm/cputype.h>
#include "kvm_util.h"
#include "processor.h"
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
index 16e2338686c1..d20981663831 100644
--- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
@@ -219,6 +219,7 @@ static void guest_code(void)
GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+ GUEST_REG_SYNC(SYS_CTR_EL0);
GUEST_DONE();
}
@@ -327,8 +328,8 @@ uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
return ftr;
}
-static void test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
- const struct reg_ftr_bits *ftr_bits)
+static uint64_t test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
+ const struct reg_ftr_bits *ftr_bits)
{
uint8_t shift = ftr_bits->shift;
uint64_t mask = ftr_bits->mask;
@@ -346,6 +347,8 @@ static void test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
vcpu_set_reg(vcpu, reg, val);
vcpu_get_reg(vcpu, reg, &new_val);
TEST_ASSERT_EQ(new_val, val);
+
+ return new_val;
}
static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
@@ -374,7 +377,15 @@ static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
TEST_ASSERT_EQ(val, old_val);
}
-static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only)
+static uint64_t test_reg_vals[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+
+#define encoding_to_range_idx(encoding) \
+ KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(encoding), sys_reg_Op1(encoding), \
+ sys_reg_CRn(encoding), sys_reg_CRm(encoding), \
+ sys_reg_Op2(encoding))
+
+
+static void test_vm_ftr_id_regs(struct kvm_vcpu *vcpu, bool aarch64_only)
{
uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
struct reg_mask_range range = {
@@ -398,9 +409,7 @@ static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only)
int idx;
/* Get the index to masks array for the idreg */
- idx = KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(reg_id), sys_reg_Op1(reg_id),
- sys_reg_CRn(reg_id), sys_reg_CRm(reg_id),
- sys_reg_Op2(reg_id));
+ idx = encoding_to_range_idx(reg_id);
for (int j = 0; ftr_bits[j].type != FTR_END; j++) {
/* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */
@@ -414,7 +423,9 @@ static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only)
TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask);
test_reg_set_fail(vcpu, reg, &ftr_bits[j]);
- test_reg_set_success(vcpu, reg, &ftr_bits[j]);
+
+ test_reg_vals[idx] = test_reg_set_success(vcpu, reg,
+ &ftr_bits[j]);
ksft_test_result_pass("%s\n", ftr_bits[j].name);
}
@@ -425,7 +436,6 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu)
{
bool done = false;
struct ucall uc;
- uint64_t val;
while (!done) {
vcpu_run(vcpu);
@@ -436,8 +446,8 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu)
break;
case UCALL_SYNC:
/* Make sure the written values are seen by guest */
- vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(uc.args[2]), &val);
- TEST_ASSERT_EQ(val, uc.args[3]);
+ TEST_ASSERT_EQ(test_reg_vals[encoding_to_range_idx(uc.args[2])],
+ uc.args[3]);
break;
case UCALL_DONE:
done = true;
@@ -448,13 +458,101 @@ static void test_guest_reg_read(struct kvm_vcpu *vcpu)
}
}
+/* Politely lifted from arch/arm64/include/asm/cache.h */
+/* Ctypen, bits[3(n - 1) + 2 : 3(n - 1)], for n = 1 to 7 */
+#define CLIDR_CTYPE_SHIFT(level) (3 * (level - 1))
+#define CLIDR_CTYPE_MASK(level) (7 << CLIDR_CTYPE_SHIFT(level))
+#define CLIDR_CTYPE(clidr, level) \
+ (((clidr) & CLIDR_CTYPE_MASK(level)) >> CLIDR_CTYPE_SHIFT(level))
+
+static void test_clidr(struct kvm_vcpu *vcpu)
+{
+ uint64_t clidr;
+ int level;
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), &clidr);
+
+ /* find the first empty level in the cache hierarchy */
+ for (level = 1; level < 7; level++) {
+ if (!CLIDR_CTYPE(clidr, level))
+ break;
+ }
+
+ /*
+ * If you have a mind-boggling 7 levels of cache, congratulations, you
+ * get to fix this.
+ */
+ TEST_ASSERT(level <= 7, "can't find an empty level in cache hierarchy");
+
+ /* stick in a unified cache level */
+ clidr |= BIT(2) << CLIDR_CTYPE_SHIFT(level);
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CLIDR_EL1), clidr);
+ test_reg_vals[encoding_to_range_idx(SYS_CLIDR_EL1)] = clidr;
+}
+
+static void test_ctr(struct kvm_vcpu *vcpu)
+{
+ u64 ctr;
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), &ctr);
+ ctr &= ~CTR_EL0_DIC_MASK;
+ if (ctr & CTR_EL0_IminLine_MASK)
+ ctr--;
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CTR_EL0), ctr);
+ test_reg_vals[encoding_to_range_idx(SYS_CTR_EL0)] = ctr;
+}
+
+static void test_vcpu_ftr_id_regs(struct kvm_vcpu *vcpu)
+{
+ u64 val;
+
+ test_clidr(vcpu);
+ test_ctr(vcpu);
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &val);
+ val++;
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), val);
+
+ test_reg_vals[encoding_to_range_idx(SYS_MPIDR_EL1)] = val;
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+static void test_assert_id_reg_unchanged(struct kvm_vcpu *vcpu, uint32_t encoding)
+{
+ size_t idx = encoding_to_range_idx(encoding);
+ uint64_t observed;
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(encoding), &observed);
+ TEST_ASSERT_EQ(test_reg_vals[idx], observed);
+}
+
+static void test_reset_preserves_id_regs(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Calls KVM_ARM_VCPU_INIT behind the scenes, which will do an
+ * architectural reset of the vCPU.
+ */
+ aarch64_vcpu_setup(vcpu, NULL);
+
+ for (int i = 0; i < ARRAY_SIZE(test_regs); i++)
+ test_assert_id_reg_unchanged(vcpu, test_regs[i].reg);
+
+ test_assert_id_reg_unchanged(vcpu, SYS_MPIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_CLIDR_EL1);
+ test_assert_id_reg_unchanged(vcpu, SYS_CTR_EL0);
+
+ ksft_test_result_pass("%s\n", __func__);
+}
+
int main(void)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
bool aarch64_only;
uint64_t val, el0;
- int ftr_cnt;
+ int test_cnt;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
@@ -467,18 +565,22 @@ int main(void)
ksft_print_header();
- ftr_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
- ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
- ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) +
- ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) -
- ARRAY_SIZE(test_regs);
+ test_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
+ ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
+ ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
+ ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) +
+ ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) -
+ ARRAY_SIZE(test_regs) + 2;
- ksft_set_plan(ftr_cnt);
+ ksft_set_plan(test_cnt);
+
+ test_vm_ftr_id_regs(vcpu, aarch64_only);
+ test_vcpu_ftr_id_regs(vcpu);
- test_user_set_reg(vcpu, aarch64_only);
test_guest_reg_read(vcpu);
+ test_reset_preserves_id_regs(vcpu);
+
kvm_vm_free(vm);
ksft_finished();
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c
index ca917c71ff60..b3b5fb0ff0a9 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_init.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c
@@ -4,7 +4,6 @@
*
* Copyright (C) 2020, Red Hat, Inc.
*/
-#define _GNU_SOURCE
#include <linux/kernel.h>
#include <sys/syscall.h>
#include <asm/kvm.h>
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
index 2e64b4856e38..a51dbd2a5f84 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_irq.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -19,9 +19,6 @@
#include "gic_v3.h"
#include "vgic.h"
-#define GICD_BASE_GPA 0x08000000ULL
-#define GICR_BASE_GPA 0x080A0000ULL
-
/*
* Stores the user specified args; it's passed to the guest and to every test
* function.
@@ -49,9 +46,6 @@ struct test_args {
#define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1)
#define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */
-static void *dist = (void *)GICD_BASE_GPA;
-static void *redist = (void *)GICR_BASE_GPA;
-
/*
* The kvm_inject_* utilities are used by the guest to ask the host to inject
* interrupts (e.g., using the KVM_IRQ_LINE ioctl).
@@ -152,7 +146,7 @@ static void reset_stats(void)
static uint64_t gic_read_ap1r0(void)
{
- uint64_t reg = read_sysreg_s(SYS_ICV_AP1R0_EL1);
+ uint64_t reg = read_sysreg_s(SYS_ICC_AP1R0_EL1);
dsb(sy);
return reg;
@@ -160,7 +154,7 @@ static uint64_t gic_read_ap1r0(void)
static void gic_write_ap1r0(uint64_t val)
{
- write_sysreg_s(val, SYS_ICV_AP1R0_EL1);
+ write_sysreg_s(val, SYS_ICC_AP1R0_EL1);
isb();
}
@@ -478,7 +472,7 @@ static void guest_code(struct test_args *args)
bool level_sensitive = args->level_sensitive;
struct kvm_inject_desc *f, *inject_fns;
- gic_init(GIC_V3, 1, dist, redist);
+ gic_init(GIC_V3, 1);
for (i = 0; i < nr_irqs; i++)
gic_irq_enable(i);
@@ -764,8 +758,7 @@ static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
vcpu_args_set(vcpu, 1, args_gva);
- gic_fd = vgic_v3_setup(vm, 1, nr_irqs,
- GICD_BASE_GPA, GICR_BASE_GPA);
+ gic_fd = vgic_v3_setup(vm, 1, nr_irqs);
__TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c b/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c
new file mode 100644
index 000000000000..fc4fe52fb6f8
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/vgic_lpi_stress.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_lpi_stress - Stress test for KVM's ITS emulation
+ *
+ * Copyright (c) 2024 Google LLC
+ */
+
+#include <linux/sizes.h>
+#include <pthread.h>
+#include <stdatomic.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "gic_v3_its.h"
+#include "processor.h"
+#include "ucall.h"
+#include "vgic.h"
+
+#define TEST_MEMSLOT_INDEX 1
+
+#define GIC_LPI_OFFSET 8192
+
+static size_t nr_iterations = 1000;
+static vm_paddr_t gpa_base;
+
+static struct kvm_vm *vm;
+static struct kvm_vcpu **vcpus;
+static int gic_fd, its_fd;
+
+static struct test_data {
+ bool request_vcpus_stop;
+ u32 nr_cpus;
+ u32 nr_devices;
+ u32 nr_event_ids;
+
+ vm_paddr_t device_table;
+ vm_paddr_t collection_table;
+ vm_paddr_t cmdq_base;
+ void *cmdq_base_va;
+ vm_paddr_t itt_tables;
+
+ vm_paddr_t lpi_prop_table;
+ vm_paddr_t lpi_pend_tables;
+} test_data = {
+ .nr_cpus = 1,
+ .nr_devices = 1,
+ .nr_event_ids = 16,
+};
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ u32 intid = gic_get_and_ack_irq();
+
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ GUEST_ASSERT(intid >= GIC_LPI_OFFSET);
+ gic_set_eoi(intid);
+}
+
+static void guest_setup_its_mappings(void)
+{
+ u32 coll_id, device_id, event_id, intid = GIC_LPI_OFFSET;
+ u32 nr_events = test_data.nr_event_ids;
+ u32 nr_devices = test_data.nr_devices;
+ u32 nr_cpus = test_data.nr_cpus;
+
+ for (coll_id = 0; coll_id < nr_cpus; coll_id++)
+ its_send_mapc_cmd(test_data.cmdq_base_va, coll_id, coll_id, true);
+
+ /* Round-robin the LPIs to all of the vCPUs in the VM */
+ coll_id = 0;
+ for (device_id = 0; device_id < nr_devices; device_id++) {
+ vm_paddr_t itt_base = test_data.itt_tables + (device_id * SZ_64K);
+
+ its_send_mapd_cmd(test_data.cmdq_base_va, device_id,
+ itt_base, SZ_64K, true);
+
+ for (event_id = 0; event_id < nr_events; event_id++) {
+ its_send_mapti_cmd(test_data.cmdq_base_va, device_id,
+ event_id, coll_id, intid++);
+
+ coll_id = (coll_id + 1) % test_data.nr_cpus;
+ }
+ }
+}
+
+static void guest_invalidate_all_rdists(void)
+{
+ int i;
+
+ for (i = 0; i < test_data.nr_cpus; i++)
+ its_send_invall_cmd(test_data.cmdq_base_va, i);
+}
+
+static void guest_setup_gic(void)
+{
+ static atomic_int nr_cpus_ready = 0;
+ u32 cpuid = guest_get_vcpuid();
+
+ gic_init(GIC_V3, test_data.nr_cpus);
+ gic_rdist_enable_lpis(test_data.lpi_prop_table, SZ_64K,
+ test_data.lpi_pend_tables + (cpuid * SZ_64K));
+
+ atomic_fetch_add(&nr_cpus_ready, 1);
+
+ if (cpuid > 0)
+ return;
+
+ while (atomic_load(&nr_cpus_ready) < test_data.nr_cpus)
+ cpu_relax();
+
+ its_init(test_data.collection_table, SZ_64K,
+ test_data.device_table, SZ_64K,
+ test_data.cmdq_base, SZ_64K);
+
+ guest_setup_its_mappings();
+ guest_invalidate_all_rdists();
+}
+
+static void guest_code(size_t nr_lpis)
+{
+ guest_setup_gic();
+
+ GUEST_SYNC(0);
+
+ /*
+ * Don't use WFI here to avoid blocking the vCPU thread indefinitely and
+ * never getting the stop signal.
+ */
+ while (!READ_ONCE(test_data.request_vcpus_stop))
+ cpu_relax();
+
+ GUEST_DONE();
+}
+
+static void setup_memslot(void)
+{
+ size_t pages;
+ size_t sz;
+
+ /*
+ * For the ITS:
+ * - A single level device table
+ * - A single level collection table
+ * - The command queue
+ * - An ITT for each device
+ */
+ sz = (3 + test_data.nr_devices) * SZ_64K;
+
+ /*
+ * For the redistributors:
+ * - A shared LPI configuration table
+ * - An LPI pending table for each vCPU
+ */
+ sz += (1 + test_data.nr_cpus) * SZ_64K;
+
+ pages = sz / vm->page_size;
+ gpa_base = ((vm_compute_max_gfn(vm) + 1) * vm->page_size) - sz;
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa_base,
+ TEST_MEMSLOT_INDEX, pages, 0);
+}
+
+#define LPI_PROP_DEFAULT_PRIO 0xa0
+
+static void configure_lpis(void)
+{
+ size_t nr_lpis = test_data.nr_devices * test_data.nr_event_ids;
+ u8 *tbl = addr_gpa2hva(vm, test_data.lpi_prop_table);
+ size_t i;
+
+ for (i = 0; i < nr_lpis; i++) {
+ tbl[i] = LPI_PROP_DEFAULT_PRIO |
+ LPI_PROP_GROUP1 |
+ LPI_PROP_ENABLED;
+ }
+}
+
+static void setup_test_data(void)
+{
+ size_t pages_per_64k = vm_calc_num_guest_pages(vm->mode, SZ_64K);
+ u32 nr_devices = test_data.nr_devices;
+ u32 nr_cpus = test_data.nr_cpus;
+ vm_paddr_t cmdq_base;
+
+ test_data.device_table = vm_phy_pages_alloc(vm, pages_per_64k,
+ gpa_base,
+ TEST_MEMSLOT_INDEX);
+
+ test_data.collection_table = vm_phy_pages_alloc(vm, pages_per_64k,
+ gpa_base,
+ TEST_MEMSLOT_INDEX);
+
+ cmdq_base = vm_phy_pages_alloc(vm, pages_per_64k, gpa_base,
+ TEST_MEMSLOT_INDEX);
+ virt_map(vm, cmdq_base, cmdq_base, pages_per_64k);
+ test_data.cmdq_base = cmdq_base;
+ test_data.cmdq_base_va = (void *)cmdq_base;
+
+ test_data.itt_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_devices,
+ gpa_base, TEST_MEMSLOT_INDEX);
+
+ test_data.lpi_prop_table = vm_phy_pages_alloc(vm, pages_per_64k,
+ gpa_base, TEST_MEMSLOT_INDEX);
+ configure_lpis();
+
+ test_data.lpi_pend_tables = vm_phy_pages_alloc(vm, pages_per_64k * nr_cpus,
+ gpa_base, TEST_MEMSLOT_INDEX);
+
+ sync_global_to_guest(vm, test_data);
+}
+
+static void setup_gic(void)
+{
+ gic_fd = vgic_v3_setup(vm, test_data.nr_cpus, 64);
+ __TEST_REQUIRE(gic_fd >= 0, "Failed to create GICv3");
+
+ its_fd = vgic_its_setup(vm);
+}
+
+static void signal_lpi(u32 device_id, u32 event_id)
+{
+ vm_paddr_t db_addr = GITS_BASE_GPA + GITS_TRANSLATER;
+
+ struct kvm_msi msi = {
+ .address_lo = db_addr,
+ .address_hi = db_addr >> 32,
+ .data = event_id,
+ .devid = device_id,
+ .flags = KVM_MSI_VALID_DEVID,
+ };
+
+ /*
+ * KVM_SIGNAL_MSI returns 1 if the MSI wasn't 'blocked' by the VM,
+ * which for arm64 implies having a valid translation in the ITS.
+ */
+ TEST_ASSERT(__vm_ioctl(vm, KVM_SIGNAL_MSI, &msi) == 1,
+ "KVM_SIGNAL_MSI ioctl failed");
+}
+
+static pthread_barrier_t test_setup_barrier;
+
+static void *lpi_worker_thread(void *data)
+{
+ u32 device_id = (size_t)data;
+ u32 event_id;
+ size_t i;
+
+ pthread_barrier_wait(&test_setup_barrier);
+
+ for (i = 0; i < nr_iterations; i++)
+ for (event_id = 0; event_id < test_data.nr_event_ids; event_id++)
+ signal_lpi(device_id, event_id);
+
+ return NULL;
+}
+
+static void *vcpu_worker_thread(void *data)
+{
+ struct kvm_vcpu *vcpu = data;
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ pthread_barrier_wait(&test_setup_barrier);
+ continue;
+ case UCALL_DONE:
+ return NULL;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall: %lu", uc.cmd);
+ }
+ }
+
+ return NULL;
+}
+
+static void report_stats(struct timespec delta)
+{
+ double nr_lpis;
+ double time;
+
+ nr_lpis = test_data.nr_devices * test_data.nr_event_ids * nr_iterations;
+
+ time = delta.tv_sec;
+ time += ((double)delta.tv_nsec) / NSEC_PER_SEC;
+
+ pr_info("Rate: %.2f LPIs/sec\n", nr_lpis / time);
+}
+
+static void run_test(void)
+{
+ u32 nr_devices = test_data.nr_devices;
+ u32 nr_vcpus = test_data.nr_cpus;
+ pthread_t *lpi_threads = malloc(nr_devices * sizeof(pthread_t));
+ pthread_t *vcpu_threads = malloc(nr_vcpus * sizeof(pthread_t));
+ struct timespec start, delta;
+ size_t i;
+
+ TEST_ASSERT(lpi_threads && vcpu_threads, "Failed to allocate pthread arrays");
+
+ pthread_barrier_init(&test_setup_barrier, NULL, nr_vcpus + nr_devices + 1);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_create(&vcpu_threads[i], NULL, vcpu_worker_thread, vcpus[i]);
+
+ for (i = 0; i < nr_devices; i++)
+ pthread_create(&lpi_threads[i], NULL, lpi_worker_thread, (void *)i);
+
+ pthread_barrier_wait(&test_setup_barrier);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ for (i = 0; i < nr_devices; i++)
+ pthread_join(lpi_threads[i], NULL);
+
+ delta = timespec_elapsed(start);
+ write_guest_global(vm, test_data.request_vcpus_stop, true);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(vcpu_threads[i], NULL);
+
+ report_stats(delta);
+}
+
+static void setup_vm(void)
+{
+ int i;
+
+ vcpus = malloc(test_data.nr_cpus * sizeof(struct kvm_vcpu));
+ TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
+
+ vm = vm_create_with_vcpus(test_data.nr_cpus, guest_code, vcpus);
+
+ vm_init_descriptor_tables(vm);
+ for (i = 0; i < test_data.nr_cpus; i++)
+ vcpu_init_descriptor_tables(vcpus[i]);
+
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+ setup_memslot();
+
+ setup_gic();
+
+ setup_test_data();
+}
+
+static void destroy_vm(void)
+{
+ close(its_fd);
+ close(gic_fd);
+ kvm_vm_free(vm);
+ free(vcpus);
+}
+
+static void pr_usage(const char *name)
+{
+ pr_info("%s [-v NR_VCPUS] [-d NR_DEVICES] [-e NR_EVENTS] [-i ITERS] -h\n", name);
+ pr_info(" -v:\tnumber of vCPUs (default: %u)\n", test_data.nr_cpus);
+ pr_info(" -d:\tnumber of devices (default: %u)\n", test_data.nr_devices);
+ pr_info(" -e:\tnumber of event IDs per device (default: %u)\n", test_data.nr_event_ids);
+ pr_info(" -i:\tnumber of iterations (default: %lu)\n", nr_iterations);
+}
+
+int main(int argc, char **argv)
+{
+ u32 nr_threads;
+ int c;
+
+ while ((c = getopt(argc, argv, "hv:d:e:i:")) != -1) {
+ switch (c) {
+ case 'v':
+ test_data.nr_cpus = atoi(optarg);
+ break;
+ case 'd':
+ test_data.nr_devices = atoi(optarg);
+ break;
+ case 'e':
+ test_data.nr_event_ids = atoi(optarg);
+ break;
+ case 'i':
+ nr_iterations = strtoul(optarg, NULL, 0);
+ break;
+ case 'h':
+ default:
+ pr_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ nr_threads = test_data.nr_cpus + test_data.nr_devices;
+ if (nr_threads > get_nprocs())
+ pr_info("WARNING: running %u threads on %d CPUs; performance is degraded.\n",
+ nr_threads, get_nprocs());
+
+ setup_vm();
+
+ run_test();
+
+ destroy_vm();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
index f2fb0e3f14bc..d31b9f64ba14 100644
--- a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
+++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
@@ -404,9 +404,6 @@ static void guest_code(uint64_t expected_pmcr_n)
GUEST_DONE();
}
-#define GICD_BASE_GPA 0x8000000ULL
-#define GICR_BASE_GPA 0x80A0000ULL
-
/* Create a VM that has one vCPU with PMUv3 configured. */
static void create_vpmu_vm(void *guest_code)
{
@@ -438,8 +435,7 @@ static void create_vpmu_vm(void *guest_code)
init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
vcpu_init_descriptor_tables(vpmu_vm.vcpu);
- vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64,
- GICD_BASE_GPA, GICR_BASE_GPA);
+ vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64);
__TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
"Failed to create vgic-v3, skipping");
diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c
index ae1f1a6d8312..acb2cb596332 100644
--- a/tools/testing/selftests/kvm/arch_timer.c
+++ b/tools/testing/selftests/kvm/arch_timer.c
@@ -19,9 +19,6 @@
*
* Copyright (c) 2021, Google LLC.
*/
-
-#define _GNU_SOURCE
-
#include <stdlib.h>
#include <pthread.h>
#include <linux/sizes.h>
@@ -29,6 +26,7 @@
#include <sys/sysinfo.h>
#include "timer_test.h"
+#include "ucall_common.h"
struct test_args test_args = {
.nr_vcpus = NR_VCPUS_DEF,
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index bf3609f71854..0202b78f8680 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -6,14 +6,10 @@
* Copyright (C) 2018, Red Hat, Inc.
* Copyright (C) 2019, Google, Inc.
*/
-
-#define _GNU_SOURCE /* for pipe2 */
-
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
-#include <poll.h>
#include <pthread.h>
#include <linux/userfaultfd.h>
#include <sys/syscall.h>
@@ -22,6 +18,7 @@
#include "test_util.h"
#include "memstress.h"
#include "guest_modes.h"
+#include "ucall_common.h"
#include "userfaultfd_util.h"
#ifdef __NR_userfaultfd
@@ -77,8 +74,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
copy.mode = 0;
r = ioctl(uffd, UFFDIO_COPY, &copy);
- if (r == -1) {
- pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n",
+ /*
+ * With multiple vCPU threads fault on a single page and there are
+ * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
+ * will fail with EEXIST: handle that case without signaling an
+ * error.
+ *
+ * Note that this also suppress any EEXISTs occurring from,
+ * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
+ * happens here, but a realistic VMM might potentially maintain
+ * some external state to correctly surface EEXISTs to userspace
+ * (or prevent duplicate COPY/CONTINUEs in the first place).
+ */
+ if (r == -1 && errno != EEXIST) {
+ pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d, errno = %d\n",
addr, tid, errno);
return r;
}
@@ -89,8 +98,20 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
cont.range.len = demand_paging_size;
r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
- if (r == -1) {
- pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n",
+ /*
+ * With multiple vCPU threads fault on a single page and there are
+ * multiple readers for the UFFD, at least one of the UFFDIO_COPYs
+ * will fail with EEXIST: handle that case without signaling an
+ * error.
+ *
+ * Note that this also suppress any EEXISTs occurring from,
+ * e.g., the first UFFDIO_COPY/CONTINUEs on a page. That never
+ * happens here, but a realistic VMM might potentially maintain
+ * some external state to correctly surface EEXISTs to userspace
+ * (or prevent duplicate COPY/CONTINUEs in the first place).
+ */
+ if (r == -1 && errno != EEXIST) {
+ pr_info("Failed UFFDIO_CONTINUE in 0x%lx, thread %d, errno = %d\n",
addr, tid, errno);
return r;
}
@@ -110,7 +131,9 @@ static int handle_uffd_page_request(int uffd_mode, int uffd,
struct test_params {
int uffd_mode;
+ bool single_uffd;
useconds_t uffd_delay;
+ int readers_per_uffd;
enum vm_mem_backing_src_type src_type;
bool partition_vcpu_memory_access;
};
@@ -131,10 +154,12 @@ static void run_test(enum vm_guest_mode mode, void *arg)
struct memstress_vcpu_args *vcpu_args;
struct test_params *p = arg;
struct uffd_desc **uffd_descs = NULL;
+ uint64_t uffd_region_size;
struct timespec start;
struct timespec ts_diff;
+ double vcpu_paging_rate;
struct kvm_vm *vm;
- int i;
+ int i, num_uffds = 0;
vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
p->src_type, p->partition_vcpu_memory_access);
@@ -147,7 +172,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
memset(guest_data_prototype, 0xAB, demand_paging_size);
if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
- for (i = 0; i < nr_vcpus; i++) {
+ num_uffds = p->single_uffd ? 1 : nr_vcpus;
+ for (i = 0; i < num_uffds; i++) {
vcpu_args = &memstress_args.vcpu_args[i];
prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa),
vcpu_args->pages * memstress_args.guest_page_size);
@@ -155,9 +181,13 @@ static void run_test(enum vm_guest_mode mode, void *arg)
}
if (p->uffd_mode) {
- uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *));
+ num_uffds = p->single_uffd ? 1 : nr_vcpus;
+ uffd_region_size = nr_vcpus * guest_percpu_mem_size / num_uffds;
+
+ uffd_descs = malloc(num_uffds * sizeof(struct uffd_desc *));
TEST_ASSERT(uffd_descs, "Memory allocation failed");
- for (i = 0; i < nr_vcpus; i++) {
+ for (i = 0; i < num_uffds; i++) {
+ struct memstress_vcpu_args *vcpu_args;
void *vcpu_hva;
vcpu_args = &memstress_args.vcpu_args[i];
@@ -170,7 +200,8 @@ static void run_test(enum vm_guest_mode mode, void *arg)
*/
uffd_descs[i] = uffd_setup_demand_paging(
p->uffd_mode, p->uffd_delay, vcpu_hva,
- vcpu_args->pages * memstress_args.guest_page_size,
+ uffd_region_size,
+ p->readers_per_uffd,
&handle_uffd_page_request);
}
}
@@ -187,15 +218,19 @@ static void run_test(enum vm_guest_mode mode, void *arg)
if (p->uffd_mode) {
/* Tell the user fault fd handler threads to quit */
- for (i = 0; i < nr_vcpus; i++)
+ for (i = 0; i < num_uffds; i++)
uffd_stop_demand_paging(uffd_descs[i]);
}
- pr_info("Total guest execution time: %ld.%.9lds\n",
+ pr_info("Total guest execution time:\t%ld.%.9lds\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
- pr_info("Overall demand paging rate: %f pgs/sec\n",
- memstress_args.vcpu_args[0].pages * nr_vcpus /
- ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC));
+
+ vcpu_paging_rate = memstress_args.vcpu_args[0].pages /
+ ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC);
+ pr_info("Per-vcpu demand paging rate:\t%f pgs/sec/vcpu\n",
+ vcpu_paging_rate);
+ pr_info("Overall demand paging rate:\t%f pgs/sec\n",
+ vcpu_paging_rate * nr_vcpus);
memstress_destroy_vm(vm);
@@ -207,15 +242,20 @@ static void run_test(enum vm_guest_mode mode, void *arg)
static void help(char *name)
{
puts("");
- printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
- " [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name);
+ printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-a]\n"
+ " [-d uffd_delay_usec] [-r readers_per_uffd] [-b memory]\n"
+ " [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name);
guest_modes_help();
printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
" UFFD registration mode: 'MISSING' or 'MINOR'.\n");
kvm_print_vcpu_pinning_help();
+ printf(" -a: Use a single userfaultfd for all of guest memory, instead of\n"
+ " creating one for each region paged by a unique vCPU\n"
+ " Set implicitly with -o, and no effect without -u.\n");
printf(" -d: add a delay in usec to the User Fault\n"
" FD handler to simulate demand paging\n"
" overheads. Ignored without -u.\n");
+ printf(" -r: Set the number of reader threads per uffd.\n");
printf(" -b: specify the size of the memory region which should be\n"
" demand paged by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n");
@@ -234,12 +274,14 @@ int main(int argc, char *argv[])
struct test_params p = {
.src_type = DEFAULT_VM_MEM_SRC,
.partition_vcpu_memory_access = true,
+ .readers_per_uffd = 1,
+ .single_uffd = false,
};
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:c:o")) != -1) {
+ while ((opt = getopt(argc, argv, "ahom:u:d:b:s:v:c:r:")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -251,6 +293,9 @@ int main(int argc, char *argv[])
p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
break;
+ case 'a':
+ p.single_uffd = true;
+ break;
case 'd':
p.uffd_delay = strtoul(optarg, NULL, 0);
TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported.");
@@ -271,6 +316,13 @@ int main(int argc, char *argv[])
break;
case 'o':
p.partition_vcpu_memory_access = false;
+ p.single_uffd = true;
+ break;
+ case 'r':
+ p.readers_per_uffd = atoi(optarg);
+ TEST_ASSERT(p.readers_per_uffd >= 1,
+ "Invalid number of readers per uffd %d: must be >=1",
+ p.readers_per_uffd);
break;
case 'h':
default:
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 504f6fe980e8..9f24303acb8c 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -18,13 +18,11 @@
#include "test_util.h"
#include "memstress.h"
#include "guest_modes.h"
+#include "ucall_common.h"
#ifdef __aarch64__
#include "aarch64/vgic.h"
-#define GICD_BASE_GPA 0x8000000ULL
-#define GICR_BASE_GPA 0x80A0000ULL
-
static int gic_fd;
static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
@@ -33,7 +31,7 @@ static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
* The test can still run even if hardware does not support GICv3, as it
* is only an optimization to reduce guest exits.
*/
- gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+ gic_fd = vgic_v3_setup(vm, nr_vcpus, 64);
}
static void arch_cleanup_vm(struct kvm_vm *vm)
@@ -132,7 +130,6 @@ struct test_params {
enum vm_mem_backing_src_type backing_src;
int slots;
uint32_t write_percent;
- uint32_t random_seed;
bool random_access;
};
@@ -156,8 +153,6 @@ static void run_test(enum vm_guest_mode mode, void *arg)
p->slots, p->backing_src,
p->partition_vcpu_memory_access);
- pr_info("Random seed: %u\n", p->random_seed);
- memstress_set_random_seed(vm, p->random_seed);
memstress_set_write_percent(vm, p->write_percent);
guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift;
@@ -346,11 +341,13 @@ int main(int argc, char *argv[])
.partition_vcpu_memory_access = true,
.backing_src = DEFAULT_VM_MEM_SRC,
.slots = 1,
- .random_seed = 1,
.write_percent = 100,
};
int opt;
+ /* Override the seed to be deterministic by default. */
+ guest_random_seed = 1;
+
dirty_log_manual_caps =
kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
@@ -395,7 +392,7 @@ int main(int argc, char *argv[])
p.phys_offset = strtoull(optarg, NULL, 0);
break;
case 'r':
- p.random_seed = atoi_positive("Random seed", optarg);
+ guest_random_seed = atoi_positive("Random seed", optarg);
break;
case 's':
p.backing_src = parse_backing_src_type(optarg);
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index eaad5b20854c..aacf80f57439 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -4,9 +4,6 @@
*
* Copyright (C) 2018, Red Hat, Inc.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
@@ -23,6 +20,7 @@
#include "test_util.h"
#include "guest_modes.h"
#include "processor.h"
+#include "ucall_common.h"
#define DIRTY_MEM_BITS 30 /* 1G */
#define PAGE_SHIFT_4K 12
@@ -76,7 +74,6 @@
static uint64_t host_page_size;
static uint64_t guest_page_size;
static uint64_t guest_num_pages;
-static uint64_t random_array[TEST_PAGES_PER_LOOP];
static uint64_t iteration;
/*
@@ -109,19 +106,19 @@ static void guest_code(void)
*/
for (i = 0; i < guest_num_pages; i++) {
addr = guest_test_virt_mem + i * guest_page_size;
- *(uint64_t *)addr = READ_ONCE(iteration);
+ vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
}
while (true) {
for (i = 0; i < TEST_PAGES_PER_LOOP; i++) {
addr = guest_test_virt_mem;
- addr += (READ_ONCE(random_array[i]) % guest_num_pages)
+ addr += (guest_random_u64(&guest_rng) % guest_num_pages)
* guest_page_size;
addr = align_down(addr, host_page_size);
- *(uint64_t *)addr = READ_ONCE(iteration);
+
+ vcpu_arch_put_guest(*(uint64_t *)addr, READ_ONCE(iteration));
}
- /* Tell the host that we need more random numbers */
GUEST_SYNC(1);
}
}
@@ -508,20 +505,10 @@ static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
mode->after_vcpu_run(vcpu, ret, err);
}
-static void generate_random_array(uint64_t *guest_array, uint64_t size)
-{
- uint64_t i;
-
- for (i = 0; i < size; i++)
- guest_array[i] = random();
-}
-
static void *vcpu_worker(void *data)
{
int ret;
struct kvm_vcpu *vcpu = data;
- struct kvm_vm *vm = vcpu->vm;
- uint64_t *guest_array;
uint64_t pages_count = 0;
struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
+ sizeof(sigset_t));
@@ -540,11 +527,8 @@ static void *vcpu_worker(void *data)
sigemptyset(sigset);
sigaddset(sigset, SIG_IPI);
- guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
-
while (!READ_ONCE(host_quit)) {
/* Clear any existing kick signals */
- generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
pages_count += TEST_PAGES_PER_LOOP;
/* Let the guest dirty the random pages */
ret = __vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
index 92eae206baa6..ba0c8e996035 100644
--- a/tools/testing/selftests/kvm/guest_memfd_test.c
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -4,8 +4,6 @@
*
* Author: Chao Peng <chao.p.peng@linux.intel.com>
*/
-
-#define _GNU_SOURCE
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
@@ -19,8 +17,8 @@
#include <sys/types.h>
#include <sys/stat.h>
+#include "kvm_util.h"
#include "test_util.h"
-#include "kvm_util_base.h"
static void test_file_read_write(int fd)
{
diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c
index 3502caa3590c..8092c2d0f5d6 100644
--- a/tools/testing/selftests/kvm/guest_print_test.c
+++ b/tools/testing/selftests/kvm/guest_print_test.c
@@ -13,6 +13,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "ucall_common.h"
struct guest_vals {
uint64_t a;
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
index decc521fc760..bce73bcb973c 100644
--- a/tools/testing/selftests/kvm/hardware_disable_test.c
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -4,9 +4,6 @@
* kvm_arch_hardware_disable is called and it attempts to unregister the user
* return notifiers.
*/
-
-#define _GNU_SOURCE
-
#include <fcntl.h>
#include <pthread.h>
#include <semaphore.h>
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h
index b217ea17cac5..baeb3c859389 100644
--- a/tools/testing/selftests/kvm/include/aarch64/gic.h
+++ b/tools/testing/selftests/kvm/include/aarch64/gic.h
@@ -6,11 +6,26 @@
#ifndef SELFTEST_KVM_GIC_H
#define SELFTEST_KVM_GIC_H
+#include <asm/kvm.h>
+
enum gic_type {
GIC_V3,
GIC_TYPE_MAX,
};
+/*
+ * Note that the redistributor frames are at the end, as the range scales
+ * with the number of vCPUs in the VM.
+ */
+#define GITS_BASE_GPA 0x8000000ULL
+#define GICD_BASE_GPA (GITS_BASE_GPA + KVM_VGIC_V3_ITS_SIZE)
+#define GICR_BASE_GPA (GICD_BASE_GPA + KVM_VGIC_V3_DIST_SIZE)
+
+/* The GIC is identity-mapped into the guest at the time of setup. */
+#define GITS_BASE_GVA ((volatile void *)GITS_BASE_GPA)
+#define GICD_BASE_GVA ((volatile void *)GICD_BASE_GPA)
+#define GICR_BASE_GVA ((volatile void *)GICR_BASE_GPA)
+
#define MIN_SGI 0
#define MIN_PPI 16
#define MIN_SPI 32
@@ -21,8 +36,7 @@ enum gic_type {
#define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI)
#define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI)
-void gic_init(enum gic_type type, unsigned int nr_cpus,
- void *dist_base, void *redist_base);
+void gic_init(enum gic_type type, unsigned int nr_cpus);
void gic_irq_enable(unsigned int intid);
void gic_irq_disable(unsigned int intid);
unsigned int gic_get_and_ack_irq(void);
@@ -44,4 +58,7 @@ void gic_irq_clear_pending(unsigned int intid);
bool gic_irq_get_pending(unsigned int intid);
void gic_irq_set_config(unsigned int intid, bool is_edge);
+void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
+ vm_paddr_t pend_table);
+
#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
index ba0886e8a2bb..a76615fa39a1 100644
--- a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
+++ b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
@@ -1,82 +1,604 @@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * ARM Generic Interrupt Controller (GIC) v3 specific defines
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
*/
-
-#ifndef SELFTEST_KVM_GICV3_H
-#define SELFTEST_KVM_GICV3_H
-
-#include <asm/sysreg.h>
+#ifndef __SELFTESTS_GIC_V3_H
+#define __SELFTESTS_GIC_V3_H
/*
- * Distributor registers
+ * Distributor registers. We assume we're running non-secure, with ARE
+ * being set. Secure-only and non-ARE registers are not described.
*/
#define GICD_CTLR 0x0000
#define GICD_TYPER 0x0004
+#define GICD_IIDR 0x0008
+#define GICD_TYPER2 0x000C
+#define GICD_STATUSR 0x0010
+#define GICD_SETSPI_NSR 0x0040
+#define GICD_CLRSPI_NSR 0x0048
+#define GICD_SETSPI_SR 0x0050
+#define GICD_CLRSPI_SR 0x0058
#define GICD_IGROUPR 0x0080
#define GICD_ISENABLER 0x0100
#define GICD_ICENABLER 0x0180
#define GICD_ISPENDR 0x0200
#define GICD_ICPENDR 0x0280
-#define GICD_ICACTIVER 0x0380
#define GICD_ISACTIVER 0x0300
+#define GICD_ICACTIVER 0x0380
#define GICD_IPRIORITYR 0x0400
#define GICD_ICFGR 0x0C00
+#define GICD_IGRPMODR 0x0D00
+#define GICD_NSACR 0x0E00
+#define GICD_IGROUPRnE 0x1000
+#define GICD_ISENABLERnE 0x1200
+#define GICD_ICENABLERnE 0x1400
+#define GICD_ISPENDRnE 0x1600
+#define GICD_ICPENDRnE 0x1800
+#define GICD_ISACTIVERnE 0x1A00
+#define GICD_ICACTIVERnE 0x1C00
+#define GICD_IPRIORITYRnE 0x2000
+#define GICD_ICFGRnE 0x3000
+#define GICD_IROUTER 0x6000
+#define GICD_IROUTERnE 0x8000
+#define GICD_IDREGS 0xFFD0
+#define GICD_PIDR2 0xFFE8
+
+#define ESPI_BASE_INTID 4096
/*
- * The assumption is that the guest runs in a non-secure mode.
- * The following bits of GICD_CTLR are defined accordingly.
+ * Those registers are actually from GICv2, but the spec demands that they
+ * are implemented as RES0 if ARE is 1 (which we do in KVM's emulated GICv3).
*/
+#define GICD_ITARGETSR 0x0800
+#define GICD_SGIR 0x0F00
+#define GICD_CPENDSGIR 0x0F10
+#define GICD_SPENDSGIR 0x0F20
+
#define GICD_CTLR_RWP (1U << 31)
#define GICD_CTLR_nASSGIreq (1U << 8)
+#define GICD_CTLR_DS (1U << 6)
#define GICD_CTLR_ARE_NS (1U << 4)
#define GICD_CTLR_ENABLE_G1A (1U << 1)
#define GICD_CTLR_ENABLE_G1 (1U << 0)
+#define GICD_IIDR_IMPLEMENTER_SHIFT 0
+#define GICD_IIDR_IMPLEMENTER_MASK (0xfff << GICD_IIDR_IMPLEMENTER_SHIFT)
+#define GICD_IIDR_REVISION_SHIFT 12
+#define GICD_IIDR_REVISION_MASK (0xf << GICD_IIDR_REVISION_SHIFT)
+#define GICD_IIDR_VARIANT_SHIFT 16
+#define GICD_IIDR_VARIANT_MASK (0xf << GICD_IIDR_VARIANT_SHIFT)
+#define GICD_IIDR_PRODUCT_ID_SHIFT 24
+#define GICD_IIDR_PRODUCT_ID_MASK (0xff << GICD_IIDR_PRODUCT_ID_SHIFT)
+
+
+/*
+ * In systems with a single security state (what we emulate in KVM)
+ * the meaning of the interrupt group enable bits is slightly different
+ */
+#define GICD_CTLR_ENABLE_SS_G1 (1U << 1)
+#define GICD_CTLR_ENABLE_SS_G0 (1U << 0)
+
+#define GICD_TYPER_RSS (1U << 26)
+#define GICD_TYPER_LPIS (1U << 17)
+#define GICD_TYPER_MBIS (1U << 16)
+#define GICD_TYPER_ESPI (1U << 8)
+
+#define GICD_TYPER_ID_BITS(typer) ((((typer) >> 19) & 0x1f) + 1)
+#define GICD_TYPER_NUM_LPIS(typer) ((((typer) >> 11) & 0x1f) + 1)
#define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32)
-#define GICD_INT_DEF_PRI_X4 0xa0a0a0a0
+#define GICD_TYPER_ESPIS(typer) \
+ (((typer) & GICD_TYPER_ESPI) ? GICD_TYPER_SPIS((typer) >> 27) : 0)
+
+#define GICD_TYPER2_nASSGIcap (1U << 8)
+#define GICD_TYPER2_VIL (1U << 7)
+#define GICD_TYPER2_VID GENMASK(4, 0)
+
+#define GICD_IROUTER_SPI_MODE_ONE (0U << 31)
+#define GICD_IROUTER_SPI_MODE_ANY (1U << 31)
+
+#define GIC_PIDR2_ARCH_MASK 0xf0
+#define GIC_PIDR2_ARCH_GICv3 0x30
+#define GIC_PIDR2_ARCH_GICv4 0x40
+
+#define GIC_V3_DIST_SIZE 0x10000
+
+#define GIC_PAGE_SIZE_4K 0ULL
+#define GIC_PAGE_SIZE_16K 1ULL
+#define GIC_PAGE_SIZE_64K 2ULL
+#define GIC_PAGE_SIZE_MASK 3ULL
/*
- * Redistributor registers
+ * Re-Distributor registers, offsets from RD_base
*/
-#define GICR_CTLR 0x000
-#define GICR_WAKER 0x014
+#define GICR_CTLR GICD_CTLR
+#define GICR_IIDR 0x0004
+#define GICR_TYPER 0x0008
+#define GICR_STATUSR GICD_STATUSR
+#define GICR_WAKER 0x0014
+#define GICR_SETLPIR 0x0040
+#define GICR_CLRLPIR 0x0048
+#define GICR_PROPBASER 0x0070
+#define GICR_PENDBASER 0x0078
+#define GICR_INVLPIR 0x00A0
+#define GICR_INVALLR 0x00B0
+#define GICR_SYNCR 0x00C0
+#define GICR_IDREGS GICD_IDREGS
+#define GICR_PIDR2 GICD_PIDR2
+
+#define GICR_CTLR_ENABLE_LPIS (1UL << 0)
+#define GICR_CTLR_CES (1UL << 1)
+#define GICR_CTLR_IR (1UL << 2)
+#define GICR_CTLR_RWP (1UL << 3)
-#define GICR_CTLR_RWP (1U << 3)
+#define GICR_TYPER_CPU_NUMBER(r) (((r) >> 8) & 0xffff)
+
+#define EPPI_BASE_INTID 1056
+
+#define GICR_TYPER_NR_PPIS(r) \
+ ({ \
+ unsigned int __ppinum = ((r) >> 27) & 0x1f; \
+ unsigned int __nr_ppis = 16; \
+ if (__ppinum == 1 || __ppinum == 2) \
+ __nr_ppis += __ppinum * 32; \
+ \
+ __nr_ppis; \
+ })
#define GICR_WAKER_ProcessorSleep (1U << 1)
#define GICR_WAKER_ChildrenAsleep (1U << 2)
+#define GIC_BASER_CACHE_nCnB 0ULL
+#define GIC_BASER_CACHE_SameAsInner 0ULL
+#define GIC_BASER_CACHE_nC 1ULL
+#define GIC_BASER_CACHE_RaWt 2ULL
+#define GIC_BASER_CACHE_RaWb 3ULL
+#define GIC_BASER_CACHE_WaWt 4ULL
+#define GIC_BASER_CACHE_WaWb 5ULL
+#define GIC_BASER_CACHE_RaWaWt 6ULL
+#define GIC_BASER_CACHE_RaWaWb 7ULL
+#define GIC_BASER_CACHE_MASK 7ULL
+#define GIC_BASER_NonShareable 0ULL
+#define GIC_BASER_InnerShareable 1ULL
+#define GIC_BASER_OuterShareable 2ULL
+#define GIC_BASER_SHAREABILITY_MASK 3ULL
+
+#define GIC_BASER_CACHEABILITY(reg, inner_outer, type) \
+ (GIC_BASER_CACHE_##type << reg##_##inner_outer##_CACHEABILITY_SHIFT)
+
+#define GIC_BASER_SHAREABILITY(reg, type) \
+ (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT)
+
+/* encode a size field of width @w containing @n - 1 units */
+#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0))
+
+#define GICR_PROPBASER_SHAREABILITY_SHIFT (10)
+#define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56)
+#define GICR_PROPBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_PROPBASER, SHAREABILITY_MASK)
+#define GICR_PROPBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, MASK)
+#define GICR_PROPBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PROPBASER, OUTER, MASK)
+#define GICR_PROPBASER_CACHEABILITY_MASK GICR_PROPBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_PROPBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_PROPBASER, InnerShareable)
+
+#define GICR_PROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nCnB)
+#define GICR_PROPBASER_nC GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, nC)
+#define GICR_PROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWt)
+#define GICR_PROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWb)
+#define GICR_PROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWt)
+#define GICR_PROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, WaWb)
+#define GICR_PROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWt)
+#define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb)
+
+#define GICR_PROPBASER_IDBITS_MASK (0x1f)
+#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12))
+#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16))
+
+#define GICR_PENDBASER_SHAREABILITY_SHIFT (10)
+#define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_PENDBASER_OUTER_CACHEABILITY_SHIFT (56)
+#define GICR_PENDBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_PENDBASER, SHAREABILITY_MASK)
+#define GICR_PENDBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, MASK)
+#define GICR_PENDBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_PENDBASER, OUTER, MASK)
+#define GICR_PENDBASER_CACHEABILITY_MASK GICR_PENDBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_PENDBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_PENDBASER, InnerShareable)
+
+#define GICR_PENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nCnB)
+#define GICR_PENDBASER_nC GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, nC)
+#define GICR_PENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWt)
+#define GICR_PENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWb)
+#define GICR_PENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWt)
+#define GICR_PENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, WaWb)
+#define GICR_PENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWt)
+#define GICR_PENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PENDBASER, INNER, RaWaWb)
+
+#define GICR_PENDBASER_PTZ BIT_ULL(62)
+
/*
- * Redistributor registers, offsets from SGI base
+ * Re-Distributor registers, offsets from SGI_base
*/
#define GICR_IGROUPR0 GICD_IGROUPR
#define GICR_ISENABLER0 GICD_ISENABLER
#define GICR_ICENABLER0 GICD_ICENABLER
#define GICR_ISPENDR0 GICD_ISPENDR
+#define GICR_ICPENDR0 GICD_ICPENDR
#define GICR_ISACTIVER0 GICD_ISACTIVER
#define GICR_ICACTIVER0 GICD_ICACTIVER
-#define GICR_ICENABLER GICD_ICENABLER
-#define GICR_ICACTIVER GICD_ICACTIVER
#define GICR_IPRIORITYR0 GICD_IPRIORITYR
+#define GICR_ICFGR0 GICD_ICFGR
+#define GICR_IGRPMODR0 GICD_IGRPMODR
+#define GICR_NSACR GICD_NSACR
+
+#define GICR_TYPER_PLPIS (1U << 0)
+#define GICR_TYPER_VLPIS (1U << 1)
+#define GICR_TYPER_DIRTY (1U << 2)
+#define GICR_TYPER_DirectLPIS (1U << 3)
+#define GICR_TYPER_LAST (1U << 4)
+#define GICR_TYPER_RVPEID (1U << 7)
+#define GICR_TYPER_COMMON_LPI_AFF GENMASK_ULL(25, 24)
+#define GICR_TYPER_AFFINITY GENMASK_ULL(63, 32)
+
+#define GICR_INVLPIR_INTID GENMASK_ULL(31, 0)
+#define GICR_INVLPIR_VPEID GENMASK_ULL(47, 32)
+#define GICR_INVLPIR_V GENMASK_ULL(63, 63)
+
+#define GICR_INVALLR_VPEID GICR_INVLPIR_VPEID
+#define GICR_INVALLR_V GICR_INVLPIR_V
+
+#define GIC_V3_REDIST_SIZE 0x20000
+
+#define LPI_PROP_GROUP1 (1 << 1)
+#define LPI_PROP_ENABLED (1 << 0)
+
+/*
+ * Re-Distributor registers, offsets from VLPI_base
+ */
+#define GICR_VPROPBASER 0x0070
+
+#define GICR_VPROPBASER_IDBITS_MASK 0x1f
+
+#define GICR_VPROPBASER_SHAREABILITY_SHIFT (10)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_SHIFT (56)
+
+#define GICR_VPROPBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_VPROPBASER, SHAREABILITY_MASK)
+#define GICR_VPROPBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, MASK)
+#define GICR_VPROPBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPROPBASER, OUTER, MASK)
+#define GICR_VPROPBASER_CACHEABILITY_MASK \
+ GICR_VPROPBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPROPBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPROPBASER, InnerShareable)
+
+#define GICR_VPROPBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nCnB)
+#define GICR_VPROPBASER_nC GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, nC)
+#define GICR_VPROPBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWt)
+#define GICR_VPROPBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWb)
+#define GICR_VPROPBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWt)
+#define GICR_VPROPBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, WaWb)
+#define GICR_VPROPBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWt)
+#define GICR_VPROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPROPBASER, INNER, RaWaWb)
+
+/*
+ * GICv4.1 VPROPBASER reinvention. A subtle mix between the old
+ * VPROPBASER and ITS_BASER. Just not quite any of the two.
+ */
+#define GICR_VPROPBASER_4_1_VALID (1ULL << 63)
+#define GICR_VPROPBASER_4_1_ENTRY_SIZE GENMASK_ULL(61, 59)
+#define GICR_VPROPBASER_4_1_INDIRECT (1ULL << 55)
+#define GICR_VPROPBASER_4_1_PAGE_SIZE GENMASK_ULL(54, 53)
+#define GICR_VPROPBASER_4_1_Z (1ULL << 52)
+#define GICR_VPROPBASER_4_1_ADDR GENMASK_ULL(51, 12)
+#define GICR_VPROPBASER_4_1_SIZE GENMASK_ULL(6, 0)
+
+#define GICR_VPENDBASER 0x0078
+
+#define GICR_VPENDBASER_SHAREABILITY_SHIFT (10)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_SHIFT (7)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_SHIFT (56)
+#define GICR_VPENDBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, SHAREABILITY_MASK)
+#define GICR_VPENDBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, MASK)
+#define GICR_VPENDBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GICR_VPENDBASER, OUTER, MASK)
+#define GICR_VPENDBASER_CACHEABILITY_MASK \
+ GICR_VPENDBASER_INNER_CACHEABILITY_MASK
+
+#define GICR_VPENDBASER_NonShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, NonShareable)
+
+#define GICR_VPENDBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GICR_VPENDBASER, InnerShareable)
+
+#define GICR_VPENDBASER_nCnB GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nCnB)
+#define GICR_VPENDBASER_nC GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, nC)
+#define GICR_VPENDBASER_RaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWt)
+#define GICR_VPENDBASER_RaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWb)
+#define GICR_VPENDBASER_WaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWt)
+#define GICR_VPENDBASER_WaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, WaWb)
+#define GICR_VPENDBASER_RaWaWt GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWt)
+#define GICR_VPENDBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_VPENDBASER, INNER, RaWaWb)
+
+#define GICR_VPENDBASER_Dirty (1ULL << 60)
+#define GICR_VPENDBASER_PendingLast (1ULL << 61)
+#define GICR_VPENDBASER_IDAI (1ULL << 62)
+#define GICR_VPENDBASER_Valid (1ULL << 63)
+
+/*
+ * GICv4.1 VPENDBASER, used for VPE residency. On top of these fields,
+ * also use the above Valid, PendingLast and Dirty.
+ */
+#define GICR_VPENDBASER_4_1_DB (1ULL << 62)
+#define GICR_VPENDBASER_4_1_VGRP0EN (1ULL << 59)
+#define GICR_VPENDBASER_4_1_VGRP1EN (1ULL << 58)
+#define GICR_VPENDBASER_4_1_VPEID GENMASK_ULL(15, 0)
+
+#define GICR_VSGIR 0x0080
+
+#define GICR_VSGIR_VPEID GENMASK(15, 0)
+
+#define GICR_VSGIPENDR 0x0088
+
+#define GICR_VSGIPENDR_BUSY (1U << 31)
+#define GICR_VSGIPENDR_PENDING GENMASK(15, 0)
+
+/*
+ * ITS registers, offsets from ITS_base
+ */
+#define GITS_CTLR 0x0000
+#define GITS_IIDR 0x0004
+#define GITS_TYPER 0x0008
+#define GITS_MPIDR 0x0018
+#define GITS_CBASER 0x0080
+#define GITS_CWRITER 0x0088
+#define GITS_CREADR 0x0090
+#define GITS_BASER 0x0100
+#define GITS_IDREGS_BASE 0xffd0
+#define GITS_PIDR0 0xffe0
+#define GITS_PIDR1 0xffe4
+#define GITS_PIDR2 GICR_PIDR2
+#define GITS_PIDR4 0xffd0
+#define GITS_CIDR0 0xfff0
+#define GITS_CIDR1 0xfff4
+#define GITS_CIDR2 0xfff8
+#define GITS_CIDR3 0xfffc
+
+#define GITS_TRANSLATER 0x10040
+
+#define GITS_SGIR 0x20020
+
+#define GITS_SGIR_VPEID GENMASK_ULL(47, 32)
+#define GITS_SGIR_VINTID GENMASK_ULL(3, 0)
+
+#define GITS_CTLR_ENABLE (1U << 0)
+#define GITS_CTLR_ImDe (1U << 1)
+#define GITS_CTLR_ITS_NUMBER_SHIFT 4
+#define GITS_CTLR_ITS_NUMBER (0xFU << GITS_CTLR_ITS_NUMBER_SHIFT)
+#define GITS_CTLR_QUIESCENT (1U << 31)
+
+#define GITS_TYPER_PLPIS (1UL << 0)
+#define GITS_TYPER_VLPIS (1UL << 1)
+#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4
+#define GITS_TYPER_ITT_ENTRY_SIZE GENMASK_ULL(7, 4)
+#define GITS_TYPER_IDBITS_SHIFT 8
+#define GITS_TYPER_DEVBITS_SHIFT 13
+#define GITS_TYPER_DEVBITS GENMASK_ULL(17, 13)
+#define GITS_TYPER_PTA (1UL << 19)
+#define GITS_TYPER_HCC_SHIFT 24
+#define GITS_TYPER_HCC(r) (((r) >> GITS_TYPER_HCC_SHIFT) & 0xff)
+#define GITS_TYPER_VMOVP (1ULL << 37)
+#define GITS_TYPER_VMAPP (1ULL << 40)
+#define GITS_TYPER_SVPET GENMASK_ULL(42, 41)
-/* CPU interface registers */
-#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
-#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
-#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
-#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
-#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4)
-#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5)
-#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+#define GITS_IIDR_REV_SHIFT 12
+#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT)
+#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf)
+#define GITS_IIDR_PRODUCTID_SHIFT 24
-#define SYS_ICV_AP1R0_EL1 sys_reg(3, 0, 12, 9, 0)
+#define GITS_CBASER_VALID (1ULL << 63)
+#define GITS_CBASER_SHAREABILITY_SHIFT (10)
+#define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59)
+#define GITS_CBASER_OUTER_CACHEABILITY_SHIFT (53)
+#define GITS_CBASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GITS_CBASER, SHAREABILITY_MASK)
+#define GITS_CBASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, MASK)
+#define GITS_CBASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_CBASER, OUTER, MASK)
+#define GITS_CBASER_CACHEABILITY_MASK GITS_CBASER_INNER_CACHEABILITY_MASK
-#define ICC_PMR_DEF_PRIO 0xf0
+#define GITS_CBASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GITS_CBASER, InnerShareable)
+#define GITS_CBASER_nCnB GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nCnB)
+#define GITS_CBASER_nC GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, nC)
+#define GITS_CBASER_RaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWt)
+#define GITS_CBASER_RaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWb)
+#define GITS_CBASER_WaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWt)
+#define GITS_CBASER_WaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, WaWb)
+#define GITS_CBASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWt)
+#define GITS_CBASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_CBASER, INNER, RaWaWb)
+
+#define GITS_CBASER_ADDRESS(cbaser) ((cbaser) & GENMASK_ULL(51, 12))
+
+#define GITS_BASER_NR_REGS 8
+
+#define GITS_BASER_VALID (1ULL << 63)
+#define GITS_BASER_INDIRECT (1ULL << 62)
+
+#define GITS_BASER_INNER_CACHEABILITY_SHIFT (59)
+#define GITS_BASER_OUTER_CACHEABILITY_SHIFT (53)
+#define GITS_BASER_INNER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_BASER, INNER, MASK)
+#define GITS_BASER_CACHEABILITY_MASK GITS_BASER_INNER_CACHEABILITY_MASK
+#define GITS_BASER_OUTER_CACHEABILITY_MASK \
+ GIC_BASER_CACHEABILITY(GITS_BASER, OUTER, MASK)
+#define GITS_BASER_SHAREABILITY_MASK \
+ GIC_BASER_SHAREABILITY(GITS_BASER, SHAREABILITY_MASK)
+
+#define GITS_BASER_nCnB GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nCnB)
+#define GITS_BASER_nC GIC_BASER_CACHEABILITY(GITS_BASER, INNER, nC)
+#define GITS_BASER_RaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWt)
+#define GITS_BASER_RaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWb)
+#define GITS_BASER_WaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWt)
+#define GITS_BASER_WaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, WaWb)
+#define GITS_BASER_RaWaWt GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWt)
+#define GITS_BASER_RaWaWb GIC_BASER_CACHEABILITY(GITS_BASER, INNER, RaWaWb)
+
+#define GITS_BASER_TYPE_SHIFT (56)
+#define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7)
+#define GITS_BASER_ENTRY_SIZE_SHIFT (48)
+#define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1)
+#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48)
+#define GITS_BASER_PHYS_52_to_48(phys) \
+ (((phys) & GENMASK_ULL(47, 16)) | (((phys) >> 48) & 0xf) << 12)
+#define GITS_BASER_ADDR_48_to_52(baser) \
+ (((baser) & GENMASK_ULL(47, 16)) | (((baser) >> 12) & 0xf) << 48)
+
+#define GITS_BASER_SHAREABILITY_SHIFT (10)
+#define GITS_BASER_InnerShareable \
+ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable)
+#define GITS_BASER_PAGE_SIZE_SHIFT (8)
+#define __GITS_BASER_PSZ(sz) (GIC_PAGE_SIZE_ ## sz << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGE_SIZE_4K __GITS_BASER_PSZ(4K)
+#define GITS_BASER_PAGE_SIZE_16K __GITS_BASER_PSZ(16K)
+#define GITS_BASER_PAGE_SIZE_64K __GITS_BASER_PSZ(64K)
+#define GITS_BASER_PAGE_SIZE_MASK __GITS_BASER_PSZ(MASK)
+#define GITS_BASER_PAGES_MAX 256
+#define GITS_BASER_PAGES_SHIFT (0)
+#define GITS_BASER_NR_PAGES(r) (((r) & 0xff) + 1)
+
+#define GITS_BASER_TYPE_NONE 0
+#define GITS_BASER_TYPE_DEVICE 1
+#define GITS_BASER_TYPE_VCPU 2
+#define GITS_BASER_TYPE_RESERVED3 3
+#define GITS_BASER_TYPE_COLLECTION 4
+#define GITS_BASER_TYPE_RESERVED5 5
+#define GITS_BASER_TYPE_RESERVED6 6
+#define GITS_BASER_TYPE_RESERVED7 7
+
+#define GITS_LVL1_ENTRY_SIZE (8UL)
+
+/*
+ * ITS commands
+ */
+#define GITS_CMD_MAPD 0x08
+#define GITS_CMD_MAPC 0x09
+#define GITS_CMD_MAPTI 0x0a
+#define GITS_CMD_MAPI 0x0b
+#define GITS_CMD_MOVI 0x01
+#define GITS_CMD_DISCARD 0x0f
+#define GITS_CMD_INV 0x0c
+#define GITS_CMD_MOVALL 0x0e
+#define GITS_CMD_INVALL 0x0d
+#define GITS_CMD_INT 0x03
+#define GITS_CMD_CLEAR 0x04
+#define GITS_CMD_SYNC 0x05
+
+/*
+ * GICv4 ITS specific commands
+ */
+#define GITS_CMD_GICv4(x) ((x) | 0x20)
+#define GITS_CMD_VINVALL GITS_CMD_GICv4(GITS_CMD_INVALL)
+#define GITS_CMD_VMAPP GITS_CMD_GICv4(GITS_CMD_MAPC)
+#define GITS_CMD_VMAPTI GITS_CMD_GICv4(GITS_CMD_MAPTI)
+#define GITS_CMD_VMOVI GITS_CMD_GICv4(GITS_CMD_MOVI)
+#define GITS_CMD_VSYNC GITS_CMD_GICv4(GITS_CMD_SYNC)
+/* VMOVP, VSGI and INVDB are the odd ones, as they dont have a physical counterpart */
+#define GITS_CMD_VMOVP GITS_CMD_GICv4(2)
+#define GITS_CMD_VSGI GITS_CMD_GICv4(3)
+#define GITS_CMD_INVDB GITS_CMD_GICv4(0xe)
+
+/*
+ * ITS error numbers
+ */
+#define E_ITS_MOVI_UNMAPPED_INTERRUPT 0x010107
+#define E_ITS_MOVI_UNMAPPED_COLLECTION 0x010109
+#define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307
+#define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507
+#define E_ITS_MAPD_DEVICE_OOR 0x010801
+#define E_ITS_MAPD_ITTSIZE_OOR 0x010802
+#define E_ITS_MAPC_PROCNUM_OOR 0x010902
+#define E_ITS_MAPC_COLLECTION_OOR 0x010903
+#define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04
+#define E_ITS_MAPTI_ID_OOR 0x010a05
+#define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06
+#define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07
+#define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09
+#define E_ITS_MOVALL_PROCNUM_OOR 0x010e01
+#define E_ITS_DISCARD_UNMAPPED_INTERRUPT 0x010f07
+
+/*
+ * CPU interface registers
+ */
+#define ICC_CTLR_EL1_EOImode_SHIFT (1)
+#define ICC_CTLR_EL1_EOImode_drop_dir (0U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_drop (1U << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT)
+#define ICC_CTLR_EL1_CBPR_SHIFT 0
+#define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT)
+#define ICC_CTLR_EL1_PMHE_SHIFT 6
+#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT)
+#define ICC_CTLR_EL1_PRI_BITS_SHIFT 8
+#define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT)
+#define ICC_CTLR_EL1_ID_BITS_SHIFT 11
+#define ICC_CTLR_EL1_ID_BITS_MASK (0x7 << ICC_CTLR_EL1_ID_BITS_SHIFT)
+#define ICC_CTLR_EL1_SEIS_SHIFT 14
+#define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT)
+#define ICC_CTLR_EL1_A3V_SHIFT 15
+#define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT)
+#define ICC_CTLR_EL1_RSS (0x1 << 18)
+#define ICC_CTLR_EL1_ExtRange (0x1 << 19)
+#define ICC_PMR_EL1_SHIFT 0
+#define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT)
+#define ICC_BPR0_EL1_SHIFT 0
+#define ICC_BPR0_EL1_MASK (0x7 << ICC_BPR0_EL1_SHIFT)
+#define ICC_BPR1_EL1_SHIFT 0
+#define ICC_BPR1_EL1_MASK (0x7 << ICC_BPR1_EL1_SHIFT)
+#define ICC_IGRPEN0_EL1_SHIFT 0
+#define ICC_IGRPEN0_EL1_MASK (1 << ICC_IGRPEN0_EL1_SHIFT)
+#define ICC_IGRPEN1_EL1_SHIFT 0
+#define ICC_IGRPEN1_EL1_MASK (1 << ICC_IGRPEN1_EL1_SHIFT)
+#define ICC_SRE_EL1_DIB (1U << 2)
+#define ICC_SRE_EL1_DFB (1U << 1)
#define ICC_SRE_EL1_SRE (1U << 0)
-#define ICC_IGRPEN1_EL1_ENABLE (1U << 0)
+/* These are for GICv2 emulation only */
+#define GICH_LR_VIRTUALID (0x3ffUL << 0)
+#define GICH_LR_PHYSID_CPUID_SHIFT (10)
+#define GICH_LR_PHYSID_CPUID (7UL << GICH_LR_PHYSID_CPUID_SHIFT)
+
+#define ICC_IAR1_EL1_SPURIOUS 0x3ff
+
+#define ICC_SRE_EL2_SRE (1 << 0)
+#define ICC_SRE_EL2_ENABLE (1 << 3)
-#define GICV3_MAX_CPUS 512
+#define ICC_SGI1R_TARGET_LIST_SHIFT 0
+#define ICC_SGI1R_TARGET_LIST_MASK (0xffff << ICC_SGI1R_TARGET_LIST_SHIFT)
+#define ICC_SGI1R_AFFINITY_1_SHIFT 16
+#define ICC_SGI1R_AFFINITY_1_MASK (0xff << ICC_SGI1R_AFFINITY_1_SHIFT)
+#define ICC_SGI1R_SGI_ID_SHIFT 24
+#define ICC_SGI1R_SGI_ID_MASK (0xfULL << ICC_SGI1R_SGI_ID_SHIFT)
+#define ICC_SGI1R_AFFINITY_2_SHIFT 32
+#define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT)
+#define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40
+#define ICC_SGI1R_RS_SHIFT 44
+#define ICC_SGI1R_RS_MASK (0xfULL << ICC_SGI1R_RS_SHIFT)
+#define ICC_SGI1R_AFFINITY_3_SHIFT 48
+#define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT)
-#endif /* SELFTEST_KVM_GICV3_H */
+#endif
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h
new file mode 100644
index 000000000000..3722ed9c8f96
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/gic_v3_its.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SELFTESTS_GIC_V3_ITS_H__
+#define __SELFTESTS_GIC_V3_ITS_H__
+
+#include <linux/sizes.h>
+
+void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
+ vm_paddr_t device_tbl, size_t device_tbl_sz,
+ vm_paddr_t cmdq, size_t cmdq_size);
+
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+ size_t itt_size, bool valid);
+void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid);
+void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
+ u32 collection_id, u32 intid);
+void its_send_invall_cmd(void *cmdq_base, u32 collection_id);
+
+#endif // __SELFTESTS_GIC_V3_ITS_H__
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index 9e518b562827..9b20a355d81a 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -8,6 +8,8 @@
#define SELFTEST_KVM_PROCESSOR_H
#include "kvm_util.h"
+#include "ucall_common.h"
+
#include <linux/stringify.h>
#include <linux/types.h>
#include <asm/sysreg.h>
@@ -58,8 +60,6 @@
MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
-#define MPIDR_HWID_BITMASK (0xff00fffffful)
-
void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
struct kvm_vcpu_init *init, void *guest_code);
@@ -177,11 +177,28 @@ static __always_inline u32 __raw_readl(const volatile void *addr)
return val;
}
+static __always_inline void __raw_writeq(u64 val, volatile void *addr)
+{
+ asm volatile("str %0, [%1]" : : "rZ" (val), "r" (addr));
+}
+
+static __always_inline u64 __raw_readq(const volatile void *addr)
+{
+ u64 val;
+ asm volatile("ldr %0, [%1]" : "=r" (val) : "r" (addr));
+ return val;
+}
+
#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
+#define writeq_relaxed(v,c) ((void)__raw_writeq((__force u64)cpu_to_le64(v),(c)))
+#define readq_relaxed(c) ({ u64 __r = le64_to_cpu((__force __le64)__raw_readq(c)); __r; })
#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c));})
#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
+#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c));})
+#define readq(c) ({ u64 __v = readq_relaxed(c); __iormb(__v); __v; })
+
static inline void local_irq_enable(void)
{
diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/aarch64/ucall.h
index 4b68f37efd36..4ec801f37f00 100644
--- a/tools/testing/selftests/kvm/include/aarch64/ucall.h
+++ b/tools/testing/selftests/kvm/include/aarch64/ucall.h
@@ -2,7 +2,7 @@
#ifndef SELFTEST_KVM_UCALL_H
#define SELFTEST_KVM_UCALL_H
-#include "kvm_util_base.h"
+#include "kvm_util.h"
#define UCALL_EXIT_REASON KVM_EXIT_MMIO
diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h
index 0ac6f05c63f9..c481d0c00a5d 100644
--- a/tools/testing/selftests/kvm/include/aarch64/vgic.h
+++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h
@@ -16,8 +16,7 @@
((uint64_t)(flags) << 12) | \
index)
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
- uint64_t gicd_base_gpa, uint64_t gicr_base_gpa);
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs);
#define VGIC_MAX_RESERVED 1023
@@ -33,4 +32,6 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
+int vgic_its_setup(struct kvm_vm *vm);
+
#endif // SELFTEST_KVM_VGIC_H
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index c9286811a4cb..63c2aaae51f3 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -1,13 +1,1116 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
- * tools/testing/selftests/kvm/include/kvm_util.h
- *
* Copyright (C) 2018, Google LLC.
*/
#ifndef SELFTEST_KVM_UTIL_H
#define SELFTEST_KVM_UTIL_H
-#include "kvm_util_base.h"
-#include "ucall_common.h"
+#include "test_util.h"
+
+#include <linux/compiler.h>
+#include "linux/hashtable.h"
+#include "linux/list.h"
+#include <linux/kernel.h>
+#include <linux/kvm.h>
+#include "linux/rbtree.h"
+#include <linux/types.h>
+
+#include <asm/atomic.h>
+#include <asm/kvm.h>
+
+#include <sys/ioctl.h>
+
+#include "kvm_util_arch.h"
+#include "kvm_util_types.h"
+#include "sparsebit.h"
+
+#define KVM_DEV_PATH "/dev/kvm"
+#define KVM_MAX_VCPUS 512
+
+#define NSEC_PER_SEC 1000000000L
+
+struct userspace_mem_region {
+ struct kvm_userspace_memory_region2 region;
+ struct sparsebit *unused_phy_pages;
+ struct sparsebit *protected_phy_pages;
+ int fd;
+ off_t offset;
+ enum vm_mem_backing_src_type backing_src_type;
+ void *host_mem;
+ void *host_alias;
+ void *mmap_start;
+ void *mmap_alias;
+ size_t mmap_size;
+ struct rb_node gpa_node;
+ struct rb_node hva_node;
+ struct hlist_node slot_node;
+};
+
+struct kvm_vcpu {
+ struct list_head list;
+ uint32_t id;
+ int fd;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+#ifdef __x86_64__
+ struct kvm_cpuid2 *cpuid;
+#endif
+ struct kvm_dirty_gfn *dirty_gfns;
+ uint32_t fetch_index;
+ uint32_t dirty_gfns_count;
+};
+
+struct userspace_mem_regions {
+ struct rb_root gpa_tree;
+ struct rb_root hva_tree;
+ DECLARE_HASHTABLE(slot_hash, 9);
+};
+
+enum kvm_mem_region_type {
+ MEM_REGION_CODE,
+ MEM_REGION_DATA,
+ MEM_REGION_PT,
+ MEM_REGION_TEST_DATA,
+ NR_MEM_REGIONS,
+};
+
+struct kvm_vm {
+ int mode;
+ unsigned long type;
+ int kvm_fd;
+ int fd;
+ unsigned int pgtable_levels;
+ unsigned int page_size;
+ unsigned int page_shift;
+ unsigned int pa_bits;
+ unsigned int va_bits;
+ uint64_t max_gfn;
+ struct list_head vcpus;
+ struct userspace_mem_regions regions;
+ struct sparsebit *vpages_valid;
+ struct sparsebit *vpages_mapped;
+ bool has_irqchip;
+ bool pgd_created;
+ vm_paddr_t ucall_mmio_addr;
+ vm_paddr_t pgd;
+ vm_vaddr_t handlers;
+ uint32_t dirty_ring_size;
+ uint64_t gpa_tag_mask;
+
+ struct kvm_vm_arch arch;
+
+ /* Cache of information for binary stats interface */
+ int stats_fd;
+ struct kvm_stats_header stats_header;
+ struct kvm_stats_desc *stats_desc;
+
+ /*
+ * KVM region slots. These are the default memslots used by page
+ * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE]
+ * memslot.
+ */
+ uint32_t memslots[NR_MEM_REGIONS];
+};
+
+struct vcpu_reg_sublist {
+ const char *name;
+ long capability;
+ int feature;
+ int feature_type;
+ bool finalize;
+ __u64 *regs;
+ __u64 regs_n;
+ __u64 *rejects_set;
+ __u64 rejects_set_n;
+ __u64 *skips_set;
+ __u64 skips_set_n;
+};
+
+struct vcpu_reg_list {
+ char *name;
+ struct vcpu_reg_sublist sublists[];
+};
+
+#define for_each_sublist(c, s) \
+ for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
+
+#define kvm_for_each_vcpu(vm, i, vcpu) \
+ for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \
+ if (!((vcpu) = vm->vcpus[i])) \
+ continue; \
+ else
+
+struct userspace_mem_region *
+memslot2region(struct kvm_vm *vm, uint32_t memslot);
+
+static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm,
+ enum kvm_mem_region_type type)
+{
+ assert(type < NR_MEM_REGIONS);
+ return memslot2region(vm, vm->memslots[type]);
+}
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
+#define DEFAULT_STACK_PGS 5
+
+enum vm_guest_mode {
+ VM_MODE_P52V48_4K,
+ VM_MODE_P52V48_16K,
+ VM_MODE_P52V48_64K,
+ VM_MODE_P48V48_4K,
+ VM_MODE_P48V48_16K,
+ VM_MODE_P48V48_64K,
+ VM_MODE_P40V48_4K,
+ VM_MODE_P40V48_16K,
+ VM_MODE_P40V48_64K,
+ VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_P47V64_4K,
+ VM_MODE_P44V64_4K,
+ VM_MODE_P36V48_4K,
+ VM_MODE_P36V48_16K,
+ VM_MODE_P36V48_64K,
+ VM_MODE_P36V47_16K,
+ NUM_VM_MODES,
+};
+
+struct vm_shape {
+ uint32_t type;
+ uint8_t mode;
+ uint8_t pad0;
+ uint16_t pad1;
+};
+
+kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
+
+#define VM_TYPE_DEFAULT 0
+
+#define VM_SHAPE(__mode) \
+({ \
+ struct vm_shape shape = { \
+ .mode = (__mode), \
+ .type = VM_TYPE_DEFAULT \
+ }; \
+ \
+ shape; \
+})
+
+#if defined(__aarch64__)
+
+extern enum vm_guest_mode vm_mode_default;
+
+#define VM_MODE_DEFAULT vm_mode_default
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__x86_64__)
+
+#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__s390x__)
+
+#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 16)
+
+#elif defined(__riscv)
+
+#if __riscv_xlen == 32
+#error "RISC-V 32-bit kvm selftests not supported"
+#endif
+
+#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#endif
+
+#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT)
+
+#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
+#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
+
+struct vm_guest_mode_params {
+ unsigned int pa_bits;
+ unsigned int va_bits;
+ unsigned int page_size;
+ unsigned int page_shift;
+};
+extern const struct vm_guest_mode_params vm_guest_mode_params[];
+
+int open_path_or_exit(const char *path, int flags);
+int open_kvm_dev_path_or_exit(void);
+
+bool get_kvm_param_bool(const char *param);
+bool get_kvm_intel_param_bool(const char *param);
+bool get_kvm_amd_param_bool(const char *param);
+
+int get_kvm_param_integer(const char *param);
+int get_kvm_intel_param_integer(const char *param);
+int get_kvm_amd_param_integer(const char *param);
+
+unsigned int kvm_check_cap(long cap);
+
+static inline bool kvm_has_cap(long cap)
+{
+ return kvm_check_cap(cap);
+}
+
+#define __KVM_SYSCALL_ERROR(_name, _ret) \
+ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
+
+/*
+ * Use the "inner", double-underscore macro when reporting errors from within
+ * other macros so that the name of ioctl() and not its literal numeric value
+ * is printed on error. The "outer" macro is strongly preferred when reporting
+ * errors "directly", i.e. without an additional layer of macros, as it reduces
+ * the probability of passing in the wrong string.
+ */
+#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret)
+#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret)
+
+#define kvm_do_ioctl(fd, cmd, arg) \
+({ \
+ kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \
+ ioctl(fd, cmd, arg); \
+})
+
+#define __kvm_ioctl(kvm_fd, cmd, arg) \
+ kvm_do_ioctl(kvm_fd, cmd, arg)
+
+#define kvm_ioctl(kvm_fd, cmd, arg) \
+({ \
+ int ret = __kvm_ioctl(kvm_fd, cmd, arg); \
+ \
+ TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \
+})
+
+static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { }
+
+#define __vm_ioctl(vm, cmd, arg) \
+({ \
+ static_assert_is_vm(vm); \
+ kvm_do_ioctl((vm)->fd, cmd, arg); \
+})
+
+/*
+ * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if
+ * the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM,
+ * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before
+ * selftests existed and (b) should never outright fail, i.e. is supposed to
+ * return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the
+ * VM and its vCPUs, including KVM_CHECK_EXTENSION.
+ */
+#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \
+do { \
+ int __errno = errno; \
+ \
+ static_assert_is_vm(vm); \
+ \
+ if (cond) \
+ break; \
+ \
+ if (errno == EIO && \
+ __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \
+ TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \
+ TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \
+ } \
+ errno = __errno; \
+ TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \
+} while (0)
+
+#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \
+ __TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm)
+
+#define vm_ioctl(vm, cmd, arg) \
+({ \
+ int ret = __vm_ioctl(vm, cmd, arg); \
+ \
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
+})
+
+static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { }
+
+#define __vcpu_ioctl(vcpu, cmd, arg) \
+({ \
+ static_assert_is_vcpu(vcpu); \
+ kvm_do_ioctl((vcpu)->fd, cmd, arg); \
+})
+
+#define vcpu_ioctl(vcpu, cmd, arg) \
+({ \
+ int ret = __vcpu_ioctl(vcpu, cmd, arg); \
+ \
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \
+})
+
+/*
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+static inline int vm_check_cap(struct kvm_vm *vm, long cap)
+{
+ int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm);
+ return ret;
+}
+
+static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
+}
+static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
+}
+
+static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size, uint64_t attributes)
+{
+ struct kvm_memory_attributes attr = {
+ .attributes = attributes,
+ .address = gpa,
+ .size = size,
+ .flags = 0,
+ };
+
+ /*
+ * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows
+ * need significant enhancements to support multiple attributes.
+ */
+ TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE,
+ "Update me to support multiple attributes!");
+
+ vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr);
+}
+
+
+static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
+}
+
+static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_set_memory_attributes(vm, gpa, size, 0);
+}
+
+void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size,
+ bool punch_hole);
+
+static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_guest_mem_fallocate(vm, gpa, size, true);
+}
+
+static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_guest_mem_fallocate(vm, gpa, size, false);
+}
+
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
+const char *vm_guest_mode_string(uint32_t i);
+
+void kvm_vm_free(struct kvm_vm *vmp);
+void kvm_vm_restart(struct kvm_vm *vmp);
+void kvm_vm_release(struct kvm_vm *vmp);
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
+ size_t len);
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
+int kvm_memfd_alloc(size_t size, bool hugepages);
+
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+{
+ struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
+
+ vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args);
+}
+
+static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
+ uint64_t first_page, uint32_t num_pages)
+{
+ struct kvm_clear_dirty_log args = {
+ .dirty_bitmap = log,
+ .slot = slot,
+ .first_page = first_page,
+ .num_pages = num_pages
+ };
+
+ vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args);
+}
+
+static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
+{
+ return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL);
+}
+
+static inline int vm_get_stats_fd(struct kvm_vm *vm)
+{
+ int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm);
+ return fd;
+}
+
+static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
+{
+ ssize_t ret;
+
+ ret = pread(stats_fd, header, sizeof(*header), 0);
+ TEST_ASSERT(ret == sizeof(*header),
+ "Failed to read '%lu' header bytes, ret = '%ld'",
+ sizeof(*header), ret);
+}
+
+struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
+ struct kvm_stats_header *header);
+
+static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header)
+{
+ /*
+ * The base size of the descriptor is defined by KVM's ABI, but the
+ * size of the name field is variable, as far as KVM's ABI is
+ * concerned. For a given instance of KVM, the name field is the same
+ * size for all stats and is provided in the overall stats header.
+ */
+ return sizeof(struct kvm_stats_desc) + header->name_size;
+}
+
+static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats,
+ int index,
+ struct kvm_stats_header *header)
+{
+ /*
+ * Note, size_desc includes the size of the name field, which is
+ * variable. i.e. this is NOT equivalent to &stats_desc[i].
+ */
+ return (void *)stats + index * get_stats_descriptor_size(header);
+}
+
+void read_stat_data(int stats_fd, struct kvm_stats_header *header,
+ struct kvm_stats_desc *desc, uint64_t *data,
+ size_t max_elements);
+
+void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
+ size_t max_elements);
+
+static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
+{
+ uint64_t data;
+
+ __vm_get_stat(vm, stat_name, &data, 1);
+ return data;
+}
+
+void vm_create_irqchip(struct kvm_vm *vm);
+
+static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
+ uint64_t flags)
+{
+ struct kvm_create_guest_memfd guest_memfd = {
+ .size = size,
+ .flags = flags,
+ };
+
+ return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
+}
+
+static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
+ uint64_t flags)
+{
+ int fd = __vm_create_guest_memfd(vm, size, flags);
+
+ TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd));
+ return fd;
+}
+
+void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva);
+int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva);
+void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset);
+int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset);
+
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags);
+void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
+
+#ifndef vm_arch_has_protected_memory
+static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
+{
+ return false;
+}
+#endif
+
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
+struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+void vm_populate_vaddr_bitmap(struct kvm_vm *vm);
+vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
+vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm,
+ enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
+
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ unsigned int npages);
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
+
+#ifndef vcpu_arch_put_guest
+#define vcpu_arch_put_guest(mem, val) do { (mem) = (val); } while (0)
+#endif
+
+static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+ return gpa & ~vm->gpa_tag_mask;
+}
+
+void vcpu_run(struct kvm_vcpu *vcpu);
+int _vcpu_run(struct kvm_vcpu *vcpu);
+
+static inline int __vcpu_run(struct kvm_vcpu *vcpu)
+{
+ return __vcpu_ioctl(vcpu, KVM_RUN, NULL);
+}
+
+void vcpu_run_complete_io(struct kvm_vcpu *vcpu);
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu);
+
+static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap,
+ uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap);
+}
+
+static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *debug)
+{
+ vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug);
+}
+
+static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state);
+}
+static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state);
+}
+
+static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_REGS, regs);
+}
+
+static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_REGS, regs);
+}
+static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs);
+
+}
+static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
+}
+static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
+}
+static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_ioctl(vcpu, KVM_GET_FPU, fpu);
+}
+static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_ioctl(vcpu, KVM_SET_FPU, fpu);
+}
+
+static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
+
+ return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
+}
+static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+
+ return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+}
+static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
+
+ vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
+}
+static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+
+ vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+}
+
+#ifdef __KVM_HAVE_VCPU_EVENTS
+static inline void vcpu_events_get(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events);
+}
+static inline void vcpu_events_set(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events);
+}
+#endif
+#ifdef __x86_64__
+static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state);
+}
+static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
+}
+
+static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
+}
+#endif
+static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu)
+{
+ int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm);
+ return fd;
+}
+
+int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr);
+
+static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
+{
+ int ret = __kvm_has_device_attr(dev_fd, group, attr);
+
+ TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
+}
+
+int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val);
+
+static inline void kvm_device_attr_get(int dev_fd, uint32_t group,
+ uint64_t attr, void *val)
+{
+ int ret = __kvm_device_attr_get(dev_fd, group, attr, val);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret));
+}
+
+int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val);
+
+static inline void kvm_device_attr_set(int dev_fd, uint32_t group,
+ uint64_t attr, void *val)
+{
+ int ret = __kvm_device_attr_set(dev_fd, group, attr, val);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret));
+}
+
+static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr)
+{
+ return __kvm_has_device_attr(vcpu->fd, group, attr);
+}
+
+static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr)
+{
+ kvm_has_device_attr(vcpu->fd, group, attr);
+}
+
+static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ return __kvm_device_attr_get(vcpu->fd, group, attr, val);
+}
+
+static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ kvm_device_attr_get(vcpu->fd, group, attr, val);
+}
+
+static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ return __kvm_device_attr_set(vcpu->fd, group, attr, val);
+}
+
+static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ kvm_device_attr_set(vcpu->fd, group, attr, val);
+}
+
+int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type);
+int __kvm_create_device(struct kvm_vm *vm, uint64_t type);
+
+static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type)
+{
+ int fd = __kvm_create_device(vm, type);
+
+ TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd));
+ return fd;
+}
+
+void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu);
+
+/*
+ * VM VCPU Args Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first @num input parameters for the function at @vcpu's entry point,
+ * per the C calling convention of the architecture, to the values given as
+ * variable args. Each of the variable args is expected to be of type uint64_t.
+ * The maximum @num can be is specific to the architecture.
+ */
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...);
+
+void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+
+#define KVM_MAX_IRQ_ROUTES 4096
+
+struct kvm_irq_routing *kvm_gsi_routing_create(void);
+void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
+ uint32_t gsi, uint32_t pin);
+int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+
+const char *exit_reason_str(unsigned int exit_reason);
+
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+ uint32_t memslot);
+vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot,
+ bool protected);
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
+
+static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot)
+{
+ /*
+ * By default, allocate memory as protected for VMs that support
+ * protected memory, as the majority of memory for such VMs is
+ * protected, i.e. using shared memory is effectively opt-in.
+ */
+ return __vm_phy_pages_alloc(vm, num, paddr_min, memslot,
+ vm_arch_has_protected_memory(vm));
+}
+
+/*
+ * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also
+ * loads the test binary into guest memory and creates an IRQ chip (x86 only).
+ * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to
+ * calculate the amount of memory needed for per-vCPU data, e.g. stacks.
+ */
+struct kvm_vm *____vm_create(struct vm_shape shape);
+struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
+ uint64_t nr_extra_pages);
+
+static inline struct kvm_vm *vm_create_barebones(void)
+{
+ return ____vm_create(VM_SHAPE_DEFAULT);
+}
+
+static inline struct kvm_vm *vm_create_barebones_type(unsigned long type)
+{
+ const struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = type,
+ };
+
+ return ____vm_create(shape);
+}
+
+static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus)
+{
+ return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0);
+}
+
+struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
+ uint64_t extra_mem_pages,
+ void *guest_code, struct kvm_vcpu *vcpus[]);
+
+static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus,
+ void *guest_code,
+ struct kvm_vcpu *vcpus[])
+{
+ return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0,
+ guest_code, vcpus);
+}
+
+
+struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
+ struct kvm_vcpu **vcpu,
+ uint64_t extra_mem_pages,
+ void *guest_code);
+
+/*
+ * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages
+ * additional pages of guest memory. Returns the VM and vCPU (via out param).
+ */
+static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ uint64_t extra_mem_pages,
+ void *guest_code)
+{
+ return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu,
+ extra_mem_pages, guest_code);
+}
+
+static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ void *guest_code)
+{
+ return __vm_create_with_one_vcpu(vcpu, 0, guest_code);
+}
+
+static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape,
+ struct kvm_vcpu **vcpu,
+ void *guest_code)
+{
+ return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code);
+}
+
+struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
+
+void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
+void kvm_print_vcpu_pinning_help(void);
+void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
+ int nr_vcpus);
+
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
+unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
+unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
+unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
+static inline unsigned int
+vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
+{
+ unsigned int n;
+ n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
+#ifdef __s390x__
+ /* s390 requires 1M aligned guest sizes */
+ n = (n + 255) & ~255;
+#endif
+ return n;
+}
+
+#define sync_global_to_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(_p, &(g), sizeof(g)); \
+})
+
+#define sync_global_from_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(&(g), _p, sizeof(g)); \
+})
+
+/*
+ * Write a global value, but only in the VM's (guest's) domain. Primarily used
+ * for "globals" that hold per-VM values (VMs always duplicate code and global
+ * data into their own region of physical memory), but can be used anytime it's
+ * undesirable to change the host's copy of the global.
+ */
+#define write_guest_global(vm, g, val) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ typeof(g) _val = val; \
+ \
+ memcpy(_p, &(_val), sizeof(g)); \
+})
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu);
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu,
+ uint8_t indent);
+
+static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu,
+ uint8_t indent)
+{
+ vcpu_arch_dump(stream, vcpu, indent);
+}
+
+/*
+ * Adds a vCPU with reasonable defaults (e.g. a stack)
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpu_id - The id of the VCPU to add to the VM.
+ */
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code);
+
+static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ void *guest_code)
+{
+ struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id);
+
+ vcpu_arch_set_entry_point(vcpu, guest_code);
+
+ return vcpu;
+}
+
+/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id);
+
+static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm,
+ uint32_t vcpu_id)
+{
+ return vm_arch_vcpu_recreate(vm, vcpu_id);
+}
+
+void vcpu_arch_free(struct kvm_vcpu *vcpu);
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm);
+
+static inline void virt_pgd_alloc(struct kvm_vm *vm)
+{
+ virt_arch_pgd_alloc(vm);
+}
+
+/*
+ * VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within @vm, creates a virtual translation for the page starting
+ * at @vaddr to the page starting at @paddr.
+ */
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
+
+static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ virt_arch_pg_map(vm, vaddr, paddr);
+}
+
+
+/*
+ * Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Returns the VM physical address of the translated VM virtual
+ * address given by @gva.
+ */
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ return addr_arch_gva2gpa(vm, gva);
+}
+
+/*
+ * Virtual Translation Tables Dump
+ *
+ * Input Args:
+ * stream - Output FILE stream
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by @stream, the contents of all the
+ * virtual translation tables for the VM given by @vm.
+ */
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ virt_arch_dump(stream, vm, indent);
+}
+
+
+static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
+{
+ return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0);
+}
+
+/*
+ * Arch hook that is invoked via a constructor, i.e. before exeucting main(),
+ * to allow for arch-specific setup that is common to all tests, e.g. computing
+ * the default guest "mode".
+ */
+void kvm_selftest_arch_init(void);
+
+void kvm_arch_vm_post_create(struct kvm_vm *vm);
+
+bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
+
+uint32_t guest_get_vcpuid(void);
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
deleted file mode 100644
index 3e0db283a46a..000000000000
--- a/tools/testing/selftests/kvm/include/kvm_util_base.h
+++ /dev/null
@@ -1,1135 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/include/kvm_util_base.h
- *
- * Copyright (C) 2018, Google LLC.
- */
-#ifndef SELFTEST_KVM_UTIL_BASE_H
-#define SELFTEST_KVM_UTIL_BASE_H
-
-#include "test_util.h"
-
-#include <linux/compiler.h>
-#include "linux/hashtable.h"
-#include "linux/list.h"
-#include <linux/kernel.h>
-#include <linux/kvm.h>
-#include "linux/rbtree.h"
-#include <linux/types.h>
-
-#include <asm/atomic.h>
-#include <asm/kvm.h>
-
-#include <sys/ioctl.h>
-
-#include "kvm_util_arch.h"
-#include "sparsebit.h"
-
-/*
- * Provide a version of static_assert() that is guaranteed to have an optional
- * message param. If _ISOC11_SOURCE is defined, glibc (/usr/include/assert.h)
- * #undefs and #defines static_assert() as a direct alias to _Static_assert(),
- * i.e. effectively makes the message mandatory. Many KVM selftests #define
- * _GNU_SOURCE for various reasons, and _GNU_SOURCE implies _ISOC11_SOURCE. As
- * a result, static_assert() behavior is non-deterministic and may or may not
- * require a message depending on #include order.
- */
-#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg)
-#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr)
-
-#define KVM_DEV_PATH "/dev/kvm"
-#define KVM_MAX_VCPUS 512
-
-#define NSEC_PER_SEC 1000000000L
-
-typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
-typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
-
-struct userspace_mem_region {
- struct kvm_userspace_memory_region2 region;
- struct sparsebit *unused_phy_pages;
- struct sparsebit *protected_phy_pages;
- int fd;
- off_t offset;
- enum vm_mem_backing_src_type backing_src_type;
- void *host_mem;
- void *host_alias;
- void *mmap_start;
- void *mmap_alias;
- size_t mmap_size;
- struct rb_node gpa_node;
- struct rb_node hva_node;
- struct hlist_node slot_node;
-};
-
-struct kvm_vcpu {
- struct list_head list;
- uint32_t id;
- int fd;
- struct kvm_vm *vm;
- struct kvm_run *run;
-#ifdef __x86_64__
- struct kvm_cpuid2 *cpuid;
-#endif
- struct kvm_dirty_gfn *dirty_gfns;
- uint32_t fetch_index;
- uint32_t dirty_gfns_count;
-};
-
-struct userspace_mem_regions {
- struct rb_root gpa_tree;
- struct rb_root hva_tree;
- DECLARE_HASHTABLE(slot_hash, 9);
-};
-
-enum kvm_mem_region_type {
- MEM_REGION_CODE,
- MEM_REGION_DATA,
- MEM_REGION_PT,
- MEM_REGION_TEST_DATA,
- NR_MEM_REGIONS,
-};
-
-struct kvm_vm {
- int mode;
- unsigned long type;
- uint8_t subtype;
- int kvm_fd;
- int fd;
- unsigned int pgtable_levels;
- unsigned int page_size;
- unsigned int page_shift;
- unsigned int pa_bits;
- unsigned int va_bits;
- uint64_t max_gfn;
- struct list_head vcpus;
- struct userspace_mem_regions regions;
- struct sparsebit *vpages_valid;
- struct sparsebit *vpages_mapped;
- bool has_irqchip;
- bool pgd_created;
- vm_paddr_t ucall_mmio_addr;
- vm_paddr_t pgd;
- vm_vaddr_t gdt;
- vm_vaddr_t tss;
- vm_vaddr_t idt;
- vm_vaddr_t handlers;
- uint32_t dirty_ring_size;
- uint64_t gpa_tag_mask;
-
- struct kvm_vm_arch arch;
-
- /* Cache of information for binary stats interface */
- int stats_fd;
- struct kvm_stats_header stats_header;
- struct kvm_stats_desc *stats_desc;
-
- /*
- * KVM region slots. These are the default memslots used by page
- * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE]
- * memslot.
- */
- uint32_t memslots[NR_MEM_REGIONS];
-};
-
-struct vcpu_reg_sublist {
- const char *name;
- long capability;
- int feature;
- int feature_type;
- bool finalize;
- __u64 *regs;
- __u64 regs_n;
- __u64 *rejects_set;
- __u64 rejects_set_n;
- __u64 *skips_set;
- __u64 skips_set_n;
-};
-
-struct vcpu_reg_list {
- char *name;
- struct vcpu_reg_sublist sublists[];
-};
-
-#define for_each_sublist(c, s) \
- for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
-
-#define kvm_for_each_vcpu(vm, i, vcpu) \
- for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \
- if (!((vcpu) = vm->vcpus[i])) \
- continue; \
- else
-
-struct userspace_mem_region *
-memslot2region(struct kvm_vm *vm, uint32_t memslot);
-
-static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm,
- enum kvm_mem_region_type type)
-{
- assert(type < NR_MEM_REGIONS);
- return memslot2region(vm, vm->memslots[type]);
-}
-
-/* Minimum allocated guest virtual and physical addresses */
-#define KVM_UTIL_MIN_VADDR 0x2000
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
-#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
-#define DEFAULT_STACK_PGS 5
-
-enum vm_guest_mode {
- VM_MODE_P52V48_4K,
- VM_MODE_P52V48_16K,
- VM_MODE_P52V48_64K,
- VM_MODE_P48V48_4K,
- VM_MODE_P48V48_16K,
- VM_MODE_P48V48_64K,
- VM_MODE_P40V48_4K,
- VM_MODE_P40V48_16K,
- VM_MODE_P40V48_64K,
- VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
- VM_MODE_P47V64_4K,
- VM_MODE_P44V64_4K,
- VM_MODE_P36V48_4K,
- VM_MODE_P36V48_16K,
- VM_MODE_P36V48_64K,
- VM_MODE_P36V47_16K,
- NUM_VM_MODES,
-};
-
-struct vm_shape {
- uint32_t type;
- uint8_t mode;
- uint8_t subtype;
- uint16_t padding;
-};
-
-kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
-
-#define VM_TYPE_DEFAULT 0
-
-#define VM_SHAPE(__mode) \
-({ \
- struct vm_shape shape = { \
- .mode = (__mode), \
- .type = VM_TYPE_DEFAULT \
- }; \
- \
- shape; \
-})
-
-#if defined(__aarch64__)
-
-extern enum vm_guest_mode vm_mode_default;
-
-#define VM_MODE_DEFAULT vm_mode_default
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 8)
-
-#elif defined(__x86_64__)
-
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 8)
-
-#elif defined(__s390x__)
-
-#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 16)
-
-#elif defined(__riscv)
-
-#if __riscv_xlen == 32
-#error "RISC-V 32-bit kvm selftests not supported"
-#endif
-
-#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
-#define MIN_PAGE_SHIFT 12U
-#define ptes_per_page(page_size) ((page_size) / 8)
-
-#endif
-
-#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT)
-
-#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
-#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
-
-struct vm_guest_mode_params {
- unsigned int pa_bits;
- unsigned int va_bits;
- unsigned int page_size;
- unsigned int page_shift;
-};
-extern const struct vm_guest_mode_params vm_guest_mode_params[];
-
-int open_path_or_exit(const char *path, int flags);
-int open_kvm_dev_path_or_exit(void);
-
-bool get_kvm_param_bool(const char *param);
-bool get_kvm_intel_param_bool(const char *param);
-bool get_kvm_amd_param_bool(const char *param);
-
-int get_kvm_param_integer(const char *param);
-int get_kvm_intel_param_integer(const char *param);
-int get_kvm_amd_param_integer(const char *param);
-
-unsigned int kvm_check_cap(long cap);
-
-static inline bool kvm_has_cap(long cap)
-{
- return kvm_check_cap(cap);
-}
-
-#define __KVM_SYSCALL_ERROR(_name, _ret) \
- "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
-
-/*
- * Use the "inner", double-underscore macro when reporting errors from within
- * other macros so that the name of ioctl() and not its literal numeric value
- * is printed on error. The "outer" macro is strongly preferred when reporting
- * errors "directly", i.e. without an additional layer of macros, as it reduces
- * the probability of passing in the wrong string.
- */
-#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret)
-#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret)
-
-#define kvm_do_ioctl(fd, cmd, arg) \
-({ \
- kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \
- ioctl(fd, cmd, arg); \
-})
-
-#define __kvm_ioctl(kvm_fd, cmd, arg) \
- kvm_do_ioctl(kvm_fd, cmd, arg)
-
-#define kvm_ioctl(kvm_fd, cmd, arg) \
-({ \
- int ret = __kvm_ioctl(kvm_fd, cmd, arg); \
- \
- TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \
-})
-
-static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { }
-
-#define __vm_ioctl(vm, cmd, arg) \
-({ \
- static_assert_is_vm(vm); \
- kvm_do_ioctl((vm)->fd, cmd, arg); \
-})
-
-/*
- * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if
- * the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM,
- * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before
- * selftests existed and (b) should never outright fail, i.e. is supposed to
- * return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the
- * VM and its vCPUs, including KVM_CHECK_EXTENSION.
- */
-#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \
-do { \
- int __errno = errno; \
- \
- static_assert_is_vm(vm); \
- \
- if (cond) \
- break; \
- \
- if (errno == EIO && \
- __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \
- TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \
- TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \
- } \
- errno = __errno; \
- TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \
-} while (0)
-
-#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \
- __TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm)
-
-#define vm_ioctl(vm, cmd, arg) \
-({ \
- int ret = __vm_ioctl(vm, cmd, arg); \
- \
- __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
-})
-
-static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { }
-
-#define __vcpu_ioctl(vcpu, cmd, arg) \
-({ \
- static_assert_is_vcpu(vcpu); \
- kvm_do_ioctl((vcpu)->fd, cmd, arg); \
-})
-
-#define vcpu_ioctl(vcpu, cmd, arg) \
-({ \
- int ret = __vcpu_ioctl(vcpu, cmd, arg); \
- \
- __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \
-})
-
-/*
- * Looks up and returns the value corresponding to the capability
- * (KVM_CAP_*) given by cap.
- */
-static inline int vm_check_cap(struct kvm_vm *vm, long cap)
-{
- int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap);
-
- TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm);
- return ret;
-}
-
-static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
-{
- struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
-
- return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
-}
-static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
-{
- struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
-
- vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
-}
-
-static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size, uint64_t attributes)
-{
- struct kvm_memory_attributes attr = {
- .attributes = attributes,
- .address = gpa,
- .size = size,
- .flags = 0,
- };
-
- /*
- * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows
- * need significant enhancements to support multiple attributes.
- */
- TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE,
- "Update me to support multiple attributes!");
-
- vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr);
-}
-
-
-static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size)
-{
- vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
-}
-
-static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size)
-{
- vm_set_memory_attributes(vm, gpa, size, 0);
-}
-
-void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size,
- bool punch_hole);
-
-static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size)
-{
- vm_guest_mem_fallocate(vm, gpa, size, true);
-}
-
-static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa,
- uint64_t size)
-{
- vm_guest_mem_fallocate(vm, gpa, size, false);
-}
-
-void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
-const char *vm_guest_mode_string(uint32_t i);
-
-void kvm_vm_free(struct kvm_vm *vmp);
-void kvm_vm_restart(struct kvm_vm *vmp);
-void kvm_vm_release(struct kvm_vm *vmp);
-int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
- size_t len);
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
-int kvm_memfd_alloc(size_t size, bool hugepages);
-
-void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-
-static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
-{
- struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
-
- vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args);
-}
-
-static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
- uint64_t first_page, uint32_t num_pages)
-{
- struct kvm_clear_dirty_log args = {
- .dirty_bitmap = log,
- .slot = slot,
- .first_page = first_page,
- .num_pages = num_pages
- };
-
- vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args);
-}
-
-static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
-{
- return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL);
-}
-
-static inline int vm_get_stats_fd(struct kvm_vm *vm)
-{
- int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL);
-
- TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm);
- return fd;
-}
-
-static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
-{
- ssize_t ret;
-
- ret = pread(stats_fd, header, sizeof(*header), 0);
- TEST_ASSERT(ret == sizeof(*header),
- "Failed to read '%lu' header bytes, ret = '%ld'",
- sizeof(*header), ret);
-}
-
-struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
- struct kvm_stats_header *header);
-
-static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header)
-{
- /*
- * The base size of the descriptor is defined by KVM's ABI, but the
- * size of the name field is variable, as far as KVM's ABI is
- * concerned. For a given instance of KVM, the name field is the same
- * size for all stats and is provided in the overall stats header.
- */
- return sizeof(struct kvm_stats_desc) + header->name_size;
-}
-
-static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats,
- int index,
- struct kvm_stats_header *header)
-{
- /*
- * Note, size_desc includes the size of the name field, which is
- * variable. i.e. this is NOT equivalent to &stats_desc[i].
- */
- return (void *)stats + index * get_stats_descriptor_size(header);
-}
-
-void read_stat_data(int stats_fd, struct kvm_stats_header *header,
- struct kvm_stats_desc *desc, uint64_t *data,
- size_t max_elements);
-
-void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
- size_t max_elements);
-
-static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
-{
- uint64_t data;
-
- __vm_get_stat(vm, stat_name, &data, 1);
- return data;
-}
-
-void vm_create_irqchip(struct kvm_vm *vm);
-
-static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
- uint64_t flags)
-{
- struct kvm_create_guest_memfd guest_memfd = {
- .size = size,
- .flags = flags,
- };
-
- return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
-}
-
-static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
- uint64_t flags)
-{
- int fd = __vm_create_guest_memfd(vm, size, flags);
-
- TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd));
- return fd;
-}
-
-void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva);
-int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva);
-void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva,
- uint32_t guest_memfd, uint64_t guest_memfd_offset);
-int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
- uint64_t gpa, uint64_t size, void *hva,
- uint32_t guest_memfd, uint64_t guest_memfd_offset);
-
-void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags);
-void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
-
-#ifndef vm_arch_has_protected_memory
-static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
-{
- return false;
-}
-#endif
-
-void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
-void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
-void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
-struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
-void vm_populate_vaddr_bitmap(struct kvm_vm *vm);
-vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
-vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- enum kvm_mem_region_type type);
-vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
- vm_vaddr_t vaddr_min,
- enum kvm_mem_region_type type);
-vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
-vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm,
- enum kvm_mem_region_type type);
-vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
-
-void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages);
-void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
-void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
-vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
-void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
-
-
-static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa)
-{
- return gpa & ~vm->gpa_tag_mask;
-}
-
-void vcpu_run(struct kvm_vcpu *vcpu);
-int _vcpu_run(struct kvm_vcpu *vcpu);
-
-static inline int __vcpu_run(struct kvm_vcpu *vcpu)
-{
- return __vcpu_ioctl(vcpu, KVM_RUN, NULL);
-}
-
-void vcpu_run_complete_io(struct kvm_vcpu *vcpu);
-struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu);
-
-static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap,
- uint64_t arg0)
-{
- struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
-
- vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap);
-}
-
-static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu,
- struct kvm_guest_debug *debug)
-{
- vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug);
-}
-
-static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu,
- struct kvm_mp_state *mp_state)
-{
- vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state);
-}
-static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu,
- struct kvm_mp_state *mp_state)
-{
- vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state);
-}
-
-static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
- vcpu_ioctl(vcpu, KVM_GET_REGS, regs);
-}
-
-static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
-{
- vcpu_ioctl(vcpu, KVM_SET_REGS, regs);
-}
-static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
-{
- vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs);
-
-}
-static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
-{
- vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
-}
-static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
-{
- return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
-}
-static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
- vcpu_ioctl(vcpu, KVM_GET_FPU, fpu);
-}
-static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
-{
- vcpu_ioctl(vcpu, KVM_SET_FPU, fpu);
-}
-
-static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
-{
- struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
-
- return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
-}
-static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
-{
- struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
-
- return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
-}
-static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
-{
- struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
-
- vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
-}
-static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
-{
- struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
-
- vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
-}
-
-#ifdef __KVM_HAVE_VCPU_EVENTS
-static inline void vcpu_events_get(struct kvm_vcpu *vcpu,
- struct kvm_vcpu_events *events)
-{
- vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events);
-}
-static inline void vcpu_events_set(struct kvm_vcpu *vcpu,
- struct kvm_vcpu_events *events)
-{
- vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events);
-}
-#endif
-#ifdef __x86_64__
-static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu,
- struct kvm_nested_state *state)
-{
- vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state);
-}
-static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu,
- struct kvm_nested_state *state)
-{
- return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
-}
-
-static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu,
- struct kvm_nested_state *state)
-{
- vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
-}
-#endif
-static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu)
-{
- int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL);
-
- TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm);
- return fd;
-}
-
-int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr);
-
-static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
-{
- int ret = __kvm_has_device_attr(dev_fd, group, attr);
-
- TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
-}
-
-int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val);
-
-static inline void kvm_device_attr_get(int dev_fd, uint32_t group,
- uint64_t attr, void *val)
-{
- int ret = __kvm_device_attr_get(dev_fd, group, attr, val);
-
- TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret));
-}
-
-int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val);
-
-static inline void kvm_device_attr_set(int dev_fd, uint32_t group,
- uint64_t attr, void *val)
-{
- int ret = __kvm_device_attr_set(dev_fd, group, attr, val);
-
- TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret));
-}
-
-static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr)
-{
- return __kvm_has_device_attr(vcpu->fd, group, attr);
-}
-
-static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr)
-{
- kvm_has_device_attr(vcpu->fd, group, attr);
-}
-
-static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr, void *val)
-{
- return __kvm_device_attr_get(vcpu->fd, group, attr, val);
-}
-
-static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr, void *val)
-{
- kvm_device_attr_get(vcpu->fd, group, attr, val);
-}
-
-static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr, void *val)
-{
- return __kvm_device_attr_set(vcpu->fd, group, attr, val);
-}
-
-static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
- uint64_t attr, void *val)
-{
- kvm_device_attr_set(vcpu->fd, group, attr, val);
-}
-
-int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type);
-int __kvm_create_device(struct kvm_vm *vm, uint64_t type);
-
-static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type)
-{
- int fd = __kvm_create_device(vm, type);
-
- TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd));
- return fd;
-}
-
-void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu);
-
-/*
- * VM VCPU Args Set
- *
- * Input Args:
- * vm - Virtual Machine
- * num - number of arguments
- * ... - arguments, each of type uint64_t
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the first @num input parameters for the function at @vcpu's entry point,
- * per the C calling convention of the architecture, to the values given as
- * variable args. Each of the variable args is expected to be of type uint64_t.
- * The maximum @num can be is specific to the architecture.
- */
-void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...);
-
-void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
-int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
-
-#define KVM_MAX_IRQ_ROUTES 4096
-
-struct kvm_irq_routing *kvm_gsi_routing_create(void);
-void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
- uint32_t gsi, uint32_t pin);
-int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
-void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
-
-const char *exit_reason_str(unsigned int exit_reason);
-
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
- uint32_t memslot);
-vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot,
- bool protected);
-vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
-
-static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot)
-{
- /*
- * By default, allocate memory as protected for VMs that support
- * protected memory, as the majority of memory for such VMs is
- * protected, i.e. using shared memory is effectively opt-in.
- */
- return __vm_phy_pages_alloc(vm, num, paddr_min, memslot,
- vm_arch_has_protected_memory(vm));
-}
-
-/*
- * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also
- * loads the test binary into guest memory and creates an IRQ chip (x86 only).
- * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to
- * calculate the amount of memory needed for per-vCPU data, e.g. stacks.
- */
-struct kvm_vm *____vm_create(struct vm_shape shape);
-struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
- uint64_t nr_extra_pages);
-
-static inline struct kvm_vm *vm_create_barebones(void)
-{
- return ____vm_create(VM_SHAPE_DEFAULT);
-}
-
-#ifdef __x86_64__
-static inline struct kvm_vm *vm_create_barebones_protected_vm(void)
-{
- const struct vm_shape shape = {
- .mode = VM_MODE_DEFAULT,
- .type = KVM_X86_SW_PROTECTED_VM,
- };
-
- return ____vm_create(shape);
-}
-#endif
-
-static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus)
-{
- return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0);
-}
-
-struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
- uint64_t extra_mem_pages,
- void *guest_code, struct kvm_vcpu *vcpus[]);
-
-static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus,
- void *guest_code,
- struct kvm_vcpu *vcpus[])
-{
- return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0,
- guest_code, vcpus);
-}
-
-
-struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
- struct kvm_vcpu **vcpu,
- uint64_t extra_mem_pages,
- void *guest_code);
-
-/*
- * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages
- * additional pages of guest memory. Returns the VM and vCPU (via out param).
- */
-static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
- uint64_t extra_mem_pages,
- void *guest_code)
-{
- return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu,
- extra_mem_pages, guest_code);
-}
-
-static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
- void *guest_code)
-{
- return __vm_create_with_one_vcpu(vcpu, 0, guest_code);
-}
-
-static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape,
- struct kvm_vcpu **vcpu,
- void *guest_code)
-{
- return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code);
-}
-
-struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
-
-void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
-void kvm_print_vcpu_pinning_help(void);
-void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
- int nr_vcpus);
-
-unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
-unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
-unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
-unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
-static inline unsigned int
-vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
-{
- unsigned int n;
- n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
-#ifdef __s390x__
- /* s390 requires 1M aligned guest sizes */
- n = (n + 255) & ~255;
-#endif
- return n;
-}
-
-#define sync_global_to_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- memcpy(_p, &(g), sizeof(g)); \
-})
-
-#define sync_global_from_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- memcpy(&(g), _p, sizeof(g)); \
-})
-
-/*
- * Write a global value, but only in the VM's (guest's) domain. Primarily used
- * for "globals" that hold per-VM values (VMs always duplicate code and global
- * data into their own region of physical memory), but can be used anytime it's
- * undesirable to change the host's copy of the global.
- */
-#define write_guest_global(vm, g, val) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- typeof(g) _val = val; \
- \
- memcpy(_p, &(_val), sizeof(g)); \
-})
-
-void assert_on_unhandled_exception(struct kvm_vcpu *vcpu);
-
-void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu,
- uint8_t indent);
-
-static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu,
- uint8_t indent)
-{
- vcpu_arch_dump(stream, vcpu, indent);
-}
-
-/*
- * Adds a vCPU with reasonable defaults (e.g. a stack)
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpu_id - The id of the VCPU to add to the VM.
- */
-struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
-void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code);
-
-static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
- void *guest_code)
-{
- struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id);
-
- vcpu_arch_set_entry_point(vcpu, guest_code);
-
- return vcpu;
-}
-
-/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */
-struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id);
-
-static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm,
- uint32_t vcpu_id)
-{
- return vm_arch_vcpu_recreate(vm, vcpu_id);
-}
-
-void vcpu_arch_free(struct kvm_vcpu *vcpu);
-
-void virt_arch_pgd_alloc(struct kvm_vm *vm);
-
-static inline void virt_pgd_alloc(struct kvm_vm *vm)
-{
- virt_arch_pgd_alloc(vm);
-}
-
-/*
- * VM Virtual Page Map
- *
- * Input Args:
- * vm - Virtual Machine
- * vaddr - VM Virtual Address
- * paddr - VM Physical Address
- * memslot - Memory region slot for new virtual translation tables
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within @vm, creates a virtual translation for the page starting
- * at @vaddr to the page starting at @paddr.
- */
-void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
-
-static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
-{
- virt_arch_pg_map(vm, vaddr, paddr);
-}
-
-
-/*
- * Address Guest Virtual to Guest Physical
- *
- * Input Args:
- * vm - Virtual Machine
- * gva - VM virtual address
- *
- * Output Args: None
- *
- * Return:
- * Equivalent VM physical address
- *
- * Returns the VM physical address of the translated VM virtual
- * address given by @gva.
- */
-vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
-
-static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
-{
- return addr_arch_gva2gpa(vm, gva);
-}
-
-/*
- * Virtual Translation Tables Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * vm - Virtual Machine
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps to the FILE stream given by @stream, the contents of all the
- * virtual translation tables for the VM given by @vm.
- */
-void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-
-static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
-{
- virt_arch_dump(stream, vm, indent);
-}
-
-
-static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
-{
- return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0);
-}
-
-/*
- * Arch hook that is invoked via a constructor, i.e. before exeucting main(),
- * to allow for arch-specific setup that is common to all tests, e.g. computing
- * the default guest "mode".
- */
-void kvm_selftest_arch_init(void);
-
-void kvm_arch_vm_post_create(struct kvm_vm *vm);
-
-bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
-
-uint32_t guest_get_vcpuid(void);
-
-#endif /* SELFTEST_KVM_UTIL_BASE_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_types.h b/tools/testing/selftests/kvm/include/kvm_util_types.h
new file mode 100644
index 000000000000..ec787b97cf18
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util_types.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_TYPES_H
+#define SELFTEST_KVM_UTIL_TYPES_H
+
+/*
+ * Provide a version of static_assert() that is guaranteed to have an optional
+ * message param. _GNU_SOURCE is defined for all KVM selftests, _GNU_SOURCE
+ * implies _ISOC11_SOURCE, and if _ISOC11_SOURCE is defined, glibc #undefs and
+ * #defines static_assert() as a direct alias to _Static_assert() (see
+ * usr/include/assert.h). Define a custom macro instead of redefining
+ * static_assert() to avoid creating non-deterministic behavior that is
+ * dependent on include order.
+ */
+#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg)
+#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr)
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+#endif /* SELFTEST_KVM_UTIL_TYPES_H */
diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h
index ce4e603050ea..9071eb6dea60 100644
--- a/tools/testing/selftests/kvm/include/memstress.h
+++ b/tools/testing/selftests/kvm/include/memstress.h
@@ -62,7 +62,6 @@ struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
void memstress_destroy_vm(struct kvm_vm *vm);
void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent);
-void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed);
void memstress_set_random_access(struct kvm_vm *vm, bool random_access);
void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *));
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
index ce473fe251dd..5f389166338c 100644
--- a/tools/testing/selftests/kvm/include/riscv/processor.h
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -50,6 +50,16 @@ static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext);
+static inline bool __vcpu_has_isa_ext(struct kvm_vcpu *vcpu, uint64_t isa_ext)
+{
+ return __vcpu_has_ext(vcpu, RISCV_ISA_EXT_REG(isa_ext));
+}
+
+static inline bool __vcpu_has_sbi_ext(struct kvm_vcpu *vcpu, uint64_t sbi_ext)
+{
+ return __vcpu_has_ext(vcpu, RISCV_SBI_EXT_REG(sbi_ext));
+}
+
struct ex_regs {
unsigned long ra;
unsigned long sp;
@@ -154,45 +164,6 @@ void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handle
#define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE
#define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT
-/* SBI return error codes */
-#define SBI_SUCCESS 0
-#define SBI_ERR_FAILURE -1
-#define SBI_ERR_NOT_SUPPORTED -2
-#define SBI_ERR_INVALID_PARAM -3
-#define SBI_ERR_DENIED -4
-#define SBI_ERR_INVALID_ADDRESS -5
-#define SBI_ERR_ALREADY_AVAILABLE -6
-#define SBI_ERR_ALREADY_STARTED -7
-#define SBI_ERR_ALREADY_STOPPED -8
-
-#define SBI_EXT_EXPERIMENTAL_START 0x08000000
-#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
-
-#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
-#define KVM_RISCV_SELFTESTS_SBI_UCALL 0
-#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1
-
-enum sbi_ext_id {
- SBI_EXT_BASE = 0x10,
- SBI_EXT_STA = 0x535441,
-};
-
-enum sbi_ext_base_fid {
- SBI_EXT_BASE_PROBE_EXT = 3,
-};
-
-struct sbiret {
- long error;
- long value;
-};
-
-struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
- unsigned long arg1, unsigned long arg2,
- unsigned long arg3, unsigned long arg4,
- unsigned long arg5);
-
-bool guest_sbi_probe_extension(int extid, long *out_val);
-
static inline void local_irq_enable(void)
{
csr_set(CSR_SSTATUS, SR_SIE);
diff --git a/tools/testing/selftests/kvm/include/riscv/sbi.h b/tools/testing/selftests/kvm/include/riscv/sbi.h
new file mode 100644
index 000000000000..046b432ae896
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/sbi.h
@@ -0,0 +1,141 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V SBI specific definitions
+ *
+ * Copyright (C) 2024 Rivos Inc.
+ */
+
+#ifndef SELFTEST_KVM_SBI_H
+#define SELFTEST_KVM_SBI_H
+
+/* SBI spec version fields */
+#define SBI_SPEC_VERSION_DEFAULT 0x1
+#define SBI_SPEC_VERSION_MAJOR_SHIFT 24
+#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
+#define SBI_SPEC_VERSION_MINOR_MASK 0xffffff
+
+/* SBI return error codes */
+#define SBI_SUCCESS 0
+#define SBI_ERR_FAILURE -1
+#define SBI_ERR_NOT_SUPPORTED -2
+#define SBI_ERR_INVALID_PARAM -3
+#define SBI_ERR_DENIED -4
+#define SBI_ERR_INVALID_ADDRESS -5
+#define SBI_ERR_ALREADY_AVAILABLE -6
+#define SBI_ERR_ALREADY_STARTED -7
+#define SBI_ERR_ALREADY_STOPPED -8
+
+#define SBI_EXT_EXPERIMENTAL_START 0x08000000
+#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
+
+#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
+#define KVM_RISCV_SELFTESTS_SBI_UCALL 0
+#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1
+
+enum sbi_ext_id {
+ SBI_EXT_BASE = 0x10,
+ SBI_EXT_STA = 0x535441,
+ SBI_EXT_PMU = 0x504D55,
+};
+
+enum sbi_ext_base_fid {
+ SBI_EXT_BASE_GET_SPEC_VERSION = 0,
+ SBI_EXT_BASE_GET_IMP_ID,
+ SBI_EXT_BASE_GET_IMP_VERSION,
+ SBI_EXT_BASE_PROBE_EXT = 3,
+};
+enum sbi_ext_pmu_fid {
+ SBI_EXT_PMU_NUM_COUNTERS = 0,
+ SBI_EXT_PMU_COUNTER_GET_INFO,
+ SBI_EXT_PMU_COUNTER_CFG_MATCH,
+ SBI_EXT_PMU_COUNTER_START,
+ SBI_EXT_PMU_COUNTER_STOP,
+ SBI_EXT_PMU_COUNTER_FW_READ,
+ SBI_EXT_PMU_COUNTER_FW_READ_HI,
+ SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+};
+
+union sbi_pmu_ctr_info {
+ unsigned long value;
+ struct {
+ unsigned long csr:12;
+ unsigned long width:6;
+#if __riscv_xlen == 32
+ unsigned long reserved:13;
+#else
+ unsigned long reserved:45;
+#endif
+ unsigned long type:1;
+ };
+};
+
+struct riscv_pmu_snapshot_data {
+ u64 ctr_overflow_mask;
+ u64 ctr_values[64];
+ u64 reserved[447];
+};
+
+struct sbiret {
+ long error;
+ long value;
+};
+
+/** General pmu event codes specified in SBI PMU extension */
+enum sbi_pmu_hw_generic_events_t {
+ SBI_PMU_HW_NO_EVENT = 0,
+ SBI_PMU_HW_CPU_CYCLES = 1,
+ SBI_PMU_HW_INSTRUCTIONS = 2,
+ SBI_PMU_HW_CACHE_REFERENCES = 3,
+ SBI_PMU_HW_CACHE_MISSES = 4,
+ SBI_PMU_HW_BRANCH_INSTRUCTIONS = 5,
+ SBI_PMU_HW_BRANCH_MISSES = 6,
+ SBI_PMU_HW_BUS_CYCLES = 7,
+ SBI_PMU_HW_STALLED_CYCLES_FRONTEND = 8,
+ SBI_PMU_HW_STALLED_CYCLES_BACKEND = 9,
+ SBI_PMU_HW_REF_CPU_CYCLES = 10,
+
+ SBI_PMU_HW_GENERAL_MAX,
+};
+
+/* SBI PMU counter types */
+enum sbi_pmu_ctr_type {
+ SBI_PMU_CTR_TYPE_HW = 0x0,
+ SBI_PMU_CTR_TYPE_FW,
+};
+
+/* Flags defined for config matching function */
+#define SBI_PMU_CFG_FLAG_SKIP_MATCH BIT(0)
+#define SBI_PMU_CFG_FLAG_CLEAR_VALUE BIT(1)
+#define SBI_PMU_CFG_FLAG_AUTO_START BIT(2)
+#define SBI_PMU_CFG_FLAG_SET_VUINH BIT(3)
+#define SBI_PMU_CFG_FLAG_SET_VSINH BIT(4)
+#define SBI_PMU_CFG_FLAG_SET_UINH BIT(5)
+#define SBI_PMU_CFG_FLAG_SET_SINH BIT(6)
+#define SBI_PMU_CFG_FLAG_SET_MINH BIT(7)
+
+/* Flags defined for counter start function */
+#define SBI_PMU_START_FLAG_SET_INIT_VALUE BIT(0)
+#define SBI_PMU_START_FLAG_INIT_SNAPSHOT BIT(1)
+
+/* Flags defined for counter stop function */
+#define SBI_PMU_STOP_FLAG_RESET BIT(0)
+#define SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT BIT(1)
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5);
+
+bool guest_sbi_probe_extension(int extid, long *out_val);
+
+/* Make SBI version */
+static inline unsigned long sbi_mk_version(unsigned long major,
+ unsigned long minor)
+{
+ return ((major & SBI_SPEC_VERSION_MAJOR_MASK) << SBI_SPEC_VERSION_MAJOR_SHIFT)
+ | (minor & SBI_SPEC_VERSION_MINOR_MASK);
+}
+
+unsigned long get_host_sbi_spec_version(void);
+
+#endif /* SELFTEST_KVM_SBI_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h
index be46eb32ec27..a695ae36f3e0 100644
--- a/tools/testing/selftests/kvm/include/riscv/ucall.h
+++ b/tools/testing/selftests/kvm/include/riscv/ucall.h
@@ -3,6 +3,7 @@
#define SELFTEST_KVM_UCALL_H
#include "processor.h"
+#include "sbi.h"
#define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI
diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390x/ucall.h
index b231bf2e49d6..8035a872a351 100644
--- a/tools/testing/selftests/kvm/include/s390x/ucall.h
+++ b/tools/testing/selftests/kvm/include/s390x/ucall.h
@@ -2,7 +2,7 @@
#ifndef SELFTEST_KVM_UCALL_H
#define SELFTEST_KVM_UCALL_H
-#include "kvm_util_base.h"
+#include "kvm_util.h"
#define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 8a6e30612c86..3e473058849f 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -91,9 +91,28 @@ struct guest_random_state {
uint32_t seed;
};
+extern uint32_t guest_random_seed;
+extern struct guest_random_state guest_rng;
+
struct guest_random_state new_guest_random_state(uint32_t seed);
uint32_t guest_random_u32(struct guest_random_state *state);
+static inline bool __guest_random_bool(struct guest_random_state *state,
+ uint8_t percent)
+{
+ return (guest_random_u32(state) % 100) < percent;
+}
+
+static inline bool guest_random_bool(struct guest_random_state *state)
+{
+ return __guest_random_bool(state, 50);
+}
+
+static inline uint64_t guest_random_u64(struct guest_random_state *state)
+{
+ return ((uint64_t)guest_random_u32(state) << 32) | guest_random_u32(state);
+}
+
enum vm_mem_backing_src_type {
VM_MEM_SRC_ANONYMOUS,
VM_MEM_SRC_ANONYMOUS_THP,
diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h
index 877449c34592..60f7f9d435dc 100644
--- a/tools/testing/selftests/kvm/include/userfaultfd_util.h
+++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h
@@ -5,9 +5,6 @@
* Copyright (C) 2018, Red Hat, Inc.
* Copyright (C) 2019-2022 Google LLC
*/
-
-#define _GNU_SOURCE /* for pipe2 */
-
#include <inttypes.h>
#include <time.h>
#include <pthread.h>
@@ -17,17 +14,27 @@
typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg);
-struct uffd_desc {
+struct uffd_reader_args {
int uffd_mode;
int uffd;
- int pipefds[2];
useconds_t delay;
uffd_handler_t handler;
- pthread_t thread;
+ /* Holds the read end of the pipe for killing the reader. */
+ int pipe;
+};
+
+struct uffd_desc {
+ int uffd;
+ uint64_t num_readers;
+ /* Holds the write ends of the pipes for killing the readers. */
+ int *pipefds;
+ pthread_t *readers;
+ struct uffd_reader_args *reader_args;
};
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
void *hva, uint64_t len,
+ uint64_t num_readers,
uffd_handler_t handler);
void uffd_stop_demand_paging(struct uffd_desc *uffd);
diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h
index bed316fdecd5..0f268b55fa06 100644
--- a/tools/testing/selftests/kvm/include/x86_64/apic.h
+++ b/tools/testing/selftests/kvm/include/x86_64/apic.h
@@ -60,6 +60,14 @@
#define APIC_VECTOR_MASK 0x000FF
#define APIC_ICR2 0x310
#define SET_APIC_DEST_FIELD(x) ((x) << 24)
+#define APIC_LVTT 0x320
+#define APIC_LVT_TIMER_ONESHOT (0 << 17)
+#define APIC_LVT_TIMER_PERIODIC (1 << 17)
+#define APIC_LVT_TIMER_TSCDEADLINE (2 << 17)
+#define APIC_LVT_MASKED (1 << 16)
+#define APIC_TMICT 0x380
+#define APIC_TMCCT 0x390
+#define APIC_TDCR 0x3E0
void apic_disable(void);
void xapic_enable(void);
diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
index 9f1725192aa2..972bb1c4ab4c 100644
--- a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
+++ b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
@@ -5,7 +5,16 @@
#include <stdbool.h>
#include <stdint.h>
+#include "kvm_util_types.h"
+#include "test_util.h"
+
+extern bool is_forced_emulation_enabled;
+
struct kvm_vm_arch {
+ vm_vaddr_t gdt;
+ vm_vaddr_t tss;
+ vm_vaddr_t idt;
+
uint64_t c_bit;
uint64_t s_bit;
int sev_fd;
@@ -20,4 +29,23 @@ static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
#define vm_arch_has_protected_memory(vm) \
__vm_arch_has_protected_memory(&(vm)->arch)
+#define vcpu_arch_put_guest(mem, __val) \
+do { \
+ const typeof(mem) val = (__val); \
+ \
+ if (!is_forced_emulation_enabled || guest_random_bool(&guest_rng)) { \
+ (mem) = val; \
+ } else if (guest_random_bool(&guest_rng)) { \
+ __asm__ __volatile__(KVM_FEP "mov %1, %0" \
+ : "+m" (mem) \
+ : "r" (val) : "memory"); \
+ } else { \
+ uint64_t __old = READ_ONCE(mem); \
+ \
+ __asm__ __volatile__(KVM_FEP LOCK_PREFIX "cmpxchg %[new], %[ptr]" \
+ : [ptr] "+m" (mem), [old] "+a" (__old) \
+ : [new]"r" (val) : "memory", "cc"); \
+ } \
+} while (0)
+
#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 81ce37ec407d..a0c1440017bb 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -18,16 +18,12 @@
#include <linux/kvm_para.h>
#include <linux/stringify.h>
-#include "../kvm_util.h"
+#include "kvm_util.h"
+#include "ucall_common.h"
extern bool host_cpu_is_intel;
extern bool host_cpu_is_amd;
-
-enum vm_guest_x86_subtype {
- VM_SUBTYPE_NONE = 0,
- VM_SUBTYPE_SEV,
- VM_SUBTYPE_SEV_ES,
-};
+extern uint64_t guest_tsc_khz;
/* Forced emulation prefix, used to invoke the emulator unconditionally. */
#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
@@ -282,6 +278,7 @@ struct kvm_x86_cpu_property {
#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
+#define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
@@ -820,6 +817,23 @@ static inline void cpu_relax(void)
asm volatile("rep; nop" ::: "memory");
}
+static inline void udelay(unsigned long usec)
+{
+ uint64_t start, now, cycles;
+
+ GUEST_ASSERT(guest_tsc_khz);
+ cycles = guest_tsc_khz / 1000 * usec;
+
+ /*
+ * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is
+ * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits.
+ */
+ start = rdtsc();
+ do {
+ now = rdtsc();
+ } while (now - start < cycles);
+}
+
#define ud2() \
__asm__ __volatile__( \
"ud2\n" \
@@ -1139,8 +1153,6 @@ struct idt_entry {
uint32_t offset2; uint32_t reserved;
};
-void vm_init_descriptor_tables(struct kvm_vm *vm);
-void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
void vm_install_exception_handler(struct kvm_vm *vm, int vector,
void (*handler)(struct ex_regs *));
diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h
index 8a1bf88474c9..82c11c81a956 100644
--- a/tools/testing/selftests/kvm/include/x86_64/sev.h
+++ b/tools/testing/selftests/kvm/include/x86_64/sev.h
@@ -31,8 +31,9 @@ void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
void sev_vm_launch_finish(struct kvm_vm *vm);
-struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code,
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
struct kvm_vcpu **cpu);
+void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement);
kvm_static_assert(SEV_RET_SUCCESS == 0);
@@ -67,20 +68,8 @@ kvm_static_assert(SEV_RET_SUCCESS == 0);
__TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
})
-static inline void sev_vm_init(struct kvm_vm *vm)
-{
- vm->arch.sev_fd = open_sev_dev_path_or_exit();
-
- vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
-}
-
-
-static inline void sev_es_vm_init(struct kvm_vm *vm)
-{
- vm->arch.sev_fd = open_sev_dev_path_or_exit();
-
- vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
-}
+void sev_vm_init(struct kvm_vm *vm);
+void sev_es_vm_init(struct kvm_vm *vm);
static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
struct userspace_mem_region *region)
diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86_64/ucall.h
index 06b244bd06ee..d3825dcc3cd9 100644
--- a/tools/testing/selftests/kvm/include/x86_64/ucall.h
+++ b/tools/testing/selftests/kvm/include/x86_64/ucall.h
@@ -2,7 +2,7 @@
#ifndef SELFTEST_KVM_UCALL_H
#define SELFTEST_KVM_UCALL_H
-#include "kvm_util_base.h"
+#include "kvm_util.h"
#define UCALL_EXIT_REASON KVM_EXIT_IO
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
index 698c1cfa3111..f02355c3c4c2 100644
--- a/tools/testing/selftests/kvm/kvm_binary_stats_test.c
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -6,8 +6,6 @@
*
* Test the fd-based interface for KVM statistics.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index b9e23265e4b3..c78f34699f73 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -6,8 +6,6 @@
*
* Test for KVM_CAP_MAX_VCPUS and KVM_CAP_MAX_VCPU_ID.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index e0ba97ac1c56..dd8b12f626d3 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -8,9 +8,6 @@
* page size have been pre-allocated on your system, if you are planning to
* use hugepages to back the guest memory for testing.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
@@ -21,6 +18,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "guest_modes.h"
+#include "ucall_common.h"
#define TEST_MEM_SLOT_INDEX 1
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c
index 55668631d546..7abbf8866512 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic.c
@@ -17,13 +17,12 @@
static const struct gic_common_ops *gic_common_ops;
static struct spinlock gic_lock;
-static void gic_cpu_init(unsigned int cpu, void *redist_base)
+static void gic_cpu_init(unsigned int cpu)
{
- gic_common_ops->gic_cpu_init(cpu, redist_base);
+ gic_common_ops->gic_cpu_init(cpu);
}
-static void
-gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base)
+static void gic_dist_init(enum gic_type type, unsigned int nr_cpus)
{
const struct gic_common_ops *gic_ops = NULL;
@@ -40,7 +39,7 @@ gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base)
GUEST_ASSERT(gic_ops);
- gic_ops->gic_init(nr_cpus, dist_base);
+ gic_ops->gic_init(nr_cpus);
gic_common_ops = gic_ops;
/* Make sure that the initialized data is visible to all the vCPUs */
@@ -49,18 +48,15 @@ gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base)
spin_unlock(&gic_lock);
}
-void gic_init(enum gic_type type, unsigned int nr_cpus,
- void *dist_base, void *redist_base)
+void gic_init(enum gic_type type, unsigned int nr_cpus)
{
uint32_t cpu = guest_get_vcpuid();
GUEST_ASSERT(type < GIC_TYPE_MAX);
- GUEST_ASSERT(dist_base);
- GUEST_ASSERT(redist_base);
GUEST_ASSERT(nr_cpus);
- gic_dist_init(type, nr_cpus, dist_base);
- gic_cpu_init(cpu, redist_base);
+ gic_dist_init(type, nr_cpus);
+ gic_cpu_init(cpu);
}
void gic_irq_enable(unsigned int intid)
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
index 75d07313c893..d24e9ecc96c6 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
@@ -8,8 +8,8 @@
#define SELFTEST_KVM_GIC_PRIVATE_H
struct gic_common_ops {
- void (*gic_init)(unsigned int nr_cpus, void *dist_base);
- void (*gic_cpu_init)(unsigned int cpu, void *redist_base);
+ void (*gic_init)(unsigned int nr_cpus);
+ void (*gic_cpu_init)(unsigned int cpu);
void (*gic_irq_enable)(unsigned int intid);
void (*gic_irq_disable)(unsigned int intid);
uint64_t (*gic_read_iar)(void);
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
index 263bf3ed8fd5..66d05506f78b 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
@@ -9,12 +9,21 @@
#include "processor.h"
#include "delay.h"
+#include "gic.h"
#include "gic_v3.h"
#include "gic_private.h"
+#define GICV3_MAX_CPUS 512
+
+#define GICD_INT_DEF_PRI 0xa0
+#define GICD_INT_DEF_PRI_X4 ((GICD_INT_DEF_PRI << 24) |\
+ (GICD_INT_DEF_PRI << 16) |\
+ (GICD_INT_DEF_PRI << 8) |\
+ GICD_INT_DEF_PRI)
+
+#define ICC_PMR_DEF_PRIO 0xf0
+
struct gicv3_data {
- void *dist_base;
- void *redist_base[GICV3_MAX_CPUS];
unsigned int nr_cpus;
unsigned int nr_spis;
};
@@ -35,17 +44,23 @@ static void gicv3_gicd_wait_for_rwp(void)
{
unsigned int count = 100000; /* 1s */
- while (readl(gicv3_data.dist_base + GICD_CTLR) & GICD_CTLR_RWP) {
+ while (readl(GICD_BASE_GVA + GICD_CTLR) & GICD_CTLR_RWP) {
GUEST_ASSERT(count--);
udelay(10);
}
}
-static void gicv3_gicr_wait_for_rwp(void *redist_base)
+static inline volatile void *gicr_base_cpu(uint32_t cpu)
+{
+ /* Align all the redistributors sequentially */
+ return GICR_BASE_GVA + cpu * SZ_64K * 2;
+}
+
+static void gicv3_gicr_wait_for_rwp(uint32_t cpu)
{
unsigned int count = 100000; /* 1s */
- while (readl(redist_base + GICR_CTLR) & GICR_CTLR_RWP) {
+ while (readl(gicr_base_cpu(cpu) + GICR_CTLR) & GICR_CTLR_RWP) {
GUEST_ASSERT(count--);
udelay(10);
}
@@ -56,7 +71,7 @@ static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
if (cpu_or_dist & DIST_BIT)
gicv3_gicd_wait_for_rwp();
else
- gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu_or_dist]);
+ gicv3_gicr_wait_for_rwp(cpu_or_dist);
}
static enum gicv3_intid_range get_intid_range(unsigned int intid)
@@ -116,15 +131,15 @@ static void gicv3_set_eoi_split(bool split)
uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
{
- void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
- : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
+ : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
return readl(base + offset);
}
void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
{
- void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
- : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ volatile void *base = cpu_or_dist & DIST_BIT ? GICD_BASE_GVA
+ : sgi_base_from_redist(gicr_base_cpu(cpu_or_dist));
writel(reg_val, base + offset);
}
@@ -263,7 +278,7 @@ static bool gicv3_irq_get_pending(uint32_t intid)
return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
}
-static void gicv3_enable_redist(void *redist_base)
+static void gicv3_enable_redist(volatile void *redist_base)
{
uint32_t val = readl(redist_base + GICR_WAKER);
unsigned int count = 100000; /* 1s */
@@ -278,21 +293,15 @@ static void gicv3_enable_redist(void *redist_base)
}
}
-static inline void *gicr_base_cpu(void *redist_base, uint32_t cpu)
+static void gicv3_cpu_init(unsigned int cpu)
{
- /* Align all the redistributors sequentially */
- return redist_base + cpu * SZ_64K * 2;
-}
-
-static void gicv3_cpu_init(unsigned int cpu, void *redist_base)
-{
- void *sgi_base;
+ volatile void *sgi_base;
unsigned int i;
- void *redist_base_cpu;
+ volatile void *redist_base_cpu;
GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
- redist_base_cpu = gicr_base_cpu(redist_base, cpu);
+ redist_base_cpu = gicr_base_cpu(cpu);
sgi_base = sgi_base_from_redist(redist_base_cpu);
gicv3_enable_redist(redist_base_cpu);
@@ -310,7 +319,7 @@ static void gicv3_cpu_init(unsigned int cpu, void *redist_base)
writel(GICD_INT_DEF_PRI_X4,
sgi_base + GICR_IPRIORITYR0 + i);
- gicv3_gicr_wait_for_rwp(redist_base_cpu);
+ gicv3_gicr_wait_for_rwp(cpu);
/* Enable the GIC system register (ICC_*) access */
write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE,
@@ -320,18 +329,15 @@ static void gicv3_cpu_init(unsigned int cpu, void *redist_base)
write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
/* Enable non-secure Group-1 interrupts */
- write_sysreg_s(ICC_IGRPEN1_EL1_ENABLE, SYS_ICC_GRPEN1_EL1);
-
- gicv3_data.redist_base[cpu] = redist_base_cpu;
+ write_sysreg_s(ICC_IGRPEN1_EL1_MASK, SYS_ICC_IGRPEN1_EL1);
}
static void gicv3_dist_init(void)
{
- void *dist_base = gicv3_data.dist_base;
unsigned int i;
/* Disable the distributor until we set things up */
- writel(0, dist_base + GICD_CTLR);
+ writel(0, GICD_BASE_GVA + GICD_CTLR);
gicv3_gicd_wait_for_rwp();
/*
@@ -339,33 +345,32 @@ static void gicv3_dist_init(void)
* Also, deactivate and disable them.
*/
for (i = 32; i < gicv3_data.nr_spis; i += 32) {
- writel(~0, dist_base + GICD_IGROUPR + i / 8);
- writel(~0, dist_base + GICD_ICACTIVER + i / 8);
- writel(~0, dist_base + GICD_ICENABLER + i / 8);
+ writel(~0, GICD_BASE_GVA + GICD_IGROUPR + i / 8);
+ writel(~0, GICD_BASE_GVA + GICD_ICACTIVER + i / 8);
+ writel(~0, GICD_BASE_GVA + GICD_ICENABLER + i / 8);
}
/* Set a default priority for all the SPIs */
for (i = 32; i < gicv3_data.nr_spis; i += 4)
writel(GICD_INT_DEF_PRI_X4,
- dist_base + GICD_IPRIORITYR + i);
+ GICD_BASE_GVA + GICD_IPRIORITYR + i);
/* Wait for the settings to sync-in */
gicv3_gicd_wait_for_rwp();
/* Finally, enable the distributor globally with ARE */
writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A |
- GICD_CTLR_ENABLE_G1, dist_base + GICD_CTLR);
+ GICD_CTLR_ENABLE_G1, GICD_BASE_GVA + GICD_CTLR);
gicv3_gicd_wait_for_rwp();
}
-static void gicv3_init(unsigned int nr_cpus, void *dist_base)
+static void gicv3_init(unsigned int nr_cpus)
{
GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS);
gicv3_data.nr_cpus = nr_cpus;
- gicv3_data.dist_base = dist_base;
gicv3_data.nr_spis = GICD_TYPER_SPIS(
- readl(gicv3_data.dist_base + GICD_TYPER));
+ readl(GICD_BASE_GVA + GICD_TYPER));
if (gicv3_data.nr_spis > 1020)
gicv3_data.nr_spis = 1020;
@@ -396,3 +401,27 @@ const struct gic_common_ops gicv3_ops = {
.gic_irq_get_pending = gicv3_irq_get_pending,
.gic_irq_set_config = gicv3_irq_set_config,
};
+
+void gic_rdist_enable_lpis(vm_paddr_t cfg_table, size_t cfg_table_size,
+ vm_paddr_t pend_table)
+{
+ volatile void *rdist_base = gicr_base_cpu(guest_get_vcpuid());
+
+ u32 ctlr;
+ u64 val;
+
+ val = (cfg_table |
+ GICR_PROPBASER_InnerShareable |
+ GICR_PROPBASER_RaWaWb |
+ ((ilog2(cfg_table_size) - 1) & GICR_PROPBASER_IDBITS_MASK));
+ writeq_relaxed(val, rdist_base + GICR_PROPBASER);
+
+ val = (pend_table |
+ GICR_PENDBASER_InnerShareable |
+ GICR_PENDBASER_RaWaWb);
+ writeq_relaxed(val, rdist_base + GICR_PENDBASER);
+
+ ctlr = readl_relaxed(rdist_base + GICR_CTLR);
+ ctlr |= GICR_CTLR_ENABLE_LPIS;
+ writel_relaxed(ctlr, rdist_base + GICR_CTLR);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c
new file mode 100644
index 000000000000..09f270545646
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3_its.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest ITS library, generously donated by drivers/irqchip/irq-gic-v3-its.c
+ * over in the kernel tree.
+ */
+
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "processor.h"
+
+static u64 its_read_u64(unsigned long offset)
+{
+ return readq_relaxed(GITS_BASE_GVA + offset);
+}
+
+static void its_write_u64(unsigned long offset, u64 val)
+{
+ writeq_relaxed(val, GITS_BASE_GVA + offset);
+}
+
+static u32 its_read_u32(unsigned long offset)
+{
+ return readl_relaxed(GITS_BASE_GVA + offset);
+}
+
+static void its_write_u32(unsigned long offset, u32 val)
+{
+ writel_relaxed(val, GITS_BASE_GVA + offset);
+}
+
+static unsigned long its_find_baser(unsigned int type)
+{
+ int i;
+
+ for (i = 0; i < GITS_BASER_NR_REGS; i++) {
+ u64 baser;
+ unsigned long offset = GITS_BASER + (i * sizeof(baser));
+
+ baser = its_read_u64(offset);
+ if (GITS_BASER_TYPE(baser) == type)
+ return offset;
+ }
+
+ GUEST_FAIL("Couldn't find an ITS BASER of type %u", type);
+ return -1;
+}
+
+static void its_install_table(unsigned int type, vm_paddr_t base, size_t size)
+{
+ unsigned long offset = its_find_baser(type);
+ u64 baser;
+
+ baser = ((size / SZ_64K) - 1) |
+ GITS_BASER_PAGE_SIZE_64K |
+ GITS_BASER_InnerShareable |
+ base |
+ GITS_BASER_RaWaWb |
+ GITS_BASER_VALID;
+
+ its_write_u64(offset, baser);
+}
+
+static void its_install_cmdq(vm_paddr_t base, size_t size)
+{
+ u64 cbaser;
+
+ cbaser = ((size / SZ_4K) - 1) |
+ GITS_CBASER_InnerShareable |
+ base |
+ GITS_CBASER_RaWaWb |
+ GITS_CBASER_VALID;
+
+ its_write_u64(GITS_CBASER, cbaser);
+}
+
+void its_init(vm_paddr_t coll_tbl, size_t coll_tbl_sz,
+ vm_paddr_t device_tbl, size_t device_tbl_sz,
+ vm_paddr_t cmdq, size_t cmdq_size)
+{
+ u32 ctlr;
+
+ its_install_table(GITS_BASER_TYPE_COLLECTION, coll_tbl, coll_tbl_sz);
+ its_install_table(GITS_BASER_TYPE_DEVICE, device_tbl, device_tbl_sz);
+ its_install_cmdq(cmdq, cmdq_size);
+
+ ctlr = its_read_u32(GITS_CTLR);
+ ctlr |= GITS_CTLR_ENABLE;
+ its_write_u32(GITS_CTLR, ctlr);
+}
+
+struct its_cmd_block {
+ union {
+ u64 raw_cmd[4];
+ __le64 raw_cmd_le[4];
+ };
+};
+
+static inline void its_fixup_cmd(struct its_cmd_block *cmd)
+{
+ /* Let's fixup BE commands */
+ cmd->raw_cmd_le[0] = cpu_to_le64(cmd->raw_cmd[0]);
+ cmd->raw_cmd_le[1] = cpu_to_le64(cmd->raw_cmd[1]);
+ cmd->raw_cmd_le[2] = cpu_to_le64(cmd->raw_cmd[2]);
+ cmd->raw_cmd_le[3] = cpu_to_le64(cmd->raw_cmd[3]);
+}
+
+static void its_mask_encode(u64 *raw_cmd, u64 val, int h, int l)
+{
+ u64 mask = GENMASK_ULL(h, l);
+ *raw_cmd &= ~mask;
+ *raw_cmd |= (val << l) & mask;
+}
+
+static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
+{
+ its_mask_encode(&cmd->raw_cmd[0], cmd_nr, 7, 0);
+}
+
+static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
+{
+ its_mask_encode(&cmd->raw_cmd[0], devid, 63, 32);
+}
+
+static void its_encode_event_id(struct its_cmd_block *cmd, u32 id)
+{
+ its_mask_encode(&cmd->raw_cmd[1], id, 31, 0);
+}
+
+static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id)
+{
+ its_mask_encode(&cmd->raw_cmd[1], phys_id, 63, 32);
+}
+
+static void its_encode_size(struct its_cmd_block *cmd, u8 size)
+{
+ its_mask_encode(&cmd->raw_cmd[1], size, 4, 0);
+}
+
+static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr)
+{
+ its_mask_encode(&cmd->raw_cmd[2], itt_addr >> 8, 51, 8);
+}
+
+static void its_encode_valid(struct its_cmd_block *cmd, int valid)
+{
+ its_mask_encode(&cmd->raw_cmd[2], !!valid, 63, 63);
+}
+
+static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr)
+{
+ its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16);
+}
+
+static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
+{
+ its_mask_encode(&cmd->raw_cmd[2], col, 15, 0);
+}
+
+#define GITS_CMDQ_POLL_ITERATIONS 0
+
+static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd)
+{
+ u64 cwriter = its_read_u64(GITS_CWRITER);
+ struct its_cmd_block *dst = cmdq_base + cwriter;
+ u64 cbaser = its_read_u64(GITS_CBASER);
+ size_t cmdq_size;
+ u64 next;
+ int i;
+
+ cmdq_size = ((cbaser & 0xFF) + 1) * SZ_4K;
+
+ its_fixup_cmd(cmd);
+
+ WRITE_ONCE(*dst, *cmd);
+ dsb(ishst);
+ next = (cwriter + sizeof(*cmd)) % cmdq_size;
+ its_write_u64(GITS_CWRITER, next);
+
+ /*
+ * Polling isn't necessary considering KVM's ITS emulation at the time
+ * of writing this, as the CMDQ is processed synchronously after a write
+ * to CWRITER.
+ */
+ for (i = 0; its_read_u64(GITS_CREADR) != next; i++) {
+ __GUEST_ASSERT(i < GITS_CMDQ_POLL_ITERATIONS,
+ "ITS didn't process command at offset %lu after %d iterations\n",
+ cwriter, i);
+
+ cpu_relax();
+ }
+}
+
+void its_send_mapd_cmd(void *cmdq_base, u32 device_id, vm_paddr_t itt_base,
+ size_t itt_size, bool valid)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_MAPD);
+ its_encode_devid(&cmd, device_id);
+ its_encode_size(&cmd, ilog2(itt_size) - 1);
+ its_encode_itt(&cmd, itt_base);
+ its_encode_valid(&cmd, valid);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool valid)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_MAPC);
+ its_encode_collection(&cmd, collection_id);
+ its_encode_target(&cmd, vcpu_id);
+ its_encode_valid(&cmd, valid);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_mapti_cmd(void *cmdq_base, u32 device_id, u32 event_id,
+ u32 collection_id, u32 intid)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_MAPTI);
+ its_encode_devid(&cmd, device_id);
+ its_encode_event_id(&cmd, event_id);
+ its_encode_phys_id(&cmd, intid);
+ its_encode_collection(&cmd, collection_id);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
+
+void its_send_invall_cmd(void *cmdq_base, u32 collection_id)
+{
+ struct its_cmd_block cmd = {};
+
+ its_encode_cmd(&cmd, GITS_CMD_INVALL);
+ its_encode_collection(&cmd, collection_id);
+
+ its_send_cmd(cmdq_base, &cmd);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index a9eb17295be4..0ac7cc89f38c 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -11,6 +11,8 @@
#include "guest_modes.h"
#include "kvm_util.h"
#include "processor.h"
+#include "ucall_common.h"
+
#include <linux/bitfield.h>
#include <linux/sizes.h>
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
index 184378d593e9..4427f43f73ea 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/vgic.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -3,8 +3,10 @@
* ARM Generic Interrupt Controller (GIC) v3 host support
*/
+#include <linux/kernel.h>
#include <linux/kvm.h>
#include <linux/sizes.h>
+#include <asm/cputype.h>
#include <asm/kvm_para.h>
#include <asm/kvm.h>
@@ -19,8 +21,6 @@
* Input args:
* vm - KVM VM
* nr_vcpus - Number of vCPUs supported by this VM
- * gicd_base_gpa - Guest Physical Address of the Distributor region
- * gicr_base_gpa - Guest Physical Address of the Redistributor region
*
* Output args: None
*
@@ -30,11 +30,10 @@
* redistributor regions of the guest. Since it depends on the number of
* vCPUs for the VM, it must be called after all the vCPUs have been created.
*/
-int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
- uint64_t gicd_base_gpa, uint64_t gicr_base_gpa)
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs)
{
int gic_fd;
- uint64_t redist_attr;
+ uint64_t attr;
struct list_head *iter;
unsigned int nr_gic_pages, nr_vcpus_created = 0;
@@ -60,18 +59,19 @@ int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+ attr = GICD_BASE_GPA;
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa);
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &attr);
nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
- virt_map(vm, gicd_base_gpa, gicd_base_gpa, nr_gic_pages);
+ virt_map(vm, GICD_BASE_GPA, GICD_BASE_GPA, nr_gic_pages);
/* Redistributor setup */
- redist_attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, gicr_base_gpa, 0, 0);
+ attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, GICR_BASE_GPA, 0, 0);
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
- KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &redist_attr);
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &attr);
nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
- virt_map(vm, gicr_base_gpa, gicr_base_gpa, nr_gic_pages);
+ virt_map(vm, GICR_BASE_GPA, GICR_BASE_GPA, nr_gic_pages);
kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
@@ -168,3 +168,21 @@ void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
{
vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
}
+
+int vgic_its_setup(struct kvm_vm *vm)
+{
+ int its_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
+ u64 attr;
+
+ attr = GITS_BASE_GPA;
+ kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &attr);
+
+ kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ virt_map(vm, GITS_BASE_GPA, GITS_BASE_GPA,
+ vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_ITS_SIZE));
+
+ return its_fd;
+}
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index 2bd25b191d15..b49690658c60 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -4,9 +4,6 @@
*
* Copyright (C) 2018, Google LLC.
*/
-
-#define _GNU_SOURCE /* for getline(3) and strchrnul(3)*/
-
#include "test_util.h"
#include <execinfo.h>
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index b2262b5fad9e..56b170b725b3 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -4,11 +4,10 @@
*
* Copyright (C) 2018, Google LLC.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "ucall_common.h"
#include <assert.h>
#include <sched.h>
@@ -20,6 +19,10 @@
#define KVM_UTIL_MIN_PFN 2
+uint32_t guest_random_seed;
+struct guest_random_state guest_rng;
+static uint32_t last_guest_seed;
+
static int vcpu_mmap_sz(void);
int open_path_or_exit(const char *path, int flags)
@@ -276,7 +279,6 @@ struct kvm_vm *____vm_create(struct vm_shape shape)
vm->mode = shape.mode;
vm->type = shape.type;
- vm->subtype = shape.subtype;
vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits;
vm->va_bits = vm_guest_mode_params[vm->mode].va_bits;
@@ -433,6 +435,13 @@ struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
slot0 = memslot2region(vm, 0);
ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+ if (guest_random_seed != last_guest_seed) {
+ pr_info("Random seed: 0x%x\n", guest_random_seed);
+ last_guest_seed = guest_random_seed;
+ }
+ guest_rng = new_guest_random_state(guest_random_seed);
+ sync_global_to_guest(vm, guest_rng);
+
kvm_arch_vm_post_create(vm);
return vm;
@@ -930,6 +939,10 @@ void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
errno, strerror(errno));
}
+#define TEST_REQUIRE_SET_USER_MEMORY_REGION2() \
+ __TEST_REQUIRE(kvm_has_cap(KVM_CAP_USER_MEMORY2), \
+ "KVM selftests now require KVM_SET_USER_MEMORY_REGION2 (introduced in v6.8)")
+
int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
uint64_t gpa, uint64_t size, void *hva,
uint32_t guest_memfd, uint64_t guest_memfd_offset)
@@ -944,6 +957,8 @@ int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flag
.guest_memfd_offset = guest_memfd_offset,
};
+ TEST_REQUIRE_SET_USER_MEMORY_REGION2();
+
return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, &region);
}
@@ -970,6 +985,8 @@ void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
size_t mem_size = npages * vm->page_size;
size_t alignment;
+ TEST_REQUIRE_SET_USER_MEMORY_REGION2();
+
TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
"Number of guest pages is not compatible with the host. "
"Try npages=%d", vm_adjust_num_guest_pages(vm->mode, npages));
@@ -2306,6 +2323,9 @@ void __attribute((constructor)) kvm_selftest_init(void)
/* Tell stdout not to buffer its content. */
setbuf(stdout, NULL);
+ guest_random_seed = last_guest_seed = random();
+ pr_info("Random seed: 0x%x\n", guest_random_seed);
+
kvm_selftest_arch_init();
}
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
index cf2c73971308..313277486a1d 100644
--- a/tools/testing/selftests/kvm/lib/memstress.c
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -2,14 +2,13 @@
/*
* Copyright (C) 2020, Google LLC.
*/
-#define _GNU_SOURCE
-
#include <inttypes.h>
#include <linux/bitmap.h>
#include "kvm_util.h"
#include "memstress.h"
#include "processor.h"
+#include "ucall_common.h"
struct memstress_args memstress_args;
@@ -56,7 +55,7 @@ void memstress_guest_code(uint32_t vcpu_idx)
uint64_t page;
int i;
- rand_state = new_guest_random_state(args->random_seed + vcpu_idx);
+ rand_state = new_guest_random_state(guest_random_seed + vcpu_idx);
gva = vcpu_args->gva;
pages = vcpu_args->pages;
@@ -76,7 +75,7 @@ void memstress_guest_code(uint32_t vcpu_idx)
addr = gva + (page * args->guest_page_size);
- if (guest_random_u32(&rand_state) % 100 < args->write_percent)
+ if (__guest_random_bool(&rand_state, args->write_percent))
*(uint64_t *)addr = 0x0123456789ABCDEF;
else
READ_ONCE(*(uint64_t *)addr);
@@ -243,12 +242,6 @@ void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent)
sync_global_to_guest(vm, memstress_args.write_percent);
}
-void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed)
-{
- memstress_args.random_seed = random_seed;
- sync_global_to_guest(vm, memstress_args.random_seed);
-}
-
void memstress_set_random_access(struct kvm_vm *vm, bool random_access)
{
memstress_args.random_access = random_access;
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
index e8211f5d6863..6ae47b3d6b25 100644
--- a/tools/testing/selftests/kvm/lib/riscv/processor.c
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -10,6 +10,7 @@
#include "kvm_util.h"
#include "processor.h"
+#include "ucall_common.h"
#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000
@@ -502,3 +503,15 @@ bool guest_sbi_probe_extension(int extid, long *out_val)
return true;
}
+
+unsigned long get_host_sbi_spec_version(void)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_GET_SPEC_VERSION, 0,
+ 0, 0, 0, 0, 0);
+
+ GUEST_ASSERT(!ret.error);
+
+ return ret.value;
+}
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
index 14ee17151a59..b5035c63d516 100644
--- a/tools/testing/selftests/kvm/lib/riscv/ucall.c
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -9,6 +9,7 @@
#include "kvm_util.h"
#include "processor.h"
+#include "sbi.h"
void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
{
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 5a8f8becb129..8ed0b74ae837 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -4,8 +4,6 @@
*
* Copyright (C) 2020, Google LLC.
*/
-
-#define _GNU_SOURCE
#include <stdio.h>
#include <stdarg.h>
#include <assert.h>
diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c
index f5af65a41c29..42151e571953 100644
--- a/tools/testing/selftests/kvm/lib/ucall_common.c
+++ b/tools/testing/selftests/kvm/lib/ucall_common.c
@@ -1,9 +1,12 @@
// SPDX-License-Identifier: GPL-2.0-only
-#include "kvm_util.h"
#include "linux/types.h"
#include "linux/bitmap.h"
#include "linux/atomic.h"
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+
#define GUEST_UCALL_FAILED -1
struct ucall_header {
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
index f4eef6eb2dc2..7c9de8414462 100644
--- a/tools/testing/selftests/kvm/lib/userfaultfd_util.c
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -6,9 +6,6 @@
* Copyright (C) 2018, Red Hat, Inc.
* Copyright (C) 2019-2022 Google LLC
*/
-
-#define _GNU_SOURCE /* for pipe2 */
-
#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
@@ -16,6 +13,7 @@
#include <poll.h>
#include <pthread.h>
#include <linux/userfaultfd.h>
+#include <sys/epoll.h>
#include <sys/syscall.h>
#include "kvm_util.h"
@@ -27,76 +25,69 @@
static void *uffd_handler_thread_fn(void *arg)
{
- struct uffd_desc *uffd_desc = (struct uffd_desc *)arg;
- int uffd = uffd_desc->uffd;
- int pipefd = uffd_desc->pipefds[0];
- useconds_t delay = uffd_desc->delay;
+ struct uffd_reader_args *reader_args = (struct uffd_reader_args *)arg;
+ int uffd = reader_args->uffd;
int64_t pages = 0;
struct timespec start;
struct timespec ts_diff;
+ struct epoll_event evt;
+ int epollfd;
+
+ epollfd = epoll_create(1);
+ TEST_ASSERT(epollfd >= 0, "Failed to create epollfd.");
+
+ evt.events = EPOLLIN | EPOLLEXCLUSIVE;
+ evt.data.u32 = 0;
+ TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, uffd, &evt),
+ "Failed to add uffd to epollfd");
+
+ evt.events = EPOLLIN;
+ evt.data.u32 = 1;
+ TEST_ASSERT(!epoll_ctl(epollfd, EPOLL_CTL_ADD, reader_args->pipe, &evt),
+ "Failed to add pipe to epollfd");
clock_gettime(CLOCK_MONOTONIC, &start);
while (1) {
struct uffd_msg msg;
- struct pollfd pollfd[2];
- char tmp_chr;
int r;
- pollfd[0].fd = uffd;
- pollfd[0].events = POLLIN;
- pollfd[1].fd = pipefd;
- pollfd[1].events = POLLIN;
+ r = epoll_wait(epollfd, &evt, 1, -1);
+ TEST_ASSERT(r == 1,
+ "Unexpected number of events (%d) from epoll, errno = %d",
+ r, errno);
- r = poll(pollfd, 2, -1);
- switch (r) {
- case -1:
- pr_info("poll err");
- continue;
- case 0:
- continue;
- case 1:
- break;
- default:
- pr_info("Polling uffd returned %d", r);
- return NULL;
- }
-
- if (pollfd[0].revents & POLLERR) {
- pr_info("uffd revents has POLLERR");
- return NULL;
- }
+ if (evt.data.u32 == 1) {
+ char tmp_chr;
- if (pollfd[1].revents & POLLIN) {
- r = read(pollfd[1].fd, &tmp_chr, 1);
+ TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
+ "Reader thread received EPOLLERR or EPOLLHUP on pipe.");
+ r = read(reader_args->pipe, &tmp_chr, 1);
TEST_ASSERT(r == 1,
- "Error reading pipefd in UFFD thread");
+ "Error reading pipefd in uffd reader thread");
break;
}
- if (!(pollfd[0].revents & POLLIN))
- continue;
+ TEST_ASSERT(!(evt.events & (EPOLLERR | EPOLLHUP)),
+ "Reader thread received EPOLLERR or EPOLLHUP on uffd.");
r = read(uffd, &msg, sizeof(msg));
if (r == -1) {
- if (errno == EAGAIN)
- continue;
- pr_info("Read of uffd got errno %d\n", errno);
- return NULL;
+ TEST_ASSERT(errno == EAGAIN,
+ "Error reading from UFFD: errno = %d", errno);
+ continue;
}
- if (r != sizeof(msg)) {
- pr_info("Read on uffd returned unexpected size: %d bytes", r);
- return NULL;
- }
+ TEST_ASSERT(r == sizeof(msg),
+ "Read on uffd returned unexpected number of bytes (%d)", r);
if (!(msg.event & UFFD_EVENT_PAGEFAULT))
continue;
- if (delay)
- usleep(delay);
- r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg);
- if (r < 0)
- return NULL;
+ if (reader_args->delay)
+ usleep(reader_args->delay);
+ r = reader_args->handler(reader_args->uffd_mode, uffd, &msg);
+ TEST_ASSERT(r >= 0,
+ "Reader thread handler fn returned negative value %d", r);
pages++;
}
@@ -110,6 +101,7 @@ static void *uffd_handler_thread_fn(void *arg)
struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
void *hva, uint64_t len,
+ uint64_t num_readers,
uffd_handler_t handler)
{
struct uffd_desc *uffd_desc;
@@ -118,14 +110,25 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
struct uffdio_api uffdio_api;
struct uffdio_register uffdio_register;
uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
- int ret;
+ int ret, i;
PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
is_minor ? "MINOR" : "MISSING",
is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
uffd_desc = malloc(sizeof(struct uffd_desc));
- TEST_ASSERT(uffd_desc, "malloc failed");
+ TEST_ASSERT(uffd_desc, "Failed to malloc uffd descriptor");
+
+ uffd_desc->pipefds = calloc(sizeof(int), num_readers);
+ TEST_ASSERT(uffd_desc->pipefds, "Failed to alloc pipes");
+
+ uffd_desc->readers = calloc(sizeof(pthread_t), num_readers);
+ TEST_ASSERT(uffd_desc->readers, "Failed to alloc reader threads");
+
+ uffd_desc->reader_args = calloc(sizeof(struct uffd_reader_args), num_readers);
+ TEST_ASSERT(uffd_desc->reader_args, "Failed to alloc reader_args");
+
+ uffd_desc->num_readers = num_readers;
/* In order to get minor faults, prefault via the alias. */
if (is_minor)
@@ -148,18 +151,28 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
expected_ioctls, "missing userfaultfd ioctls");
- ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK);
- TEST_ASSERT(!ret, "Failed to set up pipefd");
-
- uffd_desc->uffd_mode = uffd_mode;
uffd_desc->uffd = uffd;
- uffd_desc->delay = delay;
- uffd_desc->handler = handler;
- pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn,
- uffd_desc);
+ for (i = 0; i < uffd_desc->num_readers; ++i) {
+ int pipes[2];
- PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
- hva, hva + len);
+ ret = pipe2((int *) &pipes, O_CLOEXEC | O_NONBLOCK);
+ TEST_ASSERT(!ret, "Failed to set up pipefd %i for uffd_desc %p",
+ i, uffd_desc);
+
+ uffd_desc->pipefds[i] = pipes[1];
+
+ uffd_desc->reader_args[i].uffd_mode = uffd_mode;
+ uffd_desc->reader_args[i].uffd = uffd;
+ uffd_desc->reader_args[i].delay = delay;
+ uffd_desc->reader_args[i].handler = handler;
+ uffd_desc->reader_args[i].pipe = pipes[0];
+
+ pthread_create(&uffd_desc->readers[i], NULL, uffd_handler_thread_fn,
+ &uffd_desc->reader_args[i]);
+
+ PER_VCPU_DEBUG("Created uffd thread %i for HVA range [%p, %p)\n",
+ i, hva, hva + len);
+ }
return uffd_desc;
}
@@ -167,19 +180,26 @@ struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
void uffd_stop_demand_paging(struct uffd_desc *uffd)
{
char c = 0;
- int ret;
+ int i;
- ret = write(uffd->pipefds[1], &c, 1);
- TEST_ASSERT(ret == 1, "Unable to write to pipefd");
+ for (i = 0; i < uffd->num_readers; ++i)
+ TEST_ASSERT(write(uffd->pipefds[i], &c, 1) == 1,
+ "Unable to write to pipefd %i for uffd_desc %p", i, uffd);
- ret = pthread_join(uffd->thread, NULL);
- TEST_ASSERT(ret == 0, "Pthread_join failed.");
+ for (i = 0; i < uffd->num_readers; ++i)
+ TEST_ASSERT(!pthread_join(uffd->readers[i], NULL),
+ "Pthread_join failed on reader %i for uffd_desc %p", i, uffd);
close(uffd->uffd);
- close(uffd->pipefds[1]);
- close(uffd->pipefds[0]);
+ for (i = 0; i < uffd->num_readers; ++i) {
+ close(uffd->pipefds[i]);
+ close(uffd->reader_args[i].pipe);
+ }
+ free(uffd->pipefds);
+ free(uffd->readers);
+ free(uffd->reader_args);
free(uffd);
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 74a4c736c9ae..153739f2e201 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -15,14 +15,17 @@
#define NUM_INTERRUPTS 256
#endif
-#define DEFAULT_CODE_SELECTOR 0x8
-#define DEFAULT_DATA_SELECTOR 0x10
+#define KERNEL_CS 0x8
+#define KERNEL_DS 0x10
+#define KERNEL_TSS 0x18
#define MAX_NR_CPUID_ENTRIES 100
vm_vaddr_t exception_handlers;
bool host_cpu_is_amd;
bool host_cpu_is_intel;
+bool is_forced_emulation_enabled;
+uint64_t guest_tsc_khz;
static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
{
@@ -417,7 +420,7 @@ static void kvm_seg_set_unusable(struct kvm_segment *segp)
static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
{
- void *gdt = addr_gva2hva(vm, vm->gdt);
+ void *gdt = addr_gva2hva(vm, vm->arch.gdt);
struct desc64 *desc = gdt + (segp->selector >> 3) * 8;
desc->limit0 = segp->limit & 0xFFFF;
@@ -437,27 +440,10 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
desc->base3 = segp->base >> 32;
}
-
-/*
- * Set Long Mode Flat Kernel Code Segment
- *
- * Input Args:
- * vm - VM whose GDT is being filled, or NULL to only write segp
- * selector - selector value
- *
- * Output Args:
- * segp - Pointer to KVM segment
- *
- * Return: None
- *
- * Sets up the KVM segment pointed to by @segp, to be a code segment
- * with the selector value given by @selector.
- */
-static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
- struct kvm_segment *segp)
+static void kvm_seg_set_kernel_code_64bit(struct kvm_segment *segp)
{
memset(segp, 0, sizeof(*segp));
- segp->selector = selector;
+ segp->selector = KERNEL_CS;
segp->limit = 0xFFFFFFFFu;
segp->s = 0x1; /* kTypeCodeData */
segp->type = 0x08 | 0x01 | 0x02; /* kFlagCode | kFlagCodeAccessed
@@ -466,30 +452,12 @@ static void kvm_seg_set_kernel_code_64bit(struct kvm_vm *vm, uint16_t selector,
segp->g = true;
segp->l = true;
segp->present = 1;
- if (vm)
- kvm_seg_fill_gdt_64bit(vm, segp);
}
-/*
- * Set Long Mode Flat Kernel Data Segment
- *
- * Input Args:
- * vm - VM whose GDT is being filled, or NULL to only write segp
- * selector - selector value
- *
- * Output Args:
- * segp - Pointer to KVM segment
- *
- * Return: None
- *
- * Sets up the KVM segment pointed to by @segp, to be a data segment
- * with the selector value given by @selector.
- */
-static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
- struct kvm_segment *segp)
+static void kvm_seg_set_kernel_data_64bit(struct kvm_segment *segp)
{
memset(segp, 0, sizeof(*segp));
- segp->selector = selector;
+ segp->selector = KERNEL_DS;
segp->limit = 0xFFFFFFFFu;
segp->s = 0x1; /* kTypeCodeData */
segp->type = 0x00 | 0x01 | 0x02; /* kFlagData | kFlagDataAccessed
@@ -497,8 +465,6 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
*/
segp->g = true;
segp->present = true;
- if (vm)
- kvm_seg_fill_gdt_64bit(vm, segp);
}
vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
@@ -516,72 +482,163 @@ vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
}
-static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
+static void kvm_seg_set_tss_64bit(vm_vaddr_t base, struct kvm_segment *segp)
{
- if (!vm->gdt)
- vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
-
- dt->base = vm->gdt;
- dt->limit = getpagesize();
-}
-
-static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
- int selector)
-{
- if (!vm->tss)
- vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
-
memset(segp, 0, sizeof(*segp));
- segp->base = vm->tss;
+ segp->base = base;
segp->limit = 0x67;
- segp->selector = selector;
+ segp->selector = KERNEL_TSS;
segp->type = 0xb;
segp->present = 1;
- kvm_seg_fill_gdt_64bit(vm, segp);
}
-static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+static void vcpu_init_sregs(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
struct kvm_sregs sregs;
+ TEST_ASSERT_EQ(vm->mode, VM_MODE_PXXV48_4K);
+
/* Set mode specific system register values. */
vcpu_sregs_get(vcpu, &sregs);
- sregs.idt.limit = 0;
+ sregs.idt.base = vm->arch.idt;
+ sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
+ sregs.gdt.base = vm->arch.gdt;
+ sregs.gdt.limit = getpagesize() - 1;
+
+ sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
+ sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
+ sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+
+ kvm_seg_set_unusable(&sregs.ldt);
+ kvm_seg_set_kernel_code_64bit(&sregs.cs);
+ kvm_seg_set_kernel_data_64bit(&sregs.ds);
+ kvm_seg_set_kernel_data_64bit(&sregs.es);
+ kvm_seg_set_kernel_data_64bit(&sregs.gs);
+ kvm_seg_set_tss_64bit(vm->arch.tss, &sregs.tr);
+
+ sregs.cr3 = vm->pgd;
+ vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
+ int dpl, unsigned short selector)
+{
+ struct idt_entry *base =
+ (struct idt_entry *)addr_gva2hva(vm, vm->arch.idt);
+ struct idt_entry *e = &base[vector];
+
+ memset(e, 0, sizeof(*e));
+ e->offset0 = addr;
+ e->selector = selector;
+ e->ist = 0;
+ e->type = 14;
+ e->dpl = dpl;
+ e->p = 1;
+ e->offset1 = addr >> 16;
+ e->offset2 = addr >> 32;
+}
+
+static bool kvm_fixup_exception(struct ex_regs *regs)
+{
+ if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
+ return false;
- kvm_setup_gdt(vm, &sregs.gdt);
+ if (regs->vector == DE_VECTOR)
+ return false;
- switch (vm->mode) {
- case VM_MODE_PXXV48_4K:
- sregs.cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG;
- sregs.cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR;
- sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
+ regs->rip = regs->r11;
+ regs->r9 = regs->vector;
+ regs->r10 = regs->error_code;
+ return true;
+}
- kvm_seg_set_unusable(&sregs.ldt);
- kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
- kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
- kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
- kvm_setup_tss_64bit(vm, &sregs.tr, 0x18);
- break;
+void route_exception(struct ex_regs *regs)
+{
+ typedef void(*handler)(struct ex_regs *);
+ handler *handlers = (handler *)exception_handlers;
- default:
- TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ if (handlers && handlers[regs->vector]) {
+ handlers[regs->vector](regs);
+ return;
}
- sregs.cr3 = vm->pgd;
- vcpu_sregs_set(vcpu, &sregs);
+ if (kvm_fixup_exception(regs))
+ return;
+
+ ucall_assert(UCALL_UNHANDLED,
+ "Unhandled exception in guest", __FILE__, __LINE__,
+ "Unhandled exception '0x%lx' at guest RIP '0x%lx'",
+ regs->vector, regs->rip);
+}
+
+static void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ extern void *idt_handlers;
+ struct kvm_segment seg;
+ int i;
+
+ vm->arch.gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->arch.idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->arch.tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+
+ /* Handlers have the same address in both address spaces.*/
+ for (i = 0; i < NUM_INTERRUPTS; i++)
+ set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0, KERNEL_CS);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+
+ kvm_seg_set_kernel_code_64bit(&seg);
+ kvm_seg_fill_gdt_64bit(vm, &seg);
+
+ kvm_seg_set_kernel_data_64bit(&seg);
+ kvm_seg_fill_gdt_64bit(vm, &seg);
+
+ kvm_seg_set_tss_64bit(vm->arch.tss, &seg);
+ kvm_seg_fill_gdt_64bit(vm, &seg);
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
+{
+ vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
+
+ handlers[vector] = (vm_vaddr_t)handler;
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED)
+ REPORT_GUEST_ASSERT(uc);
}
void kvm_arch_vm_post_create(struct kvm_vm *vm)
{
+ int r;
+
+ TEST_ASSERT(kvm_has_cap(KVM_CAP_GET_TSC_KHZ),
+ "Require KVM_GET_TSC_KHZ to provide udelay() to guest.");
+
vm_create_irqchip(vm);
+ vm_init_descriptor_tables(vm);
+
sync_global_to_guest(vm, host_cpu_is_intel);
sync_global_to_guest(vm, host_cpu_is_amd);
+ sync_global_to_guest(vm, is_forced_emulation_enabled);
+
+ if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ struct kvm_sev_init init = { 0 };
+
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+ }
- if (vm->subtype == VM_SUBTYPE_SEV)
- sev_vm_init(vm);
- else if (vm->subtype == VM_SUBTYPE_SEV_ES)
- sev_es_vm_init(vm);
+ r = __vm_ioctl(vm, KVM_GET_TSC_KHZ, NULL);
+ TEST_ASSERT(r > 0, "KVM_GET_TSC_KHZ did not provide a valid TSC frequency.");
+ guest_tsc_khz = r;
+ sync_global_to_guest(vm, guest_tsc_khz);
}
void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
@@ -621,7 +678,7 @@ struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
vcpu = __vm_vcpu_add(vm, vcpu_id);
vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
- vcpu_setup(vm, vcpu);
+ vcpu_init_sregs(vm, vcpu);
/* Setup guest general purpose registers */
vcpu_regs_get(vcpu, &regs);
@@ -1081,108 +1138,15 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
void kvm_init_vm_address_properties(struct kvm_vm *vm)
{
- if (vm->subtype == VM_SUBTYPE_SEV || vm->subtype == VM_SUBTYPE_SEV_ES) {
+ if (vm->type == KVM_X86_SEV_VM || vm->type == KVM_X86_SEV_ES_VM) {
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
vm->gpa_tag_mask = vm->arch.c_bit;
+ } else {
+ vm->arch.sev_fd = -1;
}
}
-static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
- int dpl, unsigned short selector)
-{
- struct idt_entry *base =
- (struct idt_entry *)addr_gva2hva(vm, vm->idt);
- struct idt_entry *e = &base[vector];
-
- memset(e, 0, sizeof(*e));
- e->offset0 = addr;
- e->selector = selector;
- e->ist = 0;
- e->type = 14;
- e->dpl = dpl;
- e->p = 1;
- e->offset1 = addr >> 16;
- e->offset2 = addr >> 32;
-}
-
-
-static bool kvm_fixup_exception(struct ex_regs *regs)
-{
- if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
- return false;
-
- if (regs->vector == DE_VECTOR)
- return false;
-
- regs->rip = regs->r11;
- regs->r9 = regs->vector;
- regs->r10 = regs->error_code;
- return true;
-}
-
-void route_exception(struct ex_regs *regs)
-{
- typedef void(*handler)(struct ex_regs *);
- handler *handlers = (handler *)exception_handlers;
-
- if (handlers && handlers[regs->vector]) {
- handlers[regs->vector](regs);
- return;
- }
-
- if (kvm_fixup_exception(regs))
- return;
-
- ucall_assert(UCALL_UNHANDLED,
- "Unhandled exception in guest", __FILE__, __LINE__,
- "Unhandled exception '0x%lx' at guest RIP '0x%lx'",
- regs->vector, regs->rip);
-}
-
-void vm_init_descriptor_tables(struct kvm_vm *vm)
-{
- extern void *idt_handlers;
- int i;
-
- vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
- vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
- /* Handlers have the same address in both address spaces.*/
- for (i = 0; i < NUM_INTERRUPTS; i++)
- set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
- DEFAULT_CODE_SELECTOR);
-}
-
-void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
-{
- struct kvm_vm *vm = vcpu->vm;
- struct kvm_sregs sregs;
-
- vcpu_sregs_get(vcpu, &sregs);
- sregs.idt.base = vm->idt;
- sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
- sregs.gdt.base = vm->gdt;
- sregs.gdt.limit = getpagesize() - 1;
- kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
- vcpu_sregs_set(vcpu, &sregs);
- *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
-}
-
-void vm_install_exception_handler(struct kvm_vm *vm, int vector,
- void (*handler)(struct ex_regs *))
-{
- vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
-
- handlers[vector] = (vm_vaddr_t)handler;
-}
-
-void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED)
- REPORT_GUEST_ASSERT(uc);
-}
-
const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
uint32_t function, uint32_t index)
{
@@ -1294,9 +1258,20 @@ unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
{
const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
unsigned long ht_gfn, max_gfn, max_pfn;
- uint8_t maxphyaddr;
+ uint8_t maxphyaddr, guest_maxphyaddr;
+
+ /*
+ * Use "guest MAXPHYADDR" from KVM if it's available. Guest MAXPHYADDR
+ * enumerates the max _mappable_ GPA, which can be less than the raw
+ * MAXPHYADDR, e.g. if MAXPHYADDR=52, KVM is using TDP, and the CPU
+ * doesn't support 5-level TDP.
+ */
+ guest_maxphyaddr = kvm_cpu_property(X86_PROPERTY_GUEST_MAX_PHY_ADDR);
+ guest_maxphyaddr = guest_maxphyaddr ?: vm->pa_bits;
+ TEST_ASSERT(guest_maxphyaddr <= vm->pa_bits,
+ "Guest MAXPHYADDR should never be greater than raw MAXPHYADDR");
- max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
+ max_gfn = (1ULL << (guest_maxphyaddr - vm->page_shift)) - 1;
/* Avoid reserved HyperTransport region on AMD processors. */
if (!host_cpu_is_amd)
@@ -1344,6 +1319,7 @@ void kvm_selftest_arch_init(void)
{
host_cpu_is_intel = this_cpu_is_intel();
host_cpu_is_amd = this_cpu_is_amd();
+ is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
}
bool sys_clocksource_is_based_on_tsc(void)
diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c
index e248d3364b9c..e9535ee20b7f 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/sev.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/sev.c
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0-only
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <stdint.h>
#include <stdbool.h>
@@ -35,6 +34,32 @@ static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *regio
}
}
+void sev_vm_init(struct kvm_vm *vm)
+{
+ if (vm->type == KVM_X86_DEFAULT_VM) {
+ assert(vm->arch.sev_fd == -1);
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
+ vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
+ } else {
+ struct kvm_sev_init init = { 0 };
+ assert(vm->type == KVM_X86_SEV_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+ }
+}
+
+void sev_es_vm_init(struct kvm_vm *vm)
+{
+ if (vm->type == KVM_X86_DEFAULT_VM) {
+ assert(vm->arch.sev_fd == -1);
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
+ vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
+ } else {
+ struct kvm_sev_init init = { 0 };
+ assert(vm->type == KVM_X86_SEV_ES_VM);
+ vm_sev_ioctl(vm, KVM_SEV_INIT2, &init);
+ }
+}
+
void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
{
struct kvm_sev_launch_start launch_start = {
@@ -87,28 +112,30 @@ void sev_vm_launch_finish(struct kvm_vm *vm)
TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
}
-struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code,
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t type, void *guest_code,
struct kvm_vcpu **cpu)
{
struct vm_shape shape = {
- .type = VM_TYPE_DEFAULT,
.mode = VM_MODE_DEFAULT,
- .subtype = policy & SEV_POLICY_ES ? VM_SUBTYPE_SEV_ES :
- VM_SUBTYPE_SEV,
+ .type = type,
};
struct kvm_vm *vm;
struct kvm_vcpu *cpus[1];
- uint8_t measurement[512];
vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus);
*cpu = cpus[0];
+ return vm;
+}
+
+void vm_sev_launch(struct kvm_vm *vm, uint32_t policy, uint8_t *measurement)
+{
sev_vm_launch(vm, policy);
- /* TODO: Validate the measurement is as expected. */
+ if (!measurement)
+ measurement = alloca(256);
+
sev_vm_launch_measure(vm, measurement);
sev_vm_launch_finish(vm);
-
- return vm;
}
diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c
index 1a6da7389bf1..0b9678858b6d 100644
--- a/tools/testing/selftests/kvm/max_guest_memory_test.c
+++ b/tools/testing/selftests/kvm/max_guest_memory_test.c
@@ -1,6 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE
-
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
index 156361966612..49f162573126 100644
--- a/tools/testing/selftests/kvm/memslot_modification_stress_test.c
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -6,9 +6,6 @@
* Copyright (C) 2018, Red Hat, Inc.
* Copyright (C) 2020, Google, Inc.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include <stdio.h>
#include <stdlib.h>
#include <sys/syscall.h>
@@ -56,12 +53,6 @@ static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
}
}
-struct memslot_antagonist_args {
- struct kvm_vm *vm;
- useconds_t delay;
- uint64_t nr_modifications;
-};
-
static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
uint64_t nr_modifications)
{
diff --git a/tools/testing/selftests/kvm/pre_fault_memory_test.c b/tools/testing/selftests/kvm/pre_fault_memory_test.c
new file mode 100644
index 000000000000..0350a8896a2f
--- /dev/null
+++ b/tools/testing/selftests/kvm/pre_fault_memory_test.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024, Intel, Inc
+ *
+ * Author:
+ * Isaku Yamahata <isaku.yamahata at gmail.com>
+ */
+#include <linux/sizes.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+/* Arbitrarily chosen values */
+#define TEST_SIZE (SZ_2M + PAGE_SIZE)
+#define TEST_NPAGES (TEST_SIZE / PAGE_SIZE)
+#define TEST_SLOT 10
+
+static void guest_code(uint64_t base_gpa)
+{
+ volatile uint64_t val __used;
+ int i;
+
+ for (i = 0; i < TEST_NPAGES; i++) {
+ uint64_t *src = (uint64_t *)(base_gpa + i * PAGE_SIZE);
+
+ val = *src;
+ }
+
+ GUEST_DONE();
+}
+
+static void pre_fault_memory(struct kvm_vcpu *vcpu, u64 gpa, u64 size,
+ u64 left)
+{
+ struct kvm_pre_fault_memory range = {
+ .gpa = gpa,
+ .size = size,
+ .flags = 0,
+ };
+ u64 prev;
+ int ret, save_errno;
+
+ do {
+ prev = range.size;
+ ret = __vcpu_ioctl(vcpu, KVM_PRE_FAULT_MEMORY, &range);
+ save_errno = errno;
+ TEST_ASSERT((range.size < prev) ^ (ret < 0),
+ "%sexpecting range.size to change on %s",
+ ret < 0 ? "not " : "",
+ ret < 0 ? "failure" : "success");
+ } while (ret >= 0 ? range.size : save_errno == EINTR);
+
+ TEST_ASSERT(range.size == left,
+ "Completed with %lld bytes left, expected %" PRId64,
+ range.size, left);
+
+ if (left == 0)
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+ else
+ /* No memory slot causes RET_PF_EMULATE. it results in -ENOENT. */
+ __TEST_ASSERT_VM_VCPU_IOCTL(ret && save_errno == ENOENT,
+ "KVM_PRE_FAULT_MEMORY", ret, vcpu->vm);
+}
+
+static void __test_pre_fault_memory(unsigned long vm_type, bool private)
+{
+ const struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = vm_type,
+ };
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ uint64_t guest_test_phys_mem;
+ uint64_t guest_test_virt_mem;
+ uint64_t alignment, guest_page_size;
+
+ vm = vm_create_shape_with_one_vcpu(shape, &vcpu, guest_code);
+
+ alignment = guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+ guest_test_phys_mem = (vm->max_gfn - TEST_NPAGES) * guest_page_size;
+#ifdef __s390x__
+ alignment = max(0x100000UL, guest_page_size);
+#else
+ alignment = SZ_2M;
+#endif
+ guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
+ guest_test_virt_mem = guest_test_phys_mem & ((1ULL << (vm->va_bits - 1)) - 1);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ guest_test_phys_mem, TEST_SLOT, TEST_NPAGES,
+ private ? KVM_MEM_GUEST_MEMFD : 0);
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, TEST_NPAGES);
+
+ if (private)
+ vm_mem_set_private(vm, guest_test_phys_mem, TEST_SIZE);
+ pre_fault_memory(vcpu, guest_test_phys_mem, SZ_2M, 0);
+ pre_fault_memory(vcpu, guest_test_phys_mem + SZ_2M, PAGE_SIZE * 2, PAGE_SIZE);
+ pre_fault_memory(vcpu, guest_test_phys_mem + TEST_SIZE, PAGE_SIZE, PAGE_SIZE);
+
+ vcpu_args_set(vcpu, 1, guest_test_virt_mem);
+ vcpu_run(vcpu);
+
+ run = vcpu->run;
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ break;
+ }
+
+ kvm_vm_free(vm);
+}
+
+static void test_pre_fault_memory(unsigned long vm_type, bool private)
+{
+ if (vm_type && !(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(vm_type))) {
+ pr_info("Skipping tests for vm_type 0x%lx\n", vm_type);
+ return;
+ }
+
+ __test_pre_fault_memory(vm_type, private);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_PRE_FAULT_MEMORY));
+
+ test_pre_fault_memory(0, false);
+#ifdef __x86_64__
+ test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, false);
+ test_pre_fault_memory(KVM_X86_SW_PROTECTED_VM, true);
+#endif
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
index 0f9cabd99fd4..2c792228ac0b 100644
--- a/tools/testing/selftests/kvm/riscv/arch_timer.c
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -7,13 +7,11 @@
*
* Copyright (c) 2024, Intel Corporation.
*/
-
-#define _GNU_SOURCE
-
#include "arch_timer.h"
#include "kvm_util.h"
#include "processor.h"
#include "timer_test.h"
+#include "ucall_common.h"
static int timer_irq = IRQ_S_TIMER;
@@ -85,7 +83,7 @@ struct kvm_vm *test_vm_create(void)
int nr_vcpus = test_args.nr_vcpus;
vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
- __TEST_REQUIRE(__vcpu_has_ext(vcpus[0], RISCV_ISA_EXT_REG(KVM_RISCV_ISA_EXT_SSTC)),
+ __TEST_REQUIRE(__vcpu_has_isa_ext(vcpus[0], KVM_RISCV_ISA_EXT_SSTC),
"SSTC not available, skipping test\n");
vm_init_vector_tables(vm);
diff --git a/tools/testing/selftests/kvm/riscv/ebreak_test.c b/tools/testing/selftests/kvm/riscv/ebreak_test.c
new file mode 100644
index 000000000000..0e0712854953
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/ebreak_test.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V KVM ebreak test.
+ *
+ * Copyright 2024 Beijing ESWIN Computing Technology Co., Ltd.
+ *
+ */
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#define LABEL_ADDRESS(v) ((uint64_t)&(v))
+
+extern unsigned char sw_bp_1, sw_bp_2;
+static uint64_t sw_bp_addr;
+
+static void guest_code(void)
+{
+ asm volatile(
+ ".option push\n"
+ ".option norvc\n"
+ "sw_bp_1: ebreak\n"
+ "sw_bp_2: ebreak\n"
+ ".option pop\n"
+ );
+ GUEST_ASSERT_EQ(READ_ONCE(sw_bp_addr), LABEL_ADDRESS(sw_bp_2));
+
+ GUEST_DONE();
+}
+
+static void guest_breakpoint_handler(struct ex_regs *regs)
+{
+ WRITE_ONCE(sw_bp_addr, regs->epc);
+ regs->epc += 4;
+}
+
+int main(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint64_t pc;
+ struct kvm_guest_debug debug = {
+ .control = KVM_GUESTDBG_ENABLE,
+ };
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_vector_tables(vm);
+ vcpu_init_vector_tables(vcpu);
+ vm_install_exception_handler(vm, EXC_BREAKPOINT,
+ guest_breakpoint_handler);
+
+ /*
+ * Enable the guest debug.
+ * ebreak should exit to the VMM with KVM_EXIT_DEBUG reason.
+ */
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &pc);
+ TEST_ASSERT_EQ(pc, LABEL_ADDRESS(sw_bp_1));
+
+ /* skip sw_bp_1 */
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), pc + 4);
+
+ /*
+ * Disable all debug controls.
+ * Guest should handle the ebreak without exiting to the VMM.
+ */
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
index b882b7b9b785..8e34f7fa44e9 100644
--- a/tools/testing/selftests/kvm/riscv/get-reg-list.c
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -43,11 +43,13 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSCOFPMF:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZAWRS:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBC:
@@ -55,6 +57,11 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKC:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKX:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCD:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCF:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZCMOP:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN:
@@ -67,6 +74,7 @@ bool filter_reg(__u64 reg)
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIMOP:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE:
case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNH:
@@ -408,11 +416,13 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(V),
KVM_ISA_EXT_ARR(SMSTATEEN),
KVM_ISA_EXT_ARR(SSAIA),
+ KVM_ISA_EXT_ARR(SSCOFPMF),
KVM_ISA_EXT_ARR(SSTC),
KVM_ISA_EXT_ARR(SVINVAL),
KVM_ISA_EXT_ARR(SVNAPOT),
KVM_ISA_EXT_ARR(SVPBMT),
KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZAWRS),
KVM_ISA_EXT_ARR(ZBA),
KVM_ISA_EXT_ARR(ZBB),
KVM_ISA_EXT_ARR(ZBC),
@@ -420,6 +430,11 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZBKC),
KVM_ISA_EXT_ARR(ZBKX),
KVM_ISA_EXT_ARR(ZBS),
+ KVM_ISA_EXT_ARR(ZCA),
+ KVM_ISA_EXT_ARR(ZCB),
+ KVM_ISA_EXT_ARR(ZCD),
+ KVM_ISA_EXT_ARR(ZCF),
+ KVM_ISA_EXT_ARR(ZCMOP),
KVM_ISA_EXT_ARR(ZFA),
KVM_ISA_EXT_ARR(ZFH),
KVM_ISA_EXT_ARR(ZFHMIN),
@@ -432,6 +447,7 @@ static const char *isa_ext_single_id_to_str(__u64 reg_off)
KVM_ISA_EXT_ARR(ZIHINTNTL),
KVM_ISA_EXT_ARR(ZIHINTPAUSE),
KVM_ISA_EXT_ARR(ZIHPM),
+ KVM_ISA_EXT_ARR(ZIMOP),
KVM_ISA_EXT_ARR(ZKND),
KVM_ISA_EXT_ARR(ZKNE),
KVM_ISA_EXT_ARR(ZKNH),
@@ -931,11 +947,13 @@ KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D);
KVM_ISA_EXT_SIMPLE_CONFIG(h, H);
KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN);
+KVM_ISA_EXT_SIMPLE_CONFIG(sscofpmf, SSCOFPMF);
KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC);
KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT);
KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zawrs, ZAWRS);
KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB);
KVM_ISA_EXT_SIMPLE_CONFIG(zbc, ZBC);
@@ -943,6 +961,11 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB);
KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC);
KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX);
KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zca, ZCA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zcb, ZCB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zcd, ZCD);
+KVM_ISA_EXT_SIMPLE_CONFIG(zcf, ZCF);
+KVM_ISA_EXT_SIMPLE_CONFIG(zcmop, ZCMOP);
KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN);
@@ -955,6 +978,7 @@ KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI);
KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL);
KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE);
KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM);
+KVM_ISA_EXT_SIMPLE_CONFIG(zimop, ZIMOP);
KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND);
KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE);
KVM_ISA_EXT_SIMPLE_CONFIG(zknh, ZKNH);
@@ -986,11 +1010,13 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_fp_d,
&config_h,
&config_smstateen,
+ &config_sscofpmf,
&config_sstc,
&config_svinval,
&config_svnapot,
&config_svpbmt,
&config_zacas,
+ &config_zawrs,
&config_zba,
&config_zbb,
&config_zbc,
@@ -998,6 +1024,11 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_zbkc,
&config_zbkx,
&config_zbs,
+ &config_zca,
+ &config_zcb,
+ &config_zcd,
+ &config_zcf,
+ &config_zcmop,
&config_zfa,
&config_zfh,
&config_zfhmin,
@@ -1010,6 +1041,7 @@ struct vcpu_reg_list *vcpu_configs[] = {
&config_zihintntl,
&config_zihintpause,
&config_zihpm,
+ &config_zimop,
&config_zknd,
&config_zkne,
&config_zknh,
diff --git a/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
new file mode 100644
index 000000000000..f299cbfd23ca
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/sbi_pmu_test.c
@@ -0,0 +1,682 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * sbi_pmu_test.c - Tests the riscv64 SBI PMU functionality.
+ *
+ * Copyright (c) 2024, Rivos Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+#include "sbi.h"
+#include "arch_timer.h"
+#include "ucall_common.h"
+
+/* Maximum counters(firmware + hardware) */
+#define RISCV_MAX_PMU_COUNTERS 64
+union sbi_pmu_ctr_info ctrinfo_arr[RISCV_MAX_PMU_COUNTERS];
+
+/* Snapshot shared memory data */
+#define PMU_SNAPSHOT_GPA_BASE BIT(30)
+static void *snapshot_gva;
+static vm_paddr_t snapshot_gpa;
+
+static int vcpu_shared_irq_count;
+static int counter_in_use;
+
+/* Cache the available counters in a bitmask */
+static unsigned long counter_mask_available;
+
+static bool illegal_handler_invoked;
+
+#define SBI_PMU_TEST_BASIC BIT(0)
+#define SBI_PMU_TEST_EVENTS BIT(1)
+#define SBI_PMU_TEST_SNAPSHOT BIT(2)
+#define SBI_PMU_TEST_OVERFLOW BIT(3)
+
+static int disabled_tests;
+
+unsigned long pmu_csr_read_num(int csr_num)
+{
+#define switchcase_csr_read(__csr_num, __val) {\
+ case __csr_num: \
+ __val = csr_read(__csr_num); \
+ break; }
+#define switchcase_csr_read_2(__csr_num, __val) {\
+ switchcase_csr_read(__csr_num + 0, __val) \
+ switchcase_csr_read(__csr_num + 1, __val)}
+#define switchcase_csr_read_4(__csr_num, __val) {\
+ switchcase_csr_read_2(__csr_num + 0, __val) \
+ switchcase_csr_read_2(__csr_num + 2, __val)}
+#define switchcase_csr_read_8(__csr_num, __val) {\
+ switchcase_csr_read_4(__csr_num + 0, __val) \
+ switchcase_csr_read_4(__csr_num + 4, __val)}
+#define switchcase_csr_read_16(__csr_num, __val) {\
+ switchcase_csr_read_8(__csr_num + 0, __val) \
+ switchcase_csr_read_8(__csr_num + 8, __val)}
+#define switchcase_csr_read_32(__csr_num, __val) {\
+ switchcase_csr_read_16(__csr_num + 0, __val) \
+ switchcase_csr_read_16(__csr_num + 16, __val)}
+
+ unsigned long ret = 0;
+
+ switch (csr_num) {
+ switchcase_csr_read_32(CSR_CYCLE, ret)
+ switchcase_csr_read_32(CSR_CYCLEH, ret)
+ default :
+ break;
+ }
+
+ return ret;
+#undef switchcase_csr_read_32
+#undef switchcase_csr_read_16
+#undef switchcase_csr_read_8
+#undef switchcase_csr_read_4
+#undef switchcase_csr_read_2
+#undef switchcase_csr_read
+}
+
+static inline void dummy_func_loop(uint64_t iter)
+{
+ int i = 0;
+
+ while (i < iter) {
+ asm volatile("nop");
+ i++;
+ }
+}
+
+static void start_counter(unsigned long counter, unsigned long start_flags,
+ unsigned long ival)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_START, counter, 1, start_flags,
+ ival, 0, 0);
+ __GUEST_ASSERT(ret.error == 0, "Unable to start counter %ld\n", counter);
+}
+
+/* This should be invoked only for reset counter use case */
+static void stop_reset_counter(unsigned long counter, unsigned long stop_flags)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1,
+ stop_flags | SBI_PMU_STOP_FLAG_RESET, 0, 0, 0);
+ __GUEST_ASSERT(ret.error == SBI_ERR_ALREADY_STOPPED,
+ "Unable to stop counter %ld\n", counter);
+}
+
+static void stop_counter(unsigned long counter, unsigned long stop_flags)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_STOP, counter, 1, stop_flags,
+ 0, 0, 0);
+ __GUEST_ASSERT(ret.error == 0, "Unable to stop counter %ld error %ld\n",
+ counter, ret.error);
+}
+
+static void guest_illegal_exception_handler(struct ex_regs *regs)
+{
+ __GUEST_ASSERT(regs->cause == EXC_INST_ILLEGAL,
+ "Unexpected exception handler %lx\n", regs->cause);
+
+ illegal_handler_invoked = true;
+ /* skip the trapping instruction */
+ regs->epc += 4;
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int irq_num = regs->cause & ~CAUSE_IRQ_FLAG;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+ unsigned long overflown_mask;
+ unsigned long counter_val = 0;
+
+ /* Validate that we are in the correct irq handler */
+ GUEST_ASSERT_EQ(irq_num, IRQ_PMU_OVF);
+
+ /* Stop all counters first to avoid further interrupts */
+ stop_counter(counter_in_use, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ csr_clear(CSR_SIP, BIT(IRQ_PMU_OVF));
+
+ overflown_mask = READ_ONCE(snapshot_data->ctr_overflow_mask);
+ GUEST_ASSERT(overflown_mask & 0x01);
+
+ WRITE_ONCE(vcpu_shared_irq_count, vcpu_shared_irq_count+1);
+
+ counter_val = READ_ONCE(snapshot_data->ctr_values[0]);
+ /* Now start the counter to mimick the real driver behavior */
+ start_counter(counter_in_use, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_val);
+}
+
+static unsigned long get_counter_index(unsigned long cbase, unsigned long cmask,
+ unsigned long cflags,
+ unsigned long event)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, cbase, cmask,
+ cflags, event, 0, 0);
+ __GUEST_ASSERT(ret.error == 0, "config matching failed %ld\n", ret.error);
+ GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS);
+ GUEST_ASSERT(BIT(ret.value) & counter_mask_available);
+
+ return ret.value;
+}
+
+static unsigned long get_num_counters(void)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_NUM_COUNTERS, 0, 0, 0, 0, 0, 0);
+
+ __GUEST_ASSERT(ret.error == 0, "Unable to retrieve number of counters from SBI PMU");
+ __GUEST_ASSERT(ret.value < RISCV_MAX_PMU_COUNTERS,
+ "Invalid number of counters %ld\n", ret.value);
+
+ return ret.value;
+}
+
+static void update_counter_info(int num_counters)
+{
+ int i = 0;
+ struct sbiret ret;
+
+ for (i = 0; i < num_counters; i++) {
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i, 0, 0, 0, 0, 0);
+
+ /* There can be gaps in logical counter indicies*/
+ if (ret.error)
+ continue;
+ GUEST_ASSERT_NE(ret.value, 0);
+
+ ctrinfo_arr[i].value = ret.value;
+ counter_mask_available |= BIT(i);
+ }
+
+ GUEST_ASSERT(counter_mask_available > 0);
+}
+
+static unsigned long read_fw_counter(int idx, union sbi_pmu_ctr_info ctrinfo)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_FW_READ, idx, 0, 0, 0, 0, 0);
+ GUEST_ASSERT(ret.error == 0);
+ return ret.value;
+}
+
+static unsigned long read_counter(int idx, union sbi_pmu_ctr_info ctrinfo)
+{
+ unsigned long counter_val = 0;
+
+ __GUEST_ASSERT(ctrinfo.type < 2, "Invalid counter type %d", ctrinfo.type);
+
+ if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW)
+ counter_val = pmu_csr_read_num(ctrinfo.csr);
+ else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW)
+ counter_val = read_fw_counter(idx, ctrinfo);
+
+ return counter_val;
+}
+
+static inline void verify_sbi_requirement_assert(void)
+{
+ long out_val = 0;
+ bool probe;
+
+ probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
+ GUEST_ASSERT(probe && out_val == 1);
+
+ if (get_host_sbi_spec_version() < sbi_mk_version(2, 0))
+ __GUEST_ASSERT(0, "SBI implementation version doesn't support PMU Snapshot");
+}
+
+static void snapshot_set_shmem(vm_paddr_t gpa, unsigned long flags)
+{
+ unsigned long lo = (unsigned long)gpa;
+#if __riscv_xlen == 32
+ unsigned long hi = (unsigned long)(gpa >> 32);
+#else
+ unsigned long hi = gpa == -1 ? -1 : 0;
+#endif
+ struct sbiret ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_SNAPSHOT_SET_SHMEM,
+ lo, hi, flags, 0, 0, 0);
+
+ GUEST_ASSERT(ret.value == 0 && ret.error == 0);
+}
+
+static void test_pmu_event(unsigned long event)
+{
+ unsigned long counter;
+ unsigned long counter_value_pre, counter_value_post;
+ unsigned long counter_init_value = 100;
+
+ counter = get_counter_index(0, counter_mask_available, 0, event);
+ counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
+
+ /* Do not set the initial value */
+ start_counter(counter, 0, 0);
+ dummy_func_loop(10000);
+ stop_counter(counter, 0);
+
+ counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
+ __GUEST_ASSERT(counter_value_post > counter_value_pre,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /*
+ * We can't just update the counter without starting it.
+ * Do start/stop twice to simulate that by first initializing to a very
+ * high value and a low value after that.
+ */
+ start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, ULONG_MAX/2);
+ stop_counter(counter, 0);
+ counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
+
+ start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value);
+ stop_counter(counter, 0);
+ counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
+ __GUEST_ASSERT(counter_value_pre > counter_value_post,
+ "Counter reinitialization verification failed : post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /* Now set the initial value and compare */
+ start_counter(counter, SBI_PMU_START_FLAG_SET_INIT_VALUE, counter_init_value);
+ dummy_func_loop(10000);
+ stop_counter(counter, 0);
+
+ counter_value_post = read_counter(counter, ctrinfo_arr[counter]);
+ __GUEST_ASSERT(counter_value_post > counter_init_value,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_init_value);
+
+ stop_reset_counter(counter, 0);
+}
+
+static void test_pmu_event_snapshot(unsigned long event)
+{
+ unsigned long counter;
+ unsigned long counter_value_pre, counter_value_post;
+ unsigned long counter_init_value = 100;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+
+ counter = get_counter_index(0, counter_mask_available, 0, event);
+ counter_value_pre = read_counter(counter, ctrinfo_arr[counter]);
+
+ /* Do not set the initial value */
+ start_counter(counter, 0, 0);
+ dummy_func_loop(10000);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ /* The counter value is updated w.r.t relative index of cbase */
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ __GUEST_ASSERT(counter_value_post > counter_value_pre,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /*
+ * We can't just update the counter without starting it.
+ * Do start/stop twice to simulate that by first initializing to a very
+ * high value and a low value after that.
+ */
+ WRITE_ONCE(snapshot_data->ctr_values[0], ULONG_MAX/2);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+ counter_value_pre = READ_ONCE(snapshot_data->ctr_values[0]);
+
+ WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ __GUEST_ASSERT(counter_value_pre > counter_value_post,
+ "Counter reinitialization verification failed : post [%lx] pre [%lx]\n",
+ counter_value_post, counter_value_pre);
+
+ /* Now set the initial value and compare */
+ WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ dummy_func_loop(10000);
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ __GUEST_ASSERT(counter_value_post > counter_init_value,
+ "Event update verification failed: post [%lx] pre [%lx]\n",
+ counter_value_post, counter_init_value);
+
+ stop_reset_counter(counter, 0);
+}
+
+static void test_pmu_event_overflow(unsigned long event)
+{
+ unsigned long counter;
+ unsigned long counter_value_post;
+ unsigned long counter_init_value = ULONG_MAX - 10000;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+
+ counter = get_counter_index(0, counter_mask_available, 0, event);
+ counter_in_use = counter;
+
+ /* The counter value is updated w.r.t relative index of cbase passed to start/stop */
+ WRITE_ONCE(snapshot_data->ctr_values[0], counter_init_value);
+ start_counter(counter, SBI_PMU_START_FLAG_INIT_SNAPSHOT, 0);
+ dummy_func_loop(10000);
+ udelay(msecs_to_usecs(2000));
+ /* irq handler should have stopped the counter */
+ stop_counter(counter, SBI_PMU_STOP_FLAG_TAKE_SNAPSHOT);
+
+ counter_value_post = READ_ONCE(snapshot_data->ctr_values[0]);
+ /* The counter value after stopping should be less the init value due to overflow */
+ __GUEST_ASSERT(counter_value_post < counter_init_value,
+ "counter_value_post %lx counter_init_value %lx for counter\n",
+ counter_value_post, counter_init_value);
+
+ stop_reset_counter(counter, 0);
+}
+
+static void test_invalid_event(void)
+{
+ struct sbiret ret;
+ unsigned long event = 0x1234; /* A random event */
+
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_CFG_MATCH, 0,
+ counter_mask_available, 0, event, 0, 0);
+ GUEST_ASSERT_EQ(ret.error, SBI_ERR_NOT_SUPPORTED);
+}
+
+static void test_pmu_events(void)
+{
+ int num_counters = 0;
+
+ /* Get the counter details */
+ num_counters = get_num_counters();
+ update_counter_info(num_counters);
+
+ /* Sanity testing for any random invalid event */
+ test_invalid_event();
+
+ /* Only these two events are guaranteed to be present */
+ test_pmu_event(SBI_PMU_HW_CPU_CYCLES);
+ test_pmu_event(SBI_PMU_HW_INSTRUCTIONS);
+
+ GUEST_DONE();
+}
+
+static void test_pmu_basic_sanity(void)
+{
+ long out_val = 0;
+ bool probe;
+ struct sbiret ret;
+ int num_counters = 0, i;
+ union sbi_pmu_ctr_info ctrinfo;
+
+ probe = guest_sbi_probe_extension(SBI_EXT_PMU, &out_val);
+ GUEST_ASSERT(probe && out_val == 1);
+
+ num_counters = get_num_counters();
+
+ for (i = 0; i < num_counters; i++) {
+ ret = sbi_ecall(SBI_EXT_PMU, SBI_EXT_PMU_COUNTER_GET_INFO, i,
+ 0, 0, 0, 0, 0);
+
+ /* There can be gaps in logical counter indicies*/
+ if (ret.error)
+ continue;
+ GUEST_ASSERT_NE(ret.value, 0);
+
+ ctrinfo.value = ret.value;
+
+ /**
+ * Accessibility check of hardware and read capability of firmware counters.
+ * The spec doesn't mandate any initial value. No need to check any value.
+ */
+ if (ctrinfo.type == SBI_PMU_CTR_TYPE_HW) {
+ pmu_csr_read_num(ctrinfo.csr);
+ GUEST_ASSERT(illegal_handler_invoked);
+ } else if (ctrinfo.type == SBI_PMU_CTR_TYPE_FW) {
+ read_fw_counter(i, ctrinfo);
+ }
+ }
+
+ GUEST_DONE();
+}
+
+static void test_pmu_events_snaphost(void)
+{
+ int num_counters = 0;
+ struct riscv_pmu_snapshot_data *snapshot_data = snapshot_gva;
+ int i;
+
+ /* Verify presence of SBI PMU and minimum requrired SBI version */
+ verify_sbi_requirement_assert();
+
+ snapshot_set_shmem(snapshot_gpa, 0);
+
+ /* Get the counter details */
+ num_counters = get_num_counters();
+ update_counter_info(num_counters);
+
+ /* Validate shared memory access */
+ GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_overflow_mask), 0);
+ for (i = 0; i < num_counters; i++) {
+ if (counter_mask_available & (BIT(i)))
+ GUEST_ASSERT_EQ(READ_ONCE(snapshot_data->ctr_values[i]), 0);
+ }
+ /* Only these two events are guranteed to be present */
+ test_pmu_event_snapshot(SBI_PMU_HW_CPU_CYCLES);
+ test_pmu_event_snapshot(SBI_PMU_HW_INSTRUCTIONS);
+
+ GUEST_DONE();
+}
+
+static void test_pmu_events_overflow(void)
+{
+ int num_counters = 0;
+
+ /* Verify presence of SBI PMU and minimum requrired SBI version */
+ verify_sbi_requirement_assert();
+
+ snapshot_set_shmem(snapshot_gpa, 0);
+ csr_set(CSR_IE, BIT(IRQ_PMU_OVF));
+ local_irq_enable();
+
+ /* Get the counter details */
+ num_counters = get_num_counters();
+ update_counter_info(num_counters);
+
+ /*
+ * Qemu supports overflow for cycle/instruction.
+ * This test may fail on any platform that do not support overflow for these two events.
+ */
+ test_pmu_event_overflow(SBI_PMU_HW_CPU_CYCLES);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, 1);
+
+ test_pmu_event_overflow(SBI_PMU_HW_INSTRUCTIONS);
+ GUEST_ASSERT_EQ(vcpu_shared_irq_count, 2);
+
+ GUEST_DONE();
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ case UCALL_SYNC:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ break;
+ }
+}
+
+void test_vm_destroy(struct kvm_vm *vm)
+{
+ memset(ctrinfo_arr, 0, sizeof(union sbi_pmu_ctr_info) * RISCV_MAX_PMU_COUNTERS);
+ counter_mask_available = 0;
+ kvm_vm_free(vm);
+}
+
+static void test_vm_basic_test(void *guest_code)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+ vm_init_vector_tables(vm);
+ /* Illegal instruction handler is required to verify read access without configuration */
+ vm_install_exception_handler(vm, EXC_INST_ILLEGAL, guest_illegal_exception_handler);
+
+ vcpu_init_vector_tables(vcpu);
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_vm_events_test(void *guest_code)
+{
+ struct kvm_vm *vm = NULL;
+ struct kvm_vcpu *vcpu = NULL;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_vm_setup_snapshot_mem(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
+{
+ /* PMU Snapshot requires single page only */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, PMU_SNAPSHOT_GPA_BASE, 1, 1, 0);
+ /* PMU_SNAPSHOT_GPA_BASE is identity mapped */
+ virt_map(vm, PMU_SNAPSHOT_GPA_BASE, PMU_SNAPSHOT_GPA_BASE, 1);
+
+ snapshot_gva = (void *)(PMU_SNAPSHOT_GPA_BASE);
+ snapshot_gpa = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)snapshot_gva);
+ sync_global_to_guest(vcpu->vm, snapshot_gva);
+ sync_global_to_guest(vcpu->vm, snapshot_gpa);
+}
+
+static void test_vm_events_snapshot_test(void *guest_code)
+{
+ struct kvm_vm *vm = NULL;
+ struct kvm_vcpu *vcpu;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+
+ test_vm_setup_snapshot_mem(vm, vcpu);
+
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_vm_events_overflow(void *guest_code)
+{
+ struct kvm_vm *vm = NULL;
+ struct kvm_vcpu *vcpu;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ __TEST_REQUIRE(__vcpu_has_sbi_ext(vcpu, KVM_RISCV_SBI_EXT_PMU),
+ "SBI PMU not available, skipping test");
+
+ __TEST_REQUIRE(__vcpu_has_isa_ext(vcpu, KVM_RISCV_ISA_EXT_SSCOFPMF),
+ "Sscofpmf is not available, skipping overflow test");
+
+ test_vm_setup_snapshot_mem(vm, vcpu);
+ vm_init_vector_tables(vm);
+ vm_install_interrupt_handler(vm, guest_irq_handler);
+
+ vcpu_init_vector_tables(vcpu);
+ /* Initialize guest timer frequency. */
+ vcpu_get_reg(vcpu, RISCV_TIMER_REG(frequency), &timer_freq);
+ sync_global_to_guest(vm, timer_freq);
+
+ run_vcpu(vcpu);
+
+ test_vm_destroy(vm);
+}
+
+static void test_print_help(char *name)
+{
+ pr_info("Usage: %s [-h] [-d <test name>]\n", name);
+ pr_info("\t-d: Test to disable. Available tests are 'basic', 'events', 'snapshot', 'overflow'\n");
+ pr_info("\t-h: print this help screen\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hd:")) != -1) {
+ switch (opt) {
+ case 'd':
+ if (!strncmp("basic", optarg, 5))
+ disabled_tests |= SBI_PMU_TEST_BASIC;
+ else if (!strncmp("events", optarg, 6))
+ disabled_tests |= SBI_PMU_TEST_EVENTS;
+ else if (!strncmp("snapshot", optarg, 8))
+ disabled_tests |= SBI_PMU_TEST_SNAPSHOT;
+ else if (!strncmp("overflow", optarg, 8))
+ disabled_tests |= SBI_PMU_TEST_OVERFLOW;
+ else
+ goto done;
+ break;
+ case 'h':
+ default:
+ goto done;
+ }
+ }
+
+ return true;
+done:
+ test_print_help(argv[0]);
+ return false;
+}
+
+int main(int argc, char *argv[])
+{
+ if (!parse_args(argc, argv))
+ exit(KSFT_SKIP);
+
+ if (!(disabled_tests & SBI_PMU_TEST_BASIC)) {
+ test_vm_basic_test(test_pmu_basic_sanity);
+ pr_info("SBI PMU basic test : PASS\n");
+ }
+
+ if (!(disabled_tests & SBI_PMU_TEST_EVENTS)) {
+ test_vm_events_test(test_pmu_events);
+ pr_info("SBI PMU event verification test : PASS\n");
+ }
+
+ if (!(disabled_tests & SBI_PMU_TEST_SNAPSHOT)) {
+ test_vm_events_snapshot_test(test_pmu_events_snaphost);
+ pr_info("SBI PMU event verification with snapshot test : PASS\n");
+ }
+
+ if (!(disabled_tests & SBI_PMU_TEST_OVERFLOW)) {
+ test_vm_events_overflow(test_pmu_events_overflow);
+ pr_info("SBI PMU event verification with overflow test : PASS\n");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
index 28f97fb52044..e5898678bfab 100644
--- a/tools/testing/selftests/kvm/rseq_test.c
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -1,5 +1,13 @@
// SPDX-License-Identifier: GPL-2.0-only
-#define _GNU_SOURCE /* for program_invocation_short_name */
+
+/*
+ * Include rseq.c without _GNU_SOURCE defined, before including any headers, so
+ * that rseq.c is compiled with its configuration, not KVM selftests' config.
+ */
+#undef _GNU_SOURCE
+#include "../rseq/rseq.c"
+#define _GNU_SOURCE
+
#include <errno.h>
#include <fcntl.h>
#include <pthread.h>
@@ -19,8 +27,7 @@
#include "kvm_util.h"
#include "processor.h"
#include "test_util.h"
-
-#include "../rseq/rseq.c"
+#include "ucall_common.h"
/*
* Any bug related to task migration is likely to be timing-dependent; perform
@@ -186,12 +193,35 @@ static void calc_min_max_cpu(void)
"Only one usable CPU, task migration not possible");
}
+static void help(const char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-u]\n", name);
+ printf(" -u: Don't sanity check the number of successful KVM_RUNs\n");
+ puts("");
+ exit(0);
+}
+
int main(int argc, char *argv[])
{
+ bool skip_sanity_check = false;
int r, i, snapshot;
struct kvm_vm *vm;
struct kvm_vcpu *vcpu;
u32 cpu, rseq_cpu;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hu")) != -1) {
+ switch (opt) {
+ case 'u':
+ skip_sanity_check = true;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
@@ -254,9 +284,17 @@ int main(int argc, char *argv[])
* getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly
* conservative ratio on x86-64, which can do _more_ KVM_RUNs than
* migrations given the 1us+ delay in the migration task.
+ *
+ * Another reason why it may have small migration:KVM_RUN ratio is that,
+ * on systems with large low power mode wakeup latency, it may happen
+ * quite often that the scheduler is not able to wake up the target CPU
+ * before the vCPU thread is scheduled to another CPU.
*/
- TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2),
- "Only performed %d KVM_RUNs, task stalled too much?", i);
+ TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2),
+ "Only performed %d KVM_RUNs, task stalled too much?\n\n"
+ " Try disabling deep sleep states to reduce CPU wakeup latency,\n"
+ " e.g. via cpuidle.off=1 or setting /dev/cpu_dma_latency to '0',\n"
+ " or run with -u to disable this sanity check.", i);
pthread_join(migration_thread, NULL);
diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c
index 626a2b8a2037..b39033844756 100644
--- a/tools/testing/selftests/kvm/s390x/cmma_test.c
+++ b/tools/testing/selftests/kvm/s390x/cmma_test.c
@@ -7,8 +7,6 @@
* Authors:
* Nico Boehr <nrb@linux.ibm.com>
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -18,6 +16,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "kselftest.h"
+#include "ucall_common.h"
#define MAIN_PAGE_COUNT 512
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index 48cb910e660d..f2df7416be84 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -15,6 +15,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "kselftest.h"
+#include "ucall_common.h"
enum mop_target {
LOGICAL,
diff --git a/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c b/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c
new file mode 100644
index 000000000000..bba0d9a6dcc8
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/shared_zeropage_test.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test shared zeropage handling (with/without storage keys)
+ *
+ * Copyright (C) 2024, Red Hat, Inc.
+ */
+#include <sys/mman.h>
+
+#include <linux/fs.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+#include "ucall_common.h"
+
+static void set_storage_key(void *addr, uint8_t skey)
+{
+ asm volatile("sske %0,%1" : : "d" (skey), "a" (addr));
+}
+
+static void guest_code(void)
+{
+ /* Issue some storage key instruction. */
+ set_storage_key((void *)0, 0x98);
+ GUEST_DONE();
+}
+
+/*
+ * Returns 1 if the shared zeropage is mapped, 0 if something else is mapped.
+ * Returns < 0 on error or if nothing is mapped.
+ */
+static int maps_shared_zeropage(int pagemap_fd, void *addr)
+{
+ struct page_region region;
+ struct pm_scan_arg arg = {
+ .start = (uintptr_t)addr,
+ .end = (uintptr_t)addr + 4096,
+ .vec = (uintptr_t)&region,
+ .vec_len = 1,
+ .size = sizeof(struct pm_scan_arg),
+ .category_mask = PAGE_IS_PFNZERO,
+ .category_anyof_mask = PAGE_IS_PRESENT,
+ .return_mask = PAGE_IS_PFNZERO,
+ };
+ return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+}
+
+int main(int argc, char *argv[])
+{
+ char *mem, *page0, *page1, *page2, tmp;
+ const size_t pagesize = getpagesize();
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int pagemap_fd;
+
+ ksft_print_header();
+ ksft_set_plan(3);
+
+ /*
+ * We'll use memory that is not mapped into the VM for simplicity.
+ * Shared zeropages are enabled/disabled per-process.
+ */
+ mem = mmap(0, 3 * pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
+ TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+
+ /* Disable THP. Ignore errors on older kernels. */
+ madvise(mem, 3 * pagesize, MADV_NOHUGEPAGE);
+
+ page0 = mem;
+ page1 = page0 + pagesize;
+ page2 = page1 + pagesize;
+
+ /* Can we even detect shared zeropages? */
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ TEST_REQUIRE(pagemap_fd >= 0);
+
+ tmp = *page0;
+ asm volatile("" : "+r" (tmp));
+ TEST_REQUIRE(maps_shared_zeropage(pagemap_fd, page0) == 1);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /* Verify that we get the shared zeropage after VM creation. */
+ tmp = *page1;
+ asm volatile("" : "+r" (tmp));
+ ksft_test_result(maps_shared_zeropage(pagemap_fd, page1) == 1,
+ "Shared zeropages should be enabled\n");
+
+ /*
+ * Let our VM execute a storage key instruction that should
+ * unshare all shared zeropages.
+ */
+ vcpu_run(vcpu);
+ get_ucall(vcpu, &uc);
+ TEST_ASSERT_EQ(uc.cmd, UCALL_DONE);
+
+ /* Verify that we don't have a shared zeropage anymore. */
+ ksft_test_result(!maps_shared_zeropage(pagemap_fd, page1),
+ "Shared zeropage should be gone\n");
+
+ /* Verify that we don't get any new shared zeropages. */
+ tmp = *page2;
+ asm volatile("" : "+r" (tmp));
+ ksft_test_result(!maps_shared_zeropage(pagemap_fd, page2),
+ "Shared zeropages should be disabled\n");
+
+ kvm_vm_free(vm);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
index 43fb25ddc3ec..53def355ccba 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -10,8 +10,6 @@
*
* Test expected behavior of the KVM_CAP_SYNC_REGS functionality.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c
index c73f948c9b63..7a742a673b7c 100644
--- a/tools/testing/selftests/kvm/s390x/tprot.c
+++ b/tools/testing/selftests/kvm/s390x/tprot.c
@@ -8,6 +8,7 @@
#include "test_util.h"
#include "kvm_util.h"
#include "kselftest.h"
+#include "ucall_common.h"
#define PAGE_SHIFT 12
#define PAGE_SIZE (1 << PAGE_SHIFT)
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index bd57d991e27d..bb8002084f52 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <pthread.h>
#include <sched.h>
@@ -221,8 +220,20 @@ static void test_move_memory_region(void)
static void guest_code_delete_memory_region(void)
{
+ struct desc_ptr idt;
uint64_t val;
+ /*
+ * Clobber the IDT so that a #PF due to the memory region being deleted
+ * escalates to triple-fault shutdown. Because the memory region is
+ * deleted, there will be no valid mappings. As a result, KVM will
+ * repeatedly intercepts the state-2 page fault that occurs when trying
+ * to vector the guest's #PF. I.e. trying to actually handle the #PF
+ * in the guest will never succeed, and so isn't an option.
+ */
+ memset(&idt, 0, sizeof(idt));
+ __asm__ __volatile__("lidt %0" :: "m"(idt));
+
GUEST_SYNC(0);
/* Spin until the memory region is deleted. */
@@ -339,7 +350,7 @@ static void test_invalid_memory_region_flags(void)
#ifdef __x86_64__
if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
- vm = vm_create_barebones_protected_vm();
+ vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM);
else
#endif
vm = vm_create_barebones();
@@ -462,7 +473,7 @@ static void test_add_private_memory_region(void)
pr_info("Testing ADD of KVM_MEM_GUEST_MEMFD memory regions\n");
- vm = vm_create_barebones_protected_vm();
+ vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM);
test_invalid_guest_memfd(vm, vm->kvm_fd, 0, "KVM fd should fail");
test_invalid_guest_memfd(vm, vm->fd, 0, "VM's fd should fail");
@@ -471,7 +482,7 @@ static void test_add_private_memory_region(void)
test_invalid_guest_memfd(vm, memfd, 0, "Regular memfd() should fail");
close(memfd);
- vm2 = vm_create_barebones_protected_vm();
+ vm2 = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM);
memfd = vm_create_guest_memfd(vm2, MEM_REGION_SIZE, 0);
test_invalid_guest_memfd(vm, memfd, 0, "Other VM's guest_memfd() should fail");
@@ -499,7 +510,7 @@ static void test_add_overlapping_private_memory_regions(void)
pr_info("Testing ADD of overlapping KVM_MEM_GUEST_MEMFD memory regions\n");
- vm = vm_create_barebones_protected_vm();
+ vm = vm_create_barebones_type(KVM_X86_SW_PROTECTED_VM);
memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE * 4, 0);
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index bae0c5026f82..a8d3afa0b86b 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -4,20 +4,22 @@
*
* Copyright (C) 2020, Red Hat, Inc.
*/
-#define _GNU_SOURCE
#include <stdio.h>
#include <time.h>
#include <sched.h>
#include <pthread.h>
#include <linux/kernel.h>
#include <asm/kvm.h>
-#ifndef __riscv
+#ifdef __riscv
+#include "sbi.h"
+#else
#include <asm/kvm_para.h>
#endif
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
+#include "ucall_common.h"
#define NR_VCPUS 4
#define ST_GPA_BASE (1 << 30)
@@ -83,20 +85,18 @@ static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
{
struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
- int i;
- pr_info("VCPU%d:\n", vcpu_idx);
- pr_info(" steal: %lld\n", st->steal);
- pr_info(" version: %d\n", st->version);
- pr_info(" flags: %d\n", st->flags);
- pr_info(" preempted: %d\n", st->preempted);
- pr_info(" u8_pad: ");
- for (i = 0; i < 3; ++i)
- pr_info("%d", st->u8_pad[i]);
- pr_info("\n pad: ");
- for (i = 0; i < 11; ++i)
- pr_info("%d", st->pad[i]);
- pr_info("\n");
+ ksft_print_msg("VCPU%d:\n", vcpu_idx);
+ ksft_print_msg(" steal: %lld\n", st->steal);
+ ksft_print_msg(" version: %d\n", st->version);
+ ksft_print_msg(" flags: %d\n", st->flags);
+ ksft_print_msg(" preempted: %d\n", st->preempted);
+ ksft_print_msg(" u8_pad: %d %d %d\n",
+ st->u8_pad[0], st->u8_pad[1], st->u8_pad[2]);
+ ksft_print_msg(" pad: %d %d %d %d %d %d %d %d %d %d %d\n",
+ st->pad[0], st->pad[1], st->pad[2], st->pad[3],
+ st->pad[4], st->pad[5], st->pad[6], st->pad[7],
+ st->pad[8], st->pad[9], st->pad[10]);
}
#elif defined(__aarch64__)
@@ -199,10 +199,10 @@ static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
{
struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
- pr_info("VCPU%d:\n", vcpu_idx);
- pr_info(" rev: %d\n", st->rev);
- pr_info(" attr: %d\n", st->attr);
- pr_info(" st_time: %ld\n", st->st_time);
+ ksft_print_msg("VCPU%d:\n", vcpu_idx);
+ ksft_print_msg(" rev: %d\n", st->rev);
+ ksft_print_msg(" attr: %d\n", st->attr);
+ ksft_print_msg(" st_time: %ld\n", st->st_time);
}
#elif defined(__riscv)
@@ -366,7 +366,9 @@ int main(int ac, char **av)
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages);
+ ksft_print_header();
TEST_REQUIRE(is_steal_time_supported(vcpus[0]));
+ ksft_set_plan(NR_VCPUS);
/* Run test on each VCPU */
for (i = 0; i < NR_VCPUS; ++i) {
@@ -407,14 +409,15 @@ int main(int ac, char **av)
run_delay, stolen_time);
if (verbose) {
- pr_info("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld", i,
- guest_stolen_time[i], stolen_time);
- if (stolen_time == run_delay)
- pr_info(" (BONUS: guest test-stolen-time even exactly matches test-run_delay)");
- pr_info("\n");
+ ksft_print_msg("VCPU%d: total-stolen-time=%ld test-stolen-time=%ld%s\n",
+ i, guest_stolen_time[i], stolen_time,
+ stolen_time == run_delay ?
+ " (BONUS: guest test-stolen-time even exactly matches test-run_delay)" : "");
steal_time_dump(vm, i);
}
+ ksft_test_result_pass("vcpu%d\n", i);
}
- return 0;
+ /* Print results and exit() accordingly */
+ ksft_finished();
}
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
index eae521f050e0..903940c54d2d 100644
--- a/tools/testing/selftests/kvm/x86_64/amx_test.c
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -6,8 +6,6 @@
*
* Tests for amx #NM exception and save/restore.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -246,8 +244,6 @@ int main(int argc, char *argv[])
vcpu_regs_get(vcpu, &regs1);
/* Register #NM handler */
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
/* amx cfg for guest_code */
diff --git a/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c b/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c
new file mode 100644
index 000000000000..f8916bb34405
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/apic_bus_clock_test.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2024 Intel Corporation
+ *
+ * Verify KVM correctly emulates the APIC bus frequency when the VMM configures
+ * the frequency via KVM_CAP_X86_APIC_BUS_CYCLES_NS. Start the APIC timer by
+ * programming TMICT (timer initial count) to the largest value possible (so
+ * that the timer will not expire during the test). Then, after an arbitrary
+ * amount of time has elapsed, verify TMCCT (timer current count) is within 1%
+ * of the expected value based on the time elapsed, the APIC bus frequency, and
+ * the programmed TDCR (timer divide configuration register).
+ */
+
+#include "apic.h"
+#include "test_util.h"
+
+/*
+ * Possible TDCR values with matching divide count. Used to modify APIC
+ * timer frequency.
+ */
+static const struct {
+ const uint32_t tdcr;
+ const uint32_t divide_count;
+} tdcrs[] = {
+ {0x0, 2},
+ {0x1, 4},
+ {0x2, 8},
+ {0x3, 16},
+ {0x8, 32},
+ {0x9, 64},
+ {0xa, 128},
+ {0xb, 1},
+};
+
+static bool is_x2apic;
+
+static void apic_enable(void)
+{
+ if (is_x2apic)
+ x2apic_enable();
+ else
+ xapic_enable();
+}
+
+static uint32_t apic_read_reg(unsigned int reg)
+{
+ return is_x2apic ? x2apic_read_reg(reg) : xapic_read_reg(reg);
+}
+
+static void apic_write_reg(unsigned int reg, uint32_t val)
+{
+ if (is_x2apic)
+ x2apic_write_reg(reg, val);
+ else
+ xapic_write_reg(reg, val);
+}
+
+static void apic_guest_code(uint64_t apic_hz, uint64_t delay_ms)
+{
+ uint64_t tsc_hz = guest_tsc_khz * 1000;
+ const uint32_t tmict = ~0u;
+ uint64_t tsc0, tsc1, freq;
+ uint32_t tmcct;
+ int i;
+
+ apic_enable();
+
+ /*
+ * Setup one-shot timer. The vector does not matter because the
+ * interrupt should not fire.
+ */
+ apic_write_reg(APIC_LVTT, APIC_LVT_TIMER_ONESHOT | APIC_LVT_MASKED);
+
+ for (i = 0; i < ARRAY_SIZE(tdcrs); i++) {
+ apic_write_reg(APIC_TDCR, tdcrs[i].tdcr);
+ apic_write_reg(APIC_TMICT, tmict);
+
+ tsc0 = rdtsc();
+ udelay(delay_ms * 1000);
+ tmcct = apic_read_reg(APIC_TMCCT);
+ tsc1 = rdtsc();
+
+ /*
+ * Stop the timer _after_ reading the current, final count, as
+ * writing the initial counter also modifies the current count.
+ */
+ apic_write_reg(APIC_TMICT, 0);
+
+ freq = (tmict - tmcct) * tdcrs[i].divide_count * tsc_hz / (tsc1 - tsc0);
+ /* Check if measured frequency is within 5% of configured frequency. */
+ __GUEST_ASSERT(freq < apic_hz * 105 / 100 && freq > apic_hz * 95 / 100,
+ "Frequency = %lu (wanted %lu - %lu), bus = %lu, div = %u, tsc = %lu",
+ freq, apic_hz * 95 / 100, apic_hz * 105 / 100,
+ apic_hz, tdcrs[i].divide_count, tsc_hz);
+ }
+
+ GUEST_DONE();
+}
+
+static void test_apic_bus_clock(struct kvm_vcpu *vcpu)
+{
+ bool done = false;
+ struct ucall uc;
+
+ while (!done) {
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ break;
+ }
+ }
+}
+
+static void run_apic_bus_clock_test(uint64_t apic_hz, uint64_t delay_ms,
+ bool x2apic)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int ret;
+
+ is_x2apic = x2apic;
+
+ vm = vm_create(1);
+
+ sync_global_to_guest(vm, is_x2apic);
+
+ vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
+ NSEC_PER_SEC / apic_hz);
+
+ vcpu = vm_vcpu_add(vm, 0, apic_guest_code);
+ vcpu_args_set(vcpu, 2, apic_hz, delay_ms);
+
+ ret = __vm_enable_cap(vm, KVM_CAP_X86_APIC_BUS_CYCLES_NS,
+ NSEC_PER_SEC / apic_hz);
+ TEST_ASSERT(ret < 0 && errno == EINVAL,
+ "Setting of APIC bus frequency after vCPU is created should fail.");
+
+ if (!is_x2apic)
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ test_apic_bus_clock(vcpu);
+ kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-d delay] [-f APIC bus freq]\n", name);
+ puts("");
+ printf("-d: Delay (in msec) guest uses to measure APIC bus frequency.\n");
+ printf("-f: The APIC bus frequency (in MHz) to be configured for the guest.\n");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ /*
+ * Arbitrarilty default to 25MHz for the APIC bus frequency, which is
+ * different enough from the default 1GHz to be interesting.
+ */
+ uint64_t apic_hz = 25 * 1000 * 1000;
+ uint64_t delay_ms = 100;
+ int opt;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_APIC_BUS_CYCLES_NS));
+
+ while ((opt = getopt(argc, argv, "d:f:h")) != -1) {
+ switch (opt) {
+ case 'f':
+ apic_hz = atoi_positive("APIC bus frequency", optarg) * 1000 * 1000;
+ break;
+ case 'd':
+ delay_ms = atoi_positive("Delay in milliseconds", optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ exit(KSFT_SKIP);
+ }
+ }
+
+ run_apic_bus_clock_test(apic_hz, delay_ms, false);
+ run_apic_bus_clock_test(apic_hz, delay_ms, true);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
index ee3b384b991c..2929c067c207 100644
--- a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
+++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
@@ -17,6 +17,7 @@
#include "test_util.h"
#include "memstress.h"
#include "guest_modes.h"
+#include "ucall_common.h"
#define VCPUS 2
#define SLOTS 2
diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
index 6c2e5e0ceb1f..81055476d394 100644
--- a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
+++ b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
@@ -4,12 +4,9 @@
*
* Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
-
#include "flds_emulation.h"
-
#include "test_util.h"
+#include "ucall_common.h"
#define MMIO_GPA 0x700000000
#define MMIO_GVA MMIO_GPA
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
index f3c2239228b1..762628f7d4ba 100644
--- a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
@@ -110,8 +110,6 @@ static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk)
{
struct kvm_vm *vm = vcpu->vm;
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
if (disable_quirk)
diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
index df351ae17029..10b1b0ba374e 100644
--- a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
@@ -2,8 +2,6 @@
/*
* Copyright (C) 2023, Google LLC.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
#include "test_util.h"
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 5c27efbf405e..4f5881d4ef66 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -7,8 +7,6 @@
* This work is licensed under the terms of the GNU GPL, version 2.
*
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
index 4c7257ecd2a6..e192720bfe14 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
@@ -4,7 +4,6 @@
*
* Tests for Enlightened VMCS, including nested guest state.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -258,8 +257,6 @@ int main(int argc, char *argv[])
vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
index b923a285e96f..068e9c69710d 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_features.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -156,9 +156,6 @@ static void guest_test_msrs_access(void)
vcpu_init_cpuid(vcpu, prev_cpuid);
}
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
/* TODO: Make this entire test easier to maintain. */
if (stage >= 21)
vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
@@ -532,9 +529,6 @@ static void guest_test_hcalls_access(void)
while (true) {
vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
/* Hypercall input/output */
hcall_page = vm_vaddr_alloc_pages(vm, 2);
memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
index f1617762c22f..22c0c124582f 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
@@ -5,8 +5,6 @@
* Copyright (C) 2022, Red Hat, Inc.
*
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <pthread.h>
#include <inttypes.h>
@@ -256,16 +254,13 @@ int main(int argc, char *argv[])
hcall_page = vm_vaddr_alloc_pages(vm, 2);
memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
- vm_init_descriptor_tables(vm);
vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code);
- vcpu_init_descriptor_tables(vcpu[1]);
vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1);
vcpu_set_hv_cpuid(vcpu[1]);
vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code);
- vcpu_init_descriptor_tables(vcpu[2]);
vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2);
vcpu_set_hv_cpuid(vcpu[2]);
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
index c9b18707edc0..b987a3d79715 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -4,7 +4,6 @@
*
* Tests for Hyper-V extensions to SVM.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
index 05b56095cf76..077cd0ec3040 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
@@ -5,8 +5,6 @@
* Copyright (C) 2022, Red Hat, Inc.
*
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <asm/barrier.h>
#include <pthread.h>
#include <inttypes.h>
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
index 40cc59f4e650..78878b3a2725 100644
--- a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -183,9 +183,6 @@ int main(void)
vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
enter_guest(vcpu);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
index 3cc4b86832fe..7e2bfb3c3f3b 100644
--- a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
+++ b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
@@ -26,19 +26,37 @@ int main(int argc, char *argv[])
TEST_ASSERT(ret < 0,
"Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail");
+ /* Test BOOT_CPU_ID interaction (MAX_VCPU_ID cannot be lower) */
+ if (kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
+ vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)MAX_VCPU_ID);
+
+ /* Try setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID */
+ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID - 1);
+ TEST_ASSERT(ret < 0,
+ "Setting KVM_CAP_MAX_VCPU_ID below BOOT_CPU_ID should fail");
+ }
+
/* Set KVM_CAP_MAX_VCPU_ID */
vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID);
-
/* Try to set KVM_CAP_MAX_VCPU_ID again */
ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1);
TEST_ASSERT(ret < 0,
"Setting KVM_CAP_MAX_VCPU_ID multiple times should fail");
- /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap*/
+ /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap */
ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID);
TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail");
+ /* Create vCPU with bits 63:32 != 0, but an otherwise valid id */
+ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(1L << 32));
+ TEST_ASSERT(ret < 0, "Creating vCPU with ID[63:32] != 0 should fail");
+
+ /* Create vCPU with id within bounds */
+ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)0);
+ TEST_ASSERT(ret >= 0, "Creating vCPU with ID 0 should succeed");
+
+ close(ret);
kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
index 853802641e1e..2b550eff35f1 100644
--- a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
+++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
@@ -75,14 +75,12 @@ int main(int argc, char *argv[])
struct ucall uc;
int testcase;
+ TEST_REQUIRE(this_cpu_has(X86_FEATURE_MWAIT));
TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
while (1) {
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
index 3670331adf21..3eb0313ffa39 100644
--- a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
@@ -1,6 +1,4 @@
// SPDX-License-Identifier: GPL-2.0-only
-#define _GNU_SOURCE /* for program_invocation_short_name */
-
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
index 17bbb96fc4df..e7efb2b35f8b 100644
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -5,9 +5,6 @@
*
* Copyright (C) 2022, Google LLC.
*/
-
-#define _GNU_SOURCE
-
#include <fcntl.h>
#include <stdint.h>
#include <time.h>
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
index 7cbb409801ee..caad084b8bfd 100755
--- a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
@@ -13,10 +13,21 @@ NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_reco
NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms)
HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)
+# If we're already root, the host might not have sudo.
+if [ $(whoami) == "root" ]; then
+ function do_sudo () {
+ "$@"
+ }
+else
+ function do_sudo () {
+ sudo "$@"
+ }
+fi
+
set +e
function sudo_echo () {
- echo "$1" | sudo tee -a "$2" > /dev/null
+ echo "$1" | do_sudo tee -a "$2" > /dev/null
}
NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index 87011965dc41..eda88080c186 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -9,8 +9,6 @@
* Verifies expected behavior of controlling guest access to
* MSR_PLATFORM_INFO.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -26,36 +24,18 @@
static void guest_code(void)
{
uint64_t msr_platform_info;
+ uint8_t vector;
- for (;;) {
- msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
- GUEST_SYNC(msr_platform_info);
- asm volatile ("inc %r11");
- }
-}
-
-static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu)
-{
- struct ucall uc;
-
- vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, true);
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ GUEST_SYNC(true);
+ msr_platform_info = rdmsr(MSR_PLATFORM_INFO);
+ GUEST_ASSERT_EQ(msr_platform_info & MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
+ MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
- get_ucall(vcpu, &uc);
- TEST_ASSERT(uc.cmd == UCALL_SYNC,
- "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
- TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
- MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
- "Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
- MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
-}
+ GUEST_SYNC(false);
+ vector = rdmsr_safe(MSR_PLATFORM_INFO, &msr_platform_info);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
-static void test_msr_platform_info_disabled(struct kvm_vcpu *vcpu)
-{
- vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, false);
- vcpu_run(vcpu);
- TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+ GUEST_DONE();
}
int main(int argc, char *argv[])
@@ -63,6 +43,7 @@ int main(int argc, char *argv[])
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t msr_platform_info;
+ struct ucall uc;
TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
@@ -71,8 +52,26 @@ int main(int argc, char *argv[])
msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO);
vcpu_set_msr(vcpu, MSR_PLATFORM_INFO,
msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
- test_msr_platform_info_enabled(vcpu);
- test_msr_platform_info_disabled(vcpu);
+
+ for (;;) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ vm_enable_cap(vm, KVM_CAP_MSR_PLATFORM_INFO, uc.args[1]);
+ break;
+ case UCALL_DONE:
+ goto done;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall %lu", uc.cmd);
+ break;
+ }
+ }
+
+done:
vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
index 26c85815f7e9..698cb36989db 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
@@ -2,26 +2,36 @@
/*
* Copyright (C) 2023, Tencent, Inc.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <x86intrin.h>
#include "pmu.h"
#include "processor.h"
-/* Number of LOOP instructions for the guest measurement payload. */
-#define NUM_BRANCHES 10
+/* Number of iterations of the loop for the guest measurement payload. */
+#define NUM_LOOPS 10
+
+/* Each iteration of the loop retires one branch instruction. */
+#define NUM_BRANCH_INSNS_RETIRED (NUM_LOOPS)
+
+/*
+ * Number of instructions in each loop. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE,
+ * 1 LOOP.
+ */
+#define NUM_INSNS_PER_LOOP 3
+
/*
* Number of "extra" instructions that will be counted, i.e. the number of
- * instructions that are needed to set up the loop and then disabled the
- * counter. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR.
+ * instructions that are needed to set up the loop and then disable the
+ * counter. 2 MOV, 2 XOR, 1 WRMSR.
*/
-#define NUM_EXTRA_INSNS 7
-#define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS)
+#define NUM_EXTRA_INSNS 5
+
+/* Total number of instructions retired within the measured section. */
+#define NUM_INSNS_RETIRED (NUM_LOOPS * NUM_INSNS_PER_LOOP + NUM_EXTRA_INSNS)
+
static uint8_t kvm_pmu_version;
static bool kvm_has_perf_caps;
-static bool is_forced_emulation_enabled;
static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
void *guest_code,
@@ -31,11 +41,7 @@ static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
struct kvm_vm *vm;
vm = vm_create_with_one_vcpu(vcpu, guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(*vcpu);
-
sync_global_to_guest(vm, kvm_pmu_version);
- sync_global_to_guest(vm, is_forced_emulation_enabled);
/*
* Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
@@ -107,7 +113,7 @@ static void guest_assert_event_count(uint8_t idx,
GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
break;
case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
- GUEST_ASSERT_EQ(count, NUM_BRANCHES);
+ GUEST_ASSERT_EQ(count, NUM_BRANCH_INSNS_RETIRED);
break;
case INTEL_ARCH_LLC_REFERENCES_INDEX:
case INTEL_ARCH_LLC_MISSES_INDEX:
@@ -127,7 +133,7 @@ static void guest_assert_event_count(uint8_t idx,
}
sanity_checks:
- __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
GUEST_ASSERT_EQ(_rdpmc(pmc), count);
wrmsr(pmc_msr, 0xdead);
@@ -141,8 +147,8 @@ sanity_checks:
* before the end of the sequence.
*
* If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
- * start of the loop to force LLC references and misses, i.e. to allow testing
- * that those events actually count.
+ * CLFUSH{,OPT} instruction on each loop iteration to force LLC references and
+ * misses, i.e. to allow testing that those events actually count.
*
* If forced emulation is enabled (and specified), force emulation on a subset
* of the measured code to verify that KVM correctly emulates instructions and
@@ -152,10 +158,11 @@ sanity_checks:
#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \
do { \
__asm__ __volatile__("wrmsr\n\t" \
+ " mov $" __stringify(NUM_LOOPS) ", %%ecx\n\t" \
+ "1:\n\t" \
clflush "\n\t" \
"mfence\n\t" \
- "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" \
- FEP "loop .\n\t" \
+ FEP "loop 1b\n\t" \
FEP "mov %%edi, %%ecx\n\t" \
FEP "xor %%eax, %%eax\n\t" \
FEP "xor %%edx, %%edx\n\t" \
@@ -170,9 +177,9 @@ do { \
wrmsr(pmc_msr, 0); \
\
if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt 1f", FEP); \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt .", FEP); \
else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
- GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush 1f", FEP); \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush .", FEP); \
else \
GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
\
@@ -507,7 +514,7 @@ static void guest_test_fixed_counters(void)
wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
- __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_LOOPS}));
wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
@@ -630,7 +637,6 @@ int main(int argc, char *argv[])
kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
- is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
test_intel_counters();
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
index 3c85d1ae9893..c15513cd74d1 100644
--- a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -9,9 +9,6 @@
* Verifies the expected behavior of allow lists and deny lists for
* virtual PMU events.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
-
#include "kvm_util.h"
#include "pmu.h"
#include "processor.h"
@@ -35,8 +32,8 @@ struct __kvm_pmu_event_filter {
/*
* This event list comprises Intel's known architectural events, plus AMD's
- * "retired branch instructions" for Zen1-Zen3 (and* possibly other AMD CPUs).
- * Note, AMD and Intel use the same encoding for instructions retired.
+ * Branch Instructions Retired for Zen CPUs. Note, AMD and Intel use the
+ * same encoding for Instructions Retired.
*/
kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED);
@@ -337,9 +334,6 @@ static void test_pmu_config_disable(void (*guest_code)(void))
vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
vcpu = vm_vcpu_add(vm, 0, guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
TEST_ASSERT(!sanity_check_pmu(vcpu),
"Guest should not be able to use disabled PMU.");
@@ -359,38 +353,13 @@ static bool use_intel_pmu(void)
kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED);
}
-static bool is_zen1(uint32_t family, uint32_t model)
-{
- return family == 0x17 && model <= 0x0f;
-}
-
-static bool is_zen2(uint32_t family, uint32_t model)
-{
- return family == 0x17 && model >= 0x30 && model <= 0x3f;
-}
-
-static bool is_zen3(uint32_t family, uint32_t model)
-{
- return family == 0x19 && model <= 0x0f;
-}
-
/*
- * Determining AMD support for a PMU event requires consulting the AMD
- * PPR for the CPU or reference material derived therefrom. The AMD
- * test code herein has been verified to work on Zen1, Zen2, and Zen3.
- *
- * Feel free to add more AMD CPUs that are documented to support event
- * select 0xc2 umask 0 as "retired branch instructions."
+ * On AMD, all Family 17h+ CPUs (Zen and its successors) use event encoding
+ * 0xc2,0 for Branch Instructions Retired.
*/
static bool use_amd_pmu(void)
{
- uint32_t family = kvm_cpu_family();
- uint32_t model = kvm_cpu_model();
-
- return host_cpu_is_amd &&
- (is_zen1(family, model) ||
- is_zen2(family, model) ||
- is_zen3(family, model));
+ return host_cpu_is_amd && kvm_cpu_family() >= 0x17;
}
/*
@@ -876,9 +845,6 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
TEST_REQUIRE(sanity_check_pmu(vcpu));
if (use_amd_pmu())
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
index e0f642d2a3c4..82a8d88b5338 100644
--- a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
+++ b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
@@ -2,7 +2,6 @@
/*
* Copyright (C) 2022, Google LLC.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <limits.h>
#include <pthread.h>
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
index 366cf18600bc..49913784bc82 100644
--- a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -4,7 +4,6 @@
*
* Copyright (C) 2020, Red Hat, Inc.
*/
-#define _GNU_SOURCE /* for program_invocation_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
@@ -34,6 +33,20 @@ static void guest_not_bsp_vcpu(void *arg)
GUEST_DONE();
}
+static void test_set_invalid_bsp(struct kvm_vm *vm)
+{
+ unsigned long max_vcpu_id = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
+ int r;
+
+ if (max_vcpu_id) {
+ r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(max_vcpu_id + 1));
+ TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID > MAX should fail");
+ }
+
+ r = __vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(1L << 32));
+ TEST_ASSERT(r == -1 && errno == EINVAL, "BSP with ID[63:32]!=0 should fail");
+}
+
static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg)
{
int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID,
@@ -81,6 +94,8 @@ static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
vm = vm_create(nr_vcpus);
+ test_set_invalid_bsp(vm);
+
vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id);
for (i = 0; i < nr_vcpus; i++)
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index 3610981d9162..c021c0795a96 100644
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -10,7 +10,6 @@
* That bug allowed a user-mode program that called the KVM_SET_SREGS
* ioctl to put a VCPU's local APIC into an invalid state.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c
new file mode 100644
index 000000000000..3fb967f40c6a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/sev_init2_tests.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "kselftest.h"
+
+#define SVM_SEV_FEAT_DEBUG_SWAP 32u
+
+/*
+ * Some features may have hidden dependencies, or may only work
+ * for certain VM types. Err on the side of safety and don't
+ * expect that all supported features can be passed one by one
+ * to KVM_SEV_INIT2.
+ *
+ * (Well, right now there's only one...)
+ */
+#define KNOWN_FEATURES SVM_SEV_FEAT_DEBUG_SWAP
+
+int kvm_fd;
+u64 supported_vmsa_features;
+bool have_sev_es;
+
+static int __sev_ioctl(int vm_fd, int cmd_id, void *data)
+{
+ struct kvm_sev_cmd cmd = {
+ .id = cmd_id,
+ .data = (uint64_t)data,
+ .sev_fd = open_sev_dev_path_or_exit(),
+ };
+ int ret;
+
+ ret = ioctl(vm_fd, KVM_MEMORY_ENCRYPT_OP, &cmd);
+ TEST_ASSERT(ret < 0 || cmd.error == SEV_RET_SUCCESS,
+ "%d failed: fw error: %d\n",
+ cmd_id, cmd.error);
+
+ return ret;
+}
+
+static void test_init2(unsigned long vm_type, struct kvm_sev_init *init)
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones_type(vm_type);
+ ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
+ TEST_ASSERT(ret == 0,
+ "KVM_SEV_INIT2 return code is %d (expected 0), errno: %d",
+ ret, errno);
+ kvm_vm_free(vm);
+}
+
+static void test_init2_invalid(unsigned long vm_type, struct kvm_sev_init *init, const char *msg)
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones_type(vm_type);
+ ret = __sev_ioctl(vm->fd, KVM_SEV_INIT2, init);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "KVM_SEV_INIT2 should fail, %s.",
+ msg);
+ kvm_vm_free(vm);
+}
+
+void test_vm_types(void)
+{
+ test_init2(KVM_X86_SEV_VM, &(struct kvm_sev_init){});
+
+ /*
+ * TODO: check that unsupported types cannot be created. Probably
+ * a separate selftest.
+ */
+ if (have_sev_es)
+ test_init2(KVM_X86_SEV_ES_VM, &(struct kvm_sev_init){});
+
+ test_init2_invalid(0, &(struct kvm_sev_init){},
+ "VM type is KVM_X86_DEFAULT_VM");
+ if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
+ test_init2_invalid(KVM_X86_SW_PROTECTED_VM, &(struct kvm_sev_init){},
+ "VM type is KVM_X86_SW_PROTECTED_VM");
+}
+
+void test_flags(uint32_t vm_type)
+{
+ int i;
+
+ for (i = 0; i < 32; i++)
+ test_init2_invalid(vm_type,
+ &(struct kvm_sev_init){ .flags = BIT(i) },
+ "invalid flag");
+}
+
+void test_features(uint32_t vm_type, uint64_t supported_features)
+{
+ int i;
+
+ for (i = 0; i < 64; i++) {
+ if (!(supported_features & BIT_ULL(i)))
+ test_init2_invalid(vm_type,
+ &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) },
+ "unknown feature");
+ else if (KNOWN_FEATURES & BIT_ULL(i))
+ test_init2(vm_type,
+ &(struct kvm_sev_init){ .vmsa_features = BIT_ULL(i) });
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int kvm_fd = open_kvm_dev_path_or_exit();
+ bool have_sev;
+
+ TEST_REQUIRE(__kvm_has_device_attr(kvm_fd, KVM_X86_GRP_SEV,
+ KVM_X86_SEV_VMSA_FEATURES) == 0);
+ kvm_device_attr_get(kvm_fd, KVM_X86_GRP_SEV,
+ KVM_X86_SEV_VMSA_FEATURES,
+ &supported_vmsa_features);
+
+ have_sev = kvm_cpu_has(X86_FEATURE_SEV);
+ TEST_ASSERT(have_sev == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM)),
+ "sev: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
+ kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_VM);
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_VM));
+ have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+
+ TEST_ASSERT(have_sev_es == !!(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SEV_ES_VM)),
+ "sev-es: KVM_CAP_VM_TYPES (%x) does not match cpuid (checking %x)",
+ kvm_check_cap(KVM_CAP_VM_TYPES), 1 << KVM_X86_SEV_ES_VM);
+
+ test_vm_types();
+
+ test_flags(KVM_X86_SEV_VM);
+ if (have_sev_es)
+ test_flags(KVM_X86_SEV_ES_VM);
+
+ test_features(KVM_X86_SEV_VM, 0);
+ if (have_sev_es)
+ test_features(KVM_X86_SEV_ES_VM, supported_vmsa_features);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
index 026779f3ed06..7c70c0da4fb7 100644
--- a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
@@ -4,6 +4,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
+#include <math.h>
#include "test_util.h"
#include "kvm_util.h"
@@ -13,6 +14,8 @@
#include "sev.h"
+#define XFEATURE_MASK_X87_AVX (XFEATURE_MASK_FP | XFEATURE_MASK_SSE | XFEATURE_MASK_YMM)
+
static void guest_sev_es_code(void)
{
/* TODO: Check CPUID after GHCB-based hypercall support is added. */
@@ -35,13 +38,98 @@ static void guest_sev_code(void)
GUEST_DONE();
}
+/* Stash state passed via VMSA before any compiled code runs. */
+extern void guest_code_xsave(void);
+asm("guest_code_xsave:\n"
+ "mov $-1, %eax\n"
+ "mov $-1, %edx\n"
+ "xsave (%rdi)\n"
+ "jmp guest_sev_es_code");
+
+static void compare_xsave(u8 *from_host, u8 *from_guest)
+{
+ int i;
+ bool bad = false;
+ for (i = 0; i < 4095; i++) {
+ if (from_host[i] != from_guest[i]) {
+ printf("mismatch at %02hhx | %02hhx %02hhx\n", i, from_host[i], from_guest[i]);
+ bad = true;
+ }
+ }
+
+ if (bad)
+ abort();
+}
+
+static void test_sync_vmsa(uint32_t policy)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t gva;
+ void *hva;
+
+ double x87val = M_PI;
+ struct kvm_xsave __attribute__((aligned(64))) xsave = { 0 };
+ struct kvm_sregs sregs;
+ struct kvm_xcrs xcrs = {
+ .nr_xcrs = 1,
+ .xcrs[0].xcr = 0,
+ .xcrs[0].value = XFEATURE_MASK_X87_AVX,
+ };
+
+ vm = vm_sev_create_with_one_vcpu(KVM_X86_SEV_ES_VM, guest_code_xsave, &vcpu);
+ gva = vm_vaddr_alloc_shared(vm, PAGE_SIZE, KVM_UTIL_MIN_VADDR,
+ MEM_REGION_TEST_DATA);
+ hva = addr_gva2hva(vm, gva);
+
+ vcpu_args_set(vcpu, 1, gva);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.cr4 |= X86_CR4_OSFXSR | X86_CR4_OSXSAVE;
+ vcpu_sregs_set(vcpu, &sregs);
+
+ vcpu_xcrs_set(vcpu, &xcrs);
+ asm("fninit\n"
+ "vpcmpeqb %%ymm4, %%ymm4, %%ymm4\n"
+ "fldl %3\n"
+ "xsave (%2)\n"
+ "fstp %%st\n"
+ : "=m"(xsave)
+ : "A"(XFEATURE_MASK_X87_AVX), "r"(&xsave), "m" (x87val)
+ : "ymm4", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)");
+ vcpu_xsave_set(vcpu, &xsave);
+
+ vm_sev_launch(vm, SEV_POLICY_ES | policy, NULL);
+
+ /* This page is shared, so make it decrypted. */
+ memset(hva, 0, 4096);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+ "Wanted SYSTEM_EVENT, got %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+ TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+ TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+
+ compare_xsave((u8 *)&xsave, (u8 *)hva);
+
+ kvm_vm_free(vm);
+}
+
static void test_sev(void *guest_code, uint64_t policy)
{
struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct ucall uc;
- vm = vm_sev_create_with_one_vcpu(policy, guest_code, &vcpu);
+ uint32_t type = policy & SEV_POLICY_ES ? KVM_X86_SEV_ES_VM : KVM_X86_SEV_VM;
+
+ vm = vm_sev_create_with_one_vcpu(type, guest_code, &vcpu);
+
+ /* TODO: Validate the measurement is as expected. */
+ vm_sev_launch(vm, policy, NULL);
for (;;) {
vcpu_run(vcpu);
@@ -82,6 +170,12 @@ int main(int argc, char *argv[])
if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
test_sev(guest_sev_es_code, SEV_POLICY_ES);
+
+ if (kvm_has_cap(KVM_CAP_XCRS) &&
+ (xgetbv(0) & XFEATURE_MASK_X87_AVX) == XFEATURE_MASK_X87_AVX) {
+ test_sync_vmsa(0);
+ test_sync_vmsa(SEV_POLICY_NO_DBG);
+ }
}
return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
index 416207c38a17..fabeeaddfb3a 100644
--- a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
@@ -5,9 +5,6 @@
* Test that KVM emulates instructions in response to EPT violations when
* allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
-
#include "flds_emulation.h"
#include "test_util.h"
@@ -60,9 +57,6 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled());
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index e18b86666e1f..55c88d664a94 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -4,7 +4,6 @@
*
* Tests for SMM.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index 88b58aab7207..1c756db329e5 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -6,7 +6,6 @@
*
* Tests for vCPU state save/restore, including nested guest state.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
index 32bef39bec21..916e04248fbb 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
@@ -93,9 +93,6 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
index d6fcdcc3af31..00135cbba35e 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
@@ -48,12 +48,9 @@ int main(int argc, char *argv[])
TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
vcpu_alloc_svm(vm, &svm_gva);
- vcpu_args_set(vcpu, 2, svm_gva, vm->idt);
+ vcpu_args_set(vcpu, 2, svm_gva, vm->arch.idt);
vcpu_run(vcpu);
TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
index 0c7ce3d4e83a..7b6481d6c0d3 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
@@ -152,9 +152,6 @@ static void run_test(bool is_nmi)
vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
vm_install_exception_handler(vm, INT_NR, guest_int_handler);
@@ -166,7 +163,7 @@ static void run_test(bool is_nmi)
idt_alt_vm = vm_vaddr_alloc_page(vm);
idt_alt = addr_gva2hva(vm, idt_alt_vm);
- idt = addr_gva2hva(vm, vm->idt);
+ idt = addr_gva2hva(vm, vm->arch.idt);
memcpy(idt_alt, idt, getpagesize());
} else {
idt_alt_vm = 0;
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index adb5593daf48..8fa3948b0170 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -8,8 +8,6 @@
* including requesting an invalid register set, updates to/from values
* in kvm_run.s.regs when kvm_valid_regs and kvm_dirty_regs are toggled.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
index dcbb3c29fb8e..57f157c06b39 100644
--- a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
+++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
@@ -17,14 +17,11 @@
* delivered into the guest or not.
*
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <pthread.h>
#include <inttypes.h>
#include <string.h>
#include <time.h>
-#include "kvm_util_base.h"
#include "kvm_util.h"
#include "mce.h"
#include "processor.h"
@@ -285,10 +282,6 @@ int main(int argc, char *argv[])
cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(ucna_vcpu);
- vcpu_init_descriptor_tables(cmcidis_vcpu);
- vcpu_init_descriptor_tables(cmci_vcpu);
vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
index f4f61a2d2464..32b2794b78fe 100644
--- a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -4,8 +4,6 @@
*
* Tests for exiting into userspace on registered MSRs
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
#include "kvm_test_harness.h"
@@ -13,8 +11,6 @@
#include "kvm_util.h"
#include "vmx.h"
-static bool fep_available;
-
#define MSR_NON_EXISTENT 0x474f4f00
static u64 deny_bits = 0;
@@ -258,7 +254,7 @@ static void guest_code_filter_allow(void)
GUEST_ASSERT(data == 2);
GUEST_ASSERT(guest_exception_count == 0);
- if (fep_available) {
+ if (is_forced_emulation_enabled) {
/* Let userspace know we aren't done. */
GUEST_SYNC(0);
@@ -520,8 +516,6 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
uint64_t cmd;
int rc;
- sync_global_to_guest(vm, fep_available);
-
rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
@@ -531,9 +525,6 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
/* Process guest code userspace exits. */
@@ -551,7 +542,7 @@ KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
vcpu_run(vcpu);
cmd = process_ucall(vcpu);
- if (fep_available) {
+ if (is_forced_emulation_enabled) {
TEST_ASSERT_EQ(cmd, UCALL_SYNC);
vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
@@ -774,7 +765,5 @@ KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
int main(int argc, char *argv[])
{
- fep_available = kvm_is_forced_emulation_enabled();
-
return test_harness_run(argc, argv);
}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index 977948fd52e6..fa512d033205 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -4,9 +4,6 @@
*
* Copyright (C) 2018, Red Hat, Inc.
*/
-
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include <stdio.h>
#include <stdlib.h>
#include <linux/bitmap.h>
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
index fad3634fd9eb..3fd6eceab46f 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
@@ -115,9 +115,6 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
get_set_sigalrm_vcpu(vcpu);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
/*
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
index ea0cb3cae0f7..7c92536551cc 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
@@ -10,7 +10,6 @@
* and check it can be retrieved with KVM_GET_MSR, also test
* the invalid LBR formats are rejected.
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
#include <linux/bitmap.h>
@@ -86,9 +85,6 @@ KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code)
struct ucall uc;
int r, i;
- vm_init_descriptor_tables(vcpu->vm);
- vcpu_init_descriptor_tables(vcpu);
-
vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
vcpu_args_set(vcpu, 1, host_cap.capabilities);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
index affc32800158..00dd2ac07a61 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -9,7 +9,6 @@
* value instead of partially decayed timer value
*
*/
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
index 725c206ba0b9..a76078a08ff8 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -19,8 +19,6 @@
* Migration is a command line option. When used on non-numa machines will
* exit with error. Test is still usefull on non-numa for testing IPIs.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <getopt.h>
#include <pthread.h>
#include <inttypes.h>
@@ -410,8 +408,6 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(params[0].vcpu);
vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
index ab75b873a4ad..69849acd95b0 100644
--- a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
@@ -1,5 +1,4 @@
// SPDX-License-Identifier: GPL-2.0-only
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
index 25a0b0db5c3c..95ce192d0753 100644
--- a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
@@ -109,9 +109,6 @@ int main(int argc, char *argv[])
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
run = vcpu->run;
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
-
while (1) {
vcpu_run(vcpu);
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
index d2ea0435f4f7..a59b3c799bb2 100644
--- a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -125,7 +125,7 @@ struct compat_vcpu_runstate_info {
uint32_t state;
uint64_t state_entry_time;
uint64_t time[5];
-} __attribute__((__packed__));;
+} __attribute__((__packed__));
struct arch_vcpu_info {
unsigned long cr2;
@@ -171,8 +171,9 @@ static volatile bool guest_saw_irq;
static void evtchn_handler(struct ex_regs *regs)
{
struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
- vi->evtchn_upcall_pending = 0;
- vi->evtchn_pending_sel = 0;
+
+ vcpu_arch_put_guest(vi->evtchn_upcall_pending, 0);
+ vcpu_arch_put_guest(vi->evtchn_pending_sel, 0);
guest_saw_irq = true;
GUEST_SYNC(TEST_GUEST_SAW_IRQ);
@@ -380,20 +381,6 @@ wait_for_timer:
GUEST_SYNC(TEST_DONE);
}
-static int cmp_timespec(struct timespec *a, struct timespec *b)
-{
- if (a->tv_sec > b->tv_sec)
- return 1;
- else if (a->tv_sec < b->tv_sec)
- return -1;
- else if (a->tv_nsec > b->tv_nsec)
- return 1;
- else if (a->tv_nsec < b->tv_nsec)
- return -1;
- else
- return 0;
-}
-
static struct shared_info *shinfo;
static struct vcpu_info *vinfo;
static struct kvm_vcpu *vcpu;
@@ -449,7 +436,6 @@ static void *juggle_shinfo_state(void *arg)
int main(int argc, char *argv[])
{
- struct timespec min_ts, max_ts, vm_ts;
struct kvm_xen_hvm_attr evt_reset;
struct kvm_vm *vm;
pthread_t thread;
@@ -468,8 +454,6 @@ int main(int argc, char *argv[])
bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA);
- clock_gettime(CLOCK_REALTIME, &min_ts);
-
vm = vm_create_with_one_vcpu(&vcpu, guest_code);
/* Map a region for the shared_info page */
@@ -553,8 +537,6 @@ int main(int argc, char *argv[])
};
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
- vm_init_descriptor_tables(vm);
- vcpu_init_descriptor_tables(vcpu);
vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
if (do_runstate_tests) {
@@ -1010,7 +992,6 @@ int main(int argc, char *argv[])
vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
alarm(0);
- clock_gettime(CLOCK_REALTIME, &max_ts);
/*
* Just a *really* basic check that things are being put in the
@@ -1019,6 +1000,8 @@ int main(int argc, char *argv[])
*/
struct pvclock_wall_clock *wc;
struct pvclock_vcpu_time_info *ti, *ti2;
+ struct kvm_clock_data kcdata;
+ long long delta;
wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
@@ -1034,12 +1017,34 @@ int main(int argc, char *argv[])
ti2->tsc_shift, ti2->flags);
}
- vm_ts.tv_sec = wc->sec;
- vm_ts.tv_nsec = wc->nsec;
TEST_ASSERT(wc->version && !(wc->version & 1),
"Bad wallclock version %x", wc->version);
- TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
- TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
+
+ vm_ioctl(vm, KVM_GET_CLOCK, &kcdata);
+
+ if (kcdata.flags & KVM_CLOCK_REALTIME) {
+ if (verbose) {
+ printf("KVM_GET_CLOCK clock: %lld.%09lld\n",
+ kcdata.clock / NSEC_PER_SEC, kcdata.clock % NSEC_PER_SEC);
+ printf("KVM_GET_CLOCK realtime: %lld.%09lld\n",
+ kcdata.realtime / NSEC_PER_SEC, kcdata.realtime % NSEC_PER_SEC);
+ }
+
+ delta = (wc->sec * NSEC_PER_SEC + wc->nsec) - (kcdata.realtime - kcdata.clock);
+
+ /*
+ * KVM_GET_CLOCK gives CLOCK_REALTIME which jumps on leap seconds updates but
+ * unfortunately KVM doesn't currently offer a CLOCK_TAI alternative. Accept 1s
+ * delta as testing clock accuracy is not the goal here. The test just needs to
+ * check that the value in shinfo is somewhat sane.
+ */
+ TEST_ASSERT(llabs(delta) < NSEC_PER_SEC,
+ "Guest's epoch from shinfo %d.%09d differs from KVM_GET_CLOCK %lld.%lld",
+ wc->sec, wc->nsec, (kcdata.realtime - kcdata.clock) / NSEC_PER_SEC,
+ (kcdata.realtime - kcdata.clock) % NSEC_PER_SEC);
+ } else {
+ pr_info("Missing KVM_CLOCK_REALTIME, skipping shinfo epoch sanity check\n");
+ }
TEST_ASSERT(ti->version && !(ti->version & 1),
"Bad time_info version %x", ti->version);
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
index 167c97abff1b..f331a4e9bae3 100644
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
@@ -4,8 +4,6 @@
*
* Tests for the IA32_XSS MSR.
*/
-
-#define _GNU_SOURCE /* for program_invocation_short_name */
#include <sys/ioctl.h>
#include "test_util.h"
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
index a6f89aaea77d..3b26bf3cf5b9 100644
--- a/tools/testing/selftests/landlock/base_test.c
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -9,6 +9,7 @@
#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
+#include <linux/keyctl.h>
#include <linux/landlock.h>
#include <string.h>
#include <sys/prctl.h>
@@ -75,7 +76,7 @@ TEST(abi_version)
const struct landlock_ruleset_attr ruleset_attr = {
.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
};
- ASSERT_EQ(4, landlock_create_ruleset(NULL, 0,
+ ASSERT_EQ(5, landlock_create_ruleset(NULL, 0,
LANDLOCK_CREATE_RULESET_VERSION));
ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
@@ -326,4 +327,77 @@ TEST(ruleset_fd_transfer)
ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
}
+TEST(cred_transfer)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ };
+ int ruleset_fd, dir_fd;
+ pid_t child;
+ int status;
+
+ drop_caps(_metadata);
+
+ dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+ EXPECT_LE(0, dir_fd);
+ EXPECT_EQ(0, close(dir_fd));
+
+ /* Denies opening directories. */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* Checks ruleset enforcement. */
+ EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, errno);
+
+ /* Needed for KEYCTL_SESSION_TO_PARENT permission checks */
+ EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING, NULL, 0,
+ 0, 0))
+ {
+ TH_LOG("Failed to join session keyring: %s", strerror(errno));
+ }
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ /* Checks ruleset enforcement. */
+ EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, errno);
+
+ /*
+ * KEYCTL_SESSION_TO_PARENT is a no-op unless we have a
+ * different session keyring in the child, so make that happen.
+ */
+ EXPECT_NE(-1, syscall(__NR_keyctl, KEYCTL_JOIN_SESSION_KEYRING,
+ NULL, 0, 0, 0));
+
+ /*
+ * KEYCTL_SESSION_TO_PARENT installs credentials on the parent
+ * that never go through the cred_prepare hook, this path uses
+ * cred_transfer instead.
+ */
+ EXPECT_EQ(0, syscall(__NR_keyctl, KEYCTL_SESSION_TO_PARENT, 0,
+ 0, 0, 0));
+
+ /* Re-checks ruleset enforcement. */
+ EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, errno);
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /* Re-checks ruleset enforcement. */
+ EXPECT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, errno);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config
index 0086efaa7b68..29af19c4e9f9 100644
--- a/tools/testing/selftests/landlock/config
+++ b/tools/testing/selftests/landlock/config
@@ -2,6 +2,7 @@ CONFIG_CGROUPS=y
CONFIG_CGROUP_SCHED=y
CONFIG_INET=y
CONFIG_IPV6=y
+CONFIG_KEYS=y
CONFIG_NET=y
CONFIG_NET_NS=y
CONFIG_OVERLAY_FS=y
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index 9a6036fbf289..7d063c652be1 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -8,21 +8,35 @@
*/
#define _GNU_SOURCE
+#include <asm/termbits.h>
#include <fcntl.h>
+#include <libgen.h>
+#include <linux/fiemap.h>
#include <linux/landlock.h>
#include <linux/magic.h>
#include <sched.h>
+#include <stddef.h>
#include <stdio.h>
#include <string.h>
#include <sys/capability.h>
+#include <sys/ioctl.h>
#include <sys/mount.h>
#include <sys/prctl.h>
#include <sys/sendfile.h>
+#include <sys/socket.h>
#include <sys/stat.h>
#include <sys/sysmacros.h>
+#include <sys/un.h>
#include <sys/vfs.h>
#include <unistd.h>
+/*
+ * Intentionally included last to work around header conflict.
+ * See https://sourceware.org/glibc/wiki/Synchronizing_Headers.
+ */
+#include <linux/fs.h>
+#include <linux/mount.h>
+
#include "common.h"
#ifndef renameat2
@@ -34,6 +48,13 @@ int renameat2(int olddirfd, const char *oldpath, int newdirfd,
}
#endif
+#ifndef open_tree
+int open_tree(int dfd, const char *filename, unsigned int flags)
+{
+ return syscall(__NR_open_tree, dfd, filename, flags);
+}
+#endif
+
#ifndef RENAME_EXCHANGE
#define RENAME_EXCHANGE (1 << 1)
#endif
@@ -285,15 +306,21 @@ static void prepare_layout_opt(struct __test_metadata *const _metadata,
static void prepare_layout(struct __test_metadata *const _metadata)
{
- _metadata->teardown_parent = true;
-
prepare_layout_opt(_metadata, &mnt_tmp);
}
static void cleanup_layout(struct __test_metadata *const _metadata)
{
set_cap(_metadata, CAP_SYS_ADMIN);
- EXPECT_EQ(0, umount(TMP_DIR));
+ if (umount(TMP_DIR)) {
+ /*
+ * According to the test environment, the mount point of the
+ * current directory may be shared or not, which changes the
+ * visibility of the nested TMP_DIR mount point for the test's
+ * parent process doing this cleanup.
+ */
+ ASSERT_EQ(EINVAL, errno);
+ }
clear_cap(_metadata, CAP_SYS_ADMIN);
EXPECT_EQ(0, remove_path(TMP_DIR));
}
@@ -307,7 +334,7 @@ FIXTURE_SETUP(layout0)
prepare_layout(_metadata);
}
-FIXTURE_TEARDOWN(layout0)
+FIXTURE_TEARDOWN_PARENT(layout0)
{
cleanup_layout(_metadata);
}
@@ -370,7 +397,7 @@ FIXTURE_SETUP(layout1)
create_layout1(_metadata);
}
-FIXTURE_TEARDOWN(layout1)
+FIXTURE_TEARDOWN_PARENT(layout1)
{
remove_layout1(_metadata);
@@ -529,9 +556,10 @@ TEST_F_FORK(layout1, inval)
LANDLOCK_ACCESS_FS_EXECUTE | \
LANDLOCK_ACCESS_FS_WRITE_FILE | \
LANDLOCK_ACCESS_FS_READ_FILE | \
- LANDLOCK_ACCESS_FS_TRUNCATE)
+ LANDLOCK_ACCESS_FS_TRUNCATE | \
+ LANDLOCK_ACCESS_FS_IOCTL_DEV)
-#define ACCESS_LAST LANDLOCK_ACCESS_FS_TRUNCATE
+#define ACCESS_LAST LANDLOCK_ACCESS_FS_IOCTL_DEV
#define ACCESS_ALL ( \
ACCESS_FILE | \
@@ -736,6 +764,9 @@ static int create_ruleset(struct __test_metadata *const _metadata,
}
for (i = 0; rules[i].path; i++) {
+ if (!rules[i].access)
+ continue;
+
add_path_beneath(_metadata, ruleset_fd, rules[i].access,
rules[i].path);
}
@@ -2377,6 +2408,43 @@ TEST_F_FORK(layout1, refer_denied_by_default4)
layer_dir_s1d1_refer);
}
+/*
+ * Tests walking through a denied root mount.
+ */
+TEST_F_FORK(layout1, refer_mount_root_deny)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_DIR,
+ };
+ int root_fd, ruleset_fd;
+
+ /* Creates a mount object from a non-mount point. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ root_fd =
+ open_tree(AT_FDCWD, dir_s1d1,
+ AT_EMPTY_PATH | OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_LE(0, root_fd);
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* Link denied by Landlock: EACCES. */
+ EXPECT_EQ(-1, linkat(root_fd, ".", root_fd, "does_not_exist", 0));
+ EXPECT_EQ(EACCES, errno);
+
+ /* renameat2() always returns EBUSY. */
+ EXPECT_EQ(-1, renameat2(root_fd, ".", root_fd, "does_not_exist", 0));
+ EXPECT_EQ(EBUSY, errno);
+
+ EXPECT_EQ(0, close(root_fd));
+}
+
TEST_F_FORK(layout1, reparent_link)
{
const struct rule layer1[] = {
@@ -3444,7 +3512,7 @@ TEST_F_FORK(layout1, truncate_unhandled)
LANDLOCK_ACCESS_FS_WRITE_FILE;
int ruleset_fd;
- /* Enable Landlock. */
+ /* Enables Landlock. */
ruleset_fd = create_ruleset(_metadata, handled, rules);
ASSERT_LE(0, ruleset_fd);
@@ -3527,7 +3595,7 @@ TEST_F_FORK(layout1, truncate)
LANDLOCK_ACCESS_FS_TRUNCATE;
int ruleset_fd;
- /* Enable Landlock. */
+ /* Enables Landlock. */
ruleset_fd = create_ruleset(_metadata, handled, rules);
ASSERT_LE(0, ruleset_fd);
@@ -3683,7 +3751,7 @@ FIXTURE_SETUP(ftruncate)
create_file(_metadata, file1_s1d1);
}
-FIXTURE_TEARDOWN(ftruncate)
+FIXTURE_TEARDOWN_PARENT(ftruncate)
{
EXPECT_EQ(0, remove_path(file1_s1d1));
cleanup_layout(_metadata);
@@ -3753,7 +3821,7 @@ TEST_F_FORK(ftruncate, open_and_ftruncate)
};
int fd, ruleset_fd;
- /* Enable Landlock. */
+ /* Enables Landlock. */
ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
ASSERT_LE(0, ruleset_fd);
enforce_ruleset(_metadata, ruleset_fd);
@@ -3830,22 +3898,471 @@ TEST_F_FORK(ftruncate, open_and_ftruncate_in_different_processes)
ASSERT_EQ(0, close(socket_fds[1]));
}
-TEST(memfd_ftruncate)
+/* Invokes the FS_IOC_GETFLAGS IOCTL and returns its errno or 0. */
+static int test_fs_ioc_getflags_ioctl(int fd)
{
- int fd;
+ uint32_t flags;
+
+ if (ioctl(fd, FS_IOC_GETFLAGS, &flags) < 0)
+ return errno;
+ return 0;
+}
+
+TEST(memfd_ftruncate_and_ioctl)
+{
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = ACCESS_ALL,
+ };
+ int ruleset_fd, fd, i;
+
+ /*
+ * We exercise the same test both with and without Landlock enabled, to
+ * ensure that it behaves the same in both cases.
+ */
+ for (i = 0; i < 2; i++) {
+ /* Creates a new memfd. */
+ fd = memfd_create("name", MFD_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ /*
+ * Checks that operations associated with the opened file
+ * (ftruncate, ioctl) are permitted on file descriptors that are
+ * created in ways other than open(2).
+ */
+ EXPECT_EQ(0, test_ftruncate(fd));
+ EXPECT_EQ(0, test_fs_ioc_getflags_ioctl(fd));
+
+ ASSERT_EQ(0, close(fd));
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+ }
+}
+
+static int test_fionread_ioctl(int fd)
+{
+ size_t sz = 0;
+
+ if (ioctl(fd, FIONREAD, &sz) < 0 && errno == EACCES)
+ return errno;
+ return 0;
+}
+
+TEST_F_FORK(layout1, o_path_ftruncate_and_ioctl)
+{
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = ACCESS_ALL,
+ };
+ int ruleset_fd, fd;
- fd = memfd_create("name", MFD_CLOEXEC);
+ /*
+ * Checks that for files opened with O_PATH, both ioctl(2) and
+ * ftruncate(2) yield EBADF, as it is documented in open(2) for the
+ * O_PATH flag.
+ */
+ fd = open(dir_s1d1, O_PATH | O_CLOEXEC);
ASSERT_LE(0, fd);
+ EXPECT_EQ(EBADF, test_ftruncate(fd));
+ EXPECT_EQ(EBADF, test_fs_ioc_getflags_ioctl(fd));
+
+ ASSERT_EQ(0, close(fd));
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
/*
- * Checks that ftruncate is permitted on file descriptors that are
- * created in ways other than open(2).
+ * Checks that after enabling Landlock,
+ * - the file can still be opened with O_PATH
+ * - both ioctl and truncate still yield EBADF (not EACCES).
*/
- EXPECT_EQ(0, test_ftruncate(fd));
+ fd = open(dir_s1d1, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ EXPECT_EQ(EBADF, test_ftruncate(fd));
+ EXPECT_EQ(EBADF, test_fs_ioc_getflags_ioctl(fd));
ASSERT_EQ(0, close(fd));
}
+/*
+ * ioctl_error - generically call the given ioctl with a pointer to a
+ * sufficiently large zeroed-out memory region.
+ *
+ * Returns the IOCTLs error, or 0.
+ */
+static int ioctl_error(struct __test_metadata *const _metadata, int fd,
+ unsigned int cmd)
+{
+ char buf[128]; /* sufficiently large */
+ int res, stdinbak_fd;
+
+ /*
+ * Depending on the IOCTL command, parts of the zeroed-out buffer might
+ * be interpreted as file descriptor numbers. We do not want to
+ * accidentally operate on file descriptor 0 (stdin), so we temporarily
+ * move stdin to a different FD and close FD 0 for the IOCTL call.
+ */
+ stdinbak_fd = dup(0);
+ ASSERT_LT(0, stdinbak_fd);
+ ASSERT_EQ(0, close(0));
+
+ /* Invokes the IOCTL with a zeroed-out buffer. */
+ bzero(&buf, sizeof(buf));
+ res = ioctl(fd, cmd, &buf);
+
+ /* Restores the old FD 0 and closes the backup FD. */
+ ASSERT_EQ(0, dup2(stdinbak_fd, 0));
+ ASSERT_EQ(0, close(stdinbak_fd));
+
+ if (res < 0)
+ return errno;
+
+ return 0;
+}
+
+/* Define some linux/falloc.h IOCTL commands which are not available in uapi headers. */
+struct space_resv {
+ __s16 l_type;
+ __s16 l_whence;
+ __s64 l_start;
+ __s64 l_len; /* len == 0 means until end of file */
+ __s32 l_sysid;
+ __u32 l_pid;
+ __s32 l_pad[4]; /* reserved area */
+};
+
+#define FS_IOC_RESVSP _IOW('X', 40, struct space_resv)
+#define FS_IOC_UNRESVSP _IOW('X', 41, struct space_resv)
+#define FS_IOC_RESVSP64 _IOW('X', 42, struct space_resv)
+#define FS_IOC_UNRESVSP64 _IOW('X', 43, struct space_resv)
+#define FS_IOC_ZERO_RANGE _IOW('X', 57, struct space_resv)
+
+/*
+ * Tests a series of blanket-permitted and denied IOCTLs.
+ */
+TEST_F_FORK(layout1, blanket_permitted_ioctls)
+{
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ };
+ int ruleset_fd, fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ fd = open("/dev/null", O_RDWR | O_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ /*
+ * Checks permitted commands.
+ * These ones may return errors, but should not be blocked by Landlock.
+ */
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIOCLEX));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIONCLEX));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIONBIO));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIOASYNC));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIOQSIZE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIFREEZE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FITHAW));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FS_IOC_FIEMAP));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIGETBSZ));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FICLONE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FICLONERANGE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FIDEDUPERANGE));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FS_IOC_GETFSUUID));
+ EXPECT_NE(EACCES, ioctl_error(_metadata, fd, FS_IOC_GETFSSYSFSPATH));
+
+ /*
+ * Checks blocked commands.
+ * A call to a blocked IOCTL command always returns EACCES.
+ */
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FIONREAD));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_GETFLAGS));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_SETFLAGS));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_FSGETXATTR));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_FSSETXATTR));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FIBMAP));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_RESVSP));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_RESVSP64));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_UNRESVSP));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_UNRESVSP64));
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, FS_IOC_ZERO_RANGE));
+
+ /* Default case is also blocked. */
+ EXPECT_EQ(EACCES, ioctl_error(_metadata, fd, 0xc00ffeee));
+
+ ASSERT_EQ(0, close(fd));
+}
+
+/*
+ * Named pipes are not governed by the LANDLOCK_ACCESS_FS_IOCTL_DEV right,
+ * because they are not character or block devices.
+ */
+TEST_F_FORK(layout1, named_pipe_ioctl)
+{
+ pid_t child_pid;
+ int fd, ruleset_fd;
+ const char *const path = file1_s1d1;
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ };
+
+ ASSERT_EQ(0, unlink(path));
+ ASSERT_EQ(0, mkfifo(path, 0600));
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* The child process opens the pipe for writing. */
+ child_pid = fork();
+ ASSERT_NE(-1, child_pid);
+ if (child_pid == 0) {
+ fd = open(path, O_WRONLY);
+ close(fd);
+ exit(0);
+ }
+
+ fd = open(path, O_RDONLY);
+ ASSERT_LE(0, fd);
+
+ /* FIONREAD is implemented by pipefifo_fops. */
+ EXPECT_EQ(0, test_fionread_ioctl(fd));
+
+ ASSERT_EQ(0, close(fd));
+ ASSERT_EQ(0, unlink(path));
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, NULL, 0));
+}
+
+/* For named UNIX domain sockets, no IOCTL restrictions apply. */
+TEST_F_FORK(layout1, named_unix_domain_socket_ioctl)
+{
+ const char *const path = file1_s1d1;
+ int srv_fd, cli_fd, ruleset_fd;
+ socklen_t size;
+ struct sockaddr_un srv_un, cli_un;
+ const struct landlock_ruleset_attr attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ };
+
+ /* Sets up a server */
+ srv_un.sun_family = AF_UNIX;
+ strncpy(srv_un.sun_path, path, sizeof(srv_un.sun_path));
+
+ ASSERT_EQ(0, unlink(path));
+ srv_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, srv_fd);
+
+ size = offsetof(struct sockaddr_un, sun_path) + strlen(srv_un.sun_path);
+ ASSERT_EQ(0, bind(srv_fd, (struct sockaddr *)&srv_un, size));
+ ASSERT_EQ(0, listen(srv_fd, 10 /* qlen */));
+
+ /* Enables Landlock. */
+ ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Sets up a client connection to it */
+ cli_un.sun_family = AF_UNIX;
+ cli_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, cli_fd);
+
+ size = offsetof(struct sockaddr_un, sun_path) + strlen(cli_un.sun_path);
+ ASSERT_EQ(0, bind(cli_fd, (struct sockaddr *)&cli_un, size));
+
+ bzero(&cli_un, sizeof(cli_un));
+ cli_un.sun_family = AF_UNIX;
+ strncpy(cli_un.sun_path, path, sizeof(cli_un.sun_path));
+ size = offsetof(struct sockaddr_un, sun_path) + strlen(cli_un.sun_path);
+
+ ASSERT_EQ(0, connect(cli_fd, (struct sockaddr *)&cli_un, size));
+
+ /* FIONREAD and other IOCTLs should not be forbidden. */
+ EXPECT_EQ(0, test_fionread_ioctl(cli_fd));
+
+ ASSERT_EQ(0, close(cli_fd));
+}
+
+/* clang-format off */
+FIXTURE(ioctl) {};
+
+FIXTURE_SETUP(ioctl) {};
+
+FIXTURE_TEARDOWN(ioctl) {};
+/* clang-format on */
+
+FIXTURE_VARIANT(ioctl)
+{
+ const __u64 handled;
+ const __u64 allowed;
+ const mode_t open_mode;
+ /*
+ * FIONREAD is used as a characteristic device-specific IOCTL command.
+ * It is implemented in fs/ioctl.c for regular files,
+ * but we do not blanket-permit it for devices.
+ */
+ const int expected_fionread_result;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ioctl, handled_i_allowed_none) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .allowed = 0,
+ .open_mode = O_RDWR,
+ .expected_fionread_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ioctl, handled_i_allowed_i) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .allowed = LANDLOCK_ACCESS_FS_IOCTL_DEV,
+ .open_mode = O_RDWR,
+ .expected_fionread_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ioctl, unhandled) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_EXECUTE,
+ .allowed = LANDLOCK_ACCESS_FS_EXECUTE,
+ .open_mode = O_RDWR,
+ .expected_fionread_result = 0,
+};
+
+TEST_F_FORK(ioctl, handle_dir_access_file)
+{
+ const int flag = 0;
+ const struct rule rules[] = {
+ {
+ .path = "/dev",
+ .access = variant->allowed,
+ },
+ {},
+ };
+ int file_fd, ruleset_fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ file_fd = open("/dev/zero", variant->open_mode);
+ ASSERT_LE(0, file_fd);
+
+ /* Checks that IOCTL commands return the expected errors. */
+ EXPECT_EQ(variant->expected_fionread_result,
+ test_fionread_ioctl(file_fd));
+
+ /* Checks that unrestrictable commands are unrestricted. */
+ EXPECT_EQ(0, ioctl(file_fd, FIOCLEX));
+ EXPECT_EQ(0, ioctl(file_fd, FIONCLEX));
+ EXPECT_EQ(0, ioctl(file_fd, FIONBIO, &flag));
+ EXPECT_EQ(0, ioctl(file_fd, FIOASYNC, &flag));
+ EXPECT_EQ(0, ioctl(file_fd, FIGETBSZ, &flag));
+
+ ASSERT_EQ(0, close(file_fd));
+}
+
+TEST_F_FORK(ioctl, handle_dir_access_dir)
+{
+ const int flag = 0;
+ const struct rule rules[] = {
+ {
+ .path = "/dev",
+ .access = variant->allowed,
+ },
+ {},
+ };
+ int dir_fd, ruleset_fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Ignore variant->open_mode for this test, as we intend to open a
+ * directory. If the directory can not be opened, the variant is
+ * infeasible to test with an opened directory.
+ */
+ dir_fd = open("/dev", O_RDONLY);
+ if (dir_fd < 0)
+ return;
+
+ /*
+ * Checks that IOCTL commands return the expected errors.
+ * We do not use the expected values from the fixture here.
+ *
+ * When using IOCTL on a directory, no Landlock restrictions apply.
+ */
+ EXPECT_EQ(0, test_fionread_ioctl(dir_fd));
+
+ /* Checks that unrestrictable commands are unrestricted. */
+ EXPECT_EQ(0, ioctl(dir_fd, FIOCLEX));
+ EXPECT_EQ(0, ioctl(dir_fd, FIONCLEX));
+ EXPECT_EQ(0, ioctl(dir_fd, FIONBIO, &flag));
+ EXPECT_EQ(0, ioctl(dir_fd, FIOASYNC, &flag));
+ EXPECT_EQ(0, ioctl(dir_fd, FIGETBSZ, &flag));
+
+ ASSERT_EQ(0, close(dir_fd));
+}
+
+TEST_F_FORK(ioctl, handle_file_access_file)
+{
+ const int flag = 0;
+ const struct rule rules[] = {
+ {
+ .path = "/dev/zero",
+ .access = variant->allowed,
+ },
+ {},
+ };
+ int file_fd, ruleset_fd;
+
+ /* Enables Landlock. */
+ ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ file_fd = open("/dev/zero", variant->open_mode);
+ ASSERT_LE(0, file_fd)
+ {
+ TH_LOG("Failed to open /dev/zero: %s", strerror(errno));
+ }
+
+ /* Checks that IOCTL commands return the expected errors. */
+ EXPECT_EQ(variant->expected_fionread_result,
+ test_fionread_ioctl(file_fd));
+
+ /* Checks that unrestrictable commands are unrestricted. */
+ EXPECT_EQ(0, ioctl(file_fd, FIOCLEX));
+ EXPECT_EQ(0, ioctl(file_fd, FIONCLEX));
+ EXPECT_EQ(0, ioctl(file_fd, FIONBIO, &flag));
+ EXPECT_EQ(0, ioctl(file_fd, FIOASYNC, &flag));
+ EXPECT_EQ(0, ioctl(file_fd, FIGETBSZ, &flag));
+
+ ASSERT_EQ(0, close(file_fd));
+}
+
/* clang-format off */
FIXTURE(layout1_bind) {};
/* clang-format on */
@@ -3861,7 +4378,7 @@ FIXTURE_SETUP(layout1_bind)
clear_cap(_metadata, CAP_SYS_ADMIN);
}
-FIXTURE_TEARDOWN(layout1_bind)
+FIXTURE_TEARDOWN_PARENT(layout1_bind)
{
/* umount(dir_s2d2)) is handled by namespace lifetime. */
@@ -4266,7 +4783,7 @@ FIXTURE_SETUP(layout2_overlay)
clear_cap(_metadata, CAP_SYS_ADMIN);
}
-FIXTURE_TEARDOWN(layout2_overlay)
+FIXTURE_TEARDOWN_PARENT(layout2_overlay)
{
if (self->skip_test)
SKIP(return, "overlayfs is not supported (teardown)");
@@ -4616,7 +5133,6 @@ FIXTURE(layout3_fs)
{
bool has_created_dir;
bool has_created_file;
- char *dir_path;
bool skip_test;
};
@@ -4675,11 +5191,24 @@ FIXTURE_VARIANT_ADD(layout3_fs, hostfs) {
.cwd_fs_magic = HOSTFS_SUPER_MAGIC,
};
+static char *dirname_alloc(const char *path)
+{
+ char *dup;
+
+ if (!path)
+ return NULL;
+
+ dup = strdup(path);
+ if (!dup)
+ return NULL;
+
+ return dirname(dup);
+}
+
FIXTURE_SETUP(layout3_fs)
{
struct stat statbuf;
- const char *slash;
- size_t dir_len;
+ char *dir_path = dirname_alloc(variant->file_path);
if (!supports_filesystem(variant->mnt.type) ||
!cwd_matches_fs(variant->cwd_fs_magic)) {
@@ -4687,27 +5216,15 @@ FIXTURE_SETUP(layout3_fs)
SKIP(return, "this filesystem is not supported (setup)");
}
- _metadata->teardown_parent = true;
-
- slash = strrchr(variant->file_path, '/');
- ASSERT_NE(slash, NULL);
- dir_len = (size_t)slash - (size_t)variant->file_path;
- ASSERT_LT(0, dir_len);
- self->dir_path = malloc(dir_len + 1);
- self->dir_path[dir_len] = '\0';
- strncpy(self->dir_path, variant->file_path, dir_len);
-
prepare_layout_opt(_metadata, &variant->mnt);
/* Creates directory when required. */
- if (stat(self->dir_path, &statbuf)) {
+ if (stat(dir_path, &statbuf)) {
set_cap(_metadata, CAP_DAC_OVERRIDE);
- EXPECT_EQ(0, mkdir(self->dir_path, 0700))
+ EXPECT_EQ(0, mkdir(dir_path, 0700))
{
TH_LOG("Failed to create directory \"%s\": %s",
- self->dir_path, strerror(errno));
- free(self->dir_path);
- self->dir_path = NULL;
+ dir_path, strerror(errno));
}
self->has_created_dir = true;
clear_cap(_metadata, CAP_DAC_OVERRIDE);
@@ -4728,9 +5245,11 @@ FIXTURE_SETUP(layout3_fs)
self->has_created_file = true;
clear_cap(_metadata, CAP_DAC_OVERRIDE);
}
+
+ free(dir_path);
}
-FIXTURE_TEARDOWN(layout3_fs)
+FIXTURE_TEARDOWN_PARENT(layout3_fs)
{
if (self->skip_test)
SKIP(return, "this filesystem is not supported (teardown)");
@@ -4746,16 +5265,17 @@ FIXTURE_TEARDOWN(layout3_fs)
}
if (self->has_created_dir) {
+ char *dir_path = dirname_alloc(variant->file_path);
+
set_cap(_metadata, CAP_DAC_OVERRIDE);
/*
* Don't check for error because the directory might already
* have been removed (cf. release_inode test).
*/
- rmdir(self->dir_path);
+ rmdir(dir_path);
clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ free(dir_path);
}
- free(self->dir_path);
- self->dir_path = NULL;
cleanup_layout(_metadata);
}
@@ -4822,7 +5342,10 @@ TEST_F_FORK(layout3_fs, tag_inode_dir_mnt)
TEST_F_FORK(layout3_fs, tag_inode_dir_child)
{
- layer3_fs_tag_inode(_metadata, self, variant, self->dir_path);
+ char *dir_path = dirname_alloc(variant->file_path);
+
+ layer3_fs_tag_inode(_metadata, self, variant, dir_path);
+ free(dir_path);
}
TEST_F_FORK(layout3_fs, tag_inode_file)
@@ -4849,9 +5372,13 @@ TEST_F_FORK(layout3_fs, release_inodes)
if (self->has_created_file)
EXPECT_EQ(0, remove_path(variant->file_path));
- if (self->has_created_dir)
+ if (self->has_created_dir) {
+ char *dir_path = dirname_alloc(variant->file_path);
+
/* Don't check for error because of cgroup specificities. */
- remove_path(self->dir_path);
+ remove_path(dir_path);
+ free(dir_path);
+ }
ruleset_fd =
create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1);
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index da2cade3bab0..d6edcfcb5be8 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -7,6 +7,8 @@ else ifneq ($(filter -%,$(LLVM)),)
LLVM_SUFFIX := $(LLVM)
endif
+CLANG := $(LLVM_PREFIX)clang$(LLVM_SUFFIX)
+
CLANG_TARGET_FLAGS_arm := arm-linux-gnueabi
CLANG_TARGET_FLAGS_arm64 := aarch64-linux-gnu
CLANG_TARGET_FLAGS_hexagon := hexagon-linux-musl
@@ -18,7 +20,13 @@ CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu
CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu
CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu
CLANG_TARGET_FLAGS_x86_64 := x86_64-linux-gnu
-CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH))
+
+# Default to host architecture if ARCH is not explicitly given.
+ifeq ($(ARCH),)
+CLANG_TARGET_FLAGS := $(shell $(CLANG) -print-target-triple)
+else
+CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH))
+endif
ifeq ($(CROSS_COMPILE),)
ifeq ($(CLANG_TARGET_FLAGS),)
@@ -30,7 +38,15 @@ else
CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%))
endif # CROSS_COMPILE
-CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as
+# gcc defaults to silence (off) for the following warnings, but clang defaults
+# to the opposite. The warnings are not useful for the kernel itself, which is
+# why they have remained disabled in gcc for the main kernel build. And it is
+# only due to including kernel data structures in the selftests, that we get the
+# warnings from clang. Therefore, disable the warnings for clang builds.
+CFLAGS += -Wno-address-of-packed-member
+CFLAGS += -Wno-gnu-variable-sized-type-not-at-end
+
+CC := $(CLANG) $(CLANG_FLAGS) -fintegrated-as
else
CC := $(CROSS_COMPILE)gcc
endif # LLVM
@@ -44,10 +60,33 @@ endif
selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST))))
top_srcdir = $(selfdir)/../../..
+# msg: emit succinct information message describing current building step
+# $1 - generic step name (e.g., CC, LINK, etc);
+# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor];
+# $3 - target (assumed to be file); only file name will be emitted;
+# $4 - optional extra arg, emitted as-is, if provided.
+ifeq ($(V),1)
+Q =
+msg =
+else
+Q = @
+msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
+MAKEFLAGS += --no-print-directory
+endif
+
ifeq ($(KHDR_INCLUDES),)
KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include
endif
+# In order to use newer items that haven't yet been added to the user's system
+# header files, add $(TOOLS_INCLUDES) to the compiler invocation in each
+# each selftest.
+# You may need to add files to that location, or to refresh an existing file. In
+# order to do that, run "make headers" from $(top_srcdir), then copy the
+# header file that you want from $(top_srcdir)/usr/include/... , to the matching
+# subdir in $(TOOLS_INCLUDE).
+TOOLS_INCLUDES := -isystem $(top_srcdir)/tools/include/uapi
+
# The following are built by lib.mk common compile rules.
# TEST_CUSTOM_PROGS should be used by tests that require
# custom build rule and prevent common build rule use.
@@ -157,6 +196,9 @@ endef
clean: $(if $(TEST_GEN_MODS_DIR),clean_mods_dir)
$(CLEAN)
+# Build with _GNU_SOURCE by default
+CFLAGS += -D_GNU_SOURCE=
+
# Enables to extend CFLAGS and LDFLAGS from command line, e.g.
# make USERCFLAGS=-Werror USERLDFLAGS=-static
CFLAGS += $(USERCFLAGS)
@@ -176,7 +218,8 @@ endif
ifeq ($(OVERRIDE_TARGETS),)
LOCAL_HDRS += $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h
$(OUTPUT)/%:%.c $(LOCAL_HDRS)
- $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@
+ $(call msg,CC,,$@)
+ $(Q)$(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@
$(OUTPUT)/%.o:%.S
$(COMPILE.S) $^ -o $@
diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh
index e3455a6b1158..65c9c058458d 100755
--- a/tools/testing/selftests/livepatch/test-livepatch.sh
+++ b/tools/testing/selftests/livepatch/test-livepatch.sh
@@ -4,7 +4,9 @@
. $(dirname $0)/functions.sh
-MOD_LIVEPATCH=test_klp_livepatch
+MOD_LIVEPATCH1=test_klp_livepatch
+MOD_LIVEPATCH2=test_klp_syscall
+MOD_LIVEPATCH3=test_klp_callbacks_demo
MOD_REPLACE=test_klp_atomic_replace
setup_config
@@ -16,33 +18,33 @@ setup_config
start_test "basic function patching"
-load_lp $MOD_LIVEPATCH
+load_lp $MOD_LIVEPATCH1
-if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]] ; then
+if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH1: this has been live patched" ]] ; then
echo -e "FAIL\n\n"
die "livepatch kselftest(s) failed"
fi
-disable_lp $MOD_LIVEPATCH
-unload_lp $MOD_LIVEPATCH
+disable_lp $MOD_LIVEPATCH1
+unload_lp $MOD_LIVEPATCH1
-if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH: this has been live patched" ]] ; then
+if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH1: this has been live patched" ]] ; then
echo -e "FAIL\n\n"
die "livepatch kselftest(s) failed"
fi
-check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
-livepatch: enabling patch '$MOD_LIVEPATCH'
-livepatch: '$MOD_LIVEPATCH': initializing patching transition
-livepatch: '$MOD_LIVEPATCH': starting patching transition
-livepatch: '$MOD_LIVEPATCH': completing patching transition
-livepatch: '$MOD_LIVEPATCH': patching complete
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
-livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
-livepatch: '$MOD_LIVEPATCH': starting unpatching transition
-livepatch: '$MOD_LIVEPATCH': completing unpatching transition
-livepatch: '$MOD_LIVEPATCH': unpatching complete
-% rmmod $MOD_LIVEPATCH"
+check_result "% insmod test_modules/$MOD_LIVEPATCH1.ko
+livepatch: enabling patch '$MOD_LIVEPATCH1'
+livepatch: '$MOD_LIVEPATCH1': initializing patching transition
+livepatch: '$MOD_LIVEPATCH1': starting patching transition
+livepatch: '$MOD_LIVEPATCH1': completing patching transition
+livepatch: '$MOD_LIVEPATCH1': patching complete
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH1/enabled
+livepatch: '$MOD_LIVEPATCH1': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH1': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH1': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH1': unpatching complete
+% rmmod $MOD_LIVEPATCH1"
# - load a livepatch that modifies the output from /proc/cmdline and
@@ -53,7 +55,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
start_test "multiple livepatches"
-load_lp $MOD_LIVEPATCH
+load_lp $MOD_LIVEPATCH1
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
@@ -69,26 +71,26 @@ unload_lp $MOD_REPLACE
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-disable_lp $MOD_LIVEPATCH
-unload_lp $MOD_LIVEPATCH
+disable_lp $MOD_LIVEPATCH1
+unload_lp $MOD_LIVEPATCH1
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
-livepatch: enabling patch '$MOD_LIVEPATCH'
-livepatch: '$MOD_LIVEPATCH': initializing patching transition
-livepatch: '$MOD_LIVEPATCH': starting patching transition
-livepatch: '$MOD_LIVEPATCH': completing patching transition
-livepatch: '$MOD_LIVEPATCH': patching complete
-$MOD_LIVEPATCH: this has been live patched
+check_result "% insmod test_modules/$MOD_LIVEPATCH1.ko
+livepatch: enabling patch '$MOD_LIVEPATCH1'
+livepatch: '$MOD_LIVEPATCH1': initializing patching transition
+livepatch: '$MOD_LIVEPATCH1': starting patching transition
+livepatch: '$MOD_LIVEPATCH1': completing patching transition
+livepatch: '$MOD_LIVEPATCH1': patching complete
+$MOD_LIVEPATCH1: this has been live patched
% insmod test_modules/$MOD_REPLACE.ko replace=0
livepatch: enabling patch '$MOD_REPLACE'
livepatch: '$MOD_REPLACE': initializing patching transition
livepatch: '$MOD_REPLACE': starting patching transition
livepatch: '$MOD_REPLACE': completing patching transition
livepatch: '$MOD_REPLACE': patching complete
-$MOD_LIVEPATCH: this has been live patched
+$MOD_LIVEPATCH1: this has been live patched
$MOD_REPLACE: this has been live patched
% echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled
livepatch: '$MOD_REPLACE': initializing unpatching transition
@@ -96,35 +98,57 @@ livepatch: '$MOD_REPLACE': starting unpatching transition
livepatch: '$MOD_REPLACE': completing unpatching transition
livepatch: '$MOD_REPLACE': unpatching complete
% rmmod $MOD_REPLACE
-$MOD_LIVEPATCH: this has been live patched
-% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
-livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
-livepatch: '$MOD_LIVEPATCH': starting unpatching transition
-livepatch: '$MOD_LIVEPATCH': completing unpatching transition
-livepatch: '$MOD_LIVEPATCH': unpatching complete
-% rmmod $MOD_LIVEPATCH"
+$MOD_LIVEPATCH1: this has been live patched
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH1/enabled
+livepatch: '$MOD_LIVEPATCH1': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH1': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH1': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH1': unpatching complete
+% rmmod $MOD_LIVEPATCH1"
# - load a livepatch that modifies the output from /proc/cmdline and
# verify correct behavior
-# - load an atomic replace livepatch and verify that only the second is active
-# - remove the first livepatch and verify that the atomic replace livepatch
-# is still active
+# - load two additional livepatches and check the number of livepatch modules
+# applied
+# - load an atomic replace livepatch and check that the other three modules were
+# disabled
+# - remove all livepatches besides the atomic replace one and verify that the
+# atomic replace livepatch is still active
# - remove the atomic replace livepatch and verify that none are active
start_test "atomic replace livepatch"
-load_lp $MOD_LIVEPATCH
+load_lp $MOD_LIVEPATCH1
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
+for mod in $MOD_LIVEPATCH2 $MOD_LIVEPATCH3; do
+ load_lp "$mod"
+done
+
+mods=(/sys/kernel/livepatch/*)
+nmods=${#mods[@]}
+if [ "$nmods" -ne 3 ]; then
+ die "Expecting three modules listed, found $nmods"
+fi
+
load_lp $MOD_REPLACE replace=1
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-unload_lp $MOD_LIVEPATCH
+mods=(/sys/kernel/livepatch/*)
+nmods=${#mods[@]}
+if [ "$nmods" -ne 1 ]; then
+ die "Expecting only one moduled listed, found $nmods"
+fi
+
+# These modules were disabled by the atomic replace
+for mod in $MOD_LIVEPATCH3 $MOD_LIVEPATCH2 $MOD_LIVEPATCH1; do
+ unload_lp "$mod"
+done
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
@@ -135,13 +159,27 @@ unload_lp $MOD_REPLACE
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
-livepatch: enabling patch '$MOD_LIVEPATCH'
-livepatch: '$MOD_LIVEPATCH': initializing patching transition
-livepatch: '$MOD_LIVEPATCH': starting patching transition
-livepatch: '$MOD_LIVEPATCH': completing patching transition
-livepatch: '$MOD_LIVEPATCH': patching complete
-$MOD_LIVEPATCH: this has been live patched
+check_result "% insmod test_modules/$MOD_LIVEPATCH1.ko
+livepatch: enabling patch '$MOD_LIVEPATCH1'
+livepatch: '$MOD_LIVEPATCH1': initializing patching transition
+livepatch: '$MOD_LIVEPATCH1': starting patching transition
+livepatch: '$MOD_LIVEPATCH1': completing patching transition
+livepatch: '$MOD_LIVEPATCH1': patching complete
+$MOD_LIVEPATCH1: this has been live patched
+% insmod test_modules/$MOD_LIVEPATCH2.ko
+livepatch: enabling patch '$MOD_LIVEPATCH2'
+livepatch: '$MOD_LIVEPATCH2': initializing patching transition
+livepatch: '$MOD_LIVEPATCH2': starting patching transition
+livepatch: '$MOD_LIVEPATCH2': completing patching transition
+livepatch: '$MOD_LIVEPATCH2': patching complete
+% insmod test_modules/$MOD_LIVEPATCH3.ko
+livepatch: enabling patch '$MOD_LIVEPATCH3'
+livepatch: '$MOD_LIVEPATCH3': initializing patching transition
+$MOD_LIVEPATCH3: pre_patch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH3': starting patching transition
+livepatch: '$MOD_LIVEPATCH3': completing patching transition
+$MOD_LIVEPATCH3: post_patch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH3': patching complete
% insmod test_modules/$MOD_REPLACE.ko replace=1
livepatch: enabling patch '$MOD_REPLACE'
livepatch: '$MOD_REPLACE': initializing patching transition
@@ -149,7 +187,9 @@ livepatch: '$MOD_REPLACE': starting patching transition
livepatch: '$MOD_REPLACE': completing patching transition
livepatch: '$MOD_REPLACE': patching complete
$MOD_REPLACE: this has been live patched
-% rmmod $MOD_LIVEPATCH
+% rmmod $MOD_LIVEPATCH3
+% rmmod $MOD_LIVEPATCH2
+% rmmod $MOD_LIVEPATCH1
$MOD_REPLACE: this has been live patched
% echo 0 > /sys/kernel/livepatch/$MOD_REPLACE/enabled
livepatch: '$MOD_REPLACE': initializing unpatching transition
diff --git a/tools/testing/selftests/livepatch/test-syscall.sh b/tools/testing/selftests/livepatch/test-syscall.sh
index b76a881d4013..289eb7d4c4b3 100755
--- a/tools/testing/selftests/livepatch/test-syscall.sh
+++ b/tools/testing/selftests/livepatch/test-syscall.sh
@@ -15,7 +15,10 @@ setup_config
start_test "patch getpid syscall while being heavily hammered"
-for i in $(seq 1 $(getconf _NPROCESSORS_ONLN)); do
+NPROC=$(getconf _NPROCESSORS_ONLN)
+MAXPROC=128
+
+for i in $(seq 1 $(($NPROC < $MAXPROC ? $NPROC : $MAXPROC))); do
./test_klp-call_getpid &
pids[$i]="$!"
done
diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh
index 6c646afa7395..05a14f5a7bfb 100755
--- a/tools/testing/selftests/livepatch/test-sysfs.sh
+++ b/tools/testing/selftests/livepatch/test-sysfs.sh
@@ -18,6 +18,7 @@ check_sysfs_rights "$MOD_LIVEPATCH" "" "drwxr-xr-x"
check_sysfs_rights "$MOD_LIVEPATCH" "enabled" "-rw-r--r--"
check_sysfs_value "$MOD_LIVEPATCH" "enabled" "1"
check_sysfs_rights "$MOD_LIVEPATCH" "force" "--w-------"
+check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--"
check_sysfs_rights "$MOD_LIVEPATCH" "transition" "-r--r--r--"
check_sysfs_value "$MOD_LIVEPATCH" "transition" "0"
check_sysfs_rights "$MOD_LIVEPATCH" "vmlinux/patched" "-r--r--r--"
@@ -83,4 +84,51 @@ test_klp_callbacks_demo: post_unpatch_callback: vmlinux
livepatch: 'test_klp_callbacks_demo': unpatching complete
% rmmod test_klp_callbacks_demo"
+start_test "sysfs test replace enabled"
+
+MOD_LIVEPATCH=test_klp_atomic_replace
+load_lp $MOD_LIVEPATCH replace=1
+
+check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--"
+check_sysfs_value "$MOD_LIVEPATCH" "replace" "1"
+
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko replace=1
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
+start_test "sysfs test replace disabled"
+
+load_lp $MOD_LIVEPATCH replace=0
+
+check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--"
+check_sysfs_value "$MOD_LIVEPATCH" "replace" "0"
+
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko replace=0
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
exit 0
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 368973f05250..cff124c1eddd 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -31,6 +31,7 @@ SLAB_FREE_CROSS
SLAB_FREE_PAGE
#SOFTLOCKUP Hangs the system
#HARDLOCKUP Hangs the system
+#SMP_CALL_LOCKUP Hangs the system
#SPINLOCKUP Hangs the system
#HUNG_TASK Hangs the system
EXEC_DATA
diff --git a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
index a9cc17facfb3..4e14dba81234 100644
--- a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
+++ b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
@@ -69,5 +69,5 @@ int main(int argc, char **argv)
/* Multi-threaded */
test_mt_membarrier();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
index 4cdc8b1d124c..fa3f1d6c37a0 100644
--- a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
+++ b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
@@ -24,5 +24,5 @@ int main(int argc, char **argv)
test_membarrier_get_registrations(/*cmd=*/0);
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
index 93798c8c5d54..dbc171a3806d 100644
--- a/tools/testing/selftests/memfd/fuse_test.c
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -306,7 +306,7 @@ int main(int argc, char **argv)
* then the kernel did a page-replacement or canceled the read() (or
* whatever magic it did..). In that case, the memfd object is still
* all zero.
- * In case the memfd-object was *not* sealed, the read() was successfull
+ * In case the memfd-object was *not* sealed, the read() was successful
* and the memfd object must *not* be all zero.
* Note that in real scenarios, there might be a mixture of both, but
* in this test-cases, we have explicit 200ms delays which should be
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 18f585684e20..95af2d78fd31 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -1528,7 +1528,7 @@ static void test_share_open(char *banner, char *b_suffix)
/*
* Test sharing via fork()
- * Test whether seal-modifications work as expected with forked childs.
+ * Test whether seal-modifications work as expected with forked children.
*/
static void test_share_fork(char *banner, char *b_suffix)
{
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
index d26e962f2ac4..da030b43e43b 100644
--- a/tools/testing/selftests/mm/.gitignore
+++ b/tools/testing/selftests/mm/.gitignore
@@ -6,6 +6,7 @@ hugepage-shm
hugepage-vmemmap
hugetlb-madvise
hugetlb-read-hwpoison
+hugetlb-soft-offline
khugepaged
map_hugetlb
map_populate
@@ -47,3 +48,6 @@ mkdirty
va_high_addr_switch
hugetlb_fault_after_madv
hugetlb_madv_vs_map
+mseal_test
+seal_elf
+droppable
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
index eb5f39a2668b..901e0d07765b 100644
--- a/tools/testing/selftests/mm/Makefile
+++ b/tools/testing/selftests/mm/Makefile
@@ -2,6 +2,7 @@
# Makefile for mm selftests
LOCAL_HDRS += $(selfdir)/mm/local_config.h $(top_srcdir)/mm/gup_test.h
+LOCAL_HDRS += $(selfdir)/mm/mseal_helpers.h
include local_config.mk
@@ -12,7 +13,7 @@ uname_M := $(shell uname -m 2>/dev/null || echo not)
else
uname_M := $(shell echo $(CROSS_COMPILE) | grep -o '^[a-z0-9]\+')
endif
-ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/')
+ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/powerpc/')
endif
# Without this, failed build products remain, with up-to-date timestamps,
@@ -32,7 +33,7 @@ endif
# LDLIBS.
MAKEFLAGS += --no-builtin-rules
-CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
+CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
LDLIBS = -lrt -lpthread -lm
TEST_GEN_FILES = cow
@@ -42,6 +43,7 @@ TEST_GEN_FILES += gup_test
TEST_GEN_FILES += hmm-tests
TEST_GEN_FILES += hugetlb-madvise
TEST_GEN_FILES += hugetlb-read-hwpoison
+TEST_GEN_FILES += hugetlb-soft-offline
TEST_GEN_FILES += hugepage-mmap
TEST_GEN_FILES += hugepage-mremap
TEST_GEN_FILES += hugepage-shm
@@ -59,6 +61,8 @@ TEST_GEN_FILES += mlock2-tests
TEST_GEN_FILES += mrelease_test
TEST_GEN_FILES += mremap_dontunmap
TEST_GEN_FILES += mremap_test
+TEST_GEN_FILES += mseal_test
+TEST_GEN_FILES += seal_elf
TEST_GEN_FILES += on-fault-limit
TEST_GEN_FILES += pagemap_ioctl
TEST_GEN_FILES += thuge-gen
@@ -71,6 +75,8 @@ TEST_GEN_FILES += ksm_functional_tests
TEST_GEN_FILES += mdwe_test
TEST_GEN_FILES += hugetlb_fault_after_madv
TEST_GEN_FILES += hugetlb_madv_vs_map
+TEST_GEN_FILES += hugetlb_dio
+TEST_GEN_FILES += droppable
ifneq ($(ARCH),arm64)
TEST_GEN_FILES += soft-dirty
@@ -98,13 +104,13 @@ TEST_GEN_FILES += $(BINARIES_64)
endif
else
-ifneq (,$(findstring $(ARCH),ppc64))
+ifneq (,$(findstring $(ARCH),powerpc))
TEST_GEN_FILES += protection_keys
endif
endif
-ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64))
+ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 powerpc riscv64 s390x sparc64 x86_64))
TEST_GEN_FILES += va_high_addr_switch
TEST_GEN_FILES += virtual_address_range
TEST_GEN_FILES += write_to_hugetlbfs
diff --git a/tools/testing/selftests/mm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
index 533999b6c284..e140558e6f53 100644
--- a/tools/testing/selftests/mm/compaction_test.c
+++ b/tools/testing/selftests/mm/compaction_test.c
@@ -82,12 +82,16 @@ int prereq(void)
return -1;
}
-int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
+int check_compaction(unsigned long mem_free, unsigned long hugepage_size,
+ unsigned long initial_nr_hugepages)
{
+ unsigned long nr_hugepages_ul;
int fd, ret = -1;
int compaction_index = 0;
- char initial_nr_hugepages[10] = {0};
- char nr_hugepages[10] = {0};
+ char nr_hugepages[20] = {0};
+ char init_nr_hugepages[20] = {0};
+
+ sprintf(init_nr_hugepages, "%lu", initial_nr_hugepages);
/* We want to test with 80% of available memory. Else, OOM killer comes
in to play */
@@ -101,21 +105,6 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
goto out;
}
- if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) {
- ksft_print_msg("Failed to read from /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
- goto close_fd;
- }
-
- /* Start with the initial condition of 0 huge pages*/
- if (write(fd, "0", sizeof(char)) != sizeof(char)) {
- ksft_print_msg("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n",
- strerror(errno));
- goto close_fd;
- }
-
- lseek(fd, 0, SEEK_SET);
-
/* Request a large number of huge pages. The Kernel will allocate
as much as it can */
if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
@@ -134,22 +123,27 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
/* We should have been able to request at least 1/3 rd of the memory in
huge pages */
- compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size);
+ nr_hugepages_ul = strtoul(nr_hugepages, NULL, 10);
+ if (!nr_hugepages_ul) {
+ ksft_print_msg("ERROR: No memory is available as huge pages\n");
+ goto close_fd;
+ }
+ compaction_index = mem_free/(nr_hugepages_ul * hugepage_size);
lseek(fd, 0, SEEK_SET);
- if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages))
- != strlen(initial_nr_hugepages)) {
+ if (write(fd, init_nr_hugepages, strlen(init_nr_hugepages))
+ != strlen(init_nr_hugepages)) {
ksft_print_msg("Failed to write value to /proc/sys/vm/nr_hugepages: %s\n",
strerror(errno));
goto close_fd;
}
- ksft_print_msg("Number of huge pages allocated = %d\n",
- atoi(nr_hugepages));
+ ksft_print_msg("Number of huge pages allocated = %lu\n",
+ nr_hugepages_ul);
if (compaction_index > 3) {
- ksft_print_msg("ERROR: Less that 1/%d of memory is available\n"
+ ksft_print_msg("ERROR: Less than 1/%d of memory is available\n"
"as huge pages\n", compaction_index);
goto close_fd;
}
@@ -163,6 +157,41 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
return ret;
}
+int set_zero_hugepages(unsigned long *initial_nr_hugepages)
+{
+ int fd, ret = -1;
+ char nr_hugepages[20] = {0};
+
+ fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
+ if (fd < 0) {
+ ksft_print_msg("Failed to open /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
+ goto out;
+ }
+ if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
+ ksft_print_msg("Failed to read from /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
+ goto close_fd;
+ }
+
+ lseek(fd, 0, SEEK_SET);
+
+ /* Start with the initial condition of 0 huge pages */
+ if (write(fd, "0", sizeof(char)) != sizeof(char)) {
+ ksft_print_msg("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
+ goto close_fd;
+ }
+
+ *initial_nr_hugepages = strtoul(nr_hugepages, NULL, 10);
+ ret = 0;
+
+ close_fd:
+ close(fd);
+
+ out:
+ return ret;
+}
int main(int argc, char **argv)
{
@@ -173,14 +202,19 @@ int main(int argc, char **argv)
unsigned long mem_free = 0;
unsigned long hugepage_size = 0;
long mem_fragmentable_MB = 0;
+ unsigned long initial_nr_hugepages;
ksft_print_header();
if (prereq() || geteuid())
- return ksft_exit_skip("Prerequisites unsatisfied\n");
+ ksft_exit_skip("Prerequisites unsatisfied\n");
ksft_set_plan(1);
+ /* Start the test without hugepages reducing mem_free */
+ if (set_zero_hugepages(&initial_nr_hugepages))
+ ksft_exit_fail();
+
lim.rlim_cur = RLIM_INFINITY;
lim.rlim_max = RLIM_INFINITY;
if (setrlimit(RLIMIT_MEMLOCK, &lim))
@@ -224,8 +258,9 @@ int main(int argc, char **argv)
entry = entry->next;
}
- if (check_compaction(mem_free, hugepage_size) == 0)
- return ksft_exit_pass();
+ if (check_compaction(mem_free, hugepage_size,
+ initial_nr_hugepages) == 0)
+ ksft_exit_pass();
- return ksft_exit_fail();
+ ksft_exit_fail();
}
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index 363bf5f801be..32c6ccc2a6be 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -199,7 +199,7 @@ static int child_vmsplice_memcmp_fn(char *mem, size_t size,
typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
- child_fn fn)
+ child_fn fn, bool xfail)
{
struct comm_pipes comm_pipes;
char buf;
@@ -247,33 +247,47 @@ static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
else
ret = -EINVAL;
- ksft_test_result(!ret, "No leak from parent into child\n");
+ if (!ret) {
+ ksft_test_result_pass("No leak from parent into child\n");
+ } else if (xfail) {
+ /*
+ * With hugetlb, some vmsplice() tests are currently expected to
+ * fail because (a) harder to fix and (b) nobody really cares.
+ * Flag them as expected failure for now.
+ */
+ ksft_test_result_xfail("Leak from parent into child\n");
+ } else {
+ ksft_test_result_fail("Leak from parent into child\n");
+ }
close_comm_pipes:
close_comm_pipes(&comm_pipes);
}
-static void test_cow_in_parent(char *mem, size_t size)
+static void test_cow_in_parent(char *mem, size_t size, bool is_hugetlb)
{
- do_test_cow_in_parent(mem, size, false, child_memcmp_fn);
+ do_test_cow_in_parent(mem, size, false, child_memcmp_fn, false);
}
-static void test_cow_in_parent_mprotect(char *mem, size_t size)
+static void test_cow_in_parent_mprotect(char *mem, size_t size, bool is_hugetlb)
{
- do_test_cow_in_parent(mem, size, true, child_memcmp_fn);
+ do_test_cow_in_parent(mem, size, true, child_memcmp_fn, false);
}
-static void test_vmsplice_in_child(char *mem, size_t size)
+static void test_vmsplice_in_child(char *mem, size_t size, bool is_hugetlb)
{
- do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn);
+ do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn,
+ is_hugetlb);
}
-static void test_vmsplice_in_child_mprotect(char *mem, size_t size)
+static void test_vmsplice_in_child_mprotect(char *mem, size_t size,
+ bool is_hugetlb)
{
- do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn);
+ do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn,
+ is_hugetlb);
}
static void do_test_vmsplice_in_parent(char *mem, size_t size,
- bool before_fork)
+ bool before_fork, bool xfail)
{
struct iovec iov = {
.iov_base = mem,
@@ -355,8 +369,18 @@ static void do_test_vmsplice_in_parent(char *mem, size_t size,
}
}
- ksft_test_result(!memcmp(old, new, transferred),
- "No leak from child into parent\n");
+ if (!memcmp(old, new, transferred)) {
+ ksft_test_result_pass("No leak from child into parent\n");
+ } else if (xfail) {
+ /*
+ * With hugetlb, some vmsplice() tests are currently expected to
+ * fail because (a) harder to fix and (b) nobody really cares.
+ * Flag them as expected failure for now.
+ */
+ ksft_test_result_xfail("Leak from child into parent\n");
+ } else {
+ ksft_test_result_fail("Leak from child into parent\n");
+ }
close_pipe:
close(fds[0]);
close(fds[1]);
@@ -367,14 +391,14 @@ free:
free(new);
}
-static void test_vmsplice_before_fork(char *mem, size_t size)
+static void test_vmsplice_before_fork(char *mem, size_t size, bool is_hugetlb)
{
- do_test_vmsplice_in_parent(mem, size, true);
+ do_test_vmsplice_in_parent(mem, size, true, is_hugetlb);
}
-static void test_vmsplice_after_fork(char *mem, size_t size)
+static void test_vmsplice_after_fork(char *mem, size_t size, bool is_hugetlb)
{
- do_test_vmsplice_in_parent(mem, size, false);
+ do_test_vmsplice_in_parent(mem, size, false, is_hugetlb);
}
#ifdef LOCAL_CONFIG_HAVE_LIBURING
@@ -529,12 +553,12 @@ close_comm_pipes:
close_comm_pipes(&comm_pipes);
}
-static void test_iouring_ro(char *mem, size_t size)
+static void test_iouring_ro(char *mem, size_t size, bool is_hugetlb)
{
do_test_iouring(mem, size, false);
}
-static void test_iouring_fork(char *mem, size_t size)
+static void test_iouring_fork(char *mem, size_t size, bool is_hugetlb)
{
do_test_iouring(mem, size, true);
}
@@ -678,37 +702,41 @@ free_tmp:
free(tmp);
}
-static void test_ro_pin_on_shared(char *mem, size_t size)
+static void test_ro_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
{
do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false);
}
-static void test_ro_fast_pin_on_shared(char *mem, size_t size)
+static void test_ro_fast_pin_on_shared(char *mem, size_t size, bool is_hugetlb)
{
do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true);
}
-static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size)
+static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size,
+ bool is_hugetlb)
{
do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false);
}
-static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size)
+static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size,
+ bool is_hugetlb)
{
do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true);
}
-static void test_ro_pin_on_ro_exclusive(char *mem, size_t size)
+static void test_ro_pin_on_ro_exclusive(char *mem, size_t size,
+ bool is_hugetlb)
{
do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false);
}
-static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size)
+static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size,
+ bool is_hugetlb)
{
do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true);
}
-typedef void (*test_fn)(char *mem, size_t size);
+typedef void (*test_fn)(char *mem, size_t size, bool hugetlb);
static void do_run_with_base_page(test_fn fn, bool swapout)
{
@@ -740,7 +768,7 @@ static void do_run_with_base_page(test_fn fn, bool swapout)
}
}
- fn(mem, pagesize);
+ fn(mem, pagesize, false);
munmap:
munmap(mem, pagesize);
}
@@ -904,7 +932,7 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
break;
}
- fn(mem, size);
+ fn(mem, size, false);
munmap:
munmap(mmap_mem, mmap_size);
if (mremap_mem != MAP_FAILED)
@@ -997,7 +1025,7 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
}
munmap(dummy, hugetlbsize);
- fn(mem, hugetlbsize);
+ fn(mem, hugetlbsize, true);
munmap:
munmap(mem, hugetlbsize);
}
@@ -1036,7 +1064,7 @@ static const struct test_case anon_test_cases[] = {
*/
{
"vmsplice() + unmap in child",
- test_vmsplice_in_child
+ test_vmsplice_in_child,
},
/*
* vmsplice() test, but do an additional mprotect(PROT_READ)+
@@ -1044,7 +1072,7 @@ static const struct test_case anon_test_cases[] = {
*/
{
"vmsplice() + unmap in child with mprotect() optimization",
- test_vmsplice_in_child_mprotect
+ test_vmsplice_in_child_mprotect,
},
/*
* vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
@@ -1322,23 +1350,31 @@ close_comm_pipes:
close_comm_pipes(&comm_pipes);
}
-static void test_anon_thp_collapse_unshared(char *mem, size_t size)
+static void test_anon_thp_collapse_unshared(char *mem, size_t size,
+ bool is_hugetlb)
{
+ assert(!is_hugetlb);
do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED);
}
-static void test_anon_thp_collapse_fully_shared(char *mem, size_t size)
+static void test_anon_thp_collapse_fully_shared(char *mem, size_t size,
+ bool is_hugetlb)
{
+ assert(!is_hugetlb);
do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED);
}
-static void test_anon_thp_collapse_lower_shared(char *mem, size_t size)
+static void test_anon_thp_collapse_lower_shared(char *mem, size_t size,
+ bool is_hugetlb)
{
+ assert(!is_hugetlb);
do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED);
}
-static void test_anon_thp_collapse_upper_shared(char *mem, size_t size)
+static void test_anon_thp_collapse_upper_shared(char *mem, size_t size,
+ bool is_hugetlb)
{
+ assert(!is_hugetlb);
do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED);
}
@@ -1779,5 +1815,5 @@ int main(int argc, char **argv)
if (err)
ksft_exit_fail_msg("%d out of %d tests failed\n",
err, ksft_test_num());
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/mm/droppable.c b/tools/testing/selftests/mm/droppable.c
new file mode 100644
index 000000000000..f3d9ecf96890
--- /dev/null
+++ b/tools/testing/selftests/mm/droppable.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <linux/mman.h>
+
+#include "../kselftest.h"
+
+int main(int argc, char *argv[])
+{
+ size_t alloc_size = 134217728;
+ size_t page_size = getpagesize();
+ void *alloc;
+ pid_t child;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ alloc = mmap(0, alloc_size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_DROPPABLE, -1, 0);
+ assert(alloc != MAP_FAILED);
+ memset(alloc, 'A', alloc_size);
+ for (size_t i = 0; i < alloc_size; i += page_size)
+ assert(*(uint8_t *)(alloc + i));
+
+ child = fork();
+ assert(child >= 0);
+ if (!child) {
+ for (;;)
+ *(char *)malloc(page_size) = 'B';
+ }
+
+ for (bool done = false; !done;) {
+ for (size_t i = 0; i < alloc_size; i += page_size) {
+ if (!*(uint8_t *)(alloc + i)) {
+ done = true;
+ break;
+ }
+ }
+ }
+ kill(child, SIGTERM);
+
+ ksft_test_result_pass("MAP_DROPPABLE: PASS\n");
+ exit(KSFT_PASS);
+}
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
index ad168d35b23b..9423ad439a61 100644
--- a/tools/testing/selftests/mm/gup_longterm.c
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -118,15 +118,22 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
return;
}
- /*
- * Fault in the page writable such that GUP-fast can eventually pin
- * it immediately.
- */
+ /* Fault in the page such that GUP-fast can pin it directly. */
memset(mem, 0, size);
switch (type) {
case TEST_TYPE_RO:
case TEST_TYPE_RO_FAST:
+ /*
+ * Cover more cases regarding unsharing decisions when
+ * long-term R/O pinning by mapping the page R/O.
+ */
+ ret = mprotect(mem, size, PROT_READ);
+ if (ret) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto munmap;
+ }
+ /* FALLTHROUGH */
case TEST_TYPE_RW:
case TEST_TYPE_RW_FAST: {
struct pin_longterm_test args;
@@ -228,6 +235,7 @@ static void do_test(int fd, size_t size, enum test_type type, bool shared)
assert(false);
}
+munmap:
munmap(mem, size);
}
@@ -456,5 +464,5 @@ int main(int argc, char **argv)
if (err)
ksft_exit_fail_msg("%d out of %d tests failed\n",
err, ksft_test_num());
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c
index 18a49c70d4c6..bdeaac67ff9a 100644
--- a/tools/testing/selftests/mm/gup_test.c
+++ b/tools/testing/selftests/mm/gup_test.c
@@ -1,3 +1,4 @@
+#define __SANE_USERSPACE_TYPES__ // Use ll64
#include <fcntl.h>
#include <errno.h>
#include <stdio.h>
@@ -228,7 +229,7 @@ int main(int argc, char **argv)
break;
}
ksft_test_result_skip("Please run this test as root\n");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
@@ -267,5 +268,5 @@ int main(int argc, char **argv)
free(tid);
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c
index c463d1c09c9b..ada9156cc497 100644
--- a/tools/testing/selftests/mm/hugepage-mremap.c
+++ b/tools/testing/selftests/mm/hugepage-mremap.c
@@ -15,7 +15,7 @@
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
-#include <unistd.h>
+#include <asm-generic/unistd.h>
#include <sys/mman.h>
#include <errno.h>
#include <fcntl.h> /* Definition of O_* constants */
diff --git a/tools/testing/selftests/mm/hugetlb-soft-offline.c b/tools/testing/selftests/mm/hugetlb-soft-offline.c
new file mode 100644
index 000000000000..f086f0e04756
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb-soft-offline.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test soft offline behavior for HugeTLB pages:
+ * - if enable_soft_offline = 0, hugepages should stay intact and soft
+ * offlining failed with EOPNOTSUPP.
+ * - if enable_soft_offline = 1, a hugepage should be dissolved and
+ * nr_hugepages/free_hugepages should be reduced by 1.
+ *
+ * Before running, make sure more than 2 hugepages of default_hugepagesz
+ * are allocated. For example, if /proc/meminfo/Hugepagesize is 2048kB:
+ * echo 8 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <linux/magic.h>
+#include <linux/memfd.h>
+#include <sys/mman.h>
+#include <sys/statfs.h>
+#include <sys/types.h>
+
+#include "../kselftest.h"
+
+#ifndef MADV_SOFT_OFFLINE
+#define MADV_SOFT_OFFLINE 101
+#endif
+
+#define EPREFIX " !!! "
+
+static int do_soft_offline(int fd, size_t len, int expect_errno)
+{
+ char *filemap = NULL;
+ char *hwp_addr = NULL;
+ const unsigned long pagesize = getpagesize();
+ int ret = 0;
+
+ if (ftruncate(fd, len) < 0) {
+ ksft_perror(EPREFIX "ftruncate to len failed");
+ return -1;
+ }
+
+ filemap = mmap(NULL, len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, 0);
+ if (filemap == MAP_FAILED) {
+ ksft_perror(EPREFIX "mmap failed");
+ ret = -1;
+ goto untruncate;
+ }
+
+ memset(filemap, 0xab, len);
+ ksft_print_msg("Allocated %#lx bytes of hugetlb pages\n", len);
+
+ hwp_addr = filemap + len / 2;
+ ret = madvise(hwp_addr, pagesize, MADV_SOFT_OFFLINE);
+ ksft_print_msg("MADV_SOFT_OFFLINE %p ret=%d, errno=%d\n",
+ hwp_addr, ret, errno);
+ if (ret != 0)
+ ksft_perror(EPREFIX "madvise failed");
+
+ if (errno == expect_errno)
+ ret = 0;
+ else {
+ ksft_print_msg("MADV_SOFT_OFFLINE should ret %d\n",
+ expect_errno);
+ ret = -1;
+ }
+
+ munmap(filemap, len);
+untruncate:
+ if (ftruncate(fd, 0) < 0)
+ ksft_perror(EPREFIX "ftruncate back to 0 failed");
+
+ return ret;
+}
+
+static int set_enable_soft_offline(int value)
+{
+ char cmd[256] = {0};
+ FILE *cmdfile = NULL;
+
+ if (value != 0 && value != 1)
+ return -EINVAL;
+
+ sprintf(cmd, "echo %d > /proc/sys/vm/enable_soft_offline", value);
+ cmdfile = popen(cmd, "r");
+
+ if (cmdfile)
+ ksft_print_msg("enable_soft_offline => %d\n", value);
+ else {
+ ksft_perror(EPREFIX "failed to set enable_soft_offline");
+ return errno;
+ }
+
+ pclose(cmdfile);
+ return 0;
+}
+
+static int read_nr_hugepages(unsigned long hugepage_size,
+ unsigned long *nr_hugepages)
+{
+ char buffer[256] = {0};
+ char cmd[256] = {0};
+
+ sprintf(cmd, "cat /sys/kernel/mm/hugepages/hugepages-%ldkB/nr_hugepages",
+ hugepage_size);
+ FILE *cmdfile = popen(cmd, "r");
+
+ if (cmdfile == NULL) {
+ ksft_perror(EPREFIX "failed to popen nr_hugepages");
+ return -1;
+ }
+
+ if (!fgets(buffer, sizeof(buffer), cmdfile)) {
+ ksft_perror(EPREFIX "failed to read nr_hugepages");
+ pclose(cmdfile);
+ return -1;
+ }
+
+ *nr_hugepages = atoll(buffer);
+ pclose(cmdfile);
+ return 0;
+}
+
+static int create_hugetlbfs_file(struct statfs *file_stat)
+{
+ int fd;
+
+ fd = memfd_create("hugetlb_tmp", MFD_HUGETLB);
+ if (fd < 0) {
+ ksft_perror(EPREFIX "could not open hugetlbfs file");
+ return -1;
+ }
+
+ memset(file_stat, 0, sizeof(*file_stat));
+ if (fstatfs(fd, file_stat)) {
+ ksft_perror(EPREFIX "fstatfs failed");
+ goto close;
+ }
+ if (file_stat->f_type != HUGETLBFS_MAGIC) {
+ ksft_print_msg(EPREFIX "not hugetlbfs file\n");
+ goto close;
+ }
+
+ return fd;
+close:
+ close(fd);
+ return -1;
+}
+
+static void test_soft_offline_common(int enable_soft_offline)
+{
+ int fd;
+ int expect_errno = enable_soft_offline ? 0 : EOPNOTSUPP;
+ struct statfs file_stat;
+ unsigned long hugepagesize_kb = 0;
+ unsigned long nr_hugepages_before = 0;
+ unsigned long nr_hugepages_after = 0;
+ int ret;
+
+ ksft_print_msg("Test soft-offline when enabled_soft_offline=%d\n",
+ enable_soft_offline);
+
+ fd = create_hugetlbfs_file(&file_stat);
+ if (fd < 0)
+ ksft_exit_fail_msg("Failed to create hugetlbfs file\n");
+
+ hugepagesize_kb = file_stat.f_bsize / 1024;
+ ksft_print_msg("Hugepagesize is %ldkB\n", hugepagesize_kb);
+
+ if (set_enable_soft_offline(enable_soft_offline) != 0) {
+ close(fd);
+ ksft_exit_fail_msg("Failed to set enable_soft_offline\n");
+ }
+
+ if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_before) != 0) {
+ close(fd);
+ ksft_exit_fail_msg("Failed to read nr_hugepages\n");
+ }
+
+ ksft_print_msg("Before MADV_SOFT_OFFLINE nr_hugepages=%ld\n",
+ nr_hugepages_before);
+
+ ret = do_soft_offline(fd, 2 * file_stat.f_bsize, expect_errno);
+
+ if (read_nr_hugepages(hugepagesize_kb, &nr_hugepages_after) != 0) {
+ close(fd);
+ ksft_exit_fail_msg("Failed to read nr_hugepages\n");
+ }
+
+ ksft_print_msg("After MADV_SOFT_OFFLINE nr_hugepages=%ld\n",
+ nr_hugepages_after);
+
+ // No need for the hugetlbfs file from now on.
+ close(fd);
+
+ if (enable_soft_offline) {
+ if (nr_hugepages_before != nr_hugepages_after + 1) {
+ ksft_test_result_fail("MADV_SOFT_OFFLINE should reduced 1 hugepage\n");
+ return;
+ }
+ } else {
+ if (nr_hugepages_before != nr_hugepages_after) {
+ ksft_test_result_fail("MADV_SOFT_OFFLINE reduced %lu hugepages\n",
+ nr_hugepages_before - nr_hugepages_after);
+ return;
+ }
+ }
+
+ ksft_test_result(ret == 0,
+ "Test soft-offline when enabled_soft_offline=%d\n",
+ enable_soft_offline);
+}
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(2);
+
+ test_soft_offline_common(1);
+ test_soft_offline_common(0);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c
new file mode 100644
index 000000000000..f9ac20c657ec
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb_dio.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program tests for hugepage leaks after DIO writes to a file using a
+ * hugepage as the user buffer. During DIO, the user buffer is pinned and
+ * should be properly unpinned upon completion. This patch verifies that the
+ * kernel correctly unpins the buffer at DIO completion for both aligned and
+ * unaligned user buffer offsets (w.r.t page boundary), ensuring the hugepage
+ * is freed upon unmapping.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <sys/stat.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/mman.h>
+#include "vm_util.h"
+#include "../kselftest.h"
+
+void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off)
+{
+ int fd;
+ char *buffer = NULL;
+ char *orig_buffer = NULL;
+ size_t h_pagesize = 0;
+ size_t writesize;
+ int free_hpage_b = 0;
+ int free_hpage_a = 0;
+ const int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
+ const int mmap_prot = PROT_READ | PROT_WRITE;
+
+ writesize = end_off - start_off;
+
+ /* Get the default huge page size */
+ h_pagesize = default_huge_page_size();
+ if (!h_pagesize)
+ ksft_exit_fail_msg("Unable to determine huge page size\n");
+
+ /* Open the file to DIO */
+ fd = open("/tmp", O_TMPFILE | O_RDWR | O_DIRECT, 0664);
+ if (fd < 0)
+ ksft_exit_fail_perror("Error opening file\n");
+
+ /* Get the free huge pages before allocation */
+ free_hpage_b = get_free_hugepages();
+ if (free_hpage_b == 0) {
+ close(fd);
+ ksft_exit_skip("No free hugepage, exiting!\n");
+ }
+
+ /* Allocate a hugetlb page */
+ orig_buffer = mmap(NULL, h_pagesize, mmap_prot, mmap_flags, -1, 0);
+ if (orig_buffer == MAP_FAILED) {
+ close(fd);
+ ksft_exit_fail_perror("Error mapping memory\n");
+ }
+ buffer = orig_buffer;
+ buffer += start_off;
+
+ memset(buffer, 'A', writesize);
+
+ /* Write the buffer to the file */
+ if (write(fd, buffer, writesize) != (writesize)) {
+ munmap(orig_buffer, h_pagesize);
+ close(fd);
+ ksft_exit_fail_perror("Error writing to file\n");
+ }
+
+ /* unmap the huge page */
+ munmap(orig_buffer, h_pagesize);
+ close(fd);
+
+ /* Get the free huge pages after unmap*/
+ free_hpage_a = get_free_hugepages();
+
+ /*
+ * If the no. of free hugepages before allocation and after unmap does
+ * not match - that means there could still be a page which is pinned.
+ */
+ if (free_hpage_a != free_hpage_b) {
+ ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b);
+ ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a);
+ ksft_test_result_fail(": Huge pages not freed!\n");
+ } else {
+ ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b);
+ ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a);
+ ksft_test_result_pass(": Huge pages freed successfully !\n");
+ }
+}
+
+int main(void)
+{
+ size_t pagesize = 0;
+
+ ksft_print_header();
+ ksft_set_plan(4);
+
+ /* Get base page size */
+ pagesize = psize();
+
+ /* start and end is aligned to pagesize */
+ run_dio_using_hugetlb(0, (pagesize * 3));
+
+ /* start is aligned but end is not aligned */
+ run_dio_using_hugetlb(0, (pagesize * 3) - (pagesize / 2));
+
+ /* start is unaligned and end is aligned */
+ run_dio_using_hugetlb(pagesize / 2, (pagesize * 3));
+
+ /* both start and end are unaligned */
+ run_dio_using_hugetlb(pagesize / 2, (pagesize * 3) + (pagesize / 2));
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
index d01e8d4901d0..8f122a0f0828 100644
--- a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
+++ b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
@@ -27,9 +27,9 @@
#include "vm_util.h"
#include "../kselftest.h"
-#define MMAP_SIZE (1 << 21)
#define INLOOP_ITER 100
+size_t mmap_size;
char *huge_ptr;
/* Touch the memory while it is being madvised() */
@@ -44,7 +44,7 @@ void *touch(void *unused)
void *madv(void *unused)
{
for (int i = 0; i < INLOOP_ITER; i++)
- madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED);
+ madvise(huge_ptr, mmap_size, MADV_DONTNEED);
return NULL;
}
@@ -59,7 +59,7 @@ void *map_extra(void *unused)
void *ptr;
for (int i = 0; i < INLOOP_ITER; i++) {
- ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE,
+ ptr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
@@ -93,14 +93,16 @@ int main(void)
free_hugepages);
}
+ mmap_size = default_huge_page_size();
+
while (max--) {
- huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE,
+ huge_ptr = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
-1, 0);
if ((unsigned long)huge_ptr == -1) {
- ksft_exit_skip("Failed to allocated huge page\n");
- return KSFT_SKIP;
+ ksft_test_result_fail("Failed to allocate huge page\n");
+ return KSFT_FAIL;
}
pthread_create(&thread1, NULL, madv, NULL);
@@ -117,7 +119,7 @@ int main(void)
}
/* Unmap and restart */
- munmap(huge_ptr, MMAP_SIZE);
+ munmap(huge_ptr, mmap_size);
}
return KSFT_PASS;
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
index d615767e396b..66b4e111b5a2 100644
--- a/tools/testing/selftests/mm/ksm_functional_tests.c
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -11,7 +11,7 @@
#include <string.h>
#include <stdbool.h>
#include <stdint.h>
-#include <unistd.h>
+#include <asm-generic/unistd.h>
#include <errno.h>
#include <fcntl.h>
#include <sys/mman.h>
@@ -28,6 +28,15 @@
#define MiB (1024 * KiB)
#define FORK_EXEC_CHILD_PRG_NAME "ksm_fork_exec_child"
+#define MAP_MERGE_FAIL ((void *)-1)
+#define MAP_MERGE_SKIP ((void *)-2)
+
+enum ksm_merge_mode {
+ KSM_MERGE_PRCTL,
+ KSM_MERGE_MADVISE,
+ KSM_MERGE_NONE, /* PRCTL already set */
+};
+
static int mem_fd;
static int ksm_fd;
static int ksm_full_scans_fd;
@@ -146,33 +155,34 @@ static int ksm_unmerge(void)
return 0;
}
-static char *mmap_and_merge_range(char val, unsigned long size, int prot,
- bool use_prctl)
+static char *__mmap_and_merge_range(char val, unsigned long size, int prot,
+ enum ksm_merge_mode mode)
{
char *map;
+ char *err_map = MAP_MERGE_FAIL;
int ret;
/* Stabilize accounting by disabling KSM completely. */
if (ksm_unmerge()) {
- ksft_test_result_fail("Disabling (unmerging) KSM failed\n");
- return MAP_FAILED;
+ ksft_print_msg("Disabling (unmerging) KSM failed\n");
+ return err_map;
}
if (get_my_merging_pages() > 0) {
- ksft_test_result_fail("Still pages merged\n");
- return MAP_FAILED;
+ ksft_print_msg("Still pages merged\n");
+ return err_map;
}
map = mmap(NULL, size, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANON, -1, 0);
if (map == MAP_FAILED) {
- ksft_test_result_fail("mmap() failed\n");
- return MAP_FAILED;
+ ksft_print_msg("mmap() failed\n");
+ return err_map;
}
/* Don't use THP. Ignore if THP are not around on a kernel. */
if (madvise(map, size, MADV_NOHUGEPAGE) && errno != EINVAL) {
- ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
+ ksft_print_msg("MADV_NOHUGEPAGE failed\n");
goto unmap;
}
@@ -180,27 +190,36 @@ static char *mmap_and_merge_range(char val, unsigned long size, int prot,
memset(map, val, size);
if (mprotect(map, size, prot)) {
- ksft_test_result_skip("mprotect() failed\n");
+ ksft_print_msg("mprotect() failed\n");
+ err_map = MAP_MERGE_SKIP;
goto unmap;
}
- if (use_prctl) {
+ switch (mode) {
+ case KSM_MERGE_PRCTL:
ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
if (ret < 0 && errno == EINVAL) {
- ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
+ ksft_print_msg("PR_SET_MEMORY_MERGE not supported\n");
+ err_map = MAP_MERGE_SKIP;
goto unmap;
} else if (ret) {
- ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
+ ksft_print_msg("PR_SET_MEMORY_MERGE=1 failed\n");
goto unmap;
}
- } else if (madvise(map, size, MADV_MERGEABLE)) {
- ksft_test_result_fail("MADV_MERGEABLE failed\n");
- goto unmap;
+ break;
+ case KSM_MERGE_MADVISE:
+ if (madvise(map, size, MADV_MERGEABLE)) {
+ ksft_print_msg("MADV_MERGEABLE failed\n");
+ goto unmap;
+ }
+ break;
+ case KSM_MERGE_NONE:
+ break;
}
/* Run KSM to trigger merging and wait. */
if (ksm_merge()) {
- ksft_test_result_fail("Running KSM failed\n");
+ ksft_print_msg("Running KSM failed\n");
goto unmap;
}
@@ -209,14 +228,31 @@ static char *mmap_and_merge_range(char val, unsigned long size, int prot,
* accounted differently (depending on kernel support).
*/
if (val && !get_my_merging_pages()) {
- ksft_test_result_fail("No pages got merged\n");
+ ksft_print_msg("No pages got merged\n");
goto unmap;
}
return map;
unmap:
munmap(map, size);
- return MAP_FAILED;
+ return err_map;
+}
+
+static char *mmap_and_merge_range(char val, unsigned long size, int prot,
+ enum ksm_merge_mode mode)
+{
+ char *map;
+ char *ret = MAP_FAILED;
+
+ map = __mmap_and_merge_range(val, size, prot, mode);
+ if (map == MAP_MERGE_FAIL)
+ ksft_test_result_fail("Merging memory failed");
+ else if (map == MAP_MERGE_SKIP)
+ ksft_test_result_skip("Merging memory skipped");
+ else
+ ret = map;
+
+ return ret;
}
static void test_unmerge(void)
@@ -226,7 +262,7 @@ static void test_unmerge(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
return;
@@ -264,7 +300,7 @@ static void test_unmerge_zero_pages(void)
}
/* Let KSM deduplicate zero pages. */
- map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, false);
+ map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
return;
@@ -312,7 +348,7 @@ static void test_unmerge_discarded(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
return;
@@ -333,7 +369,6 @@ unmap:
munmap(map, size);
}
-#ifdef __NR_userfaultfd
static void test_unmerge_uffd_wp(void)
{
struct uffdio_writeprotect uffd_writeprotect;
@@ -344,7 +379,7 @@ static void test_unmerge_uffd_wp(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
return;
@@ -394,7 +429,6 @@ close_uffd:
unmap:
munmap(map, size);
}
-#endif
/* Verify that KSM can be enabled / queried with prctl. */
static void test_prctl(void)
@@ -439,6 +473,36 @@ static void test_prctl(void)
ksft_test_result_pass("Setting/clearing PR_SET_MEMORY_MERGE works\n");
}
+static int test_child_ksm(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+
+ /* Test if KSM is enabled for the process. */
+ if (prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) != 1)
+ return -1;
+
+ /* Test if merge could really happen. */
+ map = __mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_NONE);
+ if (map == MAP_MERGE_FAIL)
+ return -2;
+ else if (map == MAP_MERGE_SKIP)
+ return -3;
+
+ munmap(map, size);
+ return 0;
+}
+
+static void test_child_ksm_err(int status)
+{
+ if (status == -1)
+ ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n");
+ else if (status == -2)
+ ksft_test_result_fail("Merge in child failed\n");
+ else if (status == -3)
+ ksft_test_result_skip("Merge in child skipped\n");
+}
+
/* Verify that prctl ksm flag is inherited. */
static void test_prctl_fork(void)
{
@@ -458,7 +522,7 @@ static void test_prctl_fork(void)
child_pid = fork();
if (!child_pid) {
- exit(prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0));
+ exit(test_child_ksm());
} else if (child_pid < 0) {
ksft_test_result_fail("fork() failed\n");
return;
@@ -467,8 +531,11 @@ static void test_prctl_fork(void)
if (waitpid(child_pid, &status, 0) < 0) {
ksft_test_result_fail("waitpid() failed\n");
return;
- } else if (WEXITSTATUS(status) != 1) {
- ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n");
+ }
+
+ status = WEXITSTATUS(status);
+ if (status) {
+ test_child_ksm_err(status);
return;
}
@@ -480,12 +547,6 @@ static void test_prctl_fork(void)
ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
}
-static int ksm_fork_exec_child(void)
-{
- /* Test if KSM is enabled for the process. */
- return prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) == 1;
-}
-
static void test_prctl_fork_exec(void)
{
int ret, status;
@@ -518,7 +579,7 @@ static void test_prctl_fork_exec(void)
if (WIFEXITED(status)) {
status = WEXITSTATUS(status);
if (status) {
- ksft_test_result_fail("KSM not enabled\n");
+ test_child_ksm_err(status);
return;
}
} else {
@@ -545,7 +606,7 @@ static void test_prctl_unmerge(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, true);
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, KSM_MERGE_PRCTL);
if (map == MAP_FAILED)
return;
@@ -568,7 +629,7 @@ static void test_prot_none(void)
ksft_print_msg("[RUN] %s\n", __func__);
- map = mmap_and_merge_range(0x11, size, PROT_NONE, false);
+ map = mmap_and_merge_range(0x11, size, PROT_NONE, KSM_MERGE_MADVISE);
if (map == MAP_FAILED)
goto unmap;
@@ -593,24 +654,8 @@ unmap:
munmap(map, size);
}
-int main(int argc, char **argv)
+static void init_global_file_handles(void)
{
- unsigned int tests = 8;
- int err;
-
- if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) {
- exit(ksm_fork_exec_child() == 1 ? 0 : 1);
- }
-
-#ifdef __NR_userfaultfd
- tests++;
-#endif
-
- ksft_print_header();
- ksft_set_plan(tests);
-
- pagesize = getpagesize();
-
mem_fd = open("/proc/self/mem", O_RDWR);
if (mem_fd < 0)
ksft_exit_fail_msg("opening /proc/self/mem failed\n");
@@ -625,15 +670,33 @@ int main(int argc, char **argv)
ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY);
proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages",
- O_RDONLY);
+ O_RDONLY);
ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR);
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int tests = 8;
+ int err;
+
+ if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) {
+ init_global_file_handles();
+ exit(test_child_ksm());
+ }
+
+ tests++;
+
+ ksft_print_header();
+ ksft_set_plan(tests);
+
+ pagesize = getpagesize();
+
+ init_global_file_handles();
test_unmerge();
test_unmerge_zero_pages();
test_unmerge_discarded();
-#ifdef __NR_userfaultfd
test_unmerge_uffd_wp();
-#endif
test_prot_none();
@@ -646,5 +709,5 @@ int main(int argc, char **argv)
if (err)
ksft_exit_fail_msg("%d out of %d tests failed\n",
err, ksft_test_num());
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c
index 17bcb07f19f3..ef7d911da13e 100644
--- a/tools/testing/selftests/mm/madv_populate.c
+++ b/tools/testing/selftests/mm/madv_populate.c
@@ -307,5 +307,5 @@ int main(int argc, char **argv)
if (err)
ksft_exit_fail_msg("%d out of %d tests failed\n",
err, ksft_test_num());
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/mm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c
index b74813fdc951..d53de2486080 100644
--- a/tools/testing/selftests/mm/map_fixed_noreplace.c
+++ b/tools/testing/selftests/mm/map_fixed_noreplace.c
@@ -67,7 +67,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error: munmap failed!?\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n");
addr = base_addr + page_size;
size = 3 * page_size;
@@ -76,7 +77,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error: first mmap() failed unexpectedly\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 3*PAGE_SIZE at base+PAGE_SIZE\n");
/*
* Exact same mapping again:
@@ -93,7 +95,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 5*PAGE_SIZE at base\n");
/*
* Second mapping contained within first:
@@ -111,7 +114,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:2: mmap() succeeded when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+PAGE_SIZE\n");
/*
* Overlap end of existing mapping:
@@ -128,7 +132,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:3: mmap() succeeded when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 2*PAGE_SIZE at base+(3*PAGE_SIZE)\n");
/*
* Overlap start of existing mapping:
@@ -145,7 +150,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:4: mmap() succeeded when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() 2*PAGE_SIZE bytes at base\n");
/*
* Adjacent to start of existing mapping:
@@ -162,7 +168,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:5: mmap() failed when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() PAGE_SIZE at base\n");
/*
* Adjacent to end of existing mapping:
@@ -179,7 +186,8 @@ int main(void)
dump_maps();
ksft_exit_fail_msg("Error:6: mmap() failed when it shouldn't have\n");
}
- ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_print_msg("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
+ ksft_test_result_pass("mmap() PAGE_SIZE at base+(4*PAGE_SIZE)\n");
addr = base_addr;
size = 5 * page_size;
diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c
index 1e01d3ddc11c..200bedcdc32e 100644
--- a/tools/testing/selftests/mm/mdwe_test.c
+++ b/tools/testing/selftests/mm/mdwe_test.c
@@ -7,7 +7,6 @@
#include <linux/mman.h>
#include <linux/prctl.h>
-#define _GNU_SOURCE
#include <stdio.h>
#include <stdlib.h>
#include <sys/auxv.h>
diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c
index 9b298f6a04b3..74c911aa3aea 100644
--- a/tools/testing/selftests/mm/memfd_secret.c
+++ b/tools/testing/selftests/mm/memfd_secret.c
@@ -17,9 +17,10 @@
#include <stdlib.h>
#include <string.h>
-#include <unistd.h>
+#include <asm-generic/unistd.h>
#include <errno.h>
#include <stdio.h>
+#include <fcntl.h>
#include "../kselftest.h"
@@ -27,8 +28,6 @@
#define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__)
#define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__)
-#ifdef __NR_memfd_secret
-
#define PATTERN 0x55
static const int prot = PROT_READ | PROT_WRITE;
@@ -83,6 +82,45 @@ static void test_mlock_limit(int fd)
pass("mlock limit is respected\n");
}
+static void test_vmsplice(int fd, const char *desc)
+{
+ ssize_t transferred;
+ struct iovec iov;
+ int pipefd[2];
+ char *mem;
+
+ if (pipe(pipefd)) {
+ fail("pipe failed: %s\n", strerror(errno));
+ return;
+ }
+
+ mem = mmap(NULL, page_size, prot, mode, fd, 0);
+ if (mem == MAP_FAILED) {
+ fail("Unable to mmap secret memory\n");
+ goto close_pipe;
+ }
+
+ /*
+ * vmsplice() may use GUP-fast, which must also fail. Prefault the
+ * page table, so GUP-fast could find it.
+ */
+ memset(mem, PATTERN, page_size);
+
+ iov.iov_base = mem;
+ iov.iov_len = page_size;
+ transferred = vmsplice(pipefd[1], &iov, 1, 0);
+
+ if (transferred < 0 && errno == EFAULT)
+ pass("vmsplice is blocked as expected with %s\n", desc);
+ else
+ fail("vmsplice: unexpected memory access with %s\n", desc);
+
+ munmap(mem, page_size);
+close_pipe:
+ close(pipefd[0]);
+ close(pipefd[1]);
+}
+
static void try_process_vm_read(int fd, int pipefd[2])
{
struct iovec liov, riov;
@@ -187,7 +225,6 @@ static void test_remote_access(int fd, const char *name,
return;
}
- ftruncate(fd, page_size);
memset(mem, PATTERN, page_size);
if (write(pipefd[1], &mem, sizeof(mem)) < 0) {
@@ -258,7 +295,7 @@ static void prepare(void)
strerror(errno));
}
-#define NUM_TESTS 4
+#define NUM_TESTS 6
int main(int argc, char *argv[])
{
@@ -277,9 +314,17 @@ int main(int argc, char *argv[])
ksft_exit_fail_msg("memfd_secret failed: %s\n",
strerror(errno));
}
+ if (ftruncate(fd, page_size))
+ ksft_exit_fail_msg("ftruncate failed: %s\n", strerror(errno));
test_mlock_limit(fd);
test_file_apis(fd);
+ /*
+ * We have to run the first vmsplice test before any secretmem page was
+ * allocated for this fd.
+ */
+ test_vmsplice(fd, "fresh page");
+ test_vmsplice(fd, "existing page");
test_process_vm_read(fd);
test_ptrace(fd);
@@ -287,13 +332,3 @@ int main(int argc, char *argv[])
ksft_finished();
}
-
-#else /* __NR_memfd_secret */
-
-int main(int argc, char *argv[])
-{
- printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n");
- return KSFT_SKIP;
-}
-
-#endif /* __NR_memfd_secret */
diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c
index 301abb99e027..1db134063c38 100644
--- a/tools/testing/selftests/mm/mkdirty.c
+++ b/tools/testing/selftests/mm/mkdirty.c
@@ -9,7 +9,7 @@
*/
#include <fcntl.h>
#include <signal.h>
-#include <unistd.h>
+#include <asm-generic/unistd.h>
#include <string.h>
#include <errno.h>
#include <stdlib.h>
@@ -265,7 +265,6 @@ munmap:
munmap(mmap_mem, mmap_size);
}
-#ifdef __NR_userfaultfd
static void test_uffdio_copy(void)
{
struct uffdio_register uffdio_register;
@@ -322,7 +321,6 @@ munmap:
munmap(dst, pagesize);
free(src);
}
-#endif /* __NR_userfaultfd */
int main(void)
{
@@ -335,9 +333,7 @@ int main(void)
thpsize / 1024);
tests += 3;
}
-#ifdef __NR_userfaultfd
tests += 1;
-#endif /* __NR_userfaultfd */
ksft_print_header();
ksft_set_plan(tests);
@@ -367,13 +363,11 @@ int main(void)
if (thpsize)
test_pte_mapped_thp();
/* Placing a fresh page via userfaultfd may set the PTE dirty. */
-#ifdef __NR_userfaultfd
test_uffdio_copy();
-#endif /* __NR_userfaultfd */
err = ksft_get_fail_cnt();
if (err)
ksft_exit_fail_msg("%d out of %d tests failed\n",
err, ksft_test_num());
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/mm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c
index 26f744188ad0..7f0d50fa361d 100644
--- a/tools/testing/selftests/mm/mlock2-tests.c
+++ b/tools/testing/selftests/mm/mlock2-tests.c
@@ -20,8 +20,6 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
FILE *file;
int ret = 1;
char line[1024] = {0};
- char *end_addr;
- char *stop;
unsigned long start;
unsigned long end;
@@ -37,21 +35,10 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
memset(area, 0, sizeof(struct vm_boundaries));
while(fgets(line, 1024, file)) {
- end_addr = strchr(line, '-');
- if (!end_addr) {
+ if (sscanf(line, "%lx-%lx", &start, &end) != 2) {
ksft_print_msg("cannot parse /proc/self/maps\n");
goto out;
}
- *end_addr = '\0';
- end_addr++;
- stop = strchr(end_addr, ' ');
- if (!stop) {
- ksft_print_msg("cannot parse /proc/self/maps\n");
- goto out;
- }
-
- sscanf(line, "%lx", &start);
- sscanf(end_addr, "%lx", &end);
if (start <= addr && end > addr) {
area->start = start;
diff --git a/tools/testing/selftests/mm/mlock2.h b/tools/testing/selftests/mm/mlock2.h
index 4417eaa5cfb7..1e5731bab499 100644
--- a/tools/testing/selftests/mm/mlock2.h
+++ b/tools/testing/selftests/mm/mlock2.h
@@ -3,6 +3,7 @@
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
+#include <asm-generic/unistd.h>
static int mlock2_(void *start, size_t len, int flags)
{
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
index 2f8b991f78cb..5a3a9bcba640 100644
--- a/tools/testing/selftests/mm/mremap_test.c
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -22,7 +22,10 @@
#define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */
#define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */
+#ifndef MIN
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#define MAX(X, Y) ((X) > (Y) ? (X) : (Y))
+#endif
#define SIZE_MB(m) ((size_t)m * (1024 * 1024))
#define SIZE_KB(k) ((size_t)k * 1024)
@@ -69,6 +72,27 @@ enum {
.expect_failure = should_fail \
}
+/* compute square root using binary search */
+static unsigned long get_sqrt(unsigned long val)
+{
+ unsigned long low = 1;
+
+ /* assuming rand_size is less than 1TB */
+ unsigned long high = (1UL << 20);
+
+ while (low <= high) {
+ unsigned long mid = low + (high - low) / 2;
+ unsigned long temp = mid * mid;
+
+ if (temp == val)
+ return mid;
+ if (temp < val)
+ low = mid + 1;
+ high = mid - 1;
+ }
+ return low;
+}
+
/*
* Returns false if the requested remap region overlaps with an
* existing mapping (e.g text, stack) else returns true.
@@ -126,19 +150,21 @@ static unsigned long long get_mmap_min_addr(void)
* Using /proc/self/maps, assert that the specified address range is contained
* within a single mapping.
*/
-static bool is_range_mapped(FILE *maps_fp, void *start, void *end)
+static bool is_range_mapped(FILE *maps_fp, unsigned long start,
+ unsigned long end)
{
char *line = NULL;
size_t len = 0;
bool success = false;
+ unsigned long first_val, second_val;
rewind(maps_fp);
while (getline(&line, &len, maps_fp) != -1) {
- char *first = strtok(line, "- ");
- void *first_val = (void *)strtol(first, NULL, 16);
- char *second = strtok(NULL, "- ");
- void *second_val = (void *) strtol(second, NULL, 16);
+ if (sscanf(line, "%lx-%lx", &first_val, &second_val) != 2) {
+ ksft_exit_fail_msg("cannot parse /proc/self/maps\n");
+ break;
+ }
if (first_val <= start && second_val >= end) {
success = true;
@@ -233,7 +259,8 @@ static void mremap_expand_merge(FILE *maps_fp, unsigned long page_size)
goto out;
}
- success = is_range_mapped(maps_fp, start, start + 3 * page_size);
+ success = is_range_mapped(maps_fp, (unsigned long)start,
+ (unsigned long)(start + 3 * page_size));
munmap(start, 3 * page_size);
out:
@@ -272,7 +299,8 @@ static void mremap_expand_merge_offset(FILE *maps_fp, unsigned long page_size)
goto out;
}
- success = is_range_mapped(maps_fp, start, start + 3 * page_size);
+ success = is_range_mapped(maps_fp, (unsigned long)start,
+ (unsigned long)(start + 3 * page_size));
munmap(start, 3 * page_size);
out:
@@ -296,7 +324,7 @@ out:
*
* |DDDDddddSSSSssss|
*/
-static void mremap_move_within_range(char pattern_seed)
+static void mremap_move_within_range(unsigned int pattern_seed, char *rand_addr)
{
char *test_name = "mremap mremap move within range";
void *src, *dest;
@@ -316,10 +344,7 @@ static void mremap_move_within_range(char pattern_seed)
src = (void *)((unsigned long)src & ~(SIZE_MB(2) - 1));
/* Set byte pattern for source block. */
- srand(pattern_seed);
- for (i = 0; i < SIZE_MB(2); i++) {
- ((char *)src)[i] = (char) rand();
- }
+ memcpy(src, rand_addr, SIZE_MB(2));
dest = src - SIZE_MB(2);
@@ -357,14 +382,14 @@ out:
/* Returns the time taken for the remap on success else returns -1. */
static long long remap_region(struct config c, unsigned int threshold_mb,
- char pattern_seed)
+ char *rand_addr)
{
void *addr, *src_addr, *dest_addr, *dest_preamble_addr;
- int d;
- unsigned long long t;
+ unsigned long long t, d;
struct timespec t_start = {0, 0}, t_end = {0, 0};
long long start_ns, end_ns, align_mask, ret, offset;
unsigned long long threshold;
+ unsigned long num_chunks;
if (threshold_mb == VALIDATION_NO_THRESHOLD)
threshold = c.region_size;
@@ -378,9 +403,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
}
/* Set byte pattern for source block. */
- srand(pattern_seed);
- for (t = 0; t < threshold; t++)
- memset((char *) src_addr + t, (char) rand(), 1);
+ memcpy(src_addr, rand_addr, threshold);
/* Mask to zero out lower bits of address for alignment */
align_mask = ~(c.dest_alignment - 1);
@@ -420,9 +443,7 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
}
/* Set byte pattern for the dest preamble block. */
- srand(pattern_seed);
- for (d = 0; d < c.dest_preamble_size; d++)
- memset((char *) dest_preamble_addr + d, (char) rand(), 1);
+ memcpy(dest_preamble_addr, rand_addr, c.dest_preamble_size);
}
clock_gettime(CLOCK_MONOTONIC, &t_start);
@@ -436,15 +457,42 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
goto clean_up_dest_preamble;
}
- /* Verify byte pattern after remapping */
- srand(pattern_seed);
- for (t = 0; t < threshold; t++) {
- char c = (char) rand();
+ /*
+ * Verify byte pattern after remapping. Employ an algorithm with a
+ * square root time complexity in threshold: divide the range into
+ * chunks, if memcmp() returns non-zero, only then perform an
+ * iteration in that chunk to find the mismatch index.
+ */
+ num_chunks = get_sqrt(threshold);
+ for (unsigned long i = 0; i < num_chunks; ++i) {
+ size_t chunk_size = threshold / num_chunks;
+ unsigned long shift = i * chunk_size;
+
+ if (!memcmp(dest_addr + shift, rand_addr + shift, chunk_size))
+ continue;
+
+ /* brute force iteration only over mismatch segment */
+ for (t = shift; t < shift + chunk_size; ++t) {
+ if (((char *) dest_addr)[t] != rand_addr[t]) {
+ ksft_print_msg("Data after remap doesn't match at offset %llu\n",
+ t);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff,
+ ((char *) dest_addr)[t] & 0xff);
+ ret = -1;
+ goto clean_up_dest;
+ }
+ }
+ }
- if (((char *) dest_addr)[t] != c) {
+ /*
+ * if threshold is not divisible by num_chunks, then check the
+ * last chunk
+ */
+ for (t = num_chunks * (threshold / num_chunks); t < threshold; ++t) {
+ if (((char *) dest_addr)[t] != rand_addr[t]) {
ksft_print_msg("Data after remap doesn't match at offset %llu\n",
- t);
- ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+ t);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[t] & 0xff,
((char *) dest_addr)[t] & 0xff);
ret = -1;
goto clean_up_dest;
@@ -452,22 +500,44 @@ static long long remap_region(struct config c, unsigned int threshold_mb,
}
/* Verify the dest preamble byte pattern after remapping */
- if (c.dest_preamble_size) {
- srand(pattern_seed);
- for (d = 0; d < c.dest_preamble_size; d++) {
- char c = (char) rand();
-
- if (((char *) dest_preamble_addr)[d] != c) {
- ksft_print_msg("Preamble data after remap doesn't match at offset %d\n",
- d);
- ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
- ((char *) dest_preamble_addr)[d] & 0xff);
+ if (!c.dest_preamble_size)
+ goto no_preamble;
+
+ num_chunks = get_sqrt(c.dest_preamble_size);
+
+ for (unsigned long i = 0; i < num_chunks; ++i) {
+ size_t chunk_size = c.dest_preamble_size / num_chunks;
+ unsigned long shift = i * chunk_size;
+
+ if (!memcmp(dest_preamble_addr + shift, rand_addr + shift,
+ chunk_size))
+ continue;
+
+ /* brute force iteration only over mismatched segment */
+ for (d = shift; d < shift + chunk_size; ++d) {
+ if (((char *) dest_preamble_addr)[d] != rand_addr[d]) {
+ ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n",
+ d);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff,
+ ((char *) dest_preamble_addr)[d] & 0xff);
ret = -1;
goto clean_up_dest;
}
}
}
+ for (d = num_chunks * (c.dest_preamble_size / num_chunks); d < c.dest_preamble_size; ++d) {
+ if (((char *) dest_preamble_addr)[d] != rand_addr[d]) {
+ ksft_print_msg("Preamble data after remap doesn't match at offset %llu\n",
+ d);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", rand_addr[d] & 0xff,
+ ((char *) dest_preamble_addr)[d] & 0xff);
+ ret = -1;
+ goto clean_up_dest;
+ }
+ }
+
+no_preamble:
start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec;
end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec;
ret = end_ns - start_ns;
@@ -494,7 +564,8 @@ out:
* the beginning of the mapping just because the aligned
* down address landed on a mapping that maybe does not exist.
*/
-static void mremap_move_1mb_from_start(char pattern_seed)
+static void mremap_move_1mb_from_start(unsigned int pattern_seed,
+ char *rand_addr)
{
char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src";
void *src = NULL, *dest = NULL;
@@ -520,10 +591,7 @@ static void mremap_move_1mb_from_start(char pattern_seed)
}
/* Set byte pattern for source block. */
- srand(pattern_seed);
- for (i = 0; i < SIZE_MB(2); i++) {
- ((char *)src)[i] = (char) rand();
- }
+ memcpy(src, rand_addr, SIZE_MB(2));
/*
* Unmap the beginning of dest so that the aligned address
@@ -568,10 +636,10 @@ out:
static void run_mremap_test_case(struct test test_case, int *failures,
unsigned int threshold_mb,
- unsigned int pattern_seed)
+ unsigned int pattern_seed, char *rand_addr)
{
long long remap_time = remap_region(test_case.config, threshold_mb,
- pattern_seed);
+ rand_addr);
if (remap_time < 0) {
if (test_case.expect_failure)
@@ -642,7 +710,15 @@ int main(int argc, char **argv)
int failures = 0;
int i, run_perf_tests;
unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
+
+ /* hard-coded test configs */
+ size_t max_test_variable_region_size = _2GB;
+ size_t max_test_constant_region_size = _2MB;
+ size_t dest_preamble_size = 10 * _4MB;
+
unsigned int pattern_seed;
+ char *rand_addr;
+ size_t rand_size;
int num_expand_tests = 2;
int num_misc_tests = 2;
struct test test_cases[MAX_TEST] = {};
@@ -659,6 +735,31 @@ int main(int argc, char **argv)
ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n",
threshold_mb, pattern_seed);
+ /*
+ * set preallocated random array according to test configs; see the
+ * functions for the logic of setting the size
+ */
+ if (!threshold_mb)
+ rand_size = MAX(max_test_variable_region_size,
+ max_test_constant_region_size);
+ else
+ rand_size = MAX(MIN(threshold_mb * _1MB,
+ max_test_variable_region_size),
+ max_test_constant_region_size);
+ rand_size = MAX(dest_preamble_size, rand_size);
+
+ rand_addr = (char *)mmap(NULL, rand_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (rand_addr == MAP_FAILED) {
+ perror("mmap");
+ ksft_exit_fail_msg("cannot mmap rand_addr\n");
+ }
+
+ /* fill stream of random bytes */
+ srand(pattern_seed);
+ for (unsigned long i = 0; i < rand_size; ++i)
+ rand_addr[i] = (char) rand();
+
page_size = sysconf(_SC_PAGESIZE);
/* Expected mremap failures */
@@ -730,13 +831,13 @@ int main(int argc, char **argv)
for (i = 0; i < ARRAY_SIZE(test_cases); i++)
run_mremap_test_case(test_cases[i], &failures, threshold_mb,
- pattern_seed);
+ pattern_seed, rand_addr);
maps_fp = fopen("/proc/self/maps", "r");
if (maps_fp == NULL) {
- ksft_print_msg("Failed to read /proc/self/maps: %s\n", strerror(errno));
- exit(KSFT_FAIL);
+ munmap(rand_addr, rand_size);
+ ksft_exit_fail_msg("Failed to read /proc/self/maps: %s\n", strerror(errno));
}
mremap_expand_merge(maps_fp, page_size);
@@ -744,17 +845,20 @@ int main(int argc, char **argv)
fclose(maps_fp);
- mremap_move_within_range(pattern_seed);
- mremap_move_1mb_from_start(pattern_seed);
+ mremap_move_within_range(pattern_seed, rand_addr);
+ mremap_move_1mb_from_start(pattern_seed, rand_addr);
if (run_perf_tests) {
ksft_print_msg("\n%s\n",
"mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++)
run_mremap_test_case(perf_test_cases[i], &failures,
- threshold_mb, pattern_seed);
+ threshold_mb, pattern_seed,
+ rand_addr);
}
+ munmap(rand_addr, rand_size);
+
if (failures > 0)
ksft_exit_fail();
else
diff --git a/tools/testing/selftests/mm/mseal_helpers.h b/tools/testing/selftests/mm/mseal_helpers.h
new file mode 100644
index 000000000000..0cfce31c76d2
--- /dev/null
+++ b/tools/testing/selftests/mm/mseal_helpers.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define FAIL_TEST_IF_FALSE(test_passed) \
+ do { \
+ if (!(test_passed)) { \
+ ksft_test_result_fail("%s: line:%d\n", \
+ __func__, __LINE__); \
+ return; \
+ } \
+ } while (0)
+
+#define SKIP_TEST_IF_FALSE(test_passed) \
+ do { \
+ if (!(test_passed)) { \
+ ksft_test_result_skip("%s: line:%d\n", \
+ __func__, __LINE__); \
+ return; \
+ } \
+ } while (0)
+
+#define REPORT_TEST_PASS() ksft_test_result_pass("%s\n", __func__)
+
+#ifndef PKEY_DISABLE_ACCESS
+#define PKEY_DISABLE_ACCESS 0x1
+#endif
+
+#ifndef PKEY_DISABLE_WRITE
+#define PKEY_DISABLE_WRITE 0x2
+#endif
+
+#ifndef PKEY_BITS_PER_PKEY
+#define PKEY_BITS_PER_PKEY 2
+#endif
+
+#ifndef PKEY_MASK
+#define PKEY_MASK (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)
+#endif
+
+#ifndef u64
+#define u64 unsigned long long
+#endif
diff --git a/tools/testing/selftests/mm/mseal_test.c b/tools/testing/selftests/mm/mseal_test.c
new file mode 100644
index 000000000000..a818f010de47
--- /dev/null
+++ b/tools/testing/selftests/mm/mseal_test.c
@@ -0,0 +1,1847 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <stdint.h>
+#include <asm-generic/unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <stdbool.h>
+#include "../kselftest.h"
+#include <syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+#include <sys/stat.h>
+#include "mseal_helpers.h"
+
+static unsigned long get_vma_size(void *addr, int *prot)
+{
+ FILE *maps;
+ char line[256];
+ int size = 0;
+ uintptr_t addr_start, addr_end;
+ char protstr[5];
+ *prot = 0;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps)
+ return 0;
+
+ while (fgets(line, sizeof(line), maps)) {
+ if (sscanf(line, "%lx-%lx %4s", &addr_start, &addr_end, protstr) == 3) {
+ if (addr_start == (uintptr_t) addr) {
+ size = addr_end - addr_start;
+ if (protstr[0] == 'r')
+ *prot |= 0x4;
+ if (protstr[1] == 'w')
+ *prot |= 0x2;
+ if (protstr[2] == 'x')
+ *prot |= 0x1;
+ break;
+ }
+ }
+ }
+ fclose(maps);
+ return size;
+}
+
+/*
+ * define sys_xyx to call syscall directly.
+ */
+static int sys_mseal(void *start, size_t len)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mseal, start, len, 0);
+ return sret;
+}
+
+static int sys_mprotect(void *ptr, size_t size, unsigned long prot)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mprotect, ptr, size, prot);
+ return sret;
+}
+
+static int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
+ unsigned long pkey)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey);
+ return sret;
+}
+
+static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
+ unsigned long flags, unsigned long fd, unsigned long offset)
+{
+ void *sret;
+
+ errno = 0;
+ sret = (void *) syscall(__NR_mmap, addr, len, prot,
+ flags, fd, offset);
+ return sret;
+}
+
+static int sys_munmap(void *ptr, size_t size)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_munmap, ptr, size);
+ return sret;
+}
+
+static int sys_madvise(void *start, size_t len, int types)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_madvise, start, len, types);
+ return sret;
+}
+
+static int sys_pkey_alloc(unsigned long flags, unsigned long init_val)
+{
+ int ret = syscall(__NR_pkey_alloc, flags, init_val);
+
+ return ret;
+}
+
+static unsigned int __read_pkey_reg(void)
+{
+ unsigned int pkey_reg = 0;
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ unsigned int eax, edx;
+ unsigned int ecx = 0;
+
+ asm volatile(".byte 0x0f,0x01,0xee\n\t"
+ : "=a" (eax), "=d" (edx)
+ : "c" (ecx));
+ pkey_reg = eax;
+#endif
+ return pkey_reg;
+}
+
+static void __write_pkey_reg(u64 pkey_reg)
+{
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ unsigned int eax = pkey_reg;
+ unsigned int ecx = 0;
+ unsigned int edx = 0;
+
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (eax), "c" (ecx), "d" (edx));
+#endif
+}
+
+static unsigned long pkey_bit_position(int pkey)
+{
+ return pkey * PKEY_BITS_PER_PKEY;
+}
+
+static u64 set_pkey_bits(u64 reg, int pkey, u64 flags)
+{
+ unsigned long shift = pkey_bit_position(pkey);
+
+ /* mask out bits from pkey in old value */
+ reg &= ~((u64)PKEY_MASK << shift);
+ /* OR in new bits for pkey */
+ reg |= (flags & PKEY_MASK) << shift;
+ return reg;
+}
+
+static void set_pkey(int pkey, unsigned long pkey_value)
+{
+ u64 new_pkey_reg;
+
+ new_pkey_reg = set_pkey_bits(__read_pkey_reg(), pkey, pkey_value);
+ __write_pkey_reg(new_pkey_reg);
+}
+
+static void setup_single_address(int size, void **ptrOut)
+{
+ void *ptr;
+
+ ptr = sys_mmap(NULL, size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ *ptrOut = ptr;
+}
+
+static void setup_single_address_rw(int size, void **ptrOut)
+{
+ void *ptr;
+ unsigned long mapflags = MAP_ANONYMOUS | MAP_PRIVATE;
+
+ ptr = sys_mmap(NULL, size, PROT_READ | PROT_WRITE, mapflags, -1, 0);
+ *ptrOut = ptr;
+}
+
+static int clean_single_address(void *ptr, int size)
+{
+ int ret;
+ ret = munmap(ptr, size);
+ return ret;
+}
+
+static int seal_single_address(void *ptr, int size)
+{
+ int ret;
+ ret = sys_mseal(ptr, size);
+ return ret;
+}
+
+bool seal_support(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+
+ ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (ptr == (void *) -1)
+ return false;
+
+ ret = sys_mseal(ptr, page_size);
+ if (ret < 0)
+ return false;
+
+ return true;
+}
+
+bool pkey_supported(void)
+{
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ int pkey = sys_pkey_alloc(0, 0);
+
+ if (pkey > 0)
+ return true;
+#endif
+ return false;
+}
+
+static void test_seal_addseal(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_unmapped_start(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* munmap 2 pages from ptr. */
+ ret = sys_munmap(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail because 2 pages from ptr are unmapped. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail because 2 pages from ptr are unmapped. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ ret = sys_mseal(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_unmapped_middle(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* munmap 2 pages from ptr + page. */
+ ret = sys_munmap(ptr + page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail, since middle 2 pages are unmapped. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail as well. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* we still can add seal to the first page and last page*/
+ ret = sys_mseal(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mseal(ptr + 3 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_unmapped_end(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap last 2 pages. */
+ ret = sys_munmap(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail since last 2 pages are unmapped. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail as well. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* The first 2 pages is not sealed, and can add seals */
+ ret = sys_mseal(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_multiple_vmas(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split the vma into 3. */
+ ret = sys_mprotect(ptr + page_size, 2 * page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will get applied to all 4 pages - 3 VMAs. */
+ ret = sys_mprotect(ptr, size, PROT_READ);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* use mprotect to split the vma into 3. */
+ ret = sys_mprotect(ptr + page_size, 2 * page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mseal get applied to all 4 pages - 3 VMAs. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_split_start(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split at middle */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the first page, this will split the VMA */
+ ret = sys_mseal(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* add seal to the remain 3 pages */
+ ret = sys_mseal(ptr + page_size, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_split_end(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split at middle */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the last page */
+ ret = sys_mseal(ptr + 3 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* Adding seals to the first 3 pages */
+ ret = sys_mseal(ptr, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_invalid_input(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(8 * page_size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ ret = clean_single_address(ptr + 4 * page_size, 4 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* invalid flag */
+ ret = syscall(__NR_mseal, ptr, size, 0x20);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* unaligned address */
+ ret = sys_mseal(ptr + 1, 2 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* length too big */
+ ret = sys_mseal(ptr, 5 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* length overflow */
+ ret = sys_mseal(ptr, UINT64_MAX/page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* start is not in a valid VMA */
+ ret = sys_mseal(ptr - page_size, 5 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_zero_length(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_mprotect(ptr, 0, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal 0 length will be OK, same as mprotect */
+ ret = sys_mseal(ptr, 0);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* verify the 4 pages are not sealed by previous call. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_zero_address(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int prot;
+
+ /* use mmap to change protection. */
+ ptr = sys_mmap(0, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ FAIL_TEST_IF_FALSE(ptr == 0);
+
+ size = get_vma_size(ptr, &prot);
+ FAIL_TEST_IF_FALSE(size == 4 * page_size);
+
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* verify the 4 pages are sealed by previous call. */
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_twice(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* apply the same seal will be OK. idempotent. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_mprotect(ptr, size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_start_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* the first page is sealed. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* pages after the first page is not sealed. */
+ ret = sys_mprotect(ptr + page_size, page_size * 3,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_end_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr + page_size, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* first page is not sealed */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* last 3 page are sealed */
+ ret = sys_mprotect(ptr + page_size, page_size * 3,
+ PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_unalign_len(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, page_size * 2 - 1);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 2 pages are sealed. */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mprotect(ptr + page_size * 2, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_unalign_len_variant_2(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ if (seal) {
+ ret = seal_single_address(ptr, page_size * 2 + 1);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 3 pages are sealed. */
+ ret = sys_mprotect(ptr, page_size * 3, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mprotect(ptr + page_size * 3, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_two_vma(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = seal_single_address(ptr, page_size * 4);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_mprotect(ptr + page_size * 2, page_size * 2,
+ PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_two_vma_with_split(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split as two vma. */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mseal can apply across 2 vma, also split them. */
+ if (seal) {
+ ret = seal_single_address(ptr + page_size, page_size * 2);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* the first page is not sealed. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* the second page is sealed. */
+ ret = sys_mprotect(ptr + page_size, page_size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* the third page is sealed. */
+ ret = sys_mprotect(ptr + 2 * page_size, page_size,
+ PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* the fouth page is not sealed. */
+ ret = sys_mprotect(ptr + 3 * page_size, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_partial_mprotect(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* seal one page. */
+ if (seal) {
+ ret = seal_single_address(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mprotect first 2 page will fail, since the first page are sealed. */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ | PROT_WRITE);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_two_vma_with_gap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* use mprotect to split. */
+ ret = sys_mprotect(ptr + 3 * page_size, page_size,
+ PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* use munmap to free two pages in the middle */
+ ret = sys_munmap(ptr + page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* mprotect will fail, because there is a gap in the address. */
+ /* notes, internally mprotect still updated the first page. */
+ ret = sys_mprotect(ptr, 4 * page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* mseal will fail as well. */
+ ret = sys_mseal(ptr, 4 * page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* the first page is not sealed. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret == 0);
+
+ /* the last page is not sealed. */
+ ret = sys_mprotect(ptr + 3 * page_size, page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret == 0);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_split(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal all 4 pages. */
+ if (seal) {
+ ret = sys_mseal(ptr, 4 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mprotect is sealed. */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+
+ ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mprotect_merge(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split one page. */
+ ret = sys_mprotect(ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal first two pages. */
+ if (seal) {
+ ret = sys_mseal(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 2 pages are sealed. */
+ ret = sys_mprotect(ptr, 2 * page_size, PROT_READ);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* last 2 pages are not sealed. */
+ ret = sys_mprotect(ptr + 2 * page_size, 2 * page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(ret == 0);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_munmap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* 4 pages are sealed. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+/*
+ * allocate 4 pages,
+ * use mprotect to split it as two VMAs
+ * seal the whole range
+ * munmap will fail on both
+ */
+static void test_seal_munmap_two_vma(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect to split */
+ ret = sys_mprotect(ptr, page_size * 2, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_munmap(ptr, page_size * 2);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr + page_size, page_size * 2);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+/*
+ * allocate a VMA with 4 pages.
+ * munmap the middle 2 pages.
+ * seal the whole 4 pages, will fail.
+ * munmap the first page will be OK.
+ * munmap the last page will be OK.
+ */
+static void test_seal_munmap_vma_with_gap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ ret = sys_munmap(ptr + page_size, page_size * 2);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ /* can't have gap in the middle. */
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+ }
+
+ ret = sys_munmap(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr + page_size * 2, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_munmap_start_freed(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int prot;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap the first page. */
+ ret = sys_munmap(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the last 3 pages. */
+ if (seal) {
+ ret = sys_mseal(ptr + page_size, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* unmap from the first page. */
+ ret = sys_munmap(ptr, size);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size * 3);
+ } else {
+ /* note: this will be OK, even the first page is */
+ /* already unmapped. */
+ FAIL_TEST_IF_FALSE(!ret);
+
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 0);
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_munmap_end_freed(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap last page. */
+ ret = sys_munmap(ptr + page_size * 3, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the first 3 pages. */
+ if (seal) {
+ ret = sys_mseal(ptr, 3 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* unmap all pages. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_munmap_middle_freed(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int prot;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* unmap 2 pages in the middle. */
+ ret = sys_munmap(ptr + page_size, page_size * 2);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* seal the first page. */
+ if (seal) {
+ ret = sys_mseal(ptr, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* munmap all 4 pages. */
+ ret = sys_munmap(ptr, size);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ size = get_vma_size(ptr, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+
+ size = get_vma_size(ptr + page_size * 3, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+ } else {
+ FAIL_TEST_IF_FALSE(!ret);
+
+ size = get_vma_size(ptr, &prot);
+ FAIL_TEST_IF_FALSE(size == 0);
+
+ size = get_vma_size(ptr + page_size * 3, &prot);
+ FAIL_TEST_IF_FALSE(size == 0);
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_shrink(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* shrink from 4 pages to 2 pages. */
+ ret2 = mremap(ptr, size, 2 * page_size, 0, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_expand(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ /* ummap last 2 pages. */
+ ret = sys_munmap(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* expand from 2 page to 4 pages. */
+ ret2 = mremap(ptr, 2 * page_size, 4 * page_size, 0, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_move(bool seal)
+{
+ void *ptr, *newPtr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newPtr);
+ FAIL_TEST_IF_FALSE(newPtr != (void *)-1);
+ ret = clean_single_address(newPtr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* move from ptr to fixed address. */
+ ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newPtr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mmap_overwrite_prot(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* use mmap to change protection. */
+ ret2 = sys_mmap(ptr, size, PROT_NONE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mmap_expand(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 12 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ /* ummap last 4 pages. */
+ ret = sys_munmap(ptr + 8 * page_size, 4 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, 8 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* use mmap to expand. */
+ ret2 = sys_mmap(ptr, size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mmap_shrink(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 12 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* use mmap to shrink. */
+ ret2 = sys_mmap(ptr, 8 * page_size, PROT_READ,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_FIXED, -1, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == ptr);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_shrink_fixed(bool seal)
+{
+ void *ptr;
+ void *newAddr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newAddr);
+ FAIL_TEST_IF_FALSE(newAddr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move and shrink to fixed address */
+ ret2 = mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED,
+ newAddr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == newAddr);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_expand_fixed(bool seal)
+{
+ void *ptr;
+ void *newAddr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(page_size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newAddr);
+ FAIL_TEST_IF_FALSE(newAddr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(newAddr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move and expand to fixed address */
+ ret2 = mremap(ptr, page_size, size, MREMAP_MAYMOVE | MREMAP_FIXED,
+ newAddr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == newAddr);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_move_fixed(bool seal)
+{
+ void *ptr;
+ void *newAddr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+ setup_single_address(size, &newAddr);
+ FAIL_TEST_IF_FALSE(newAddr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(newAddr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move to fixed address */
+ ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, newAddr);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else
+ FAIL_TEST_IF_FALSE(ret2 == newAddr);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_move_fixed_zero(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /*
+ * MREMAP_FIXED can move the mapping to zero address
+ */
+ ret2 = mremap(ptr, size, 2 * page_size, MREMAP_MAYMOVE | MREMAP_FIXED,
+ 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 == 0);
+
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_move_dontunmap(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* mremap to move, and don't unmap src addr. */
+ ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP, 0);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+
+ }
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_mremap_move_dontunmap_anyaddr(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ void *ret2;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /*
+ * The 0xdeaddead should not have effect on dest addr
+ * when MREMAP_DONTUNMAP is set.
+ */
+ ret2 = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_DONTUNMAP,
+ 0xdeaddead);
+ if (seal) {
+ FAIL_TEST_IF_FALSE(ret2 == MAP_FAILED);
+ FAIL_TEST_IF_FALSE(errno == EPERM);
+ } else {
+ FAIL_TEST_IF_FALSE(ret2 != MAP_FAILED);
+ FAIL_TEST_IF_FALSE((long)ret2 != 0xdeaddead);
+
+ }
+
+ REPORT_TEST_PASS();
+}
+
+
+static void test_seal_merge_and_split(void)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size;
+ int ret;
+ int prot;
+
+ /* (24 RO) */
+ setup_single_address(24 * page_size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ /* use mprotect(NONE) to set out boundary */
+ /* (1 NONE) (22 RO) (1 NONE) */
+ ret = sys_mprotect(ptr, page_size, PROT_NONE);
+ FAIL_TEST_IF_FALSE(!ret);
+ ret = sys_mprotect(ptr + 23 * page_size, page_size, PROT_NONE);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 22 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 4);
+
+ /* use mseal to split from beginning */
+ /* (1 NONE) (1 RO_SEAL) (21 RO) (1 NONE) */
+ ret = sys_mseal(ptr + page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ size = get_vma_size(ptr + 2 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 21 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* use mseal to split from the end. */
+ /* (1 NONE) (1 RO_SEAL) (20 RO) (1 RO_SEAL) (1 NONE) */
+ ret = sys_mseal(ptr + 22 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + 22 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ size = get_vma_size(ptr + 2 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 20 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* merge with prev. */
+ /* (1 NONE) (2 RO_SEAL) (19 RO) (1 RO_SEAL) (1 NONE) */
+ ret = sys_mseal(ptr + 2 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 2 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* merge with after. */
+ /* (1 NONE) (2 RO_SEAL) (18 RO) (2 RO_SEALS) (1 NONE) */
+ ret = sys_mseal(ptr + 21 * page_size, page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + 21 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 2 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* split and merge from prev */
+ /* (1 NONE) (3 RO_SEAL) (17 RO) (2 RO_SEALS) (1 NONE) */
+ ret = sys_mseal(ptr + 2 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + 1 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 3 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ ret = sys_munmap(ptr + page_size, page_size);
+ FAIL_TEST_IF_FALSE(ret < 0);
+ ret = sys_mprotect(ptr + 2 * page_size, page_size, PROT_NONE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+
+ /* split and merge from next */
+ /* (1 NONE) (3 RO_SEAL) (16 RO) (3 RO_SEALS) (1 NONE) */
+ ret = sys_mseal(ptr + 20 * page_size, 2 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+ size = get_vma_size(ptr + 20 * page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 3 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ /* merge from middle of prev and middle of next. */
+ /* (1 NONE) (22 RO_SEAL) (1 NONE) */
+ ret = sys_mseal(ptr + 2 * page_size, 20 * page_size);
+ FAIL_TEST_IF_FALSE(!ret);
+ size = get_vma_size(ptr + page_size, &prot);
+ FAIL_TEST_IF_FALSE(size == 22 * page_size);
+ FAIL_TEST_IF_FALSE(prot == 0x4);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_discard_ro_anon_on_rw(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address_rw(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't take effect on RW memory. */
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* base seal still apply. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_discard_ro_anon_on_pkey(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int pkey;
+
+ SKIP_TEST_IF_FALSE(pkey_supported());
+
+ setup_single_address_rw(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ pkey = sys_pkey_alloc(0, 0);
+ FAIL_TEST_IF_FALSE(pkey > 0);
+
+ ret = sys_mprotect_pkey((void *)ptr, size, PROT_READ | PROT_WRITE, pkey);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't take effect if PKRU allow write. */
+ set_pkey(pkey, 0);
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* sealing will take effect if PKRU deny write. */
+ set_pkey(pkey, PKEY_DISABLE_WRITE);
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ /* base seal still apply. */
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_discard_ro_anon_on_filebacked(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ int fd;
+ unsigned long mapflags = MAP_PRIVATE;
+
+ fd = memfd_create("test", 0);
+ FAIL_TEST_IF_FALSE(fd > 0);
+
+ ret = fallocate(fd, 0, 0, size);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ptr = sys_mmap(NULL, size, PROT_READ, mapflags, fd, 0);
+ FAIL_TEST_IF_FALSE(ptr != MAP_FAILED);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't apply for file backed mapping. */
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+ close(fd);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_discard_ro_anon_on_shared(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+ unsigned long mapflags = MAP_ANONYMOUS | MAP_SHARED;
+
+ ptr = sys_mmap(NULL, size, PROT_READ, mapflags, -1, 0);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = sys_mseal(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ /* sealing doesn't apply for shared mapping. */
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+static void test_seal_discard_ro_anon(bool seal)
+{
+ void *ptr;
+ unsigned long page_size = getpagesize();
+ unsigned long size = 4 * page_size;
+ int ret;
+
+ setup_single_address(size, &ptr);
+ FAIL_TEST_IF_FALSE(ptr != (void *)-1);
+
+ if (seal) {
+ ret = seal_single_address(ptr, size);
+ FAIL_TEST_IF_FALSE(!ret);
+ }
+
+ ret = sys_madvise(ptr, size, MADV_DONTNEED);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ ret = sys_munmap(ptr, size);
+ if (seal)
+ FAIL_TEST_IF_FALSE(ret < 0);
+ else
+ FAIL_TEST_IF_FALSE(!ret);
+
+ REPORT_TEST_PASS();
+}
+
+int main(int argc, char **argv)
+{
+ bool test_seal = seal_support();
+
+ ksft_print_header();
+
+ if (!test_seal)
+ ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n");
+
+ if (!pkey_supported())
+ ksft_print_msg("PKEY not supported\n");
+
+ ksft_set_plan(80);
+
+ test_seal_addseal();
+ test_seal_unmapped_start();
+ test_seal_unmapped_middle();
+ test_seal_unmapped_end();
+ test_seal_multiple_vmas();
+ test_seal_split_start();
+ test_seal_split_end();
+ test_seal_invalid_input();
+ test_seal_zero_length();
+ test_seal_twice();
+
+ test_seal_mprotect(false);
+ test_seal_mprotect(true);
+
+ test_seal_start_mprotect(false);
+ test_seal_start_mprotect(true);
+
+ test_seal_end_mprotect(false);
+ test_seal_end_mprotect(true);
+
+ test_seal_mprotect_unalign_len(false);
+ test_seal_mprotect_unalign_len(true);
+
+ test_seal_mprotect_unalign_len_variant_2(false);
+ test_seal_mprotect_unalign_len_variant_2(true);
+
+ test_seal_mprotect_two_vma(false);
+ test_seal_mprotect_two_vma(true);
+
+ test_seal_mprotect_two_vma_with_split(false);
+ test_seal_mprotect_two_vma_with_split(true);
+
+ test_seal_mprotect_partial_mprotect(false);
+ test_seal_mprotect_partial_mprotect(true);
+
+ test_seal_mprotect_two_vma_with_gap(false);
+ test_seal_mprotect_two_vma_with_gap(true);
+
+ test_seal_mprotect_merge(false);
+ test_seal_mprotect_merge(true);
+
+ test_seal_mprotect_split(false);
+ test_seal_mprotect_split(true);
+
+ test_seal_munmap(false);
+ test_seal_munmap(true);
+ test_seal_munmap_two_vma(false);
+ test_seal_munmap_two_vma(true);
+ test_seal_munmap_vma_with_gap(false);
+ test_seal_munmap_vma_with_gap(true);
+
+ test_munmap_start_freed(false);
+ test_munmap_start_freed(true);
+ test_munmap_middle_freed(false);
+ test_munmap_middle_freed(true);
+ test_munmap_end_freed(false);
+ test_munmap_end_freed(true);
+
+ test_seal_mremap_shrink(false);
+ test_seal_mremap_shrink(true);
+ test_seal_mremap_expand(false);
+ test_seal_mremap_expand(true);
+ test_seal_mremap_move(false);
+ test_seal_mremap_move(true);
+
+ test_seal_mremap_shrink_fixed(false);
+ test_seal_mremap_shrink_fixed(true);
+ test_seal_mremap_expand_fixed(false);
+ test_seal_mremap_expand_fixed(true);
+ test_seal_mremap_move_fixed(false);
+ test_seal_mremap_move_fixed(true);
+ test_seal_mremap_move_dontunmap(false);
+ test_seal_mremap_move_dontunmap(true);
+ test_seal_mremap_move_fixed_zero(false);
+ test_seal_mremap_move_fixed_zero(true);
+ test_seal_mremap_move_dontunmap_anyaddr(false);
+ test_seal_mremap_move_dontunmap_anyaddr(true);
+ test_seal_discard_ro_anon(false);
+ test_seal_discard_ro_anon(true);
+ test_seal_discard_ro_anon_on_rw(false);
+ test_seal_discard_ro_anon_on_rw(true);
+ test_seal_discard_ro_anon_on_shared(false);
+ test_seal_discard_ro_anon_on_shared(true);
+ test_seal_discard_ro_anon_on_filebacked(false);
+ test_seal_discard_ro_anon_on_filebacked(true);
+ test_seal_mmap_overwrite_prot(false);
+ test_seal_mmap_overwrite_prot(true);
+ test_seal_mmap_expand(false);
+ test_seal_mmap_expand(true);
+ test_seal_mmap_shrink(false);
+ test_seal_mmap_shrink(true);
+
+ test_seal_merge_and_split();
+ test_seal_zero_address();
+
+ test_seal_discard_ro_anon_on_pkey(false);
+ test_seal_discard_ro_anon_on_pkey(true);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
index d59517ed3d48..fc90af2a97b8 100644
--- a/tools/testing/selftests/mm/pagemap_ioctl.c
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -15,7 +15,7 @@
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <math.h>
-#include <asm/unistd.h>
+#include <asm-generic/unistd.h>
#include <pthread.h>
#include <sys/resource.h>
#include <assert.h>
@@ -1484,7 +1484,7 @@ int main(int argc, char *argv[])
ksft_print_header();
if (init_uffd())
- return ksft_exit_pass();
+ ksft_exit_pass();
ksft_set_plan(115);
@@ -1567,8 +1567,10 @@ int main(int argc, char *argv[])
/* 7. File Hugetlb testing */
mem_size = 2*1024*1024;
fd = memfd_create("uffd-test", MFD_HUGETLB | MFD_NOEXEC_SEAL);
+ if (fd < 0)
+ ksft_exit_fail_msg("uffd-test creation failed %d %s\n", errno, strerror(errno));
mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
- if (mem) {
+ if (mem != MAP_FAILED) {
wp_init(mem, mem_size);
wp_addr_range(mem, mem_size);
@@ -1660,5 +1662,5 @@ int main(int argc, char *argv[])
userfaultfd_tests();
close(pagemap_fd);
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/mm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index 48dc151f8fca..eaa6d1fc5328 100644
--- a/tools/testing/selftests/mm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -42,7 +42,7 @@
#include <sys/wait.h>
#include <sys/stat.h>
#include <fcntl.h>
-#include <unistd.h>
+#include <asm-generic/unistd.h>
#include <sys/ptrace.h>
#include <setjmp.h>
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
index 4bdb3a0c7a60..03ac4f2e1cce 100755
--- a/tools/testing/selftests/mm/run_vmtests.sh
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -152,9 +152,13 @@ done < /proc/meminfo
# both of these requirements into account and attempt to increase
# number of huge pages available.
nr_cpus=$(nproc)
-hpgsize_MB=$((hpgsize_KB / 1024))
-half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128))
-needmem_KB=$((half_ufd_size_MB * 2 * 1024))
+uffd_min_KB=$((hpgsize_KB * nr_cpus * 2))
+hugetlb_min_KB=$((256 * 1024))
+if [[ $uffd_min_KB -gt $hugetlb_min_KB ]]; then
+ needmem_KB=$uffd_min_KB
+else
+ needmem_KB=$hugetlb_min_KB
+fi
# set proper nr_hugepages
if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
@@ -261,6 +265,7 @@ CATEGORY="hugetlb" run_test ./map_hugetlb
CATEGORY="hugetlb" run_test ./hugepage-mremap
CATEGORY="hugetlb" run_test ./hugepage-vmemmap
CATEGORY="hugetlb" run_test ./hugetlb-madvise
+CATEGORY="hugetlb" run_test ./hugetlb_dio
nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
# For this test, we need one and just one huge page
@@ -294,7 +299,8 @@ CATEGORY="userfaultfd" run_test ./uffd-unit-tests
uffd_stress_bin=./uffd-stress
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16
# Hugetlb tests require source and destination huge pages. Pass in half
-# the size ($half_ufd_size_MB), which is used for *each*.
+# the size of the free pages we have, which is used for *each*.
+half_ufd_size_MB=$((freepgs / 2))
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32
CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16
@@ -326,6 +332,12 @@ CATEGORY="hugetlb" run_test ./thuge-gen
CATEGORY="hugetlb" run_test ./charge_reserved_hugetlb.sh -cgroup-v2
CATEGORY="hugetlb" run_test ./hugetlb_reparenting_test.sh -cgroup-v2
if $RUN_DESTRUCTIVE; then
+nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
+enable_soft_offline=$(cat /proc/sys/vm/enable_soft_offline)
+echo 8 > /proc/sys/vm/nr_hugepages
+CATEGORY="hugetlb" run_test ./hugetlb-soft-offline
+echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
+echo "$enable_soft_offline" > /proc/sys/vm/enable_soft_offline
CATEGORY="hugetlb" run_test ./hugetlb-read-hwpoison
fi
diff --git a/tools/testing/selftests/mm/seal_elf.c b/tools/testing/selftests/mm/seal_elf.c
new file mode 100644
index 000000000000..7aa1366063e4
--- /dev/null
+++ b/tools/testing/selftests/mm/seal_elf.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sys/mman.h>
+#include <stdint.h>
+#include <asm-generic/unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <stdbool.h>
+#include "../kselftest.h"
+#include <syscall.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+#include <sys/stat.h>
+#include "mseal_helpers.h"
+
+/*
+ * define sys_xyx to call syscall directly.
+ */
+static int sys_mseal(void *start, size_t len)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mseal, start, len, 0);
+ return sret;
+}
+
+static void *sys_mmap(void *addr, unsigned long len, unsigned long prot,
+ unsigned long flags, unsigned long fd, unsigned long offset)
+{
+ void *sret;
+
+ errno = 0;
+ sret = (void *) syscall(__NR_mmap, addr, len, prot,
+ flags, fd, offset);
+ return sret;
+}
+
+static inline int sys_mprotect(void *ptr, size_t size, unsigned long prot)
+{
+ int sret;
+
+ errno = 0;
+ sret = syscall(__NR_mprotect, ptr, size, prot);
+ return sret;
+}
+
+static bool seal_support(void)
+{
+ int ret;
+ void *ptr;
+ unsigned long page_size = getpagesize();
+
+ ptr = sys_mmap(NULL, page_size, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (ptr == (void *) -1)
+ return false;
+
+ ret = sys_mseal(ptr, page_size);
+ if (ret < 0)
+ return false;
+
+ return true;
+}
+
+const char somestr[4096] = {"READONLY"};
+
+static void test_seal_elf(void)
+{
+ int ret;
+ FILE *maps;
+ char line[512];
+ uintptr_t addr_start, addr_end;
+ char prot[5];
+ char filename[256];
+ unsigned long page_size = getpagesize();
+ unsigned long long ptr = (unsigned long long) somestr;
+ char *somestr2 = (char *)somestr;
+
+ /*
+ * Modify the protection of readonly somestr
+ */
+ if (((unsigned long long)ptr % page_size) != 0)
+ ptr = (unsigned long long)ptr & ~(page_size - 1);
+
+ ksft_print_msg("somestr = %s\n", somestr);
+ ksft_print_msg("change protection to rw\n");
+ ret = sys_mprotect((void *)ptr, page_size, PROT_READ|PROT_WRITE);
+ FAIL_TEST_IF_FALSE(!ret);
+ *somestr2 = 'A';
+ ksft_print_msg("somestr is modified to: %s\n", somestr);
+ ret = sys_mprotect((void *)ptr, page_size, PROT_READ);
+ FAIL_TEST_IF_FALSE(!ret);
+
+ maps = fopen("/proc/self/maps", "r");
+ FAIL_TEST_IF_FALSE(maps);
+
+ /*
+ * apply sealing to elf binary
+ */
+ while (fgets(line, sizeof(line), maps)) {
+ if (sscanf(line, "%lx-%lx %4s %*x %*x:%*x %*u %255[^\n]",
+ &addr_start, &addr_end, prot, filename) == 4) {
+ if (strlen(filename)) {
+ /*
+ * seal the mapping if read only.
+ */
+ if (strstr(prot, "r-")) {
+ ret = sys_mseal((void *)addr_start, addr_end - addr_start);
+ FAIL_TEST_IF_FALSE(!ret);
+ ksft_print_msg("sealed: %lx-%lx %s %s\n",
+ addr_start, addr_end, prot, filename);
+ if ((uintptr_t) somestr >= addr_start &&
+ (uintptr_t) somestr <= addr_end)
+ ksft_print_msg("mapping for somestr found\n");
+ }
+ }
+ }
+ }
+ fclose(maps);
+
+ ret = sys_mprotect((void *)ptr, page_size, PROT_READ | PROT_WRITE);
+ FAIL_TEST_IF_FALSE(ret < 0);
+ ksft_print_msg("somestr is sealed, mprotect is rejected\n");
+
+ REPORT_TEST_PASS();
+}
+
+int main(int argc, char **argv)
+{
+ bool test_seal = seal_support();
+
+ ksft_print_header();
+ ksft_print_msg("pid=%d\n", getpid());
+
+ if (!test_seal)
+ ksft_exit_skip("sealing not supported, check CONFIG_64BIT\n");
+
+ ksft_set_plan(1);
+
+ test_seal_elf();
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
index 7dbfa53d93a0..bdfa5d085f00 100644
--- a/tools/testing/selftests/mm/soft-dirty.c
+++ b/tools/testing/selftests/mm/soft-dirty.c
@@ -209,5 +209,5 @@ int main(int argc, char **argv)
close(pagemap_fd);
- return ksft_exit_pass();
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
index d3c7f5fb3e7b..e5e8dafc9d94 100644
--- a/tools/testing/selftests/mm/split_huge_page_test.c
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -300,7 +300,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
char **addr)
{
size_t i;
- int __attribute__((unused)) dummy = 0;
+ int dummy = 0;
srand(time(NULL));
@@ -341,6 +341,7 @@ int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
for (size_t i = 0; i < fd_size; i++)
dummy += *(*addr + i);
+ asm volatile("" : "+r" (dummy));
if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n");
diff --git a/tools/testing/selftests/mm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index ea7fd8fe2876..e4370b79b62f 100644
--- a/tools/testing/selftests/mm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -13,8 +13,9 @@
sudo ipcs | awk '$1 == "0x00000000" {print $2}' | xargs -n1 sudo ipcrm -m
(warning this will remove all if someone else uses them) */
-#define _GNU_SOURCE 1
+#define _GNU_SOURCE
#include <sys/mman.h>
+#include <linux/mman.h>
#include <stdlib.h>
#include <stdio.h>
#include <sys/ipc.h>
@@ -28,19 +29,23 @@
#include "vm_util.h"
#include "../kselftest.h"
-#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
-#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
-#define MAP_HUGE_SHIFT 26
-#define MAP_HUGE_MASK 0x3f
#if !defined(MAP_HUGETLB)
#define MAP_HUGETLB 0x40000
#endif
#define SHM_HUGETLB 04000 /* segment will use huge TLB pages */
+#ifndef SHM_HUGE_SHIFT
#define SHM_HUGE_SHIFT 26
+#endif
+#ifndef SHM_HUGE_MASK
#define SHM_HUGE_MASK 0x3f
+#endif
+#ifndef SHM_HUGE_2MB
#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT)
+#endif
+#ifndef SHM_HUGE_1GB
#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
+#endif
#define NUM_PAGESIZES 5
#define NUM_PAGES 4
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
index 7ad6ba660c7d..717539eddf98 100644
--- a/tools/testing/selftests/mm/uffd-common.c
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -673,11 +673,7 @@ int uffd_open_dev(unsigned int flags)
int uffd_open_sys(unsigned int flags)
{
-#ifdef __NR_userfaultfd
return syscall(__NR_userfaultfd, flags);
-#else
- return -1;
-#endif
}
int uffd_open(unsigned int flags)
diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h
index cc5629c3d2aa..a70ae10b5f62 100644
--- a/tools/testing/selftests/mm/uffd-common.h
+++ b/tools/testing/selftests/mm/uffd-common.h
@@ -8,6 +8,7 @@
#define __UFFD_COMMON_H__
#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
#include <stdio.h>
#include <errno.h>
#include <unistd.h>
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
index f78bab0f3d45..a4b83280998a 100644
--- a/tools/testing/selftests/mm/uffd-stress.c
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -33,10 +33,10 @@
* pthread_mutex_lock will also verify the atomicity of the memory
* transfer (UFFDIO_COPY).
*/
-
+#include <asm-generic/unistd.h>
#include "uffd-common.h"
-#ifdef __NR_userfaultfd
+uint64_t features;
#define BOUNCE_RANDOM (1<<0)
#define BOUNCE_RACINGFAULTS (1<<1)
@@ -247,10 +247,14 @@ static int userfaultfd_stress(void)
unsigned long nr;
struct uffd_args args[nr_cpus];
uint64_t mem_size = nr_pages * page_size;
+ int flags = 0;
memset(args, 0, sizeof(struct uffd_args) * nr_cpus);
- if (uffd_test_ctx_init(UFFD_FEATURE_WP_UNPOPULATED, NULL))
+ if (features & UFFD_FEATURE_WP_UNPOPULATED && test_type == TEST_ANON)
+ flags = UFFD_FEATURE_WP_UNPOPULATED;
+
+ if (uffd_test_ctx_init(flags, NULL))
err("context init failed");
if (posix_memalign(&area, page_size, page_size))
@@ -385,8 +389,6 @@ static void set_test_type(const char *type)
static void parse_test_type_arg(const char *raw_type)
{
- uint64_t features = UFFD_API_FEATURES;
-
set_test_type(raw_type);
if (!test_type)
@@ -409,12 +411,15 @@ static void parse_test_type_arg(const char *raw_type)
* feature.
*/
- if (userfaultfd_open(&features))
- err("Userfaultfd open failed");
+ if (uffd_get_features(&features))
+ err("failed to get available features");
test_uffdio_wp = test_uffdio_wp &&
(features & UFFD_FEATURE_PAGEFAULT_FLAG_WP);
+ if (test_type != TEST_ANON && !(features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM))
+ test_uffdio_wp = false;
+
close(uffd);
uffd = -1;
}
@@ -466,15 +471,3 @@ int main(int argc, char **argv)
nr_pages, nr_pages_per_cpu);
return userfaultfd_stress();
}
-
-#else /* __NR_userfaultfd */
-
-#warning "missing __NR_userfaultfd definition"
-
-int main(void)
-{
- printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
- return KSFT_SKIP;
-}
-
-#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
index 21ec23206ab4..b3d21eed203d 100644
--- a/tools/testing/selftests/mm/uffd-unit-tests.c
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -5,12 +5,11 @@
* Copyright (C) 2015-2023 Red Hat, Inc.
*/
+#include <asm-generic/unistd.h>
#include "uffd-common.h"
#include "../../../../mm/gup_test.h"
-#ifdef __NR_userfaultfd
-
/* The unit test doesn't need a large or random size, make it 32MB for now */
#define UFFD_TEST_MEM_SIZE (32UL << 20)
@@ -1554,14 +1553,3 @@ int main(int argc, char *argv[])
return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS;
}
-#else /* __NR_userfaultfd */
-
-#warning "missing __NR_userfaultfd definition"
-
-int main(void)
-{
- printf("Skipping %s (missing __NR_userfaultfd)\n", __file__);
- return KSFT_SKIP;
-}
-
-#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.c b/tools/testing/selftests/mm/va_high_addr_switch.c
index cfbc501290d3..896b3f73fc53 100644
--- a/tools/testing/selftests/mm/va_high_addr_switch.c
+++ b/tools/testing/selftests/mm/va_high_addr_switch.c
@@ -9,26 +9,9 @@
#include <sys/mman.h>
#include <string.h>
+#include "vm_util.h"
#include "../kselftest.h"
-#ifdef __powerpc64__
-#define PAGE_SIZE (64 << 10)
-/*
- * This will work with 16M and 2M hugepage size
- */
-#define HUGETLB_SIZE (16 << 20)
-#elif __aarch64__
-/*
- * The default hugepage size for 64k base pagesize
- * is 512MB.
- */
-#define PAGE_SIZE (64 << 10)
-#define HUGETLB_SIZE (512 << 20)
-#else
-#define PAGE_SIZE (4 << 10)
-#define HUGETLB_SIZE (2 << 20)
-#endif
-
/*
* The hint addr value is used to allocate addresses
* beyond the high address switch boundary.
@@ -37,18 +20,8 @@
#define ADDR_MARK_128TB (1UL << 47)
#define ADDR_MARK_256TB (1UL << 48)
-#define HIGH_ADDR_128TB ((void *) (1UL << 48))
-#define HIGH_ADDR_256TB ((void *) (1UL << 49))
-
-#define LOW_ADDR ((void *) (1UL << 30))
-
-#ifdef __aarch64__
-#define ADDR_SWITCH_HINT ADDR_MARK_256TB
-#define HIGH_ADDR HIGH_ADDR_256TB
-#else
-#define ADDR_SWITCH_HINT ADDR_MARK_128TB
-#define HIGH_ADDR HIGH_ADDR_128TB
-#endif
+#define HIGH_ADDR_128TB (1UL << 48)
+#define HIGH_ADDR_256TB (1UL << 49)
struct testcase {
void *addr;
@@ -59,195 +32,230 @@ struct testcase {
unsigned int keep_mapped:1;
};
-static struct testcase testcases[] = {
- {
- /*
- * If stack is moved, we could possibly allocate
- * this at the requested address.
- */
- .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
- .size = PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)",
- .low_addr_required = 1,
- },
- {
- /*
- * Unless MAP_FIXED is specified, allocation based on hint
- * addr is never at requested address or above it, which is
- * beyond high address switch boundary in this case. Instead,
- * a suitable allocation is found in lower address space.
- */
- .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
- .size = 2 * PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, (2 * PAGE_SIZE))",
- .low_addr_required = 1,
- },
- {
- /*
- * Exact mapping at high address switch boundary, should
- * be obtained even without MAP_FIXED as area is free.
- */
- .addr = ((void *)(ADDR_SWITCH_HINT)),
- .size = PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)",
- .keep_mapped = 1,
- },
- {
- .addr = (void *)(ADDR_SWITCH_HINT),
- .size = 2 * PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
- .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)",
- },
- {
- .addr = NULL,
- .size = 2 * PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(NULL)",
- .low_addr_required = 1,
- },
- {
- .addr = LOW_ADDR,
- .size = 2 * PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(LOW_ADDR)",
- .low_addr_required = 1,
- },
- {
- .addr = HIGH_ADDR,
- .size = 2 * PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(HIGH_ADDR)",
- .keep_mapped = 1,
- },
- {
- .addr = HIGH_ADDR,
- .size = 2 * PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(HIGH_ADDR) again",
- .keep_mapped = 1,
- },
- {
- .addr = HIGH_ADDR,
- .size = 2 * PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
- .msg = "mmap(HIGH_ADDR, MAP_FIXED)",
- },
- {
- .addr = (void *) -1,
- .size = 2 * PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(-1)",
- .keep_mapped = 1,
- },
- {
- .addr = (void *) -1,
- .size = 2 * PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(-1) again",
- },
- {
- .addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
- .size = PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, PAGE_SIZE)",
- .low_addr_required = 1,
- },
- {
- .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE),
- .size = 2 * PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2 * PAGE_SIZE)",
- .low_addr_required = 1,
- .keep_mapped = 1,
- },
- {
- .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE / 2),
- .size = 2 * PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE/2 , 2 * PAGE_SIZE)",
- .low_addr_required = 1,
- .keep_mapped = 1,
- },
- {
- .addr = ((void *)(ADDR_SWITCH_HINT)),
- .size = PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(ADDR_SWITCH_HINT, PAGE_SIZE)",
- },
- {
- .addr = (void *)(ADDR_SWITCH_HINT),
- .size = 2 * PAGE_SIZE,
- .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
- .msg = "mmap(ADDR_SWITCH_HINT, 2 * PAGE_SIZE, MAP_FIXED)",
- },
-};
+static struct testcase *testcases;
+static struct testcase *hugetlb_testcases;
+static int sz_testcases, sz_hugetlb_testcases;
+static unsigned long switch_hint;
-static struct testcase hugetlb_testcases[] = {
- {
- .addr = NULL,
- .size = HUGETLB_SIZE,
- .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(NULL, MAP_HUGETLB)",
- .low_addr_required = 1,
- },
- {
- .addr = LOW_ADDR,
- .size = HUGETLB_SIZE,
- .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(LOW_ADDR, MAP_HUGETLB)",
- .low_addr_required = 1,
- },
- {
- .addr = HIGH_ADDR,
- .size = HUGETLB_SIZE,
- .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(HIGH_ADDR, MAP_HUGETLB)",
- .keep_mapped = 1,
- },
- {
- .addr = HIGH_ADDR,
- .size = HUGETLB_SIZE,
- .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(HIGH_ADDR, MAP_HUGETLB) again",
- .keep_mapped = 1,
- },
- {
- .addr = HIGH_ADDR,
- .size = HUGETLB_SIZE,
- .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
- .msg = "mmap(HIGH_ADDR, MAP_FIXED | MAP_HUGETLB)",
- },
- {
- .addr = (void *) -1,
- .size = HUGETLB_SIZE,
- .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(-1, MAP_HUGETLB)",
- .keep_mapped = 1,
- },
- {
- .addr = (void *) -1,
- .size = HUGETLB_SIZE,
- .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(-1, MAP_HUGETLB) again",
- },
- {
- .addr = (void *)(ADDR_SWITCH_HINT - PAGE_SIZE),
- .size = 2 * HUGETLB_SIZE,
- .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
- .msg = "mmap(ADDR_SWITCH_HINT - PAGE_SIZE, 2*HUGETLB_SIZE, MAP_HUGETLB)",
- .low_addr_required = 1,
- .keep_mapped = 1,
- },
- {
- .addr = (void *)(ADDR_SWITCH_HINT),
- .size = 2 * HUGETLB_SIZE,
- .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
- .msg = "mmap(ADDR_SWITCH_HINT , 2*HUGETLB_SIZE, MAP_FIXED | MAP_HUGETLB)",
- },
-};
+/* Initialize testcases inside a function to compute parameters at runtime */
+void testcases_init(void)
+{
+ unsigned long pagesize = getpagesize();
+ unsigned long hugepagesize = default_huge_page_size();
+ unsigned long low_addr = (1UL << 30);
+ unsigned long addr_switch_hint = ADDR_MARK_128TB;
+ unsigned long high_addr = HIGH_ADDR_128TB;
+
+#ifdef __aarch64__
+
+ /* Post LPA2, the lower userspace VA on a 16K pagesize is 47 bits. */
+ if (pagesize != (16UL << 10)) {
+ addr_switch_hint = ADDR_MARK_256TB;
+ high_addr = HIGH_ADDR_256TB;
+ }
+#endif
+
+ struct testcase t[] = {
+ {
+ /*
+ * If stack is moved, we could possibly allocate
+ * this at the requested address.
+ */
+ .addr = ((void *)(addr_switch_hint - pagesize)),
+ .size = pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint - pagesize, pagesize)",
+ .low_addr_required = 1,
+ },
+ {
+ /*
+ * Unless MAP_FIXED is specified, allocation based on hint
+ * addr is never at requested address or above it, which is
+ * beyond high address switch boundary in this case. Instead,
+ * a suitable allocation is found in lower address space.
+ */
+ .addr = ((void *)(addr_switch_hint - pagesize)),
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint - pagesize, (2 * pagesize))",
+ .low_addr_required = 1,
+ },
+ {
+ /*
+ * Exact mapping at high address switch boundary, should
+ * be obtained even without MAP_FIXED as area is free.
+ */
+ .addr = ((void *)(addr_switch_hint)),
+ .size = pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint, pagesize)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)(addr_switch_hint),
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(addr_switch_hint, 2 * pagesize, MAP_FIXED)",
+ },
+ {
+ .addr = NULL,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(NULL)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = (void *)low_addr,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(low_addr)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = (void *)high_addr,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(high_addr)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)high_addr,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(high_addr) again",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)high_addr,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(high_addr, MAP_FIXED)",
+ },
+ {
+ .addr = (void *) -1,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *) -1,
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1) again",
+ },
+ {
+ .addr = ((void *)(addr_switch_hint - pagesize)),
+ .size = pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint - pagesize, pagesize)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = (void *)(addr_switch_hint - pagesize),
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint - pagesize, 2 * pagesize)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)(addr_switch_hint - pagesize / 2),
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint - pagesize/2 , 2 * pagesize)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = ((void *)(addr_switch_hint)),
+ .size = pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint, pagesize)",
+ },
+ {
+ .addr = (void *)(addr_switch_hint),
+ .size = 2 * pagesize,
+ .flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(addr_switch_hint, 2 * pagesize, MAP_FIXED)",
+ },
+ };
+
+ struct testcase ht[] = {
+ {
+ .addr = NULL,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(NULL, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = (void *)low_addr,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(low_addr, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ },
+ {
+ .addr = (void *)high_addr,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(high_addr, MAP_HUGETLB)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)high_addr,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(high_addr, MAP_HUGETLB) again",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)high_addr,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(high_addr, MAP_FIXED | MAP_HUGETLB)",
+ },
+ {
+ .addr = (void *) -1,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1, MAP_HUGETLB)",
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *) -1,
+ .size = hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(-1, MAP_HUGETLB) again",
+ },
+ {
+ .addr = (void *)(addr_switch_hint - pagesize),
+ .size = 2 * hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS,
+ .msg = "mmap(addr_switch_hint - pagesize, 2*hugepagesize, MAP_HUGETLB)",
+ .low_addr_required = 1,
+ .keep_mapped = 1,
+ },
+ {
+ .addr = (void *)(addr_switch_hint),
+ .size = 2 * hugepagesize,
+ .flags = MAP_HUGETLB | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED,
+ .msg = "mmap(addr_switch_hint , 2*hugepagesize, MAP_FIXED | MAP_HUGETLB)",
+ },
+ };
+
+ testcases = malloc(sizeof(t));
+ hugetlb_testcases = malloc(sizeof(ht));
+
+ /* Copy into global arrays */
+ memcpy(testcases, t, sizeof(t));
+ memcpy(hugetlb_testcases, ht, sizeof(ht));
+
+ sz_testcases = ARRAY_SIZE(t);
+ sz_hugetlb_testcases = ARRAY_SIZE(ht);
+ switch_hint = addr_switch_hint;
+}
static int run_test(struct testcase *test, int count)
{
@@ -267,7 +275,7 @@ static int run_test(struct testcase *test, int count)
continue;
}
- if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) {
+ if (t->low_addr_required && p >= (void *)(switch_hint)) {
printf("FAILED\n");
ret = KSFT_FAIL;
} else {
@@ -285,6 +293,20 @@ static int run_test(struct testcase *test, int count)
return ret;
}
+#ifdef __aarch64__
+/* Check if userspace VA > 48 bits */
+static int high_address_present(void)
+{
+ void *ptr = mmap((void *)(1UL << 50), 1, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+ if (ptr == MAP_FAILED)
+ return 0;
+
+ munmap(ptr, 1);
+ return 1;
+}
+#endif
+
static int supported_arch(void)
{
#if defined(__powerpc64__)
@@ -292,7 +314,7 @@ static int supported_arch(void)
#elif defined(__x86_64__)
return 1;
#elif defined(__aarch64__)
- return getpagesize() == PAGE_SIZE;
+ return high_address_present();
#else
return 0;
#endif
@@ -305,8 +327,10 @@ int main(int argc, char **argv)
if (!supported_arch())
return KSFT_SKIP;
- ret = run_test(testcases, ARRAY_SIZE(testcases));
+ testcases_init();
+
+ ret = run_test(testcases, sz_testcases);
if (argc == 2 && !strcmp(argv[1], "--run-hugetlb"))
- ret = run_test(hugetlb_testcases, ARRAY_SIZE(hugetlb_testcases));
+ ret = run_test(hugetlb_testcases, sz_hugetlb_testcases);
return ret;
}
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh
index a0a75f302904..2c725773cd79 100755
--- a/tools/testing/selftests/mm/va_high_addr_switch.sh
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
@@ -57,8 +57,4 @@ check_test_requirements()
}
check_test_requirements
-./va_high_addr_switch
-
-# In order to run hugetlb testcases, "--run-hugetlb" must be appended
-# to the binary.
./va_high_addr_switch --run-hugetlb
diff --git a/tools/testing/selftests/mm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
index 7bcf8d48256a..4e4c1e311247 100644
--- a/tools/testing/selftests/mm/virtual_address_range.c
+++ b/tools/testing/selftests/mm/virtual_address_range.c
@@ -12,6 +12,8 @@
#include <errno.h>
#include <sys/mman.h>
#include <sys/time.h>
+#include <fcntl.h>
+
#include "../kselftest.h"
/*
@@ -85,7 +87,7 @@ static int validate_lower_address_hint(void)
char *ptr;
ptr = mmap((void *) (1UL << 45), MAP_CHUNK_SIZE, PROT_READ |
- PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED)
return 0;
@@ -93,6 +95,66 @@ static int validate_lower_address_hint(void)
return 1;
}
+static int validate_complete_va_space(void)
+{
+ unsigned long start_addr, end_addr, prev_end_addr;
+ char line[400];
+ char prot[6];
+ FILE *file;
+ int fd;
+
+ fd = open("va_dump", O_CREAT | O_WRONLY, 0600);
+ unlink("va_dump");
+ if (fd < 0) {
+ ksft_test_result_skip("cannot create or open dump file\n");
+ ksft_finished();
+ }
+
+ file = fopen("/proc/self/maps", "r");
+ if (file == NULL)
+ ksft_exit_fail_msg("cannot open /proc/self/maps\n");
+
+ prev_end_addr = 0;
+ while (fgets(line, sizeof(line), file)) {
+ unsigned long hop;
+
+ if (sscanf(line, "%lx-%lx %s[rwxp-]",
+ &start_addr, &end_addr, prot) != 3)
+ ksft_exit_fail_msg("cannot parse /proc/self/maps\n");
+
+ /* end of userspace mappings; ignore vsyscall mapping */
+ if (start_addr & (1UL << 63))
+ return 0;
+
+ /* /proc/self/maps must have gaps less than MAP_CHUNK_SIZE */
+ if (start_addr - prev_end_addr >= MAP_CHUNK_SIZE)
+ return 1;
+
+ prev_end_addr = end_addr;
+
+ if (prot[0] != 'r')
+ continue;
+
+ /*
+ * Confirm whether MAP_CHUNK_SIZE chunk can be found or not.
+ * If write succeeds, no need to check MAP_CHUNK_SIZE - 1
+ * addresses after that. If the address was not held by this
+ * process, write would fail with errno set to EFAULT.
+ * Anyways, if write returns anything apart from 1, exit the
+ * program since that would mean a bug in /proc/self/maps.
+ */
+ hop = 0;
+ while (start_addr + hop < end_addr) {
+ if (write(fd, (void *)(start_addr + hop), 1) != 1)
+ return 1;
+ lseek(fd, 0, SEEK_SET);
+
+ hop += MAP_CHUNK_SIZE;
+ }
+ }
+ return 0;
+}
+
int main(int argc, char *argv[])
{
char *ptr[NR_CHUNKS_LOW];
@@ -105,13 +167,11 @@ int main(int argc, char *argv[])
for (i = 0; i < NR_CHUNKS_LOW; i++) {
ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr[i] == MAP_FAILED) {
- if (validate_lower_address_hint()) {
- ksft_test_result_skip("Memory constraint not fulfilled\n");
- ksft_finished();
- }
+ if (validate_lower_address_hint())
+ ksft_exit_fail_msg("mmap unexpectedly succeeded with hint\n");
break;
}
@@ -127,7 +187,7 @@ int main(int argc, char *argv[])
for (i = 0; i < NR_CHUNKS_HIGH; i++) {
hint = hind_addr();
hptr[i] = mmap(hint, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (hptr[i] == MAP_FAILED)
break;
@@ -135,6 +195,10 @@ int main(int argc, char *argv[])
validate_addr(hptr[i], 1);
}
hchunks = i;
+ if (validate_complete_va_space()) {
+ ksft_test_result_fail("BUG in mmap() or /proc/self/maps\n");
+ ksft_finished();
+ }
for (i = 0; i < lchunks; i++)
munmap(ptr[i], MAP_CHUNK_SIZE);
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index 5c16159d0bcd..fb898850867c 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -323,7 +323,8 @@ void *fake_cont_thread(void *arg)
void *cont_thread(void *arg)
{
char buff[MSG_SIZE];
- int i, priority;
+ int i;
+ unsigned int priority;
for (i = 0; i < num_cpus_to_pin; i++)
if (cpu_threads[i] == pthread_self())
@@ -425,7 +426,8 @@ struct test test2[] = {
void *perf_test_thread(void *arg)
{
char buff[MSG_SIZE];
- int prio_out, prio_in;
+ int prio_out;
+ unsigned int prio_in;
int i;
clockid_t clock;
pthread_t *t;
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 2f9d378edec3..666ab7d9390b 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -2,9 +2,9 @@
bind_bhash
bind_timewait
bind_wildcard
-csum
cmsg_sender
diag_uid
+epoll_busy_poll
fin_ack_lat
gro
hwtstamp_config
@@ -31,6 +31,7 @@ reuseport_dualstack
rxtimestamp
sctp_hello
scm_pidfd
+scm_rights
sk_bind_sendto_listen
sk_connect_zero_addr
socket
@@ -42,7 +43,6 @@ tap
tcp_fastopen_backup_key
tcp_inq
tcp_mmap
-test_unix_oob
timestamping
tls
toeplitz
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 7b6918d5f4af..8eaffd7a641c 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for net selftests
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -Wall -Wl,--no-as-needed -O2 -g
CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES)
# Additional include paths needed by kselftest.h
CFLAGS += -I../
@@ -20,7 +20,6 @@ TEST_PROGS += reuseaddr_ports_exhausted.sh
TEST_PROGS += txtimestamp.sh
TEST_PROGS += vrf-xfrm-tests.sh
TEST_PROGS += rxtimestamp.sh
-TEST_PROGS += devlink_port_split.py
TEST_PROGS += drop_monitor_tests.sh
TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS += bareudp.sh
@@ -35,6 +34,7 @@ TEST_PROGS += gre_gso.sh
TEST_PROGS += cmsg_so_mark.sh
TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
TEST_PROGS += netns-name.sh
+TEST_PROGS += nl_netdev.py
TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
@@ -43,6 +43,8 @@ TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh
TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh
TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh
TEST_PROGS += srv6_end_flavors_test.sh
+TEST_PROGS += srv6_end_dx4_netfilter_test.sh
+TEST_PROGS += srv6_end_dx6_netfilter_test.sh
TEST_PROGS += vrf_strict_mode_test.sh
TEST_PROGS += arp_ndisc_evict_nocarrier.sh
TEST_PROGS += ndisc_unsolicited_na_test.sh
@@ -53,6 +55,7 @@ TEST_PROGS += bind_bhash.sh
TEST_PROGS += ip_local_port_range.sh
TEST_PROGS += rps_default_mask.sh
TEST_PROGS += big_tcp.sh
+TEST_PROGS += netns-sysctl.sh
TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -67,7 +70,7 @@ TEST_GEN_FILES += ipsec
TEST_GEN_FILES += ioam6_parser
TEST_GEN_FILES += gro
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll
TEST_GEN_FILES += toeplitz
TEST_GEN_FILES += cmsg_sender
TEST_GEN_FILES += stress_reuseport_listen
@@ -81,9 +84,6 @@ TEST_PROGS += test_ingress_egress_chaining.sh
TEST_GEN_PROGS += so_incoming_cpu
TEST_PROGS += sctp_vrf.sh
TEST_GEN_FILES += sctp_hello
-TEST_GEN_FILES += csum
-TEST_GEN_FILES += nat6to4.o
-TEST_GEN_FILES += xdp_dummy.o
TEST_GEN_FILES += ip_local_port_range
TEST_GEN_FILES += bind_wildcard
TEST_PROGS += test_vxlan_mdb.sh
@@ -93,63 +93,22 @@ TEST_PROGS += test_bridge_backup_port.sh
TEST_PROGS += fdb_flush.sh
TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
+TEST_PROGS += bpf_offload.py
TEST_FILES := settings
TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
+TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
+
TEST_INCLUDES := forwarding/lib.sh
include ../lib.mk
+$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto
$(OUTPUT)/tcp_inq: LDLIBS += -lpthread
$(OUTPUT)/bind_bhash: LDLIBS += -lpthread
$(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/
-# Rules to generate bpf objs
-CLANG ?= clang
-SCRATCH_DIR := $(OUTPUT)/tools
-BUILD_DIR := $(SCRATCH_DIR)/build
-BPFDIR := $(abspath ../../../lib/bpf)
-APIDIR := $(abspath ../../../include/uapi)
-
-CCINCLUDE += -I../bpf
-CCINCLUDE += -I../../../../usr/include/
-CCINCLUDE += -I$(SCRATCH_DIR)/include
-
-BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
-
-MAKE_DIRS := $(BUILD_DIR)/libbpf
-$(MAKE_DIRS):
- mkdir -p $@
-
-# Get Clang's default includes on this system, as opposed to those seen by
-# '--target=bpf'. This fixes "missing" files on some architectures/distros,
-# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
-#
-# Use '-idirafter': Don't interfere with include mechanics except where the
-# build would have failed anyways.
-define get_sys_includes
-$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
- | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
-$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
-endef
-
-ifneq ($(CROSS_COMPILE),)
-CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
-endif
-
-CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
-
-$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS)
- $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
-
-$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
- $(APIDIR)/linux/bpf.h \
- | $(BUILD_DIR)/libbpf
- $(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
- EXTRA_CFLAGS='-g -O0' \
- DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
-
-EXTRA_CLEAN := $(SCRATCH_DIR)
+include bpf.mk
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index 221c387a7d7f..50584479540b 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,4 +1,4 @@
CFLAGS += $(KHDR_INCLUDES)
-TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd
+TEST_GEN_PROGS := diag_uid msg_oob scm_pidfd scm_rights unix_connect
include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/config b/tools/testing/selftests/net/af_unix/config
new file mode 100644
index 000000000000..37368567768c
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/config
@@ -0,0 +1,3 @@
+CONFIG_UNIX=y
+CONFIG_AF_UNIX_OOB=y
+CONFIG_UNIX_DIAG=m
diff --git a/tools/testing/selftests/net/af_unix/msg_oob.c b/tools/testing/selftests/net/af_unix/msg_oob.c
new file mode 100644
index 000000000000..16d0c172eaeb
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/msg_oob.c
@@ -0,0 +1,734 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <netinet/in.h>
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/signalfd.h>
+#include <sys/socket.h>
+
+#include "../../kselftest_harness.h"
+
+#define BUF_SZ 32
+
+FIXTURE(msg_oob)
+{
+ int fd[4]; /* 0: AF_UNIX sender
+ * 1: AF_UNIX receiver
+ * 2: TCP sender
+ * 3: TCP receiver
+ */
+ int signal_fd;
+ int epoll_fd[2]; /* 0: AF_UNIX receiver
+ * 1: TCP receiver
+ */
+ bool tcp_compliant;
+};
+
+FIXTURE_VARIANT(msg_oob)
+{
+ bool peek;
+};
+
+FIXTURE_VARIANT_ADD(msg_oob, no_peek)
+{
+ .peek = false,
+};
+
+FIXTURE_VARIANT_ADD(msg_oob, peek)
+{
+ .peek = true
+};
+
+static void create_unix_socketpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ int ret;
+
+ ret = socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, self->fd);
+ ASSERT_EQ(ret, 0);
+}
+
+static void create_tcp_socketpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct sockaddr_in addr;
+ socklen_t addrlen;
+ int listen_fd;
+ int ret;
+
+ listen_fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(listen_fd, 0);
+
+ ret = listen(listen_fd, -1);
+ ASSERT_EQ(ret, 0);
+
+ addrlen = sizeof(addr);
+ ret = getsockname(listen_fd, (struct sockaddr *)&addr, &addrlen);
+ ASSERT_EQ(ret, 0);
+
+ self->fd[2] = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(self->fd[2], 0);
+
+ ret = connect(self->fd[2], (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(ret, 0);
+
+ self->fd[3] = accept(listen_fd, (struct sockaddr *)&addr, &addrlen);
+ ASSERT_GE(self->fd[3], 0);
+
+ ret = fcntl(self->fd[3], F_SETFL, O_NONBLOCK);
+ ASSERT_EQ(ret, 0);
+}
+
+static void setup_sigurg(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct signalfd_siginfo siginfo;
+ int pid = getpid();
+ sigset_t mask;
+ int i, ret;
+
+ for (i = 0; i < 2; i++) {
+ ret = ioctl(self->fd[i * 2 + 1], FIOSETOWN, &pid);
+ ASSERT_EQ(ret, 0);
+ }
+
+ ret = sigemptyset(&mask);
+ ASSERT_EQ(ret, 0);
+
+ ret = sigaddset(&mask, SIGURG);
+ ASSERT_EQ(ret, 0);
+
+ ret = sigprocmask(SIG_BLOCK, &mask, NULL);
+ ASSERT_EQ(ret, 0);
+
+ self->signal_fd = signalfd(-1, &mask, SFD_NONBLOCK);
+ ASSERT_GE(self->signal_fd, 0);
+
+ ret = read(self->signal_fd, &siginfo, sizeof(siginfo));
+ ASSERT_EQ(ret, -1);
+}
+
+static void setup_epollpri(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ struct epoll_event event = {
+ .events = EPOLLPRI,
+ };
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ self->epoll_fd[i] = epoll_create1(0);
+ ASSERT_GE(self->epoll_fd[i], 0);
+
+ ret = epoll_ctl(self->epoll_fd[i], EPOLL_CTL_ADD, self->fd[i * 2 + 1], &event);
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+static void close_sockets(FIXTURE_DATA(msg_oob) *self)
+{
+ int i;
+
+ for (i = 0; i < 4; i++)
+ close(self->fd[i]);
+}
+
+FIXTURE_SETUP(msg_oob)
+{
+ create_unix_socketpair(_metadata, self);
+ create_tcp_socketpair(_metadata, self);
+
+ setup_sigurg(_metadata, self);
+ setup_epollpri(_metadata, self);
+
+ self->tcp_compliant = true;
+}
+
+FIXTURE_TEARDOWN(msg_oob)
+{
+ close_sockets(self);
+}
+
+static void __epollpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ bool oob_remaining)
+{
+ struct epoll_event event[2] = {};
+ int i, ret[2];
+
+ for (i = 0; i < 2; i++)
+ ret[i] = epoll_wait(self->epoll_fd[i], &event[i], 1, 0);
+
+ ASSERT_EQ(ret[0], oob_remaining);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(ret[0], ret[1]);
+
+ if (oob_remaining) {
+ ASSERT_EQ(event[0].events, EPOLLPRI);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(event[0].events, event[1].events);
+ }
+}
+
+static void __sendpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const void *buf, size_t len, int flags)
+{
+ int i, ret[2];
+
+ for (i = 0; i < 2; i++) {
+ struct signalfd_siginfo siginfo = {};
+ int bytes;
+
+ ret[i] = send(self->fd[i * 2], buf, len, flags);
+
+ bytes = read(self->signal_fd, &siginfo, sizeof(siginfo));
+
+ if (flags & MSG_OOB) {
+ ASSERT_EQ(bytes, sizeof(siginfo));
+ ASSERT_EQ(siginfo.ssi_signo, SIGURG);
+
+ bytes = read(self->signal_fd, &siginfo, sizeof(siginfo));
+ }
+
+ ASSERT_EQ(bytes, -1);
+ }
+
+ ASSERT_EQ(ret[0], len);
+ ASSERT_EQ(ret[0], ret[1]);
+}
+
+static void __recvpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ const void *expected_buf, int expected_len,
+ int buf_len, int flags)
+{
+ int i, ret[2], recv_errno[2], expected_errno = 0;
+ char recv_buf[2][BUF_SZ] = {};
+ bool printed = false;
+
+ ASSERT_GE(BUF_SZ, buf_len);
+
+ errno = 0;
+
+ for (i = 0; i < 2; i++) {
+ ret[i] = recv(self->fd[i * 2 + 1], recv_buf[i], buf_len, flags);
+ recv_errno[i] = errno;
+ }
+
+ if (expected_len < 0) {
+ expected_errno = -expected_len;
+ expected_len = -1;
+ }
+
+ if (ret[0] != expected_len || recv_errno[0] != expected_errno) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf);
+
+ ASSERT_EQ(ret[0], expected_len);
+ ASSERT_EQ(recv_errno[0], expected_errno);
+ }
+
+ if (ret[0] != ret[1] || recv_errno[0] != recv_errno[1]) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]);
+
+ printed = true;
+
+ if (self->tcp_compliant) {
+ ASSERT_EQ(ret[0], ret[1]);
+ ASSERT_EQ(recv_errno[0], recv_errno[1]);
+ }
+ }
+
+ if (expected_len >= 0) {
+ int cmp;
+
+ cmp = strncmp(expected_buf, recv_buf[0], expected_len);
+ if (cmp) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("Expected:%s", expected_errno ? strerror(expected_errno) : expected_buf);
+
+ ASSERT_EQ(cmp, 0);
+ }
+
+ cmp = strncmp(recv_buf[0], recv_buf[1], expected_len);
+ if (cmp) {
+ if (!printed) {
+ TH_LOG("AF_UNIX :%s", ret[0] < 0 ? strerror(recv_errno[0]) : recv_buf[0]);
+ TH_LOG("TCP :%s", ret[1] < 0 ? strerror(recv_errno[1]) : recv_buf[1]);
+ }
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(cmp, 0);
+ }
+ }
+}
+
+static void __setinlinepair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self)
+{
+ int i, oob_inline = 1;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ ret = setsockopt(self->fd[i * 2 + 1], SOL_SOCKET, SO_OOBINLINE,
+ &oob_inline, sizeof(oob_inline));
+ ASSERT_EQ(ret, 0);
+ }
+}
+
+static void __siocatmarkpair(struct __test_metadata *_metadata,
+ FIXTURE_DATA(msg_oob) *self,
+ bool oob_head)
+{
+ int answ[2] = {};
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ int ret;
+
+ ret = ioctl(self->fd[i * 2 + 1], SIOCATMARK, &answ[i]);
+ ASSERT_EQ(ret, 0);
+ }
+
+ ASSERT_EQ(answ[0], oob_head);
+
+ if (self->tcp_compliant)
+ ASSERT_EQ(answ[0], answ[1]);
+}
+
+#define sendpair(buf, len, flags) \
+ __sendpair(_metadata, self, buf, len, flags)
+
+#define recvpair(expected_buf, expected_len, buf_len, flags) \
+ do { \
+ if (variant->peek) \
+ __recvpair(_metadata, self, \
+ expected_buf, expected_len, \
+ buf_len, (flags) | MSG_PEEK); \
+ __recvpair(_metadata, self, \
+ expected_buf, expected_len, buf_len, flags); \
+ } while (0)
+
+#define epollpair(oob_remaining) \
+ __epollpair(_metadata, self, oob_remaining)
+
+#define siocatmarkpair(oob_head) \
+ __siocatmarkpair(_metadata, self, oob_head)
+
+#define setinlinepair() \
+ __setinlinepair(_metadata, self)
+
+#define tcp_incompliant \
+ for (self->tcp_compliant = false; \
+ self->tcp_compliant == false; \
+ self->tcp_compliant = true)
+
+TEST_F(msg_oob, non_oob)
+{
+ sendpair("x", 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("", -EAGAIN, 1, 0); /* Drop OOB. */
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_ahead)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 5, 0); /* Break at OOB even with enough buffer. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("", -EAGAIN, 1, 0);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 9, 0); /* Break at OOB even after it's recv()ed. */
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("world", 5, 5, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, oob_break_drop)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 10, 0); /* Break at OOB even with enough buffer. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("world", 5, 10, 0); /* Drop OOB and recv() the next skb. */
+ epollpair(false);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("r", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+
+ recvpair("ld", 2, 2, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, ex_oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP drops "y" by passing through it. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, MSG_OOB); /* TCP returns -EINVAL. */
+ epollpair(false);
+ siocatmarkpair(true);
+ }
+}
+
+TEST_F(msg_oob, ex_oob_drop_2)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+ }
+
+ recvpair("y", 1, 1, MSG_OOB);
+ epollpair(false);
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP returns -EAGAIN. */
+ epollpair(false);
+ siocatmarkpair(true);
+ }
+}
+
+TEST_F(msg_oob, ex_oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("r", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ tcp_incompliant {
+ recvpair("hellowol", 8, 10, 0); /* TCP recv()s "helloworl", why "r" ?? */
+ }
+
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("d", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(true);
+}
+
+TEST_F(msg_oob, ex_oob_siocatmark)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob)
+{
+ setinlinepair();
+
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob_break)
+{
+ setinlinepair();
+
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("", -EINVAL, 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 5, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("o", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_oob_ahead_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("world", 5, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ recvpair("hell", 4, 9, 0); /* Break at OOB even with enough buffer. */
+ epollpair(false);
+ siocatmarkpair(true);
+
+ tcp_incompliant {
+ recvpair("world", 5, 6, 0); /* TCP recv()s "oworld", ... "o" ??? */
+ }
+
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_break)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("wor", 3, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ sendpair("ld", 2, 0);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ recvpair("hellowo", 7, 10, 0); /* Break at OOB but not at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("rld", 3, 3, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_no_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ setinlinepair();
+
+ sendpair("y", 1, MSG_OOB); /* TCP does NOT drops "x" at this moment. */
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, 0);
+ epollpair(false);
+ siocatmarkpair(false);
+}
+
+TEST_F(msg_oob, inline_ex_oob_drop)
+{
+ sendpair("x", 1, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(true);
+
+ sendpair("y", 1, MSG_OOB); /* TCP drops "x" at this moment. */
+ epollpair(true);
+
+ setinlinepair();
+
+ tcp_incompliant {
+ siocatmarkpair(false);
+
+ recvpair("x", 1, 1, 0); /* TCP recv()s "y". */
+ epollpair(true);
+ siocatmarkpair(true);
+
+ recvpair("y", 1, 1, 0); /* TCP returns -EAGAIN. */
+ epollpair(false);
+ siocatmarkpair(false);
+ }
+}
+
+TEST_F(msg_oob, inline_ex_oob_siocatmark)
+{
+ sendpair("hello", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("o", 1, 1, MSG_OOB);
+ epollpair(false);
+ siocatmarkpair(false);
+
+ setinlinepair();
+
+ sendpair("world", 5, MSG_OOB);
+ epollpair(true);
+ siocatmarkpair(false);
+
+ recvpair("hell", 4, 4, 0); /* Intentionally stop at ex-OOB. */
+ epollpair(true);
+ siocatmarkpair(false);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c
new file mode 100644
index 000000000000..d66336256580
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_rights.c
@@ -0,0 +1,307 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(scm_rights)
+{
+ int fd[32];
+};
+
+FIXTURE_VARIANT(scm_rights)
+{
+ char name[32];
+ int type;
+ int flags;
+ bool test_listener;
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, dgram)
+{
+ .name = "UNIX ",
+ .type = SOCK_DGRAM,
+ .flags = 0,
+ .test_listener = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_oob)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = false,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = 0,
+ .test_listener = true,
+};
+
+FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob)
+{
+ .name = "UNIX-STREAM ",
+ .type = SOCK_STREAM,
+ .flags = MSG_OOB,
+ .test_listener = true,
+};
+
+static int count_sockets(struct __test_metadata *_metadata,
+ const FIXTURE_VARIANT(scm_rights) *variant)
+{
+ int sockets = -1, len, ret;
+ char *line = NULL;
+ size_t unused;
+ FILE *f;
+
+ f = fopen("/proc/net/protocols", "r");
+ ASSERT_NE(NULL, f);
+
+ len = strlen(variant->name);
+
+ while (getline(&line, &unused, f) != -1) {
+ int unused2;
+
+ if (strncmp(line, variant->name, len))
+ continue;
+
+ ret = sscanf(line + len, "%d %d", &unused2, &sockets);
+ ASSERT_EQ(2, ret);
+
+ break;
+ }
+
+ free(line);
+
+ ret = fclose(f);
+ ASSERT_EQ(0, ret);
+
+ return sockets;
+}
+
+FIXTURE_SETUP(scm_rights)
+{
+ int ret;
+
+ ret = unshare(CLONE_NEWNET);
+ ASSERT_EQ(0, ret);
+
+ ret = count_sockets(_metadata, variant);
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(scm_rights)
+{
+ int ret;
+
+ sleep(1);
+
+ ret = count_sockets(_metadata, variant);
+ ASSERT_EQ(0, ret);
+}
+
+static void create_listeners(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ int n)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ };
+ socklen_t addrlen;
+ int i, ret;
+
+ for (i = 0; i < n * 2; i += 2) {
+ self->fd[i] = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, self->fd[i]);
+
+ addrlen = sizeof(addr.sun_family);
+ ret = bind(self->fd[i], (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(0, ret);
+
+ ret = listen(self->fd[i], -1);
+ ASSERT_EQ(0, ret);
+
+ addrlen = sizeof(addr);
+ ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen);
+ ASSERT_EQ(0, ret);
+
+ self->fd[i + 1] = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_LE(0, self->fd[i + 1]);
+
+ ret = connect(self->fd[i + 1], (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(0, ret);
+ }
+}
+
+static void create_socketpairs(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
+ int n)
+{
+ int i, ret;
+
+ ASSERT_GE(sizeof(self->fd) / sizeof(int), n);
+
+ for (i = 0; i < n * 2; i += 2) {
+ ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i);
+ ASSERT_EQ(0, ret);
+ }
+}
+
+static void __create_sockets(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
+ int n)
+{
+ ASSERT_LE(n * 2, sizeof(self->fd) / sizeof(self->fd[0]));
+
+ if (variant->test_listener)
+ create_listeners(_metadata, self, n);
+ else
+ create_socketpairs(_metadata, self, variant, n);
+}
+
+static void __close_sockets(struct __test_metadata *_metadata,
+ FIXTURE_DATA(scm_rights) *self,
+ int n)
+{
+ int i, ret;
+
+ ASSERT_GE(sizeof(self->fd) / sizeof(int), n);
+
+ for (i = 0; i < n * 2; i++) {
+ ret = close(self->fd[i]);
+ ASSERT_EQ(0, ret);
+ }
+}
+
+void __send_fd(struct __test_metadata *_metadata,
+ const FIXTURE_DATA(scm_rights) *self,
+ const FIXTURE_VARIANT(scm_rights) *variant,
+ int inflight, int receiver)
+{
+#define MSG "x"
+#define MSGLEN 1
+ struct {
+ struct cmsghdr cmsghdr;
+ int fd[2];
+ } cmsg = {
+ .cmsghdr = {
+ .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)),
+ .cmsg_level = SOL_SOCKET,
+ .cmsg_type = SCM_RIGHTS,
+ },
+ .fd = {
+ self->fd[inflight * 2],
+ self->fd[inflight * 2],
+ },
+ };
+ struct iovec iov = {
+ .iov_base = MSG,
+ .iov_len = MSGLEN,
+ };
+ struct msghdr msg = {
+ .msg_name = NULL,
+ .msg_namelen = 0,
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = &cmsg,
+ .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)),
+ };
+ int ret;
+
+ ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags);
+ ASSERT_EQ(MSGLEN, ret);
+}
+
+#define create_sockets(n) \
+ __create_sockets(_metadata, self, variant, n)
+#define close_sockets(n) \
+ __close_sockets(_metadata, self, n)
+#define send_fd(inflight, receiver) \
+ __send_fd(_metadata, self, variant, inflight, receiver)
+
+TEST_F(scm_rights, self_ref)
+{
+ create_sockets(2);
+
+ send_fd(0, 0);
+
+ send_fd(1, 1);
+
+ close_sockets(2);
+}
+
+TEST_F(scm_rights, triangle)
+{
+ create_sockets(6);
+
+ send_fd(0, 1);
+ send_fd(1, 2);
+ send_fd(2, 0);
+
+ send_fd(3, 4);
+ send_fd(4, 5);
+ send_fd(5, 3);
+
+ close_sockets(6);
+}
+
+TEST_F(scm_rights, cross_edge)
+{
+ create_sockets(8);
+
+ send_fd(0, 1);
+ send_fd(1, 2);
+ send_fd(2, 0);
+ send_fd(1, 3);
+ send_fd(3, 2);
+
+ send_fd(4, 5);
+ send_fd(5, 6);
+ send_fd(6, 4);
+ send_fd(5, 7);
+ send_fd(7, 6);
+
+ close_sockets(8);
+}
+
+TEST_F(scm_rights, backtrack_from_scc)
+{
+ create_sockets(10);
+
+ send_fd(0, 1);
+ send_fd(0, 4);
+ send_fd(1, 2);
+ send_fd(2, 3);
+ send_fd(3, 1);
+
+ send_fd(5, 6);
+ send_fd(5, 9);
+ send_fd(6, 7);
+ send_fd(7, 8);
+ send_fd(8, 6);
+
+ close_sockets(10);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
deleted file mode 100644
index a7c51889acd5..000000000000
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ /dev/null
@@ -1,436 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/socket.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <string.h>
-#include <fcntl.h>
-#include <sys/ioctl.h>
-#include <errno.h>
-#include <netinet/tcp.h>
-#include <sys/un.h>
-#include <sys/signal.h>
-#include <sys/poll.h>
-
-static int pipefd[2];
-static int signal_recvd;
-static pid_t producer_id;
-static char sock_name[32];
-
-static void sig_hand(int sn, siginfo_t *si, void *p)
-{
- signal_recvd = sn;
-}
-
-static int set_sig_handler(int signal)
-{
- struct sigaction sa;
-
- sa.sa_sigaction = sig_hand;
- sigemptyset(&sa.sa_mask);
- sa.sa_flags = SA_SIGINFO | SA_RESTART;
-
- return sigaction(signal, &sa, NULL);
-}
-
-static void set_filemode(int fd, int set)
-{
- int flags = fcntl(fd, F_GETFL, 0);
-
- if (set)
- flags &= ~O_NONBLOCK;
- else
- flags |= O_NONBLOCK;
- fcntl(fd, F_SETFL, flags);
-}
-
-static void signal_producer(int fd)
-{
- char cmd;
-
- cmd = 'S';
- write(fd, &cmd, sizeof(cmd));
-}
-
-static void wait_for_signal(int fd)
-{
- char buf[5];
-
- read(fd, buf, 5);
-}
-
-static void die(int status)
-{
- fflush(NULL);
- unlink(sock_name);
- kill(producer_id, SIGTERM);
- exit(status);
-}
-
-int is_sioctatmark(int fd)
-{
- int ans = -1;
-
- if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
-#ifdef DEBUG
- perror("SIOCATMARK Failed");
-#endif
- }
- return ans;
-}
-
-void read_oob(int fd, char *c)
-{
-
- *c = ' ';
- if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
-#ifdef DEBUG
- perror("Reading MSG_OOB Failed");
-#endif
- }
-}
-
-int read_data(int pfd, char *buf, int size)
-{
- int len = 0;
-
- memset(buf, size, '0');
- len = read(pfd, buf, size);
-#ifdef DEBUG
- if (len < 0)
- perror("read failed");
-#endif
- return len;
-}
-
-static void wait_for_data(int pfd, int event)
-{
- struct pollfd pfds[1];
-
- pfds[0].fd = pfd;
- pfds[0].events = event;
- poll(pfds, 1, -1);
-}
-
-void producer(struct sockaddr_un *consumer_addr)
-{
- int cfd;
- char buf[64];
- int i;
-
- memset(buf, 'x', sizeof(buf));
- cfd = socket(AF_UNIX, SOCK_STREAM, 0);
-
- wait_for_signal(pipefd[0]);
- if (connect(cfd, (struct sockaddr *)consumer_addr,
- sizeof(*consumer_addr)) != 0) {
- perror("Connect failed");
- kill(0, SIGTERM);
- exit(1);
- }
-
- for (i = 0; i < 2; i++) {
- /* Test 1: Test for SIGURG and OOB */
- wait_for_signal(pipefd[0]);
- memset(buf, 'x', sizeof(buf));
- buf[63] = '@';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 2: Test for OOB being overwitten */
- memset(buf, 'x', sizeof(buf));
- buf[63] = '%';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- buf[63] = '#';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 3: Test for SIOCATMARK */
- memset(buf, 'x', sizeof(buf));
- buf[63] = '@';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- buf[63] = '%';
- send(cfd, buf, sizeof(buf), MSG_OOB);
-
- memset(buf, 'x', sizeof(buf));
- send(cfd, buf, sizeof(buf), 0);
-
- wait_for_signal(pipefd[0]);
-
- /* Test 4: Test for 1byte OOB msg */
- memset(buf, 'x', sizeof(buf));
- buf[0] = '@';
- send(cfd, buf, 1, MSG_OOB);
- }
-}
-
-int
-main(int argc, char **argv)
-{
- int lfd, pfd;
- struct sockaddr_un consumer_addr, paddr;
- socklen_t len = sizeof(consumer_addr);
- char buf[1024];
- int on = 0;
- char oob;
- int atmark;
-
- lfd = socket(AF_UNIX, SOCK_STREAM, 0);
- memset(&consumer_addr, 0, sizeof(consumer_addr));
- consumer_addr.sun_family = AF_UNIX;
- sprintf(sock_name, "unix_oob_%d", getpid());
- unlink(sock_name);
- strcpy(consumer_addr.sun_path, sock_name);
-
- if ((bind(lfd, (struct sockaddr *)&consumer_addr,
- sizeof(consumer_addr))) != 0) {
- perror("socket bind failed");
- exit(1);
- }
-
- pipe(pipefd);
-
- listen(lfd, 1);
-
- producer_id = fork();
- if (producer_id == 0) {
- producer(&consumer_addr);
- exit(0);
- }
-
- set_sig_handler(SIGURG);
- signal_producer(pipefd[1]);
-
- pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
- fcntl(pfd, F_SETOWN, getpid());
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 1:
- * veriyf that SIGURG is
- * delivered, 63 bytes are
- * read, oob is '@', and POLLPRI works.
- */
- wait_for_data(pfd, POLLPRI);
- read_oob(pfd, &oob);
- len = read_data(pfd, buf, 1024);
- if (!signal_recvd || len != 63 || oob != '@') {
- fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 2:
- * Verify that the first OOB is over written by
- * the 2nd one and the first OOB is returned as
- * part of the read, and sigurg is received.
- */
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = 0;
- while (len < 70)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- read_oob(pfd, &oob);
- if (!signal_recvd || len != 127 || oob != '#') {
- fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 3:
- * verify that 2nd oob over writes
- * the first one and read breaks at
- * oob boundary returning 127 bytes
- * and sigurg is received and atmark
- * is set.
- * oob is '%' and second read returns
- * 64 bytes.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 150)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- read_oob(pfd, &oob);
-
- if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
- fprintf(stderr,
- "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n",
- signal_recvd, len, oob, atmark);
- die(1);
- }
-
- signal_recvd = 0;
-
- len = read_data(pfd, buf, 1024);
- if (len != 64) {
- fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 4:
- * verify that a single byte
- * oob message is delivered.
- * set non blocking mode and
- * check proper error is
- * returned and sigurg is
- * received and correct
- * oob is read.
- */
-
- set_filemode(pfd, 0);
-
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
- if ((len == -1) && (errno == 11))
- len = 0;
-
- read_oob(pfd, &oob);
-
- if (!signal_recvd || len != 0 || oob != '@') {
- fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- set_filemode(pfd, 1);
-
- /* Inline Testing */
-
- on = 1;
- if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
- perror("SO_OOBINLINE");
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 1 -- Inline:
- * Check that SIGURG is
- * delivered and 63 bytes are
- * read and oob is '@'
- */
-
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
-
- if (!signal_recvd || len != 63) {
- fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
- signal_recvd, len);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
-
- if (len != 1) {
- fprintf(stderr,
- "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
- signal_recvd, len, oob);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 2 -- Inline:
- * Verify that the first OOB is over written by
- * the 2nd one and read breaks correctly on
- * 2nd OOB boundary with the first OOB returned as
- * part of the read, and sigurg is delivered and
- * siocatmark returns true.
- * next read returns one byte, the oob byte
- * and siocatmark returns false.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 70)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 127 || atmark != 1 || !signal_recvd) {
- fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
- len, atmark);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 1 || buf[0] != '#' || atmark == 1) {
- fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
- len, buf[0], atmark);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 3 -- Inline:
- * verify that 2nd oob over writes
- * the first one and read breaks at
- * oob boundary returning 127 bytes
- * and sigurg is received and siocatmark
- * is true after the read.
- * subsequent read returns 65 bytes
- * because of oob which should be '%'.
- */
- len = 0;
- wait_for_data(pfd, POLLIN | POLLPRI);
- while (len < 126)
- len = recv(pfd, buf, 1024, MSG_PEEK);
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (!signal_recvd || len != 127 || !atmark) {
- fprintf(stderr,
- "Test 3 Inline failed, sigurg %d len %d data %c\n",
- signal_recvd, len, buf[0]);
- die(1);
- }
-
- len = read_data(pfd, buf, 1024);
- atmark = is_sioctatmark(pfd);
- if (len != 65 || buf[0] != '%' || atmark != 0) {
- fprintf(stderr,
- "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
- len, buf[0], atmark);
- die(1);
- }
-
- signal_recvd = 0;
- signal_producer(pipefd[1]);
-
- /* Test 4 -- Inline:
- * verify that a single
- * byte oob message is delivered
- * and read returns one byte, the oob
- * byte and sigurg is received
- */
- wait_for_data(pfd, POLLIN | POLLPRI);
- len = read_data(pfd, buf, 1024);
- if (!signal_recvd || len != 1 || buf[0] != '@') {
- fprintf(stderr,
- "Test 4 Inline failed, signal %d len %d data %c\n",
- signal_recvd, len, buf[0]);
- die(1);
- }
- die(0);
-}
diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh
index 75528788cb95..d458b45c775b 100755
--- a/tools/testing/selftests/net/amt.sh
+++ b/tools/testing/selftests/net/amt.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
# Author: Taehee Yoo <ap420073@gmail.com>
@@ -77,6 +77,7 @@ readonly LISTENER=$(mktemp -u listener-XXXXXXXX)
readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX)
readonly RELAY=$(mktemp -u relay-XXXXXXXX)
readonly SOURCE=$(mktemp -u source-XXXXXXXX)
+readonly SMCROUTEDIR="$(mktemp -d)"
ERR=4
err=0
@@ -85,6 +86,11 @@ exit_cleanup()
for ns in "$@"; do
ip netns delete "${ns}" 2>/dev/null || true
done
+ if [ -f "$SMCROUTEDIR/amt.pid" ]; then
+ smcpid=$(< $SMCROUTEDIR/amt.pid)
+ kill $smcpid
+ fi
+ rm -rf $SMCROUTEDIR
exit $ERR
}
@@ -167,7 +173,7 @@ setup_iptables()
setup_mcast_routing()
{
- ip netns exec "${RELAY}" smcrouted
+ ip netns exec "${RELAY}" smcrouted -P $SMCROUTEDIR/amt.pid
ip netns exec "${RELAY}" smcroutectl a relay_src \
172.17.0.2 239.0.0.1 amtr
ip netns exec "${RELAY}" smcroutectl a relay_src \
@@ -210,8 +216,8 @@ check_features()
test_ipv4_forward()
{
- RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000)
- if [ "$RESULT4" == "172.17.0.2" ]; then
+ RESULT4=$(ip netns exec "${LISTENER}" timeout 15 socat - UDP4-LISTEN:4000,readbytes=128 || true)
+ if echo "$RESULT4" | grep -q "172.17.0.2"; then
printf "TEST: %-60s [ OK ]\n" "IPv4 amt multicast forwarding"
exit 0
else
@@ -222,8 +228,8 @@ test_ipv4_forward()
test_ipv6_forward()
{
- RESULT6=$(ip netns exec "${LISTENER}" nc -w 1 -l -u ff0e::5:6 6000)
- if [ "$RESULT6" == "2001:db8:3::2" ]; then
+ RESULT6=$(ip netns exec "${LISTENER}" timeout 15 socat - UDP6-LISTEN:6000,readbytes=128 || true)
+ if echo "$RESULT6" | grep -q "2001:db8:3::2"; then
printf "TEST: %-60s [ OK ]\n" "IPv6 amt multicast forwarding"
exit 0
else
@@ -236,14 +242,14 @@ send_mcast4()
{
sleep 2
ip netns exec "${SOURCE}" bash -c \
- 'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' &
+ 'printf "%s %128s" 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' &
}
send_mcast6()
{
sleep 2
ip netns exec "${SOURCE}" bash -c \
- 'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' &
+ 'printf "%s %128s" 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' &
}
check_features
diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
index a40c0e9bd023..eef5cbf6eecc 100755
--- a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
+++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
@@ -73,25 +73,19 @@ setup_v6() {
# namespaces. veth0 is veth-router, veth1 is veth-host.
# first, set up the inteface's link to the namespace
# then, set the interface "up"
- ip -6 -netns ${ROUTER_NS_V6} link add name ${ROUTER_INTF} \
- type veth peer name ${HOST_INTF}
-
- ip -6 -netns ${ROUTER_NS_V6} link set dev ${ROUTER_INTF} up
- ip -6 -netns ${ROUTER_NS_V6} link set dev ${HOST_INTF} netns \
- ${HOST_NS_V6}
+ ip -n ${ROUTER_NS_V6} link add name ${ROUTER_INTF} \
+ type veth peer name ${HOST_INTF} netns ${HOST_NS_V6}
- ip -6 -netns ${HOST_NS_V6} link set dev ${HOST_INTF} up
- ip -6 -netns ${ROUTER_NS_V6} addr add \
- ${ROUTER_ADDR_V6}/${PREFIX_WIDTH_V6} dev ${ROUTER_INTF} nodad
+ # Add tc rule to filter out host na message
+ tc -n ${ROUTER_NS_V6} qdisc add dev ${ROUTER_INTF} clsact
+ tc -n ${ROUTER_NS_V6} filter add dev ${ROUTER_INTF} \
+ ingress protocol ipv6 pref 1 handle 101 \
+ flower src_ip ${HOST_ADDR_V6} ip_proto icmpv6 type 136 skip_hw action pass
HOST_CONF=net.ipv6.conf.${HOST_INTF}
ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.ndisc_notify=1
ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.disable_ipv6=0
- ip -6 -netns ${HOST_NS_V6} addr add ${HOST_ADDR_V6}/${PREFIX_WIDTH_V6} \
- dev ${HOST_INTF}
-
ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF}
-
ip netns exec ${ROUTER_NS_V6} sysctl -w \
${ROUTER_CONF}.forwarding=1 >/dev/null 2>&1
ip netns exec ${ROUTER_NS_V6} sysctl -w \
@@ -99,6 +93,13 @@ setup_v6() {
ip netns exec ${ROUTER_NS_V6} sysctl -w \
${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na} \
>/dev/null 2>&1
+
+ ip -n ${ROUTER_NS_V6} link set dev ${ROUTER_INTF} up
+ ip -n ${HOST_NS_V6} link set dev ${HOST_INTF} up
+ ip -n ${ROUTER_NS_V6} addr add ${ROUTER_ADDR_V6}/${PREFIX_WIDTH_V6} \
+ dev ${ROUTER_INTF} nodad
+ ip -n ${HOST_NS_V6} addr add ${HOST_ADDR_V6}/${PREFIX_WIDTH_V6} \
+ dev ${HOST_INTF}
set +e
}
@@ -162,26 +163,6 @@ arp_test_gratuitous_combinations() {
arp_test_gratuitous 2 1
}
-cleanup_tcpdump() {
- set -e
- [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout}
- [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr}
- tcpdump_stdout=
- tcpdump_stderr=
- set +e
-}
-
-start_tcpdump() {
- set -e
- tcpdump_stdout=`mktemp`
- tcpdump_stderr=`mktemp`
- ip netns exec ${ROUTER_NS_V6} timeout 15s \
- tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \
- "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR_V6}" \
- > ${tcpdump_stdout} 2> /dev/null
- set +e
-}
-
verify_ndisc() {
local accept_untracked_na=$1
local same_subnet=$2
@@ -222,8 +203,9 @@ ndisc_test_untracked_advertisements() {
HOST_ADDR_V6=2001:db8:abcd:0012::3
fi
fi
- setup_v6 $1 $2
- start_tcpdump
+ setup_v6 $1
+ slowwait_for_counter 15 1 \
+ tc_rule_handle_stats_get "dev ${ROUTER_INTF} ingress" 101 ".packets" "-n ${ROUTER_NS_V6}"
if verify_ndisc $1 $2; then
printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}"
@@ -231,7 +213,6 @@ ndisc_test_untracked_advertisements() {
printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}"
fi
- cleanup_tcpdump
cleanup_v6
set +e
}
diff --git a/tools/testing/selftests/net/bpf.mk b/tools/testing/selftests/net/bpf.mk
new file mode 100644
index 000000000000..a4f6755dd894
--- /dev/null
+++ b/tools/testing/selftests/net/bpf.mk
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+# Rules to generate bpf objs
+CLANG ?= clang
+SCRATCH_DIR := $(OUTPUT)/tools
+BUILD_DIR := $(SCRATCH_DIR)/build
+BPFDIR := $(top_srcdir)/tools/lib/bpf
+APIDIR := $(top_srcdir)/tools/include/uapi
+
+CCINCLUDE += -I$(selfdir)/bpf
+CCINCLUDE += -I$(top_srcdir)/usr/include/
+CCINCLUDE += -I$(SCRATCH_DIR)/include
+
+BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+
+MAKE_DIRS := $(BUILD_DIR)/libbpf
+$(MAKE_DIRS):
+ $(call msg,MKDIR,,$@)
+ $(Q)mkdir -p $@
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+endef
+
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
+
+BPF_PROG_OBJS := $(patsubst %.c,$(OUTPUT)/%.o,$(wildcard *.bpf.c))
+
+$(BPF_PROG_OBJS): $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS)
+ $(call msg,BPF_PROG,,$@)
+ $(Q)$(CLANG) -O2 -g --target=bpf $(CCINCLUDE) $(CLANG_SYS_INCLUDES) \
+ -c $< -o $@
+
+$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ $(APIDIR)/linux/bpf.h \
+ | $(BUILD_DIR)/libbpf
+ $(call msg,MAKE,,$@)
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
+ EXTRA_CFLAGS='-g -O0' \
+ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
+
+EXTRA_CLEAN += $(SCRATCH_DIR)
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/net/bpf_offload.py
index 6157f884d091..3efe44f6e92a 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/net/bpf_offload.py
@@ -29,6 +29,9 @@ import subprocess
import time
import traceback
+from lib.py import NetdevSim, NetdevSimDev
+
+
logfile = None
log_level = 1
skip_extack = False
@@ -145,8 +148,10 @@ def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False):
if JSON:
params += "%s " % (flags["json"])
- if ns != "":
+ if ns:
ns = "ip netns exec %s " % (ns)
+ elif ns is None:
+ ns = ""
if include_stderr:
ret, stdout, stderr = cmd(ns + name + " " + params + args,
@@ -201,11 +206,11 @@ def bpftool_prog_list_wait(expected=0, n_retry=20):
time.sleep(0.05)
raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs))
-def bpftool_map_list_wait(expected=0, n_retry=20):
+def bpftool_map_list_wait(expected=0, n_retry=20, ns=""):
for i in range(n_retry):
- nmaps = len(bpftool_map_list())
- if nmaps == expected:
- return
+ maps = bpftool_map_list(ns=ns)
+ if len(maps) == expected:
+ return maps
time.sleep(0.05)
raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps))
@@ -237,7 +242,7 @@ def tc(args, JSON=True, ns="", fail=True, include_stderr=False):
def ethtool(dev, opt, args, fail=True):
return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail)
-def bpf_obj(name, sec=".text", path=bpf_test_dir,):
+def bpf_obj(name, sec="xdp", path=bpf_test_dir,):
return "obj %s sec %s" % (os.path.join(path, name), sec)
def bpf_pinned(name):
@@ -334,72 +339,16 @@ class DebugfsDir:
return dfs
-class NetdevSimDev:
+class BpfNetdevSimDev(NetdevSimDev):
"""
Class for netdevsim bus device and its attributes.
"""
- @staticmethod
- def ctrl_write(path, val):
- fullpath = os.path.join("/sys/bus/netdevsim/", path)
- try:
- with open(fullpath, "w") as f:
- f.write(val)
- except OSError as e:
- log("WRITE %s: %r" % (fullpath, val), -e.errno)
- raise e
- log("WRITE %s: %r" % (fullpath, val), 0)
-
- def __init__(self, port_count=1):
- addr = 0
- while True:
- try:
- self.ctrl_write("new_device", "%u %u" % (addr, port_count))
- except OSError as e:
- if e.errno == errno.ENOSPC:
- addr += 1
- continue
- raise e
- break
- self.addr = addr
-
- # As probe of netdevsim device might happen from a workqueue,
- # so wait here until all netdevs appear.
- self.wait_for_netdevs(port_count)
-
- ret, out = cmd("udevadm settle", fail=False)
- if ret:
- raise Exception("udevadm settle failed")
- ifnames = self.get_ifnames()
-
+ def __init__(self, port_count=1, ns=None):
+ super().__init__(port_count, ns=ns)
devs.append(self)
- self.dfs_dir = "/sys/kernel/debug/netdevsim/netdevsim%u/" % addr
-
- self.nsims = []
- for port_index in range(port_count):
- self.nsims.append(NetdevSim(self, port_index, ifnames[port_index]))
-
- def get_ifnames(self):
- ifnames = []
- listdir = os.listdir("/sys/bus/netdevsim/devices/netdevsim%u/net/" % self.addr)
- for ifname in listdir:
- ifnames.append(ifname)
- ifnames.sort()
- return ifnames
-
- def wait_for_netdevs(self, port_count):
- timeout = 5
- timeout_start = time.time()
-
- while True:
- try:
- ifnames = self.get_ifnames()
- except FileNotFoundError as e:
- ifnames = []
- if len(ifnames) == port_count:
- break
- if time.time() < timeout_start + timeout:
- continue
- raise Exception("netdevices did not appear within timeout")
+
+ def _make_port(self, port_index, ifname):
+ return BpfNetdevSim(self, port_index, ifname, self.ns)
def dfs_num_bound_progs(self):
path = os.path.join(self.dfs_dir, "bpf_bound_progs")
@@ -415,33 +364,20 @@ class NetdevSimDev:
return progs
def remove(self):
- self.ctrl_write("del_device", "%u" % (self.addr, ))
+ super().remove()
devs.remove(self)
- def remove_nsim(self, nsim):
- self.nsims.remove(nsim)
- self.ctrl_write("devices/netdevsim%u/del_port" % (self.addr, ),
- "%u" % (nsim.port_index, ))
-class NetdevSim:
+class BpfNetdevSim(NetdevSim):
"""
Class for netdevsim netdevice and its attributes.
"""
- def __init__(self, nsimdev, port_index, ifname):
- # In case udev renamed the netdev to according to new schema,
- # check if the name matches the port_index.
- nsimnamere = re.compile("eni\d+np(\d+)")
- match = nsimnamere.match(ifname)
- if match and int(match.groups()[0]) != port_index + 1:
- raise Exception("netdevice name mismatches the expected one")
-
- self.nsimdev = nsimdev
- self.port_index = port_index
- self.ns = ""
+ def __init__(self, nsimdev, port_index, ifname, ns=None):
+ super().__init__(nsimdev, port_index, ifname, ns=ns)
+
self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index)
self.dfs_refresh()
- _, [self.dev] = ip("link show dev %s" % ifname)
def __getitem__(self, key):
return self.dev[key]
@@ -468,7 +404,7 @@ class NetdevSim:
raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs))
def set_ns(self, ns):
- name = "1" if ns == "" else ns
+ name = ns if ns else "1"
ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns)
self.ns = ns
@@ -605,7 +541,7 @@ def pin_prog(file_name, idx=0):
return file_name, bpf_pinned(file_name)
def pin_map(file_name, idx=0, expected=1):
- maps = bpftool_map_list(expected=expected)
+ maps = bpftool_map_list_wait(expected=expected)
m = maps[idx]
bpftool("map pin id %d %s" % (m["id"], file_name))
files.append(file_name)
@@ -618,7 +554,7 @@ def check_dev_info_removed(prog_file=None, map_file=None):
ret, err = bpftool("prog show pin %s" % (prog_file), fail=False)
fail(ret != 0, "failed to show prog with removed device")
- bpftool_map_list(expected=0)
+ bpftool_map_list_wait(expected=0)
ret, err = bpftool("map show pin %s" % (map_file), fail=False)
fail(ret == 0, "Showing map with removed device did not fail")
fail(err["error"].find("No such device") == -1,
@@ -642,7 +578,7 @@ def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False):
else:
fail("ifname" in dev.keys(), "Ifname is reported for other ns")
- maps = bpftool_map_list(expected=2, ns=ns)
+ maps = bpftool_map_list_wait(expected=2, ns=ns)
for m in maps:
fail("dev" not in m.keys(), "Device parameters not reported")
fail(dev != m["dev"], "Map's device different than program's")
@@ -744,7 +680,7 @@ def test_multi_prog(simdev, sim, obj, modename, modeid):
start_test("Test multi-attachment XDP - device remove...")
simdev.remove()
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.set_ethtool_tc_offloads(True)
return [simdev, sim]
@@ -809,13 +745,13 @@ try:
bytecode = bpf_bytecode("1,6 0 0 4294967295,")
start_test("Test destruction of generic XDP...")
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.set_xdp(obj, "generic")
simdev.remove()
bpftool_prog_list_wait(expected=0)
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.tc_add_ingress()
@@ -967,7 +903,7 @@ try:
simdev.remove()
bpftool_prog_list_wait(expected=0)
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.set_ethtool_tc_offloads(True)
@@ -976,7 +912,7 @@ try:
simdev.remove()
bpftool_prog_list_wait(expected=0)
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.set_ethtool_tc_offloads(True)
@@ -1080,7 +1016,7 @@ try:
bpftool_prog_list_wait(expected=0)
start_test("Test attempt to use a program for a wrong device...")
- simdev2 = NetdevSimDev()
+ simdev2 = BpfNetdevSimDev()
sim2, = simdev2.nsims
sim2.set_xdp(obj, "offload")
pin_file, pinned = pin_prog("/sys/fs/bpf/tmp")
@@ -1169,7 +1105,7 @@ try:
clean_up()
bpftool_prog_list_wait(expected=0)
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
map_obj = bpf_obj("sample_map_ret0.bpf.o")
start_test("Test loading program with maps...")
@@ -1201,12 +1137,12 @@ try:
clean_up()
bpftool_prog_list_wait(expected=0)
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
start_test("Test map update (no flags)...")
sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
- maps = bpftool_map_list(expected=2)
+ maps = bpftool_map_list_wait(expected=2)
array = maps[0] if maps[0]["type"] == "array" else maps[1]
htab = maps[0] if maps[0]["type"] == "hash" else maps[1]
for m in maps:
@@ -1285,14 +1221,14 @@ try:
bpftool_map_list_wait(expected=0)
simdev.remove()
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
simdev.remove()
bpftool_map_list_wait(expected=0)
start_test("Test map creation fail path...")
- simdev = NetdevSimDev()
+ simdev = BpfNetdevSimDev()
sim, = simdev.nsims
sim.dfs["bpf_map_accept"] = "N"
ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False)
@@ -1302,9 +1238,9 @@ try:
simdev.remove()
start_test("Test multi-dev ASIC program reuse...")
- simdevA = NetdevSimDev()
+ simdevA = BpfNetdevSimDev()
simA, = simdevA.nsims
- simdevB = NetdevSimDev(3)
+ simdevB = BpfNetdevSimDev(3)
simB1, simB2, simB3 = simdevB.nsims
sims = (simA, simB1, simB2, simB3)
simB = (simB1, simB2, simB3)
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index c79e65581dc3..876c2db02a63 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -260,15 +260,8 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit);
if (opt.txtime.ena) {
- struct sock_txtime so_txtime = {
- .clockid = CLOCK_MONOTONIC,
- };
__u64 txtime;
- if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
- &so_txtime, sizeof(so_txtime)))
- error(ERN_SOCKOPT, errno, "setsockopt TXTIME");
-
txtime = time_start_mono.tv_sec * (1000ULL * 1000 * 1000) +
time_start_mono.tv_nsec +
opt.txtime.delay * 1000;
@@ -284,13 +277,6 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
memcpy(CMSG_DATA(cmsg), &txtime, sizeof(txtime));
}
if (opt.ts.ena) {
- __u32 val = SOF_TIMESTAMPING_SOFTWARE |
- SOF_TIMESTAMPING_OPT_TSONLY;
-
- if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
- &val, sizeof(val)))
- error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING");
-
cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
cmsg_len += CMSG_SPACE(sizeof(__u32));
if (cbuf_sz < cmsg_len)
@@ -333,16 +319,17 @@ static const char *cs_ts_info2str(unsigned int info)
return "unknown";
}
-static void
+static unsigned long
cs_read_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
{
struct sock_extended_err *see;
struct scm_timestamping *ts;
+ unsigned long ts_seen = 0;
struct cmsghdr *cmsg;
int i, err;
if (!opt.ts.ena)
- return;
+ return 0;
msg->msg_control = cbuf;
msg->msg_controllen = cbuf_sz;
@@ -396,8 +383,11 @@ cs_read_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
printf(" %5s ts%d %lluus\n",
cs_ts_info2str(see->ee_info),
i, rel_time);
+ ts_seen |= 1 << see->ee_info;
}
}
+
+ return ts_seen;
}
static void ca_set_sockopts(int fd)
@@ -422,6 +412,24 @@ static void ca_set_sockopts(int fd)
setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
&opt.sockopt.priority, sizeof(opt.sockopt.priority)))
error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
+
+ if (opt.txtime.ena) {
+ struct sock_txtime so_txtime = {
+ .clockid = CLOCK_MONOTONIC,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
+ &so_txtime, sizeof(so_txtime)))
+ error(ERN_SOCKOPT, errno, "setsockopt TXTIME");
+ }
+ if (opt.ts.ena) {
+ __u32 val = SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_TSONLY;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
+ &val, sizeof(val)))
+ error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING");
+ }
}
int main(int argc, char *argv[])
@@ -509,10 +517,16 @@ int main(int argc, char *argv[])
err = ERN_SUCCESS;
if (opt.ts.ena) {
- /* Make sure all timestamps have time to loop back */
- usleep(opt.txtime.delay);
+ unsigned long seen;
+ int i;
- cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+ /* Make sure all timestamps have time to loop back */
+ for (i = 0; i < 40; i++) {
+ seen = cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+ if (seen & (1 << SCM_TSTAMP_SND))
+ break;
+ usleep(opt.txtime.delay / 20);
+ }
}
err_out:
diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh
index af85267ad1e3..1d7e756644bc 100755
--- a/tools/testing/selftests/net/cmsg_time.sh
+++ b/tools/testing/selftests/net/cmsg_time.sh
@@ -66,10 +66,13 @@ for i in "-4 $TGT4" "-6 $TGT6"; do
awk '/SND/ { if ($3 > 1000) print "OK"; }')
check_result $? "$ts" "OK" "$prot - TXTIME abs"
- ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
+ [ "$KSFT_MACHINE_SLOW" = yes ] && delay=8000 || delay=1000
+
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d $delay |
awk '/SND/ {snd=$3}
/SCHED/ {sch=$3}
- END { if (snd - sch > 500) print "OK"; }')
+ END { if (snd - sch > '$((delay/2))') print "OK";
+ else print snd, "-", sch, "<", '$((delay/2))'; }')
check_result $? "$ts" "OK" "$prot - TXTIME rel"
done
done
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 5e4390cac17e..5b9baf708950 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -26,10 +26,10 @@ CONFIG_INET_ESP=y
CONFIG_INET_ESP_OFFLOAD=y
CONFIG_NET_FOU=y
CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_IP_GRE=m
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
+CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_SIT=y
CONFIG_IP_DCCP=m
CONFIG_NF_NAT=m
@@ -74,7 +74,12 @@ CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_NETEM=y
CONFIG_NET_SCH_PRIO=m
CONFIG_NFT_COMPAT=m
+CONFIG_NF_CONNTRACK_OVS=y
CONFIG_NF_FLOW_TABLE=m
+CONFIG_OPENVSWITCH=m
+CONFIG_OPENVSWITCH_GENEVE=m
+CONFIG_OPENVSWITCH_GRE=m
+CONFIG_OPENVSWITCH_VXLAN=m
CONFIG_PSAMPLE=m
CONFIG_TCP_MD5SIG=y
CONFIG_TEST_BLACKHOLE_DEV=m
@@ -100,3 +105,5 @@ CONFIG_NETFILTER_XT_MATCH_POLICY=m
CONFIG_CRYPTO_ARIA=y
CONFIG_XFRM_INTERFACE=m
CONFIG_XFRM_USER=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
diff --git a/tools/testing/selftests/net/epoll_busy_poll.c b/tools/testing/selftests/net/epoll_busy_poll.c
new file mode 100644
index 000000000000..16e457c2f877
--- /dev/null
+++ b/tools/testing/selftests/net/epoll_busy_poll.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+/* Basic per-epoll context busy poll test.
+ *
+ * Only tests the ioctls, but should be expanded to test two connected hosts in
+ * the future
+ */
+
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <errno.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/capability.h>
+
+#include <sys/epoll.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "../kselftest_harness.h"
+
+/* if the headers haven't been updated, we need to define some things */
+#if !defined(EPOLL_IOC_TYPE)
+struct epoll_params {
+ uint32_t busy_poll_usecs;
+ uint16_t busy_poll_budget;
+ uint8_t prefer_busy_poll;
+
+ /* pad the struct to a multiple of 64bits */
+ uint8_t __pad;
+};
+
+#define EPOLL_IOC_TYPE 0x8A
+#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params)
+#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
+#endif
+
+FIXTURE(invalid_fd)
+{
+ int invalid_fd;
+ struct epoll_params params;
+};
+
+FIXTURE_SETUP(invalid_fd)
+{
+ int ret;
+
+ ret = socket(AF_UNIX, SOCK_DGRAM, 0);
+ EXPECT_NE(-1, ret)
+ TH_LOG("error creating unix socket");
+
+ self->invalid_fd = ret;
+}
+
+FIXTURE_TEARDOWN(invalid_fd)
+{
+ int ret;
+
+ ret = close(self->invalid_fd);
+ EXPECT_EQ(0, ret);
+}
+
+TEST_F(invalid_fd, test_invalid_fd)
+{
+ int ret;
+
+ ret = ioctl(self->invalid_fd, EPIOCGPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCGPARAMS on invalid epoll FD should error");
+
+ EXPECT_EQ(ENOTTY, errno)
+ TH_LOG("EPIOCGPARAMS on invalid epoll FD should set errno to ENOTTY");
+
+ memset(&self->params, 0, sizeof(struct epoll_params));
+
+ ret = ioctl(self->invalid_fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS on invalid epoll FD should error");
+
+ EXPECT_EQ(ENOTTY, errno)
+ TH_LOG("EPIOCSPARAMS on invalid epoll FD should set errno to ENOTTY");
+}
+
+FIXTURE(epoll_busy_poll)
+{
+ int fd;
+ struct epoll_params params;
+ struct epoll_params *invalid_params;
+ cap_t caps;
+};
+
+FIXTURE_SETUP(epoll_busy_poll)
+{
+ int ret;
+
+ ret = epoll_create1(0);
+ EXPECT_NE(-1, ret)
+ TH_LOG("epoll_create1 failed?");
+
+ self->fd = ret;
+
+ self->caps = cap_get_proc();
+ EXPECT_NE(NULL, self->caps);
+}
+
+FIXTURE_TEARDOWN(epoll_busy_poll)
+{
+ int ret;
+
+ ret = close(self->fd);
+ EXPECT_EQ(0, ret);
+
+ ret = cap_free(self->caps);
+ EXPECT_NE(-1, ret)
+ TH_LOG("unable to free capabilities");
+}
+
+TEST_F(epoll_busy_poll, test_get_params)
+{
+ /* begin by getting the epoll params from the kernel
+ *
+ * the default should be default and all fields should be zero'd by the
+ * kernel, so set params fields to garbage to test this.
+ */
+ int ret = 0;
+
+ self->params.busy_poll_usecs = 0xff;
+ self->params.busy_poll_budget = 0xff;
+ self->params.prefer_busy_poll = 1;
+ self->params.__pad = 0xf;
+
+ ret = ioctl(self->fd, EPIOCGPARAMS, &self->params);
+ EXPECT_EQ(0, ret)
+ TH_LOG("ioctl EPIOCGPARAMS should succeed");
+
+ EXPECT_EQ(0, self->params.busy_poll_usecs)
+ TH_LOG("EPIOCGPARAMS busy_poll_usecs should have been 0");
+
+ EXPECT_EQ(0, self->params.busy_poll_budget)
+ TH_LOG("EPIOCGPARAMS busy_poll_budget should have been 0");
+
+ EXPECT_EQ(0, self->params.prefer_busy_poll)
+ TH_LOG("EPIOCGPARAMS prefer_busy_poll should have been 0");
+
+ EXPECT_EQ(0, self->params.__pad)
+ TH_LOG("EPIOCGPARAMS __pad should have been 0");
+
+ self->invalid_params = (struct epoll_params *)0xdeadbeef;
+ ret = ioctl(self->fd, EPIOCGPARAMS, self->invalid_params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCGPARAMS should error with invalid params");
+
+ EXPECT_EQ(EFAULT, errno)
+ TH_LOG("EPIOCGPARAMS with invalid params should set errno to EFAULT");
+}
+
+TEST_F(epoll_busy_poll, test_set_invalid)
+{
+ int ret;
+
+ memset(&self->params, 0, sizeof(struct epoll_params));
+
+ self->params.__pad = 1;
+
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS non-zero __pad should error");
+
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("EPIOCSPARAMS non-zero __pad errno should be EINVAL");
+
+ self->params.__pad = 0;
+ self->params.busy_poll_usecs = (uint32_t)INT_MAX + 1;
+
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS should error busy_poll_usecs > S32_MAX");
+
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("EPIOCSPARAMS busy_poll_usecs > S32_MAX errno should be EINVAL");
+
+ self->params.__pad = 0;
+ self->params.busy_poll_usecs = 32;
+ self->params.prefer_busy_poll = 2;
+
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS should error prefer_busy_poll > 1");
+
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("EPIOCSPARAMS prefer_busy_poll > 1 errno should be EINVAL");
+
+ self->params.__pad = 0;
+ self->params.busy_poll_usecs = 32;
+ self->params.prefer_busy_poll = 1;
+
+ /* set budget well above kernel's NAPI_POLL_WEIGHT of 64 */
+ self->params.busy_poll_budget = UINT16_MAX;
+
+ /* test harness should run with CAP_NET_ADMIN, but let's make sure */
+ cap_flag_value_t tmp;
+
+ ret = cap_get_flag(self->caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &tmp);
+ EXPECT_EQ(0, ret)
+ TH_LOG("unable to get CAP_NET_ADMIN cap flag");
+
+ EXPECT_EQ(CAP_SET, tmp)
+ TH_LOG("expecting CAP_NET_ADMIN to be set for the test harness");
+
+ /* at this point we know CAP_NET_ADMIN is available, so setting the
+ * params with a busy_poll_budget > NAPI_POLL_WEIGHT should succeed
+ */
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(0, ret)
+ TH_LOG("EPIOCSPARAMS should allow busy_poll_budget > NAPI_POLL_WEIGHT");
+
+ /* remove CAP_NET_ADMIN from our effective set */
+ cap_value_t net_admin[] = { CAP_NET_ADMIN };
+
+ ret = cap_set_flag(self->caps, CAP_EFFECTIVE, 1, net_admin, CAP_CLEAR);
+ EXPECT_EQ(0, ret)
+ TH_LOG("couldn't clear CAP_NET_ADMIN");
+
+ ret = cap_set_proc(self->caps);
+ EXPECT_EQ(0, ret)
+ TH_LOG("cap_set_proc should drop CAP_NET_ADMIN");
+
+ /* this is now expected to fail */
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS should error busy_poll_budget > NAPI_POLL_WEIGHT");
+
+ EXPECT_EQ(EPERM, errno)
+ TH_LOG("EPIOCSPARAMS errno should be EPERM busy_poll_budget > NAPI_POLL_WEIGHT");
+
+ /* restore CAP_NET_ADMIN to our effective set */
+ ret = cap_set_flag(self->caps, CAP_EFFECTIVE, 1, net_admin, CAP_SET);
+ EXPECT_EQ(0, ret)
+ TH_LOG("couldn't restore CAP_NET_ADMIN");
+
+ ret = cap_set_proc(self->caps);
+ EXPECT_EQ(0, ret)
+ TH_LOG("cap_set_proc should set CAP_NET_ADMIN");
+
+ self->invalid_params = (struct epoll_params *)0xdeadbeef;
+ ret = ioctl(self->fd, EPIOCSPARAMS, self->invalid_params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("EPIOCSPARAMS should error when epoll_params is invalid");
+
+ EXPECT_EQ(EFAULT, errno)
+ TH_LOG("EPIOCSPARAMS should set errno to EFAULT when epoll_params is invalid");
+}
+
+TEST_F(epoll_busy_poll, test_set_and_get_valid)
+{
+ int ret;
+
+ memset(&self->params, 0, sizeof(struct epoll_params));
+
+ self->params.busy_poll_usecs = 25;
+ self->params.busy_poll_budget = 16;
+ self->params.prefer_busy_poll = 1;
+
+ ret = ioctl(self->fd, EPIOCSPARAMS, &self->params);
+
+ EXPECT_EQ(0, ret)
+ TH_LOG("EPIOCSPARAMS with valid params should not error");
+
+ /* check that the kernel returns the same values back */
+
+ memset(&self->params, 0, sizeof(struct epoll_params));
+
+ ret = ioctl(self->fd, EPIOCGPARAMS, &self->params);
+
+ EXPECT_EQ(0, ret)
+ TH_LOG("EPIOCGPARAMS should not error");
+
+ EXPECT_EQ(25, self->params.busy_poll_usecs)
+ TH_LOG("params.busy_poll_usecs incorrect");
+
+ EXPECT_EQ(16, self->params.busy_poll_budget)
+ TH_LOG("params.busy_poll_budget incorrect");
+
+ EXPECT_EQ(1, self->params.prefer_busy_poll)
+ TH_LOG("params.prefer_busy_poll incorrect");
+
+ EXPECT_EQ(0, self->params.__pad)
+ TH_LOG("params.__pad was not 0");
+}
+
+TEST_F(epoll_busy_poll, test_invalid_ioctl)
+{
+ int invalid_ioctl = EPIOCGPARAMS + 10;
+ int ret;
+
+ ret = ioctl(self->fd, invalid_ioctl, &self->params);
+
+ EXPECT_EQ(-1, ret)
+ TH_LOG("invalid ioctl should return error");
+
+ EXPECT_EQ(EINVAL, errno)
+ TH_LOG("invalid ioctl should set errno to EINVAL");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 51157a5559b7..7c01f58a20de 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -9,6 +9,7 @@ PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
RTABLE=100
RTABLE_PEER=101
+RTABLE_VRF=102
GW_IP4=192.51.100.2
SRC_IP=192.51.100.3
GW_IP6=2001:db8:1::2
@@ -17,7 +18,14 @@ SRC_IP6=2001:db8:1::3
DEV_ADDR=192.51.100.1
DEV_ADDR6=2001:db8:1::1
DEV=dummy0
-TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect"
+TESTS="
+ fib_rule6
+ fib_rule4
+ fib_rule6_connect
+ fib_rule4_connect
+ fib_rule6_vrf
+ fib_rule4_vrf
+"
SELFTEST_PATH=""
@@ -27,13 +35,18 @@ log_test()
local expected=$2
local msg="$3"
+ $IP rule show | grep -q l3mdev
+ if [ $? -eq 0 ]; then
+ msg="$msg (VRF)"
+ fi
+
if [ ${rc} -eq ${expected} ]; then
nsuccess=$((nsuccess+1))
- printf "\n TEST: %-50s [ OK ]\n" "${msg}"
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
else
ret=1
nfail=$((nfail+1))
- printf "\n TEST: %-50s [FAIL]\n" "${msg}"
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
@@ -130,6 +143,17 @@ cleanup_peer()
ip netns del $peerns
}
+setup_vrf()
+{
+ $IP link add name vrf0 up type vrf table $RTABLE_VRF
+ $IP link set dev $DEV master vrf0
+}
+
+cleanup_vrf()
+{
+ $IP link del dev vrf0
+}
+
fib_check_iproute_support()
{
ip rule help 2>&1 | grep -q $1
@@ -248,6 +272,13 @@ fib_rule6_test()
fi
}
+fib_rule6_vrf_test()
+{
+ setup_vrf
+ fib_rule6_test
+ cleanup_vrf
+}
+
# Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly
# taken into account when connecting the socket and when sending packets.
fib_rule6_connect_test()
@@ -385,6 +416,13 @@ fib_rule4_test()
fi
}
+fib_rule4_vrf_test()
+{
+ setup_vrf
+ fib_rule4_test
+ cleanup_vrf
+}
+
# Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken
# into account when connecting the socket and when sending packets.
fib_rule4_connect_test()
@@ -467,6 +505,8 @@ do
fib_rule4_test|fib_rule4) fib_rule4_test;;
fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;;
fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;;
+ fib_rule6_vrf_test|fib_rule6_vrf) fib_rule6_vrf_test;;
+ fib_rule4_vrf_test|fib_rule4_vrf) fib_rule4_vrf_test;;
help) echo "Test names: $TESTS"; exit 0;;
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 73895711cdf4..5f3c28fc8624 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -1737,53 +1737,53 @@ ipv4_rt_dsfield()
# DSCP 0x10 should match the specific route, no matter the ECN bits
$IP route get fibmatch 172.16.102.1 dsfield 0x10 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT"
$IP route get fibmatch 172.16.102.1 dsfield 0x11 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)"
$IP route get fibmatch 172.16.102.1 dsfield 0x12 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)"
$IP route get fibmatch 172.16.102.1 dsfield 0x13 | \
- grep -q "via 172.16.103.2"
+ grep -q "172.16.102.0/24 tos 0x10 via 172.16.103.2"
log_test $? 0 "IPv4 route with DSCP and ECN:CE"
# Unknown DSCP should match the generic route, no matter the ECN bits
$IP route get fibmatch 172.16.102.1 dsfield 0x14 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT"
$IP route get fibmatch 172.16.102.1 dsfield 0x15 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)"
$IP route get fibmatch 172.16.102.1 dsfield 0x16 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)"
$IP route get fibmatch 172.16.102.1 dsfield 0x17 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE"
# Null DSCP should match the generic route, no matter the ECN bits
$IP route get fibmatch 172.16.102.1 dsfield 0x00 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT"
$IP route get fibmatch 172.16.102.1 dsfield 0x01 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)"
$IP route get fibmatch 172.16.102.1 dsfield 0x02 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)"
$IP route get fibmatch 172.16.102.1 dsfield 0x03 | \
- grep -q "via 172.16.101.2"
+ grep -q "172.16.102.0/24 via 172.16.101.2"
log_test $? 0 "IPv4 route with no DSCP and ECN:CE"
}
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 535865b3d1d6..224346426ef2 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -15,18 +15,12 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
bridge_vlan_unaware.sh \
custom_multipath_hash.sh \
dual_vxlan_bridge.sh \
- ethtool_extended_state.sh \
- ethtool_mm.sh \
- ethtool_rmon.sh \
- ethtool.sh \
gre_custom_multipath_hash.sh \
gre_inner_v4_multipath.sh \
gre_inner_v6_multipath.sh \
gre_multipath_nh_res.sh \
gre_multipath_nh.sh \
gre_multipath.sh \
- hw_stats_l3.sh \
- hw_stats_l3_gre.sh \
ip6_forward_instats_vrf.sh \
ip6gre_custom_multipath_hash.sh \
ip6gre_flat_key.sh \
@@ -43,8 +37,9 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
ipip_hier_gre_key.sh \
ipip_hier_gre_keys.sh \
ipip_hier_gre.sh \
+ lib_sh_test.sh \
local_termination.sh \
- loopback.sh \
+ min_max_mtu.sh \
mirror_gre_bound.sh \
mirror_gre_bridge_1d.sh \
mirror_gre_bridge_1d_vlan.sh \
@@ -76,6 +71,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
router_broadcast.sh \
router_mpath_nh_res.sh \
router_mpath_nh.sh \
+ router_mpath_seed.sh \
router_multicast.sh \
router_multipath.sh \
router_nh.sh \
@@ -113,7 +109,6 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
vxlan_symmetric.sh
TEST_FILES := devlink_lib.sh \
- ethtool_lib.sh \
fib_offload_lib.sh \
forwarding.config.sample \
ip6gre_lib.sh \
diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README
index b8a2af8fcfb7..7fdb6a9ca543 100644
--- a/tools/testing/selftests/net/forwarding/README
+++ b/tools/testing/selftests/net/forwarding/README
@@ -56,3 +56,36 @@ o Checks shall be added to lib.sh for any external dependencies.
o Code shall be checked using ShellCheck [1] prior to submission.
1. https://www.shellcheck.net/
+
+Customization
+=============
+
+The forwarding selftests framework uses a number of variables that
+influence its behavior and tools it invokes, and how it invokes them, in
+various ways. A number of these variables can be overridden. The way these
+overridable variables are specified is typically one of the following two
+syntaxes:
+
+ : "${VARIABLE:=default_value}"
+ VARIABLE=${VARIABLE:=default_value}
+
+Any of these variables can be overridden. Notably net/forwarding/lib.sh and
+net/lib.sh contain a number of overridable variables.
+
+One way of overriding these variables is through the environment:
+
+ PAUSE_ON_FAIL=yes ./some_test.sh
+
+The variable NETIFS is special. Since it is an array variable, there is no
+way to pass it through the environment. Its value can instead be given as
+consecutive arguments to the selftest:
+
+ ./some_test.sh swp{1..8}
+
+A way to customize variables in a persistent fashion is to create a file
+named forwarding.config in this directory. lib.sh sources the file if
+present, so it can contain any shell code. Typically it will contain
+assignments of variables whose value should be overridden.
+
+forwarding.config.sample is available in the directory as an example of
+how forwarding.config might look.
diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
index 0760a34b7114..a21b7085da2e 100755
--- a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
@@ -178,6 +178,22 @@ fdb_del()
check_err $? "Failed to remove a FDB entry of type ${type}"
}
+check_fdb_n_learned_support()
+{
+ if ! ip link help bridge 2>&1 | grep -q "fdb_max_learned"; then
+ echo "SKIP: iproute2 too old, missing bridge max learned support"
+ exit $ksft_skip
+ fi
+
+ ip link add dev br0 type bridge
+ local learned=$(fdb_get_n_learned)
+ ip link del dev br0
+ if [ "$learned" == "null" ]; then
+ echo "SKIP: kernel too old; bridge fdb_n_learned feature not supported."
+ exit $ksft_skip
+ fi
+}
+
check_accounting_one_type()
{
local type=$1 is_counted=$2 overrides_learned=$3
@@ -274,6 +290,8 @@ check_limit()
done
}
+check_fdb_n_learned_support
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 2aa66d2a1702..e6a3e04fd83f 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -478,10 +478,10 @@ v3exc_timeout_test()
RET=0
local X=("192.0.2.20" "192.0.2.30")
- # GMI should be 3 seconds
+ # GMI should be 5 seconds
ip link set dev br0 type bridge mcast_query_interval 100 \
mcast_query_response_interval 100 \
- mcast_membership_interval 300
+ mcast_membership_interval 500
v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
ip link set dev br0 type bridge mcast_query_interval 500 \
@@ -489,7 +489,7 @@ v3exc_timeout_test()
mcast_membership_interval 1500
$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
- sleep 3
+ sleep 5
bridge -j -d -s mdb show dev br0 \
| jq -e ".[].mdb[] | \
select(.grp == \"$TEST_GROUP\" and \
diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index e2b9ff773c6b..f84ab2e65754 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -478,10 +478,10 @@ mldv2exc_timeout_test()
RET=0
local X=("2001:db8:1::20" "2001:db8:1::30")
- # GMI should be 3 seconds
+ # GMI should be 5 seconds
ip link set dev br0 type bridge mcast_query_interval 100 \
mcast_query_response_interval 100 \
- mcast_membership_interval 300
+ mcast_membership_interval 500
mldv2exclude_prepare $h1
ip link set dev br0 type bridge mcast_query_interval 500 \
@@ -489,7 +489,7 @@ mldv2exc_timeout_test()
mcast_membership_interval 1500
$MZ $h1 -c 1 $MZPKT_ALLOW2 -q
- sleep 3
+ sleep 5
bridge -j -d -s mdb show dev br0 \
| jq -e ".[].mdb[] | \
select(.grp == \"$TEST_GROUP\" and \
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index f1de525cfa55..62a05bca1e82 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -122,6 +122,8 @@ devlink_reload()
still_pending=$(devlink resource show "$DEVLINK_DEV" | \
grep -c "size_new")
check_err $still_pending "Failed reload - There are still unset sizes"
+
+ udevadm settle
}
declare -A DEVLINK_ORIG
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index 1fc4f0242fc5..f1ca95e79a65 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -3,51 +3,28 @@
##############################################################################
# Topology description. p1 looped back to p2, p3 to p4 and so on.
-declare -A NETIFS
-NETIFS[p1]=veth0
-NETIFS[p2]=veth1
-NETIFS[p3]=veth2
-NETIFS[p4]=veth3
-NETIFS[p5]=veth4
-NETIFS[p6]=veth5
-NETIFS[p7]=veth6
-NETIFS[p8]=veth7
-NETIFS[p9]=veth8
-NETIFS[p10]=veth9
+NETIFS=(
+ [p1]=veth0
+ [p2]=veth1
+ [p3]=veth2
+ [p4]=veth3
+ [p5]=veth4
+ [p6]=veth5
+ [p7]=veth6
+ [p8]=veth7
+ [p9]=veth8
+ [p10]=veth9
+)
# Port that does not have a cable connected.
NETIF_NO_CABLE=eth8
##############################################################################
-# Defines
+# In addition to the topology-related variables, it is also possible to override
+# in this file other variables that net/lib.sh, net/forwarding/lib.sh or other
+# libraries or selftests use. E.g.:
-# IPv4 ping utility name
-PING=ping
-# IPv6 ping utility name. Some distributions use 'ping' for IPv6.
PING6=ping6
-# Packet generator. Some distributions use 'mz'.
MZ=mausezahn
-# mausezahn delay between transmissions in microseconds.
-MZ_DELAY=0
-# Time to wait after interfaces participating in the test are all UP
WAIT_TIME=5
-# Whether to pause on failure or not.
-PAUSE_ON_FAIL=no
-# Whether to pause on cleanup or not.
-PAUSE_ON_CLEANUP=no
-# Type of network interface to create
-NETIF_TYPE=veth
-# Whether to create virtual interfaces (veth) or not
-NETIF_CREATE=yes
-# Timeout (in seconds) before ping exits regardless of how many packets have
-# been sent or received
-PING_TIMEOUT=5
-# Minimum ageing_time (in centiseconds) supported by hardware
-LOW_AGEING_TIME=1000
-# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process
-# filter by software/hardware
-TC_FLAG=skip_hw
-# IPv6 traceroute utility name.
-TROUTE6=traceroute6
-
diff --git a/tools/testing/selftests/net/forwarding/ipip_lib.sh b/tools/testing/selftests/net/forwarding/ipip_lib.sh
index 30f36a57bae6..01e62c4ac94d 100644
--- a/tools/testing/selftests/net/forwarding/ipip_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ipip_lib.sh
@@ -141,7 +141,6 @@
# | $h2 + |
# | 192.0.2.18/28 |
# +---------------------------+
-source lib.sh
h1_create()
{
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index e579c2e0c462..ff96bb7535ff 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -2,68 +2,133 @@
# SPDX-License-Identifier: GPL-2.0
##############################################################################
+# Topology description. p1 looped back to p2, p3 to p4 and so on.
+
+declare -A NETIFS=(
+ [p1]=veth0
+ [p2]=veth1
+ [p3]=veth2
+ [p4]=veth3
+ [p5]=veth4
+ [p6]=veth5
+ [p7]=veth6
+ [p8]=veth7
+ [p9]=veth8
+ [p10]=veth9
+)
+
+# Port that does not have a cable connected.
+: "${NETIF_NO_CABLE:=eth8}"
+
+##############################################################################
# Defines
-# Can be overridden by the configuration file.
-PING=${PING:=ping}
-PING6=${PING6:=ping6}
-MZ=${MZ:=mausezahn}
-MZ_DELAY=${MZ_DELAY:=0}
-ARPING=${ARPING:=arping}
-TEAMD=${TEAMD:=teamd}
-WAIT_TIME=${WAIT_TIME:=5}
-PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
-PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no}
-NETIF_TYPE=${NETIF_TYPE:=veth}
-NETIF_CREATE=${NETIF_CREATE:=yes}
-MCD=${MCD:=smcrouted}
-MC_CLI=${MC_CLI:=smcroutectl}
-PING_COUNT=${PING_COUNT:=10}
-PING_TIMEOUT=${PING_TIMEOUT:=5}
-WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
-INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
-LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
-REQUIRE_JQ=${REQUIRE_JQ:=yes}
-REQUIRE_MZ=${REQUIRE_MZ:=yes}
-REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
-STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
-TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
-TROUTE6=${TROUTE6:=traceroute6}
+# Networking utilities.
+: "${PING:=ping}"
+: "${PING6:=ping6}" # Some distros just use ping.
+: "${ARPING:=arping}"
+: "${TROUTE6:=traceroute6}"
+
+# Packet generator.
+: "${MZ:=mausezahn}" # Some distributions use 'mz'.
+: "${MZ_DELAY:=0}"
+
+# Host configuration tools.
+: "${TEAMD:=teamd}"
+: "${MCD:=smcrouted}"
+: "${MC_CLI:=smcroutectl}"
+
+# Constants for netdevice bring-up:
+# Default time in seconds to wait for an interface to come up before giving up
+# and bailing out. Used during initial setup.
+: "${INTERFACE_TIMEOUT:=600}"
+# Like INTERFACE_TIMEOUT, but default for ad-hoc waiting in testing scripts.
+: "${WAIT_TIMEOUT:=20}"
+# Time to wait after interfaces participating in the test are all UP.
+: "${WAIT_TIME:=5}"
+
+# Whether to pause on, respectively, after a failure and before cleanup.
+: "${PAUSE_ON_FAIL:=no}"
+: "${PAUSE_ON_CLEANUP:=no}"
+
+# Whether to create virtual interfaces, and what netdevice type they should be.
+: "${NETIF_CREATE:=yes}"
+: "${NETIF_TYPE:=veth}"
+
+# Constants for ping tests:
+# How many packets should be sent.
+: "${PING_COUNT:=10}"
+# Timeout (in seconds) before ping exits regardless of how many packets have
+# been sent or received
+: "${PING_TIMEOUT:=5}"
+
+# Minimum ageing_time (in centiseconds) supported by hardware
+: "${LOW_AGEING_TIME:=1000}"
+
+# Whether to check for availability of certain tools.
+: "${REQUIRE_JQ:=yes}"
+: "${REQUIRE_MZ:=yes}"
+: "${REQUIRE_MTOOLS:=no}"
+
+# Whether to override MAC addresses on interfaces participating in the test.
+: "${STABLE_MAC_ADDRS:=no}"
+
+# Flags for tcpdump
+: "${TCPDUMP_EXTRA_FLAGS:=}"
+
+# Flags for TC filters.
+: "${TC_FLAG:=skip_hw}"
+
+# Whether the machine is "slow" -- i.e. might be incapable of running tests
+# involving heavy traffic. This might be the case on a debug kernel, a VM, or
+# e.g. a low-power board.
+: "${KSFT_MACHINE_SLOW:=no}"
-net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+##############################################################################
+# Find netifs by test-specified driver name
-if [[ -f $net_forwarding_dir/forwarding.config ]]; then
- source "$net_forwarding_dir/forwarding.config"
-fi
+driver_name_get()
+{
+ local dev=$1; shift
+ local driver_path="/sys/class/net/$dev/device/driver"
-source "$net_forwarding_dir/../lib.sh"
+ if [[ -L $driver_path ]]; then
+ basename `realpath $driver_path`
+ fi
+}
-# timeout in seconds
-slowwait()
+netif_find_driver()
{
- local timeout=$1; shift
+ local ifnames=`ip -j link show | jq -r ".[].ifname"`
+ local count=0
- local start_time="$(date -u +%s)"
- while true
+ for ifname in $ifnames
do
- local out
- out=$("$@")
- local ret=$?
- if ((!ret)); then
- echo -n "$out"
- return 0
+ local driver_name=`driver_name_get $ifname`
+ if [[ ! -z $driver_name && $driver_name == $NETIF_FIND_DRIVER ]]; then
+ count=$((count + 1))
+ NETIFS[p$count]="$ifname"
fi
-
- local current_time="$(date -u +%s)"
- if ((current_time - start_time > timeout)); then
- echo -n "$out"
- return 1
- fi
-
- sleep 0.1
done
}
+# Whether to find netdevice according to the driver speficied by the importer
+: "${NETIF_FIND_DRIVER:=}"
+
+if [[ $NETIF_FIND_DRIVER ]]; then
+ unset NETIFS
+ declare -A NETIFS
+ netif_find_driver
+fi
+
+net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+if [[ -f $net_forwarding_dir/forwarding.config ]]; then
+ source "$net_forwarding_dir/forwarding.config"
+fi
+
+source "$net_forwarding_dir/../lib.sh"
+
##############################################################################
# Sanity checks
@@ -205,22 +270,23 @@ check_port_mab_support()
fi
}
-skip_on_veth()
+if [[ "$(id -u)" -ne 0 ]]; then
+ echo "SKIP: need root privileges"
+ exit $ksft_skip
+fi
+
+check_driver()
{
- local kind=$(ip -j -d link show dev ${NETIFS[p1]} |
- jq -r '.[].linkinfo.info_kind')
+ local dev=$1; shift
+ local expected=$1; shift
+ local driver_name=`driver_name_get $dev`
- if [[ $kind == veth ]]; then
- echo "SKIP: Test cannot be run with veth pairs"
+ if [[ $driver_name != $expected ]]; then
+ echo "SKIP: expected driver $expected for $dev, got $driver_name instead"
exit $ksft_skip
fi
}
-if [[ "$(id -u)" -ne 0 ]]; then
- echo "SKIP: need root privileges"
- exit $ksft_skip
-fi
-
if [[ "$CHECK_TC" = "yes" ]]; then
check_tc_version
fi
@@ -235,6 +301,21 @@ require_command()
fi
}
+# IPv6 support was added in v3.0
+check_mtools_version()
+{
+ local version="$(msend -v)"
+ local major
+
+ version=${version##msend version }
+ major=$(echo $version | cut -d. -f1)
+
+ if [ $major -lt 3 ]; then
+ echo "SKIP: expected mtools version 3.0, got $version"
+ exit $ksft_skip
+ fi
+}
+
if [[ "$REQUIRE_JQ" = "yes" ]]; then
require_command jq
fi
@@ -242,15 +323,10 @@ if [[ "$REQUIRE_MZ" = "yes" ]]; then
require_command $MZ
fi
if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
- # https://github.com/vladimiroltean/mtools/
- # patched for IPv6 support
+ # https://github.com/troglobit/mtools
require_command msend
require_command mreceive
-fi
-
-if [[ ! -v NUM_NETIFS ]]; then
- echo "SKIP: importer does not define \"NUM_NETIFS\""
- exit $ksft_skip
+ check_mtools_version
fi
##############################################################################
@@ -271,6 +347,23 @@ done
##############################################################################
# Network interfaces configuration
+if [[ ! -v NUM_NETIFS ]]; then
+ echo "SKIP: importer does not define \"NUM_NETIFS\""
+ exit $ksft_skip
+fi
+
+if (( NUM_NETIFS > ${#NETIFS[@]} )); then
+ echo "SKIP: Importer requires $NUM_NETIFS NETIFS, but only ${#NETIFS[@]} are defined (${NETIFS[@]})"
+ exit $ksft_skip
+fi
+
+for i in $(seq ${#NETIFS[@]}); do
+ if [[ ! ${NETIFS[p$i]} ]]; then
+ echo "SKIP: NETIFS[p$i] not given"
+ exit $ksft_skip
+ fi
+done
+
create_netif_veth()
{
local i
@@ -358,14 +451,31 @@ EXIT_STATUS=0
# Per-test return value. Clear at the beginning of each test.
RET=0
+ret_set_ksft_status()
+{
+ local ksft_status=$1; shift
+ local msg=$1; shift
+
+ RET=$(ksft_status_merge $RET $ksft_status)
+ if (( $? )); then
+ retmsg=$msg
+ fi
+}
+
+# Whether FAILs should be interpreted as XFAILs. Internal.
+FAIL_TO_XFAIL=
+
check_err()
{
local err=$1
local msg=$2
- if [[ $RET -eq 0 && $err -ne 0 ]]; then
- RET=$err
- retmsg=$msg
+ if ((err)); then
+ if [[ $FAIL_TO_XFAIL = yes ]]; then
+ ret_set_ksft_status $ksft_xfail "$msg"
+ else
+ ret_set_ksft_status $ksft_fail "$msg"
+ fi
fi
}
@@ -374,10 +484,7 @@ check_fail()
local err=$1
local msg=$2
- if [[ $RET -eq 0 && $err -eq 0 ]]; then
- RET=1
- retmsg=$msg
- fi
+ check_err $((!err)) "$msg"
}
check_err_fail()
@@ -393,6 +500,85 @@ check_err_fail()
fi
}
+xfail_on_slow()
+{
+ if [[ $KSFT_MACHINE_SLOW = yes ]]; then
+ FAIL_TO_XFAIL=yes "$@"
+ else
+ "$@"
+ fi
+}
+
+xfail_on_veth()
+{
+ local dev=$1; shift
+ local kind
+
+ kind=$(ip -j -d link show dev $dev |
+ jq -r '.[].linkinfo.info_kind')
+ if [[ $kind = veth ]]; then
+ FAIL_TO_XFAIL=yes "$@"
+ else
+ "$@"
+ fi
+}
+
+log_test_result()
+{
+ local test_name=$1; shift
+ local opt_str=$1; shift
+ local result=$1; shift
+ local retmsg=$1; shift
+
+ printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result"
+ if [[ $retmsg ]]; then
+ printf "\t%s\n" "$retmsg"
+ fi
+}
+
+pause_on_fail()
+{
+ if [[ $PAUSE_ON_FAIL == yes ]]; then
+ echo "Hit enter to continue, 'q' to quit"
+ read a
+ [[ $a == q ]] && exit 1
+ fi
+}
+
+handle_test_result_pass()
+{
+ local test_name=$1; shift
+ local opt_str=$1; shift
+
+ log_test_result "$test_name" "$opt_str" " OK "
+}
+
+handle_test_result_fail()
+{
+ local test_name=$1; shift
+ local opt_str=$1; shift
+
+ log_test_result "$test_name" "$opt_str" FAIL "$retmsg"
+ pause_on_fail
+}
+
+handle_test_result_xfail()
+{
+ local test_name=$1; shift
+ local opt_str=$1; shift
+
+ log_test_result "$test_name" "$opt_str" XFAIL "$retmsg"
+ pause_on_fail
+}
+
+handle_test_result_skip()
+{
+ local test_name=$1; shift
+ local opt_str=$1; shift
+
+ log_test_result "$test_name" "$opt_str" SKIP "$retmsg"
+}
+
log_test()
{
local test_name=$1
@@ -402,31 +588,28 @@ log_test()
opt_str="($opt_str)"
fi
- if [[ $RET -ne 0 ]]; then
- EXIT_STATUS=1
- printf "TEST: %-60s [FAIL]\n" "$test_name $opt_str"
- if [[ ! -z "$retmsg" ]]; then
- printf "\t%s\n" "$retmsg"
- fi
- if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
- echo "Hit enter to continue, 'q' to quit"
- read a
- [ "$a" = "q" ] && exit 1
- fi
- return 1
+ if ((RET == ksft_pass)); then
+ handle_test_result_pass "$test_name" "$opt_str"
+ elif ((RET == ksft_xfail)); then
+ handle_test_result_xfail "$test_name" "$opt_str"
+ elif ((RET == ksft_skip)); then
+ handle_test_result_skip "$test_name" "$opt_str"
+ else
+ handle_test_result_fail "$test_name" "$opt_str"
fi
- printf "TEST: %-60s [ OK ]\n" "$test_name $opt_str"
- return 0
+ EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET)
+ return $RET
}
log_test_skip()
{
- local test_name=$1
- local opt_str=$2
+ RET=$ksft_skip retmsg= log_test "$@"
+}
- printf "TEST: %-60s [SKIP]\n" "$test_name $opt_str"
- return 0
+log_test_xfail()
+{
+ RET=$ksft_xfail retmsg= log_test "$@"
}
log_info()
@@ -487,33 +670,6 @@ wait_for_trap()
"$@" | grep -q trap
}
-until_counter_is()
-{
- local expr=$1; shift
- local current=$("$@")
-
- echo $((current))
- ((current $expr))
-}
-
-busywait_for_counter()
-{
- local timeout=$1; shift
- local delta=$1; shift
-
- local base=$("$@")
- busywait "$timeout" until_counter_is ">= $((base + delta))" "$@"
-}
-
-slowwait_for_counter()
-{
- local timeout=$1; shift
- local delta=$1; shift
-
- local base=$("$@")
- slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@"
-}
-
setup_wait_dev()
{
local dev=$1; shift
@@ -562,6 +718,19 @@ setup_wait()
sleep $WAIT_TIME
}
+wait_for_dev()
+{
+ local dev=$1; shift
+ local timeout=${1:-$WAIT_TIMEOUT}; shift
+
+ slowwait $timeout ip link show dev $dev &> /dev/null
+ if (( $? )); then
+ check_err 1
+ log_test wait_for_dev "Interface $dev did not appear."
+ exit $EXIT_STATUS
+ fi
+}
+
cmd_jq()
{
local cmd=$1
@@ -819,29 +988,6 @@ link_stats_rx_errors_get()
link_stats_get $1 rx errors
}
-tc_rule_stats_get()
-{
- local dev=$1; shift
- local pref=$1; shift
- local dir=$1; shift
- local selector=${1:-.packets}; shift
-
- tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \
- | jq ".[1].options.actions[].stats$selector"
-}
-
-tc_rule_handle_stats_get()
-{
- local id=$1; shift
- local handle=$1; shift
- local selector=${1:-.packets}; shift
- local netns=${1:-""}; shift
-
- tc $netns -j -s filter show $id \
- | jq ".[] | select(.options.handle == $handle) | \
- .options.actions[0].stats$selector"
-}
-
ethtool_stats_get()
{
local dev=$1; shift
@@ -988,12 +1134,19 @@ bridge_ageing_time_get()
}
declare -A SYSCTL_ORIG
+sysctl_save()
+{
+ local key=$1; shift
+
+ SYSCTL_ORIG[$key]=$(sysctl -n $key)
+}
+
sysctl_set()
{
local key=$1; shift
local value=$1; shift
- SYSCTL_ORIG[$key]=$(sysctl -n $key)
+ sysctl_save "$key"
sysctl -qw $key="$value"
}
@@ -1072,22 +1225,6 @@ trap_uninstall()
tc filter del dev $dev $direction pref 1 flower
}
-slow_path_trap_install()
-{
- # For slow-path testing, we need to install a trap to get to
- # slow path the packets that would otherwise be switched in HW.
- if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
- trap_install "$@"
- fi
-}
-
-slow_path_trap_uninstall()
-{
- if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
- trap_uninstall "$@"
- fi
-}
-
__icmp_capture_add_del()
{
local add_del=$1; shift
@@ -1104,22 +1241,34 @@ __icmp_capture_add_del()
icmp_capture_install()
{
- __icmp_capture_add_del add 100 "" "$@"
+ local tundev=$1; shift
+ local filter=$1; shift
+
+ __icmp_capture_add_del add 100 "" "$tundev" "$filter"
}
icmp_capture_uninstall()
{
- __icmp_capture_add_del del 100 "" "$@"
+ local tundev=$1; shift
+ local filter=$1; shift
+
+ __icmp_capture_add_del del 100 "" "$tundev" "$filter"
}
icmp6_capture_install()
{
- __icmp_capture_add_del add 100 v6 "$@"
+ local tundev=$1; shift
+ local filter=$1; shift
+
+ __icmp_capture_add_del add 100 v6 "$tundev" "$filter"
}
icmp6_capture_uninstall()
{
- __icmp_capture_add_del del 100 v6 "$@"
+ local tundev=$1; shift
+ local filter=$1; shift
+
+ __icmp_capture_add_del del 100 v6 "$tundev" "$filter"
}
__vlan_capture_add_del()
@@ -1137,12 +1286,18 @@ __vlan_capture_add_del()
vlan_capture_install()
{
- __vlan_capture_add_del add 100 "$@"
+ local dev=$1; shift
+ local filter=$1; shift
+
+ __vlan_capture_add_del add 100 "$dev" "$filter"
}
vlan_capture_uninstall()
{
- __vlan_capture_add_del del 100 "$@"
+ local dev=$1; shift
+ local filter=$1; shift
+
+ __vlan_capture_add_del del 100 "$dev" "$filter"
}
__dscp_capture_add_del()
@@ -1502,34 +1657,61 @@ __start_traffic()
local sip=$1; shift
local dip=$1; shift
local dmac=$1; shift
+ local -a mz_args=("$@")
$MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
- -a own -b $dmac -t "$proto" -q "$@" &
+ -a own -b $dmac -t "$proto" -q "${mz_args[@]}" &
sleep 1
}
start_traffic_pktsize()
{
local pktsize=$1; shift
+ local h_in=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dmac=$1; shift
+ local -a mz_args=("$@")
- __start_traffic $pktsize udp "$@"
+ __start_traffic $pktsize udp "$h_in" "$sip" "$dip" "$dmac" \
+ "${mz_args[@]}"
}
start_tcp_traffic_pktsize()
{
local pktsize=$1; shift
+ local h_in=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dmac=$1; shift
+ local -a mz_args=("$@")
- __start_traffic $pktsize tcp "$@"
+ __start_traffic $pktsize tcp "$h_in" "$sip" "$dip" "$dmac" \
+ "${mz_args[@]}"
}
start_traffic()
{
- start_traffic_pktsize 8000 "$@"
+ local h_in=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dmac=$1; shift
+ local -a mz_args=("$@")
+
+ start_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
+ "${mz_args[@]}"
}
start_tcp_traffic()
{
- start_tcp_traffic_pktsize 8000 "$@"
+ local h_in=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dmac=$1; shift
+ local -a mz_args=("$@")
+
+ start_tcp_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \
+ "${mz_args[@]}"
}
stop_traffic()
@@ -2011,6 +2193,8 @@ bail_on_lldpad()
{
local reason1="$1"; shift
local reason2="$1"; shift
+ local caller=${FUNCNAME[1]}
+ local src=${BASH_SOURCE[1]}
if systemctl is-active --quiet lldpad; then
@@ -2031,7 +2215,8 @@ bail_on_lldpad()
an environment variable ALLOW_LLDPAD to a
non-empty string.
EOF
- exit 1
+ log_test_skip $src:$caller
+ exit $EXIT_STATUS
else
return
fi
diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
new file mode 100755
index 000000000000..ff2accccaf4d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh
@@ -0,0 +1,208 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This tests the operation of lib.sh itself.
+
+ALL_TESTS="
+ test_ret
+ test_exit_status
+"
+NUM_NETIFS=0
+source lib.sh
+
+# Simulated checks.
+
+do_test()
+{
+ local msg=$1; shift
+
+ "$@"
+ check_err $? "$msg"
+}
+
+tpass()
+{
+ do_test "tpass" true
+}
+
+tfail()
+{
+ do_test "tfail" false
+}
+
+txfail()
+{
+ FAIL_TO_XFAIL=yes do_test "txfail" false
+}
+
+# Simulated tests.
+
+pass()
+{
+ RET=0
+ do_test "true" true
+ log_test "true"
+}
+
+fail()
+{
+ RET=0
+ do_test "false" false
+ log_test "false"
+}
+
+xfail()
+{
+ RET=0
+ FAIL_TO_XFAIL=yes do_test "xfalse" false
+ log_test "xfalse"
+}
+
+skip()
+{
+ RET=0
+ log_test_skip "skip"
+}
+
+slow_xfail()
+{
+ RET=0
+ xfail_on_slow do_test "slow_false" false
+ log_test "slow_false"
+}
+
+# lib.sh tests.
+
+ret_tests_run()
+{
+ local t
+
+ RET=0
+ retmsg=
+ for t in "$@"; do
+ $t
+ done
+ echo "$retmsg"
+ return $RET
+}
+
+ret_subtest()
+{
+ local expect_ret=$1; shift
+ local expect_retmsg=$1; shift
+ local -a tests=( "$@" )
+
+ local status_names=(pass fail xfail xpass skip)
+ local ret
+ local out
+
+ RET=0
+
+ # Run this in a subshell, so that our environment is intact.
+ out=$(ret_tests_run "${tests[@]}")
+ ret=$?
+
+ (( ret == expect_ret ))
+ check_err $? "RET=$ret expected $expect_ret"
+
+ [[ $out == $expect_retmsg ]]
+ check_err $? "retmsg=$out expected $expect_retmsg"
+
+ log_test "RET $(echo ${tests[@]}) -> ${status_names[$ret]}"
+}
+
+test_ret()
+{
+ ret_subtest $ksft_pass ""
+
+ ret_subtest $ksft_pass "" tpass
+ ret_subtest $ksft_fail "tfail" tfail
+ ret_subtest $ksft_xfail "txfail" txfail
+
+ ret_subtest $ksft_pass "" tpass tpass
+ ret_subtest $ksft_fail "tfail" tpass tfail
+ ret_subtest $ksft_xfail "txfail" tpass txfail
+
+ ret_subtest $ksft_fail "tfail" tfail tpass
+ ret_subtest $ksft_xfail "txfail" txfail tpass
+
+ ret_subtest $ksft_fail "tfail" tfail tfail
+ ret_subtest $ksft_fail "tfail" tfail txfail
+
+ ret_subtest $ksft_fail "tfail" txfail tfail
+
+ ret_subtest $ksft_xfail "txfail" txfail txfail
+}
+
+exit_status_tests_run()
+{
+ EXIT_STATUS=0
+ tests_run > /dev/null
+ return $EXIT_STATUS
+}
+
+exit_status_subtest()
+{
+ local expect_exit_status=$1; shift
+ local tests=$1; shift
+ local what=$1; shift
+
+ local status_names=(pass fail xfail xpass skip)
+ local exit_status
+ local out
+
+ RET=0
+
+ # Run this in a subshell, so that our environment is intact.
+ out=$(TESTS="$tests" exit_status_tests_run)
+ exit_status=$?
+
+ (( exit_status == expect_exit_status ))
+ check_err $? "EXIT_STATUS=$exit_status, expected $expect_exit_status"
+
+ log_test "EXIT_STATUS $tests$what -> ${status_names[$exit_status]}"
+}
+
+test_exit_status()
+{
+ exit_status_subtest $ksft_pass ":"
+
+ exit_status_subtest $ksft_pass "pass"
+ exit_status_subtest $ksft_fail "fail"
+ exit_status_subtest $ksft_pass "xfail"
+ exit_status_subtest $ksft_skip "skip"
+
+ exit_status_subtest $ksft_pass "pass pass"
+ exit_status_subtest $ksft_fail "pass fail"
+ exit_status_subtest $ksft_pass "pass xfail"
+ exit_status_subtest $ksft_skip "pass skip"
+
+ exit_status_subtest $ksft_fail "fail pass"
+ exit_status_subtest $ksft_pass "xfail pass"
+ exit_status_subtest $ksft_skip "skip pass"
+
+ exit_status_subtest $ksft_fail "fail fail"
+ exit_status_subtest $ksft_fail "fail xfail"
+ exit_status_subtest $ksft_fail "fail skip"
+
+ exit_status_subtest $ksft_fail "xfail fail"
+ exit_status_subtest $ksft_fail "skip fail"
+
+ exit_status_subtest $ksft_pass "xfail xfail"
+ exit_status_subtest $ksft_skip "xfail skip"
+ exit_status_subtest $ksft_skip "skip xfail"
+
+ exit_status_subtest $ksft_skip "skip skip"
+
+ KSFT_MACHINE_SLOW=yes \
+ exit_status_subtest $ksft_pass "slow_xfail" ": slow"
+
+ KSFT_MACHINE_SLOW=no \
+ exit_status_subtest $ksft_fail "slow_xfail" ": fast"
+}
+
+trap pre_cleanup EXIT
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
index c5b0cbc85b3e..4b364cdf3ef0 100755
--- a/tools/testing/selftests/net/forwarding/local_termination.sh
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -155,25 +155,30 @@ run_test()
"$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \
true
- check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
- "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
- false
+ xfail_on_veth $h1 \
+ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
+ "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
+ false
check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \
"$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \
true
- check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, allmulti" \
- "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
- false
+ xfail_on_veth $h1 \
+ check_rcv $rcv_if_name \
+ "Unicast IPv4 to unknown MAC address, allmulti" \
+ "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
+ false
check_rcv $rcv_if_name "Multicast IPv4 to joined group" \
"$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \
true
- check_rcv $rcv_if_name "Multicast IPv4 to unknown group" \
- "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
- false
+ xfail_on_veth $h1 \
+ check_rcv $rcv_if_name \
+ "Multicast IPv4 to unknown group" \
+ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
+ false
check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \
"$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \
@@ -187,9 +192,10 @@ run_test()
"$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \
true
- check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
- "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
- false
+ xfail_on_veth $h1 \
+ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
+ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
+ false
check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \
"$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \
diff --git a/tools/testing/selftests/net/forwarding/min_max_mtu.sh b/tools/testing/selftests/net/forwarding/min_max_mtu.sh
new file mode 100755
index 000000000000..97bb8b221bed
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/min_max_mtu.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+
+# | H1 |
+# | |
+# | $h1.10 + |
+# | 192.0.2.2/24 | |
+# | 2001:db8:1::2/64 | |
+# | | |
+# | $h1 + |
+# | | |
+# +------------------|-+
+# |
+# +------------------|-+
+# | SW | |
+# | $swp1 + |
+# | | |
+# | $swp1.10 + |
+# | 192.0.2.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# +--------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ max_mtu_config_test
+ max_mtu_traffic_test
+ min_mtu_config_test
+ min_mtu_traffic_test
+"
+
+NUM_NETIFS=2
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 10 192.0.2.2/24 2001:db8:1::2/64
+ simple_if_fini $h1
+}
+
+switch_create()
+{
+ ip li set dev $swp1 up
+ vlan_create $swp1 10 "" 192.0.2.1/24 2001:db8:1::1/64
+}
+
+switch_destroy()
+{
+ ip li set dev $swp1 down
+ vlan_destroy $swp1 10
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ h1_create
+
+ switch_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ switch_destroy
+
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1.10 192.0.2.1
+}
+
+ping_ipv6()
+{
+ ping6_test $h1.10 2001:db8:1::1
+}
+
+min_max_mtu_get_if()
+{
+ local dev=$1; shift
+ local min_max=$1; shift
+
+ ip -d -j link show $dev | jq ".[].$min_max"
+}
+
+ensure_compatible_min_max_mtu()
+{
+ local min_max=$1; shift
+
+ local mtu=$(min_max_mtu_get_if ${NETIFS[p1]} $min_max)
+ local i
+
+ for ((i = 2; i <= NUM_NETIFS; ++i)); do
+ local current_mtu=$(min_max_mtu_get_if ${NETIFS[p$i]} $min_max)
+
+ if [ $current_mtu -ne $mtu ]; then
+ return 1
+ fi
+ done
+}
+
+mtu_set_if()
+{
+ local dev=$1; shift
+ local mtu=$1; shift
+ local should_fail=${1:-0}; shift
+
+ mtu_set $dev $mtu 2>/dev/null
+ check_err_fail $should_fail $? "Set MTU $mtu for $dev"
+}
+
+mtu_set_all_if()
+{
+ local mtu=$1; shift
+ local i
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ mtu_set_if ${NETIFS[p$i]} $mtu
+ mtu_set_if ${NETIFS[p$i]}.10 $mtu
+ done
+}
+
+mtu_restore_all_if()
+{
+ local i
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ mtu_restore ${NETIFS[p$i]}.10
+ mtu_restore ${NETIFS[p$i]}
+ done
+}
+
+mtu_test_ping4()
+{
+ local mtu=$1; shift
+ local should_fail=$1; shift
+
+ # Ping adds 8 bytes for ICMP header and 20 bytes for IP header
+ local ping_headers_len=$((20 + 8))
+ local pkt_size=$((mtu - ping_headers_len))
+
+ ping_do $h1.10 192.0.2.1 "-s $pkt_size -M do"
+ check_err_fail $should_fail $? "Ping, packet size: $pkt_size"
+}
+
+mtu_test_ping6()
+{
+ local mtu=$1; shift
+ local should_fail=$1; shift
+
+ # Ping adds 8 bytes for ICMP header and 40 bytes for IPv6 header
+ local ping6_headers_len=$((40 + 8))
+ local pkt_size=$((mtu - ping6_headers_len))
+
+ ping6_do $h1.10 2001:db8:1::1 "-s $pkt_size -M do"
+ check_err_fail $should_fail $? "Ping6, packet size: $pkt_size"
+}
+
+max_mtu_config_test()
+{
+ local i
+
+ RET=0
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ local dev=${NETIFS[p$i]}
+ local max_mtu=$(min_max_mtu_get_if $dev "max_mtu")
+ local should_fail
+
+ should_fail=0
+ mtu_set_if $dev $max_mtu $should_fail
+ mtu_restore $dev
+
+ should_fail=1
+ mtu_set_if $dev $((max_mtu + 1)) $should_fail
+ mtu_restore $dev
+ done
+
+ log_test "Test maximum MTU configuration"
+}
+
+max_mtu_traffic_test()
+{
+ local should_fail
+ local max_mtu
+
+ RET=0
+
+ if ! ensure_compatible_min_max_mtu "max_mtu"; then
+ log_test_xfail "Topology has incompatible maximum MTU values"
+ return
+ fi
+
+ max_mtu=$(min_max_mtu_get_if ${NETIFS[p1]} "max_mtu")
+
+ should_fail=0
+ mtu_set_all_if $max_mtu
+ mtu_test_ping4 $max_mtu $should_fail
+ mtu_test_ping6 $max_mtu $should_fail
+ mtu_restore_all_if
+
+ should_fail=1
+ mtu_set_all_if $((max_mtu - 1))
+ mtu_test_ping4 $max_mtu $should_fail
+ mtu_test_ping6 $max_mtu $should_fail
+ mtu_restore_all_if
+
+ log_test "Test traffic, packet size is maximum MTU"
+}
+
+min_mtu_config_test()
+{
+ local i
+
+ RET=0
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ local dev=${NETIFS[p$i]}
+ local min_mtu=$(min_max_mtu_get_if $dev "min_mtu")
+ local should_fail
+
+ should_fail=0
+ mtu_set_if $dev $min_mtu $should_fail
+ mtu_restore $dev
+
+ should_fail=1
+ mtu_set_if $dev $((min_mtu - 1)) $should_fail
+ mtu_restore $dev
+ done
+
+ log_test "Test minimum MTU configuration"
+}
+
+min_mtu_traffic_test()
+{
+ local should_fail=0
+ local min_mtu
+
+ RET=0
+
+ if ! ensure_compatible_min_max_mtu "min_mtu"; then
+ log_test_xfail "Topology has incompatible minimum MTU values"
+ return
+ fi
+
+ min_mtu=$(min_max_mtu_get_if ${NETIFS[p1]} "min_mtu")
+ mtu_set_all_if $min_mtu
+ mtu_test_ping4 $min_mtu $should_fail
+ # Do not test minimum MTU with IPv6, as IPv6 requires higher MTU.
+
+ mtu_restore_all_if
+
+ log_test "Test traffic, packet size is minimum MTU"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
index 0266443601bc..921c733ee04f 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -74,7 +74,7 @@ test_span_gre_mac()
RET=0
- mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ mirror_install $swp1 $direction $tundev "matchall"
icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac"
mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10
@@ -82,29 +82,29 @@ test_span_gre_mac()
icmp_capture_uninstall h3-${tundev}
mirror_uninstall $swp1 $direction
- log_test "$direction $what: envelope MAC ($tcflags)"
+ log_test "$direction $what: envelope MAC"
}
test_two_spans()
{
RET=0
- mirror_install $swp1 ingress gt4 "matchall $tcflags"
- mirror_install $swp1 egress gt6 "matchall $tcflags"
- quick_test_span_gre_dir gt4 ingress
- quick_test_span_gre_dir gt6 egress
+ mirror_install $swp1 ingress gt4 "matchall"
+ mirror_install $swp1 egress gt6 "matchall"
+ quick_test_span_gre_dir gt4 8 0
+ quick_test_span_gre_dir gt6 0 8
mirror_uninstall $swp1 ingress
- fail_test_span_gre_dir gt4 ingress
- quick_test_span_gre_dir gt6 egress
+ fail_test_span_gre_dir gt4 8 0
+ quick_test_span_gre_dir gt6 0 8
- mirror_install $swp1 ingress gt4 "matchall $tcflags"
+ mirror_install $swp1 ingress gt4 "matchall"
mirror_uninstall $swp1 egress
- quick_test_span_gre_dir gt4 ingress
- fail_test_span_gre_dir gt6 egress
+ quick_test_span_gre_dir gt4 8 0
+ fail_test_span_gre_dir gt6 0 8
mirror_uninstall $swp1 ingress
- log_test "two simultaneously configured mirrors ($tcflags)"
+ log_test "two simultaneously configured mirrors"
}
test_gretap()
@@ -131,30 +131,11 @@ test_ip6gretap_mac()
test_span_gre_mac gt6 egress "mirror to ip6gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
index 6c257ec03756..e3cd48e18eeb 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
@@ -196,32 +196,11 @@ test_ip6gretap()
full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL"
}
-test_all()
-{
- RET=0
-
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
index 04fd14b0a9b7..6c7bd33332c2 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
@@ -108,30 +108,11 @@ test_ip6gretap()
full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
index f35313c76fac..909ec956a5e5 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -104,30 +104,11 @@ test_ip6gretap_stp()
full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
index 0cf4c47a46f9..40ac9dd3aff1 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
@@ -104,30 +104,11 @@ test_ip6gretap()
full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
}
-tests()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-tests
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- tests
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
index c53148b1dc63..fe4d7c906a70 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -227,10 +227,10 @@ test_lag_slave()
RET=0
tc filter add dev $swp1 ingress pref 999 \
- proto 802.1q flower vlan_ethtype arp $tcflags \
+ proto 802.1q flower vlan_ethtype arp \
action pass
mirror_install $swp1 ingress gt4 \
- "proto 802.1q flower vlan_id 333 $tcflags"
+ "proto 802.1q flower vlan_id 333"
# Test connectivity through $up_dev when $down_dev is set down.
ip link set dev $down_dev down
@@ -239,7 +239,7 @@ test_lag_slave()
setup_wait_dev $host_dev
$ARPING -I br1 192.0.2.130 -qfc 1
sleep 2
- mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 10
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10"
# Test lack of connectivity when both slaves are down.
ip link set dev $up_dev down
@@ -252,7 +252,7 @@ test_lag_slave()
mirror_uninstall $swp1 ingress
tc filter del dev $swp1 ingress pref 999
- log_test "$what ($tcflags)"
+ log_test "$what"
}
test_mirror_gretap_first()
@@ -265,30 +265,11 @@ test_mirror_gretap_second()
test_lag_slave $h4 $swp4 $swp3 "mirror to gretap: LAG second slave"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
index 5ea9d63915f7..65ae9d960c18 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -73,7 +73,7 @@ test_span_gre_ttl()
RET=0
mirror_install $swp1 ingress $tundev \
- "prot ip flower $tcflags ip_prot icmp"
+ "prot ip flower ip_prot icmp"
tc filter add dev $h3 ingress pref 77 prot $prot \
flower skip_hw ip_ttl 50 action pass
@@ -81,13 +81,13 @@ test_span_gre_ttl()
ip link set dev $tundev type $type ttl 50
sleep 2
- mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+ mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 ">= 10"
ip link set dev $tundev type $type ttl 100
tc filter del dev $h3 ingress pref 77
mirror_uninstall $swp1 ingress
- log_test "$what: TTL change ($tcflags)"
+ log_test "$what: TTL change"
}
test_span_gre_tun_up()
@@ -98,15 +98,15 @@ test_span_gre_tun_up()
RET=0
ip link set dev $tundev down
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
ip link set dev $tundev up
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: tunnel down/up ($tcflags)"
+ log_test "$what: tunnel down/up"
}
test_span_gre_egress_up()
@@ -118,8 +118,8 @@ test_span_gre_egress_up()
RET=0
ip link set dev $swp3 down
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
# After setting the device up, wait for neighbor to get resolved so that
# we can expect mirroring to work.
@@ -127,10 +127,10 @@ test_span_gre_egress_up()
setup_wait_dev $swp3
ping -c 1 -I $swp3 $remote_ip &>/dev/null
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: egress down/up ($tcflags)"
+ log_test "$what: egress down/up"
}
test_span_gre_remote_ip()
@@ -144,14 +144,14 @@ test_span_gre_remote_ip()
RET=0
ip link set dev $tundev type $type remote $wrong_ip
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ fail_test_span_gre_dir $tundev
ip link set dev $tundev type $type remote $correct_ip
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: remote address change ($tcflags)"
+ log_test "$what: remote address change"
}
test_span_gre_tun_del()
@@ -165,10 +165,10 @@ test_span_gre_tun_del()
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir $tundev
ip link del dev $tundev
- fail_test_span_gre_dir $tundev ingress
+ fail_test_span_gre_dir $tundev
tunnel_create $tundev $type $local_ip $remote_ip \
ttl 100 tos inherit $flags
@@ -176,11 +176,11 @@ test_span_gre_tun_del()
# Recreating the tunnel doesn't reestablish mirroring, so reinstall it
# and verify it works for the follow-up tests.
mirror_uninstall $swp1 ingress
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: tunnel deleted ($tcflags)"
+ log_test "$what: tunnel deleted"
}
test_span_gre_route_del()
@@ -192,18 +192,18 @@ test_span_gre_route_del()
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir $tundev
ip route del $route dev $edev
- fail_test_span_gre_dir $tundev ingress
+ fail_test_span_gre_dir $tundev
ip route add $route dev $edev
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: underlay route removal ($tcflags)"
+ log_test "$what: underlay route removal"
}
test_ttl()
@@ -244,30 +244,11 @@ test_route_del()
test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
index 09389f3b9369..3a84f3ab5856 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
@@ -64,12 +64,19 @@ cleanup()
test_span_gre_dir_acl()
{
- test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+ local tundev=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+
+ test_span_gre_dir_ips "$tundev" "$forward_type" \
+ "$backward_type" 192.0.2.3 192.0.2.4
}
fail_test_span_gre_dir_acl()
{
- fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+ local tundev=$1; shift
+
+ fail_test_span_gre_dir_ips "$tundev" 192.0.2.3 192.0.2.4
}
full_test_span_gre_dir_acl()
@@ -84,16 +91,15 @@ full_test_span_gre_dir_acl()
RET=0
mirror_install $swp1 $direction $tundev \
- "protocol ip flower $tcflags dst_ip $match_dip"
- fail_test_span_gre_dir $tundev $direction
- test_span_gre_dir_acl "$tundev" "$direction" \
- "$forward_type" "$backward_type"
+ "protocol ip flower dst_ip $match_dip"
+ fail_test_span_gre_dir $tundev
+ test_span_gre_dir_acl "$tundev" "$forward_type" "$backward_type"
mirror_uninstall $swp1 $direction
# Test lack of mirroring after ACL mirror is uninstalled.
- fail_test_span_gre_dir_acl "$tundev" "$direction"
+ fail_test_span_gre_dir_acl "$tundev"
- log_test "$direction $what ($tcflags)"
+ log_test "$direction $what"
}
test_gretap()
@@ -108,30 +114,11 @@ test_ip6gretap()
full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
index 9edf4cb104a8..1261e6f46e34 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
@@ -37,8 +37,14 @@
# | \ / |
# | \____________________________________________/ |
# | | |
-# | + lag2 (team) |
-# | 192.0.2.130/28 |
+# | + lag2 (team) ------> + gt4-dst (gretap) |
+# | 192.0.2.130/28 loc=192.0.2.130 |
+# | rem=192.0.2.129 |
+# | ttl=100 |
+# | tos=inherit |
+# | |
+# | |
+# | |
# | |
# +---------------------------------------------------------------------------+
@@ -50,9 +56,6 @@ ALL_TESTS="
NUM_NETIFS=6
source lib.sh
source mirror_lib.sh
-source mirror_gre_lib.sh
-
-require_command $ARPING
vlan_host_create()
{
@@ -122,16 +125,21 @@ h3_create()
{
vrf_create vrf-h3
ip link set dev vrf-h3 up
- tc qdisc add dev $h3 clsact
- tc qdisc add dev $h4 clsact
h3_create_team
+
+ tunnel_create gt4-dst gretap 192.0.2.130 192.0.2.129 \
+ ttl 100 tos inherit
+ ip link set dev gt4-dst master vrf-h3
+ tc qdisc add dev gt4-dst clsact
}
h3_destroy()
{
+ tc qdisc del dev gt4-dst clsact
+ ip link set dev gt4-dst nomaster
+ tunnel_destroy gt4-dst
+
h3_destroy_team
- tc qdisc del dev $h4 clsact
- tc qdisc del dev $h3 clsact
ip link set dev vrf-h3 down
vrf_destroy vrf-h3
}
@@ -188,18 +196,12 @@ setup_prepare()
h2_create
h3_create
switch_create
-
- trap_install $h3 ingress
- trap_install $h4 ingress
}
cleanup()
{
pre_cleanup
- trap_uninstall $h4 ingress
- trap_uninstall $h3 ingress
-
switch_destroy
h3_destroy
h2_destroy
@@ -218,7 +220,8 @@ test_lag_slave()
RET=0
mirror_install $swp1 ingress gt4 \
- "proto 802.1q flower vlan_id 333 $tcflags"
+ "proto 802.1q flower vlan_id 333"
+ vlan_capture_install gt4-dst "vlan_ethtype ipv4 ip_proto icmp type 8"
# Move $down_dev away from the team. That will prompt change in
# txability of the connected device, without changing its upness. The
@@ -226,13 +229,14 @@ test_lag_slave()
# other slave.
ip link set dev $down_dev nomaster
sleep 2
- mirror_test vrf-h1 192.0.2.1 192.0.2.18 $up_dev 1 10
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 gt4-dst 100 10
# Test lack of connectivity when neither slave is txable.
ip link set dev $up_dev nomaster
sleep 2
- mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0
- mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0
+ mirror_test vrf-h1 192.0.2.1 192.0.2.18 gt4-dst 100 0
+
+ vlan_capture_uninstall gt4-dst
mirror_uninstall $swp1 ingress
# Recreate H3's team device, because mlxsw, which this test is
@@ -243,7 +247,7 @@ test_lag_slave()
# Wait for ${h,swp}{3,4}.
setup_wait
- log_test "$what ($tcflags)"
+ log_test "$what"
}
test_mirror_gretap_first()
@@ -256,30 +260,11 @@ test_mirror_gretap_second()
test_lag_slave $h4 $h3 "mirror to gretap: LAG second slave"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
index 0c36546e131e..20078cc55f24 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -5,22 +5,34 @@ source "$net_forwarding_dir/mirror_lib.sh"
quick_test_span_gre_dir_ips()
{
local tundev=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
- do_test_span_dir_ips 10 h3-$tundev "$@"
+ do_test_span_dir_ips 10 h3-$tundev "$ip1" "$ip2" \
+ "$forward_type" "$backward_type"
}
fail_test_span_gre_dir_ips()
{
local tundev=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
- do_test_span_dir_ips 0 h3-$tundev "$@"
+ do_test_span_dir_ips 0 h3-$tundev "$ip1" "$ip2"
}
test_span_gre_dir_ips()
{
local tundev=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
- test_span_dir_ips h3-$tundev "$@"
+ test_span_dir_ips h3-$tundev "$forward_type" \
+ "$backward_type" "$ip1" "$ip2"
}
full_test_span_gre_dir_ips()
@@ -35,12 +47,12 @@ full_test_span_gre_dir_ips()
RET=0
- mirror_install $swp1 $direction $tundev "matchall $tcflags"
- test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+ mirror_install $swp1 $direction $tundev "matchall"
+ test_span_dir_ips "h3-$tundev" "$forward_type" \
"$backward_type" "$ip1" "$ip2"
mirror_uninstall $swp1 $direction
- log_test "$direction $what ($tcflags)"
+ log_test "$direction $what"
}
full_test_span_gre_dir_vlan_ips()
@@ -56,45 +68,63 @@ full_test_span_gre_dir_vlan_ips()
RET=0
- mirror_install $swp1 $direction $tundev "matchall $tcflags"
+ mirror_install $swp1 $direction $tundev "matchall"
- test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+ test_span_dir_ips "h3-$tundev" "$forward_type" \
"$backward_type" "$ip1" "$ip2"
tc filter add dev $h3 ingress pref 77 prot 802.1q \
flower $vlan_match \
action pass
- mirror_test v$h1 $ip1 $ip2 $h3 77 10
+ mirror_test v$h1 $ip1 $ip2 $h3 77 '>= 10'
tc filter del dev $h3 ingress pref 77
mirror_uninstall $swp1 $direction
- log_test "$direction $what ($tcflags)"
+ log_test "$direction $what"
}
quick_test_span_gre_dir()
{
- quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+ local tundev=$1; shift
+ local forward_type=${1-8}; shift
+ local backward_type=${1-0}; shift
+
+ quick_test_span_gre_dir_ips "$tundev" 192.0.2.1 192.0.2.2 \
+ "$forward_type" "$backward_type"
}
fail_test_span_gre_dir()
{
- fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
-}
+ local tundev=$1; shift
-test_span_gre_dir()
-{
- test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+ fail_test_span_gre_dir_ips "$tundev" 192.0.2.1 192.0.2.2
}
full_test_span_gre_dir()
{
- full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+ local tundev=$1; shift
+ local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local what=$1; shift
+
+ full_test_span_gre_dir_ips "$tundev" "$direction" "$forward_type" \
+ "$backward_type" "$what" 192.0.2.1 192.0.2.2
}
full_test_span_gre_dir_vlan()
{
- full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2
+ local tundev=$1; shift
+ local direction=$1; shift
+ local vlan_match=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+ local what=$1; shift
+
+ full_test_span_gre_dir_vlan_ips "$tundev" "$direction" "$vlan_match" \
+ "$forward_type" "$backward_type" \
+ "$what" 192.0.2.1 192.0.2.2
}
full_test_span_gre_stp_ips()
@@ -104,27 +134,39 @@ full_test_span_gre_stp_ips()
local what=$1; shift
local ip1=$1; shift
local ip2=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
local h3mac=$(mac_get $h3)
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir_ips $tundev $ip1 $ip2 \
+ "$forward_type" "$backward_type"
bridge link set dev $nbpdev state disabled
sleep 1
- fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+ fail_test_span_gre_dir_ips $tundev $ip1 $ip2
bridge link set dev $nbpdev state forwarding
sleep 1
- quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+ quick_test_span_gre_dir_ips $tundev $ip1 $ip2 \
+ "$forward_type" "$backward_type"
mirror_uninstall $swp1 ingress
- log_test "$what: STP state ($tcflags)"
+ log_test "$what: STP state"
}
full_test_span_gre_stp()
{
- full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2
+ local tundev=$1; shift
+ local nbpdev=$1; shift
+ local what=$1; shift
+ local forward_type=${1-8}; shift
+ local backward_type=${1-0}; shift
+
+ full_test_span_gre_stp_ips "$tundev" "$nbpdev" "$what" \
+ 192.0.2.1 192.0.2.2 \
+ "$forward_type" "$backward_type"
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
index fc0508e40fca..2cbfbecf25c8 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
@@ -60,41 +60,32 @@ test_span_gre_neigh()
local addr=$1; shift
local tundev=$1; shift
local direction=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
local what=$1; shift
RET=0
ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55
- mirror_install $swp1 $direction $tundev "matchall $tcflags"
- fail_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 $direction $tundev "matchall"
+ fail_test_span_gre_dir $tundev "$forward_type" "$backward_type"
ip neigh del dev $swp3 $addr
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev "$forward_type" "$backward_type"
mirror_uninstall $swp1 $direction
- log_test "$direction $what: neighbor change ($tcflags)"
+ log_test "$direction $what: neighbor change"
}
test_gretap()
{
- test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap"
- test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap"
+ test_span_gre_neigh 192.0.2.130 gt4 ingress 8 0 "mirror to gretap"
+ test_span_gre_neigh 192.0.2.130 gt4 egress 0 8 "mirror to gretap"
}
test_ip6gretap()
{
- test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap"
- test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap"
-}
-
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
+ test_span_gre_neigh 2001:db8:2::2 gt6 ingress 8 0 "mirror to ip6gretap"
+ test_span_gre_neigh 2001:db8:2::2 gt6 egress 0 8 "mirror to ip6gretap"
}
trap cleanup EXIT
@@ -102,14 +93,6 @@ trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
index 6f9ef1820e93..34bc646938e3 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
@@ -75,42 +75,31 @@ cleanup()
test_gretap()
{
RET=0
- mirror_install $swp1 ingress gt4 "matchall $tcflags"
+ mirror_install $swp1 ingress gt4 "matchall"
# For IPv4, test that there's no mirroring without the route directing
# the traffic to tunnel remote address. Then add it and test that
# mirroring starts. For IPv6 we can't test this due to the limitation
# that routes for locally-specified IPv6 addresses can't be added.
- fail_test_span_gre_dir gt4 ingress
+ fail_test_span_gre_dir gt4
ip route add 192.0.2.130/32 via 192.0.2.162
- quick_test_span_gre_dir gt4 ingress
+ quick_test_span_gre_dir gt4
ip route del 192.0.2.130/32 via 192.0.2.162
mirror_uninstall $swp1 ingress
- log_test "mirror to gre with next-hop remote ($tcflags)"
+ log_test "mirror to gre with next-hop remote"
}
test_ip6gretap()
{
RET=0
- mirror_install $swp1 ingress gt6 "matchall $tcflags"
- quick_test_span_gre_dir gt6 ingress
+ mirror_install $swp1 ingress gt6 "matchall"
+ quick_test_span_gre_dir gt6
mirror_uninstall $swp1 ingress
- log_test "mirror to ip6gre with next-hop remote ($tcflags)"
-}
-
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
+ log_test "mirror to ip6gre with next-hop remote"
}
trap cleanup EXIT
@@ -118,14 +107,6 @@ trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
index 88cecdb9a861..63689928cb51 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
@@ -63,30 +63,11 @@ test_gretap()
full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
index c8a9b5bd841f..1b902cc579f6 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -153,21 +153,21 @@ test_span_gre_forbidden_cpu()
RET=0
# Run the pass-test first, to prime neighbor table.
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir $tundev
# Now forbid the VLAN at the bridge and see it fail.
bridge vlan del dev br1 vid 555 self
sleep 1
- fail_test_span_gre_dir $tundev ingress
+ fail_test_span_gre_dir $tundev
bridge vlan add dev br1 vid 555 self
sleep 1
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: vlan forbidden at a bridge ($tcflags)"
+ log_test "$what: vlan forbidden at a bridge"
}
test_gretap_forbidden_cpu()
@@ -187,22 +187,22 @@ test_span_gre_forbidden_egress()
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir $tundev
bridge vlan del dev $swp3 vid 555
sleep 1
- fail_test_span_gre_dir $tundev ingress
+ fail_test_span_gre_dir $tundev
bridge vlan add dev $swp3 vid 555
# Re-prime FDB
$ARPING -I br1.555 192.0.2.130 -fqc 1
sleep 1
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: vlan forbidden at a bridge egress ($tcflags)"
+ log_test "$what: vlan forbidden at a bridge egress"
}
test_gretap_forbidden_egress()
@@ -223,30 +223,30 @@ test_span_gre_untagged_egress()
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ mirror_install $swp1 ingress $tundev "matchall"
- quick_test_span_gre_dir $tundev ingress
- quick_test_span_vlan_dir $h3 555 ingress "$ul_proto"
+ quick_test_span_gre_dir $tundev
+ quick_test_span_vlan_dir $h3 555 "$ul_proto"
h3_addr_add_del del $h3.555
bridge vlan add dev $swp3 vid 555 pvid untagged
h3_addr_add_del add $h3
sleep 5
- quick_test_span_gre_dir $tundev ingress
- fail_test_span_vlan_dir $h3 555 ingress "$ul_proto"
+ quick_test_span_gre_dir $tundev
+ fail_test_span_vlan_dir $h3 555 "$ul_proto"
h3_addr_add_del del $h3
bridge vlan add dev $swp3 vid 555
h3_addr_add_del add $h3.555
sleep 5
- quick_test_span_gre_dir $tundev ingress
- quick_test_span_vlan_dir $h3 555 ingress "$ul_proto"
+ quick_test_span_gre_dir $tundev
+ quick_test_span_vlan_dir $h3 555 "$ul_proto"
mirror_uninstall $swp1 ingress
- log_test "$what: vlan untagged at a bridge egress ($tcflags)"
+ log_test "$what: vlan untagged at a bridge egress"
}
test_gretap_untagged_egress()
@@ -267,19 +267,19 @@ test_span_gre_fdb_roaming()
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
- quick_test_span_gre_dir $tundev ingress
+ mirror_install $swp1 ingress $tundev "matchall"
+ quick_test_span_gre_dir $tundev
while ((RET == 0)); do
bridge fdb del dev $swp3 $h3mac vlan 555 master 2>/dev/null
bridge fdb add dev $swp2 $h3mac vlan 555 master static
sleep 1
- fail_test_span_gre_dir $tundev ingress
+ fail_test_span_gre_dir $tundev
if ! bridge fdb sh dev $swp2 vlan 555 master \
| grep -q $h3mac; then
printf "TEST: %-60s [RETRY]\n" \
- "$what: MAC roaming ($tcflags)"
+ "$what: MAC roaming"
# ARP or ND probably reprimed the FDB while the test
# was running. We would get a spurious failure.
RET=0
@@ -292,11 +292,11 @@ test_span_gre_fdb_roaming()
# Re-prime FDB
$ARPING -I br1.555 192.0.2.130 -fqc 1
sleep 1
- quick_test_span_gre_dir $tundev ingress
+ quick_test_span_gre_dir $tundev
mirror_uninstall $swp1 ingress
- log_test "$what: MAC roaming ($tcflags)"
+ log_test "$what: MAC roaming"
}
test_gretap_fdb_roaming()
@@ -319,30 +319,11 @@ test_ip6gretap_stp()
full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap"
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
-
- tests_run
-
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index 3e8ebeff3019..6bf9d5ae933c 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -44,14 +44,17 @@ mirror_test()
local type="icmp echoreq"
fi
+ if [[ -z ${expect//[[:digit:]]/} ]]; then
+ expect="== $expect"
+ fi
+
local t0=$(tc_rule_stats_get $dev $pref)
$MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
-c 10 -d 100msec -t $type
sleep 0.5
local t1=$(tc_rule_stats_get $dev $pref)
local delta=$((t1 - t0))
- # Tolerate a couple stray extra packets.
- ((expect <= delta && delta <= expect + 2))
+ ((delta $expect))
check_err $? "Expected to capture $expect packets, got $delta."
}
@@ -59,36 +62,42 @@ do_test_span_dir_ips()
{
local expect=$1; shift
local dev=$1; shift
- local direction=$1; shift
local ip1=$1; shift
local ip2=$1; shift
+ local forward_type=${1-8}; shift
+ local backward_type=${1-0}; shift
- icmp_capture_install $dev
+ icmp_capture_install $dev "type $forward_type"
mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+ icmp_capture_uninstall $dev
+
+ icmp_capture_install $dev "type $backward_type"
mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
icmp_capture_uninstall $dev
}
quick_test_span_dir_ips()
{
- do_test_span_dir_ips 10 "$@"
-}
+ local dev=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+ local forward_type=${1-8}; shift
+ local backward_type=${1-0}; shift
-fail_test_span_dir_ips()
-{
- do_test_span_dir_ips 0 "$@"
+ do_test_span_dir_ips 10 "$dev" "$ip1" "$ip2" \
+ "$forward_type" "$backward_type"
}
test_span_dir_ips()
{
local dev=$1; shift
- local direction=$1; shift
local forward_type=$1; shift
local backward_type=$1; shift
local ip1=$1; shift
local ip2=$1; shift
- quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2"
+ quick_test_span_dir_ips "$dev" "$ip1" "$ip2" \
+ "$forward_type" "$backward_type"
icmp_capture_install $dev "type $forward_type"
mirror_test v$h1 $ip1 $ip2 $dev 100 10
@@ -99,14 +108,14 @@ test_span_dir_ips()
icmp_capture_uninstall $dev
}
-fail_test_span_dir()
-{
- fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
-}
-
test_span_dir()
{
- test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+ local dev=$1; shift
+ local forward_type=$1; shift
+ local backward_type=$1; shift
+
+ test_span_dir_ips "$dev" "$forward_type" "$backward_type" \
+ 192.0.2.1 192.0.2.2
}
do_test_span_vlan_dir_ips()
@@ -114,7 +123,6 @@ do_test_span_vlan_dir_ips()
local expect=$1; shift
local dev=$1; shift
local vid=$1; shift
- local direction=$1; shift
local ul_proto=$1; shift
local ip1=$1; shift
local ip2=$1; shift
@@ -123,27 +131,50 @@ do_test_span_vlan_dir_ips()
# The traffic is meant for local box anyway, so will be trapped to
# kernel.
vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype $ul_proto"
- mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
- mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+ mirror_test v$h1 $ip1 $ip2 $dev 100 "$expect"
+ mirror_test v$h2 $ip2 $ip1 $dev 100 "$expect"
vlan_capture_uninstall $dev
}
quick_test_span_vlan_dir_ips()
{
- do_test_span_vlan_dir_ips 10 "$@"
+ local dev=$1; shift
+ local vid=$1; shift
+ local ul_proto=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ do_test_span_vlan_dir_ips '>= 10' "$dev" "$vid" "$ul_proto" \
+ "$ip1" "$ip2"
}
fail_test_span_vlan_dir_ips()
{
- do_test_span_vlan_dir_ips 0 "$@"
+ local dev=$1; shift
+ local vid=$1; shift
+ local ul_proto=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+
+ do_test_span_vlan_dir_ips 0 "$dev" "$vid" "$ul_proto" "$ip1" "$ip2"
}
quick_test_span_vlan_dir()
{
- quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+ local dev=$1; shift
+ local vid=$1; shift
+ local ul_proto=$1; shift
+
+ quick_test_span_vlan_dir_ips "$dev" "$vid" "$ul_proto" \
+ 192.0.2.1 192.0.2.2
}
fail_test_span_vlan_dir()
{
- fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+ local dev=$1; shift
+ local vid=$1; shift
+ local ul_proto=$1; shift
+
+ fail_test_span_vlan_dir_ips "$dev" "$vid" "$ul_proto" \
+ 192.0.2.1 192.0.2.2
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
index 0b44e148235e..2f150a414d38 100755
--- a/tools/testing/selftests/net/forwarding/mirror_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
@@ -40,12 +40,16 @@ setup_prepare()
vlan_create $h2 111 v$h2 192.0.2.18/28
bridge vlan add dev $swp2 vid 111
+
+ trap_install $h3 ingress
}
cleanup()
{
pre_cleanup
+ trap_uninstall $h3 ingress
+
vlan_destroy $h2 111
vlan_destroy $h1 111
vlan_destroy $h3 555
@@ -63,11 +67,11 @@ test_vlan_dir()
RET=0
- mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
- test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type"
+ mirror_install $swp1 $direction $swp3.555 "matchall"
+ test_span_dir "$h3.555" "$forward_type" "$backward_type"
mirror_uninstall $swp1 $direction
- log_test "$direction mirror to vlan ($tcflags)"
+ log_test "$direction mirror to vlan"
}
test_vlan()
@@ -84,14 +88,12 @@ test_tagged_vlan_dir()
RET=0
- mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
- do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" ip \
- 192.0.2.17 192.0.2.18
- do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" ip \
- 192.0.2.17 192.0.2.18
+ mirror_install $swp1 $direction $swp3.555 "matchall"
+ do_test_span_vlan_dir_ips '>= 10' "$h3.555" 111 ip 192.0.2.17 192.0.2.18
+ do_test_span_vlan_dir_ips 0 "$h3.555" 555 ip 192.0.2.17 192.0.2.18
mirror_uninstall $swp1 $direction
- log_test "$direction mirror tagged to vlan ($tcflags)"
+ log_test "$direction mirror tagged to vlan"
}
test_tagged_vlan()
@@ -100,32 +102,11 @@ test_tagged_vlan()
test_tagged_vlan_dir egress 0 8
}
-test_all()
-{
- slow_path_trap_install $swp1 ingress
- slow_path_trap_install $swp1 egress
- trap_install $h3 ingress
-
- tests_run
-
- trap_uninstall $h3 ingress
- slow_path_trap_uninstall $swp1 egress
- slow_path_trap_uninstall $swp1 ingress
-}
-
trap cleanup EXIT
setup_prepare
setup_wait
-tcflags="skip_hw"
-test_all
-
-if ! tc_offload_check; then
- echo "WARN: Could not test offloaded functionality"
-else
- tcflags="skip_sw"
- test_all
-fi
+tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index 3f0f5dc95542..2ba44247c60a 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -1,6 +1,41 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.2/24 | |
+# | 2001:db8:1::2/64 | |
+# +-------------------|-----+
+# |
+# +-------------------|----------------------+
+# | | R1 |
+# | $rp11 + |
+# | 192.0.2.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | + $rp12 + $rp13 |
+# | | 169.254.2.12/24 | 169.254.3.13/24 |
+# | | fe80:2::12/64 | fe80:3::13/64 |
+# +--|--------------------|------------------+
+# | |
+# +--|--------------------|------------------+
+# | + $rp22 + $rp23 |
+# | 169.254.2.22/24 169.254.3.23/24 |
+# | fe80:2::22/64 fe80:3::23/64 |
+# | |
+# | $rp21 + |
+# | 198.51.100.1/24 | |
+# | 2001:db8:2::1/64 | R2 |
+# +-------------------|----------------------+
+# |
+# +-------------------|-----+
+# | | |
+# | $h2 + |
+# | 198.51.100.2/24 |
+# | 2001:db8:2::2/64 H2 |
+# +-------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
index 7e7d62161c34..2903294d8bca 100644
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
@@ -56,21 +56,12 @@ nh_stats_test_dispatch_swhw()
local group_id=$1; shift
local mz="$@"
- local used
-
nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \
nh_stats_get "${mz[@]}"
- used=$(ip -s -j -d nexthop show id $group_id |
- jq '.[].hw_stats.used')
- kind=$(ip -j -d link show dev $rp11 |
- jq -r '.[].linkinfo.info_kind')
- if [[ $used == true ]]; then
+ xfail_on_veth $rp11 \
nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \
nh_stats_get_hw "${mz[@]}"
- elif [[ $kind == veth ]]; then
- log_test_skip "HW stats not offloaded on veth topology"
- fi
}
nh_stats_test_dispatch()
@@ -83,7 +74,6 @@ nh_stats_test_dispatch()
local mz="$@"
local enabled
- local kind
if ! ip nexthop help 2>&1 | grep -q hw_stats; then
log_test_skip "NH stats test: ip doesn't support HW stats"
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
index 4b483d24ad00..cd9e346436fc 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -1,6 +1,41 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.2/24 | |
+# | 2001:db8:1::2/64 | |
+# +-------------------|-----+
+# |
+# +-------------------|----------------------+
+# | | R1 |
+# | $rp11 + |
+# | 192.0.2.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | + $rp12 + $rp13 |
+# | | 169.254.2.12/24 | 169.254.3.13/24 |
+# | | fe80:2::12/64 | fe80:3::13/64 |
+# +--|--------------------|------------------+
+# | |
+# +--|--------------------|------------------+
+# | + $rp22 + $rp23 |
+# | 169.254.2.22/24 169.254.3.23/24 |
+# | fe80:2::22/64 fe80:3::23/64 |
+# | |
+# | $rp21 + |
+# | 198.51.100.1/24 | |
+# | 2001:db8:2::1/64 | R2 |
+# +-------------------|----------------------+
+# |
+# +-------------------|-----+
+# | | |
+# | $h2 + |
+# | 198.51.100.2/24 |
+# | 2001:db8:2::2/64 H2 |
+# +-------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_seed.sh b/tools/testing/selftests/net/forwarding/router_mpath_seed.sh
new file mode 100755
index 000000000000..314cb906c1eb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_seed.sh
@@ -0,0 +1,333 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-------------------------+ +-------------------------+
+# | H1 | | H2 |
+# | $h1 + | | + $h2 |
+# | 192.0.2.1/28 | | | | 192.0.2.34/28 |
+# | 2001:db8:1::1/64 | | | | 2001:db8:3::2/64 |
+# +-------------------|-----+ +-|-----------------------+
+# | |
+# +-------------------|-----+ +-|-----------------------+
+# | R1 | | | | R2 |
+# | $rp11 + | | + $rp21 |
+# | 192.0.2.2/28 | | 192.0.2.33/28 |
+# | 2001:db8:1::2/64 | | 2001:db8:3::1/64 |
+# | | | |
+# | $rp12 + | | + $rp22 |
+# | 192.0.2.17/28 | | | | 192.0.2.18..27/28 |
+# | 2001:db8:2::17/64 | | | | 2001:db8:2::18..27/64 |
+# +-------------------|-----+ +-|-----------------------+
+# | |
+# `----------'
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ test_mpath_seed_stability_ipv4
+ test_mpath_seed_stability_ipv6
+ test_mpath_seed_get
+ test_mpath_seed_ipv4
+ test_mpath_seed_ipv6
+"
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+ ip -4 route add 192.0.2.32/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:3::/64 vrf v$h1 nexthop via 2001:db8:1::2
+ ip -4 route del 192.0.2.32/28 vrf v$h1 nexthop via 192.0.2.2
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.34/28 2001:db8:3::2/64
+ ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.33
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:3::1
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:3::1
+ ip -4 route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.33
+ simple_if_fini $h2 192.0.2.34/28 2001:db8:3::2/64
+}
+
+router1_create()
+{
+ simple_if_init $rp11 192.0.2.2/28 2001:db8:1::2/64
+ __simple_if_init $rp12 v$rp11 192.0.2.17/28 2001:db8:2::17/64
+}
+
+router1_destroy()
+{
+ __simple_if_fini $rp12 192.0.2.17/28 2001:db8:2::17/64
+ simple_if_fini $rp11 192.0.2.2/28 2001:db8:1::2/64
+}
+
+router2_create()
+{
+ simple_if_init $rp21 192.0.2.33/28 2001:db8:3::1/64
+ __simple_if_init $rp22 v$rp21 192.0.2.18/28 2001:db8:2::18/64
+ ip -4 route add 192.0.2.0/28 vrf v$rp21 nexthop via 192.0.2.17
+ ip -6 route add 2001:db8:1::/64 vrf v$rp21 nexthop via 2001:db8:2::17
+}
+
+router2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$rp21 nexthop via 2001:db8:2::17
+ ip -4 route del 192.0.2.0/28 vrf v$rp21 nexthop via 192.0.2.17
+ __simple_if_fini $rp22 192.0.2.18/28 2001:db8:2::18/64
+ simple_if_fini $rp21 192.0.2.33/28 2001:db8:3::1/64
+}
+
+nexthops_create()
+{
+ local i
+ for i in $(seq 10); do
+ ip nexthop add id $((1000 + i)) via 192.0.2.18 dev $rp12
+ ip nexthop add id $((2000 + i)) via 2001:db8:2::18 dev $rp12
+ done
+
+ ip nexthop add id 1000 group $(seq -s / 1001 1010) hw_stats on
+ ip nexthop add id 2000 group $(seq -s / 2001 2010) hw_stats on
+ ip -4 route add 192.0.2.32/28 vrf v$rp11 nhid 1000
+ ip -6 route add 2001:db8:3::/64 vrf v$rp11 nhid 2000
+}
+
+nexthops_destroy()
+{
+ local i
+
+ ip -6 route del 2001:db8:3::/64 vrf v$rp11 nhid 2000
+ ip -4 route del 192.0.2.32/28 vrf v$rp11 nhid 1000
+ ip nexthop del id 2000
+ ip nexthop del id 1000
+
+ for i in $(seq 10 -1 1); do
+ ip nexthop del id $((2000 + i))
+ ip nexthop del id $((1000 + i))
+ done
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp11=${NETIFS[p2]}
+
+ rp12=${NETIFS[p3]}
+ rp22=${NETIFS[p4]}
+
+ rp21=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ sysctl_save net.ipv4.fib_multipath_hash_seed
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ nexthops_destroy
+ router2_destroy
+ router1_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ sysctl_restore net.ipv4.fib_multipath_hash_seed
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.34
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:3::2
+}
+
+test_mpath_seed_get()
+{
+ RET=0
+
+ local i
+ for ((i = 0; i < 100; i++)); do
+ local seed_w=$((999331 * i))
+ sysctl -qw net.ipv4.fib_multipath_hash_seed=$seed_w
+ local seed_r=$(sysctl -n net.ipv4.fib_multipath_hash_seed)
+ ((seed_r == seed_w))
+ check_err $? "mpath seed written as $seed_w, but read as $seed_r"
+ done
+
+ log_test "mpath seed set/get"
+}
+
+nh_stats_snapshot()
+{
+ local group_id=$1; shift
+
+ ip -j -s -s nexthop show id $group_id |
+ jq -c '[.[].group_stats | sort_by(.id) | .[].packets]'
+}
+
+get_active_nh()
+{
+ local s0=$1; shift
+ local s1=$1; shift
+
+ jq -n --argjson s0 "$s0" --argjson s1 "$s1" -f /dev/stdin <<-"EOF"
+ [range($s0 | length)] |
+ map($s1[.] - $s0[.]) |
+ map(if . > 8 then 1 else 0 end) |
+ index(1)
+ EOF
+}
+
+probe_nh()
+{
+ local group_id=$1; shift
+ local -a mz=("$@")
+
+ local s0=$(nh_stats_snapshot $group_id)
+ "${mz[@]}"
+ local s1=$(nh_stats_snapshot $group_id)
+
+ get_active_nh "$s0" "$s1"
+}
+
+probe_seed()
+{
+ local group_id=$1; shift
+ local seed=$1; shift
+ local -a mz=("$@")
+
+ sysctl -qw net.ipv4.fib_multipath_hash_seed=$seed
+ probe_nh "$group_id" "${mz[@]}"
+}
+
+test_mpath_seed()
+{
+ local group_id=$1; shift
+ local what=$1; shift
+ local -a mz=("$@")
+ local ii
+
+ RET=0
+
+ local -a tally=(0 0 0 0 0 0 0 0 0 0)
+ for ((ii = 0; ii < 100; ii++)); do
+ local act=$(probe_seed $group_id $((999331 * ii)) "${mz[@]}")
+ ((tally[act]++))
+ done
+
+ local tally_str="${tally[@]}"
+ for ((ii = 0; ii < ${#tally[@]}; ii++)); do
+ ((tally[ii] > 0))
+ check_err $? "NH #$ii not hit, tally='$tally_str'"
+ done
+
+ log_test "mpath seed $what"
+ sysctl -qw net.ipv4.fib_multipath_hash_seed=0
+}
+
+test_mpath_seed_ipv4()
+{
+ test_mpath_seed 1000 IPv4 \
+ $MZ $h1 -A 192.0.2.1 -B 192.0.2.34 -q \
+ -p 64 -d 0 -c 10 -t udp
+}
+
+test_mpath_seed_ipv6()
+{
+ test_mpath_seed 2000 IPv6 \
+ $MZ -6 $h1 -A 2001:db8:1::1 -B 2001:db8:3::2 -q \
+ -p 64 -d 0 -c 10 -t udp
+}
+
+check_mpath_seed_stability()
+{
+ local seed=$1; shift
+ local act_0=$1; shift
+ local act_1=$1; shift
+
+ ((act_0 == act_1))
+ check_err $? "seed $seed: active NH moved from $act_0 to $act_1 after seed change"
+}
+
+test_mpath_seed_stability()
+{
+ local group_id=$1; shift
+ local what=$1; shift
+ local -a mz=("$@")
+
+ RET=0
+
+ local seed_0=0
+ local seed_1=3221338814
+ local seed_2=3735928559
+
+ # Initial active NH before touching the seed at all.
+ local act_ini=$(probe_nh $group_id "${mz[@]}")
+
+ local act_0_0=$(probe_seed $group_id $seed_0 "${mz[@]}")
+ local act_1_0=$(probe_seed $group_id $seed_1 "${mz[@]}")
+ local act_2_0=$(probe_seed $group_id $seed_2 "${mz[@]}")
+
+ local act_0_1=$(probe_seed $group_id $seed_0 "${mz[@]}")
+ local act_1_1=$(probe_seed $group_id $seed_1 "${mz[@]}")
+ local act_2_1=$(probe_seed $group_id $seed_2 "${mz[@]}")
+
+ check_mpath_seed_stability initial $act_ini $act_0_0
+ check_mpath_seed_stability $seed_0 $act_0_0 $act_0_1
+ check_mpath_seed_stability $seed_1 $act_1_0 $act_1_1
+ check_mpath_seed_stability $seed_2 $act_2_0 $act_2_1
+
+ log_test "mpath seed stability $what"
+ sysctl -qw net.ipv4.fib_multipath_hash_seed=0
+}
+
+test_mpath_seed_stability_ipv4()
+{
+ test_mpath_seed_stability 1000 IPv4 \
+ $MZ $h1 -A 192.0.2.1 -B 192.0.2.34 -q \
+ -p 64 -d 0 -c 10 -t udp
+}
+
+test_mpath_seed_stability_ipv6()
+{
+ test_mpath_seed_stability 2000 IPv6 \
+ $MZ -6 $h1 -A 2001:db8:1::1 -B 2001:db8:3::2 -q \
+ -p 64 -d 0 -c 10 -t udp
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+nexthops_create
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_nh.sh b/tools/testing/selftests/net/forwarding/router_nh.sh
index f3a53738bdcc..92904b01eae9 100755
--- a/tools/testing/selftests/net/forwarding/router_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_nh.sh
@@ -1,6 +1,20 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +-------------------------+ +-------------------------+
+# | H1 | | H2 |
+# | $h1 + | | $h2 + |
+# | 192.0.2.2/24 | | | 198.51.100.2/24 | |
+# | 2001:db8:1::2/64 | | | 2001:db8:2::2/64 | |
+# +-------------------|-----+ +-------------------|-----+
+# | |
+# +-------------------|----------------------------|-----+
+# | R1 | | |
+# | $rp1 + $rp2 + |
+# | 192.0.2.1/24 198.51.100.1/24 |
+# | 2001:db8:1::1/64 2001:db8:2::1/64 |
+# +------------------------------------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
index cdf689e99458..f9d26a7911bb 100644
--- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
+++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh
@@ -199,25 +199,28 @@ ets_set_dwrr_two_bands()
ets_test_strict()
{
ets_set_strict
- ets_dwrr_test_01
- ets_dwrr_test_12
+ xfail_on_slow ets_dwrr_test_01
+ xfail_on_slow ets_dwrr_test_12
}
ets_test_mixed()
{
ets_set_mixed
- ets_dwrr_test_01
- ets_dwrr_test_12
+ xfail_on_slow ets_dwrr_test_01
+ xfail_on_slow ets_dwrr_test_12
}
ets_test_dwrr()
{
ets_set_dwrr_uniform
- ets_dwrr_test_012
+ xfail_on_slow ets_dwrr_test_012
+
ets_set_dwrr_varying
- ets_dwrr_test_012
+ xfail_on_slow ets_dwrr_test_012
+
ets_change_quantum
- ets_dwrr_test_012
+ xfail_on_slow ets_dwrr_test_012
+
ets_set_dwrr_two_bands
- ets_dwrr_test_01
+ xfail_on_slow ets_dwrr_test_01
}
diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
index 81f31179ac88..17f28644568e 100755
--- a/tools/testing/selftests/net/forwarding/sch_red.sh
+++ b/tools/testing/selftests/net/forwarding/sch_red.sh
@@ -451,35 +451,35 @@ uninstall_qdisc()
ecn_test()
{
install_qdisc ecn
- do_ecn_test $BACKLOG
+ xfail_on_slow do_ecn_test $BACKLOG
uninstall_qdisc
}
ecn_nodrop_test()
{
install_qdisc ecn nodrop
- do_ecn_nodrop_test $BACKLOG
+ xfail_on_slow do_ecn_nodrop_test $BACKLOG
uninstall_qdisc
}
red_test()
{
install_qdisc
- do_red_test $BACKLOG
+ xfail_on_slow do_red_test $BACKLOG
uninstall_qdisc
}
red_qevent_test()
{
install_qdisc qevent early_drop block 10
- do_red_qevent_test $BACKLOG
+ xfail_on_slow do_red_qevent_test $BACKLOG
uninstall_qdisc
}
ecn_qevent_test()
{
install_qdisc ecn qevent mark block 10
- do_ecn_qevent_test $BACKLOG
+ xfail_on_slow do_ecn_qevent_test $BACKLOG
uninstall_qdisc
}
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
index d1f26cb7cd73..9cd884d4a5de 100644
--- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh
@@ -227,7 +227,7 @@ do_tbf_test()
local nr=$(rate $t2 $t3 10)
local nr_pct=$((100 * (nr - er) / er))
((-5 <= nr_pct && nr_pct <= 5))
- check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%."
+ xfail_on_slow check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%."
log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit"
}
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
index bce8bb8d2b6f..2e3326edfa9a 100644
--- a/tools/testing/selftests/net/forwarding/tc_common.sh
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -4,7 +4,7 @@
CHECK_TC="yes"
# Can be overridden by the configuration file. See lib.sh
-TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms
+: "${TC_HIT_TIMEOUT:=1000}" # ms
tc_check_packets()
{
diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
index 5a5dd9034819..79775b10b99f 100755
--- a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
+++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
@@ -1,7 +1,5 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
ALL_TESTS="tunnel_key_nofrag_test"
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index 6f0a2e452ba1..3f9d50f1ef9e 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -680,9 +680,9 @@ test_learning()
local mac=de:ad:be:ef:13:37
local dst=192.0.2.100
- # Enable learning on the VxLAN device and set ageing time to 10 seconds
- ip link set dev br1 type bridge ageing_time 1000
- ip link set dev vx1 type vxlan ageing 10
+ # Enable learning on the VxLAN device and set ageing time to 30 seconds
+ ip link set dev br1 type bridge ageing_time 3000
+ ip link set dev vx1 type vxlan ageing 30
ip link set dev vx1 type vxlan learning
reapply_config
@@ -740,7 +740,7 @@ test_learning()
vxlan_flood_test $mac $dst 0 10 0
- sleep 20
+ sleep 60
bridge fdb show brport vx1 | grep $mac | grep -q self
check_fail $?
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
index 353e1e867fbb..b2184847e388 100644
--- a/tools/testing/selftests/net/gro.c
+++ b/tools/testing/selftests/net/gro.c
@@ -93,6 +93,7 @@ static bool tx_socket = true;
static int tcp_offset = -1;
static int total_hdr_len = -1;
static int ethhdr_proto = -1;
+static const int num_flush_id_cases = 6;
static void vlog(const char *fmt, ...)
{
@@ -119,6 +120,9 @@ static void setup_sock_filter(int fd)
next_off = offsetof(struct ipv6hdr, nexthdr);
ipproto_off = ETH_HLEN + next_off;
+ /* Overridden later if exthdrs are used: */
+ opt_ipproto_off = ipproto_off;
+
if (strcmp(testname, "ip") == 0) {
if (proto == PF_INET)
optlen = sizeof(struct ip_timestamp);
@@ -617,6 +621,113 @@ static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext
iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
}
+static void fix_ip4_checksum(struct iphdr *iph)
+{
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+}
+
+static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase)
+{
+ static char buf1[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf2[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf3[MAX_HDR_LEN + PAYLOAD_LEN];
+ bool send_three = false;
+ struct iphdr *iph1;
+ struct iphdr *iph2;
+ struct iphdr *iph3;
+
+ iph1 = (struct iphdr *)(buf1 + ETH_HLEN);
+ iph2 = (struct iphdr *)(buf2 + ETH_HLEN);
+ iph3 = (struct iphdr *)(buf3 + ETH_HLEN);
+
+ create_packet(buf1, 0, 0, PAYLOAD_LEN, 0);
+ create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+
+ switch (tcase) {
+ case 0: /* DF=1, Incrementing - should coalesce */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(9);
+ break;
+
+ case 1: /* DF=1, Fixed - should coalesce */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(8);
+ break;
+
+ case 2: /* DF=0, Incrementing - should coalesce */
+ iph1->frag_off &= ~htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off &= ~htons(IP_DF);
+ iph2->id = htons(9);
+ break;
+
+ case 3: /* DF=0, Fixed - should not coalesce */
+ iph1->frag_off &= ~htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off &= ~htons(IP_DF);
+ iph2->id = htons(8);
+ break;
+
+ case 4: /* DF=1, two packets incrementing, and one fixed - should
+ * coalesce only the first two packets
+ */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(9);
+
+ iph3->frag_off |= htons(IP_DF);
+ iph3->id = htons(9);
+ send_three = true;
+ break;
+
+ case 5: /* DF=1, two packets fixed, and one incrementing - should
+ * coalesce only the first two packets
+ */
+ iph1->frag_off |= htons(IP_DF);
+ iph1->id = htons(8);
+
+ iph2->frag_off |= htons(IP_DF);
+ iph2->id = htons(8);
+
+ iph3->frag_off |= htons(IP_DF);
+ iph3->id = htons(9);
+ send_three = true;
+ break;
+ }
+
+ fix_ip4_checksum(iph1);
+ fix_ip4_checksum(iph2);
+ write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr);
+ write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr);
+
+ if (send_three) {
+ fix_ip4_checksum(iph3);
+ write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr);
+ }
+}
+
+static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt)
+{
+ for (int i = 0; i < num_flush_id_cases; i++) {
+ sleep(1);
+ send_flush_id_case(fd, daddr, i);
+ sleep(1);
+ write_packet(fd, fin_pkt, total_hdr_len, daddr);
+ }
+}
+
static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
{
static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
@@ -935,6 +1046,8 @@ static void gro_sender(void)
send_fragment4(txfd, &daddr);
sleep(1);
write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ test_flush_id(txfd, &daddr, fin_pkt);
} else if (proto == PF_INET6) {
sleep(1);
send_fragment6(txfd, &daddr);
@@ -1061,6 +1174,34 @@ static void gro_receiver(void)
printf("fragmented ip4 doesn't coalesce: ");
check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* is_atomic checks */
+ printf("DF=1, Incrementing - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("DF=1, Fixed - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("DF=0, Incrementing - should coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("DF=0, Fixed - should not coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
} else if (proto == PF_INET6) {
/* GRO doesn't check for ipv6 hop limit when flushing.
* Hence no corresponding test to the ipv4 case.
diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile
index 92c1d9d080cd..884cd2cc0681 100644
--- a/tools/testing/selftests/net/hsr/Makefile
+++ b/tools/testing/selftests/net/hsr/Makefile
@@ -2,6 +2,7 @@
top_srcdir = ../../../../..
-TEST_PROGS := hsr_ping.sh
+TEST_PROGS := hsr_ping.sh hsr_redbox.sh
+TEST_FILES += hsr_common.sh
include ../../lib.mk
diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config
index 22061204fb69..241542441c51 100644
--- a/tools/testing/selftests/net/hsr/config
+++ b/tools/testing/selftests/net/hsr/config
@@ -2,3 +2,4 @@ CONFIG_IPV6=y
CONFIG_NET_SCH_NETEM=m
CONFIG_HSR=y
CONFIG_VETH=y
+CONFIG_BRIDGE=y
diff --git a/tools/testing/selftests/net/hsr/hsr_common.sh b/tools/testing/selftests/net/hsr/hsr_common.sh
new file mode 100644
index 000000000000..8e97b1f2e7e5
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/hsr_common.sh
@@ -0,0 +1,84 @@
+# SPDX-License-Identifier: GPL-2.0
+# Common code for HSR testing scripts
+
+source ../lib.sh
+ret=0
+ksft_skip=4
+
+# $1: IP address
+is_v6()
+{
+ [ -z "${1##*:*}" ]
+}
+
+do_ping()
+{
+ local netns="$1"
+ local connect_addr="$2"
+ local ping_args="-q -c 2"
+
+ if is_v6 "${connect_addr}"; then
+ $ipv6 || return 0
+ ping_args="${ping_args} -6"
+ fi
+
+ ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null
+ if [ $? -ne 0 ] ; then
+ echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+do_ping_long()
+{
+ local netns="$1"
+ local connect_addr="$2"
+ local ping_args="-q -c 10"
+
+ if is_v6 "${connect_addr}"; then
+ $ipv6 || return 0
+ ping_args="${ping_args} -6"
+ fi
+
+ OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)"
+ if [ $? -ne 0 ] ; then
+ echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2
+ ret=1
+ return 1
+ fi
+
+ VAL="$(echo $OUT | cut -d' ' -f1-8)"
+ SED_VAL="$(echo ${VAL} | sed -r -e 's/([0-9]{2}).*([0-9]{2}).*[[:space:]]([0-9]+%).*/\1 transmitted \2 received \3 loss/')"
+ if [ "${SED_VAL}" != "10 transmitted 10 received 0% loss" ]
+ then
+ echo "$netns -> $connect_addr ping TEST [ FAIL ]"
+ echo "Expect to send and receive 10 packets and no duplicates."
+ echo "Full message: ${OUT}."
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+stop_if_error()
+{
+ local msg="$1"
+
+ if [ ${ret} -ne 0 ]; then
+ echo "FAIL: ${msg}" 1>&2
+ exit ${ret}
+ fi
+}
+
+check_prerequisites()
+{
+ ip -Version > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+ fi
+}
diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
index 1c6457e54625..f5d207fc770a 100755
--- a/tools/testing/selftests/net/hsr/hsr_ping.sh
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -1,10 +1,10 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ret=0
-ksft_skip=4
ipv6=true
+source ./hsr_common.sh
+
optstring="h4"
usage() {
echo "Usage: $0 [OPTION]"
@@ -27,88 +27,6 @@ while getopts "$optstring" option;do
esac
done
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
-ns3="ns3-$rndh"
-
-cleanup()
-{
- local netns
- for netns in "$ns1" "$ns2" "$ns3" ;do
- ip netns del $netns
- done
-}
-
-# $1: IP address
-is_v6()
-{
- [ -z "${1##*:*}" ]
-}
-
-do_ping()
-{
- local netns="$1"
- local connect_addr="$2"
- local ping_args="-q -c 2"
-
- if is_v6 "${connect_addr}"; then
- $ipv6 || return 0
- ping_args="${ping_args} -6"
- fi
-
- ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null
- if [ $? -ne 0 ] ; then
- echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2
- ret=1
- return 1
- fi
-
- return 0
-}
-
-do_ping_long()
-{
- local netns="$1"
- local connect_addr="$2"
- local ping_args="-q -c 10"
-
- if is_v6 "${connect_addr}"; then
- $ipv6 || return 0
- ping_args="${ping_args} -6"
- fi
-
- OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)"
- if [ $? -ne 0 ] ; then
- echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2
- ret=1
- return 1
- fi
-
- VAL="$(echo $OUT | cut -d' ' -f1-8)"
- if [ "$VAL" != "10 packets transmitted, 10 received, 0% packet loss," ]
- then
- echo "$netns -> $connect_addr ping TEST [ FAIL ]"
- echo "Expect to send and receive 10 packets and no duplicates."
- echo "Full message: ${OUT}."
- ret=1
- return 1
- fi
-
- return 0
-}
-
-stop_if_error()
-{
- local msg="$1"
-
- if [ ${ret} -ne 0 ]; then
- echo "FAIL: ${msg}" 1>&2
- exit ${ret}
- fi
-}
-
do_complete_ping_test()
{
echo "INFO: Initial validation ping."
@@ -234,6 +152,15 @@ setup_hsr_interfaces()
ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3
ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad
+ ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1
+ ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2
+
+ ip -net "$ns2" link set address 00:11:22:00:02:01 dev ns2eth1
+ ip -net "$ns2" link set address 00:11:22:00:02:02 dev ns2eth2
+
+ ip -net "$ns3" link set address 00:11:22:00:03:01 dev ns3eth1
+ ip -net "$ns3" link set address 00:11:22:00:03:02 dev ns3eth2
+
# All Links up
ip -net "$ns1" link set ns1eth1 up
ip -net "$ns1" link set ns1eth2 up
@@ -248,27 +175,15 @@ setup_hsr_interfaces()
ip -net "$ns3" link set hsr3 up
}
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+check_prerequisites
+setup_ns ns1 ns2 ns3
-trap cleanup EXIT
-
-for i in "$ns1" "$ns2" "$ns3" ;do
- ip netns add $i || exit $ksft_skip
- ip -net $i link set lo up
-done
+trap cleanup_all_ns EXIT
setup_hsr_interfaces 0
do_complete_ping_test
-cleanup
-for i in "$ns1" "$ns2" "$ns3" ;do
- ip netns add $i || exit $ksft_skip
- ip -net $i link set lo up
-done
+setup_ns ns1 ns2 ns3
setup_hsr_interfaces 1
do_complete_ping_test
diff --git a/tools/testing/selftests/net/hsr/hsr_redbox.sh b/tools/testing/selftests/net/hsr/hsr_redbox.sh
new file mode 100755
index 000000000000..998103502d5d
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/hsr_redbox.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ipv6=false
+
+source ./hsr_common.sh
+
+do_complete_ping_test()
+{
+ echo "INFO: Initial validation ping (HSR-SAN/RedBox)."
+ # Each node has to be able to reach each one.
+ do_ping "${ns1}" 100.64.0.2
+ do_ping "${ns2}" 100.64.0.1
+ # Ping between SANs (test bridge)
+ do_ping "${ns4}" 100.64.0.51
+ do_ping "${ns5}" 100.64.0.41
+ # Ping from SANs to hsr1 (via hsr2) (and opposite)
+ do_ping "${ns3}" 100.64.0.1
+ do_ping "${ns1}" 100.64.0.3
+ do_ping "${ns1}" 100.64.0.41
+ do_ping "${ns4}" 100.64.0.1
+ do_ping "${ns1}" 100.64.0.51
+ do_ping "${ns5}" 100.64.0.1
+ stop_if_error "Initial validation failed."
+
+ # Wait for MGNT HSR frames being received and nodes being
+ # merged.
+ sleep 5
+
+ echo "INFO: Longer ping test (HSR-SAN/RedBox)."
+ # Ping from SAN to hsr1 (via hsr2)
+ do_ping_long "${ns3}" 100.64.0.1
+ # Ping from hsr1 (via hsr2) to SANs (and opposite)
+ do_ping_long "${ns1}" 100.64.0.3
+ do_ping_long "${ns1}" 100.64.0.41
+ do_ping_long "${ns4}" 100.64.0.1
+ do_ping_long "${ns1}" 100.64.0.51
+ do_ping_long "${ns5}" 100.64.0.1
+ stop_if_error "Longer ping test failed."
+
+ echo "INFO: All good."
+}
+
+setup_hsr_interfaces()
+{
+ local HSRv="$1"
+
+ echo "INFO: preparing interfaces for HSRv${HSRv} (HSR-SAN/RedBox)."
+#
+# IPv4 addresses (100.64.X.Y/24), and [X.Y] is presented on below diagram:
+#
+#
+# |NS1 | |NS4 |
+# | [0.1] | | |
+# | /-- hsr1 --\ | | [0.41] |
+# | ns1eth1 ns1eth2 | | ns4eth1 (SAN) |
+# |------------------------| |-------------------|
+# | | |
+# | | |
+# | | |
+# |------------------------| |-------------------------------|
+# | ns2eth1 ns2eth2 | | ns3eth2 |
+# | \-- hsr2 --/ | | / |
+# | [0.2] \ | | / | |------------|
+# | ns2eth3 |---| ns3eth1 -- ns3br1 -- ns3eth3--|--| ns5eth1 |
+# | (interlink)| | [0.3] [0.11] | | [0.51] |
+# |NS2 (RedBOX) | |NS3 (BR) | | NS5 (SAN) |
+#
+#
+ # Check if iproute2 supports adding interlink port to hsrX device
+ ip link help hsr | grep -q INTERLINK
+ [ $? -ne 0 ] && { echo "iproute2: HSR interlink interface not supported!"; exit 0; }
+
+ # Create interfaces for name spaces
+ ip link add ns1eth1 netns "${ns1}" type veth peer name ns2eth1 netns "${ns2}"
+ ip link add ns1eth2 netns "${ns1}" type veth peer name ns2eth2 netns "${ns2}"
+ ip link add ns2eth3 netns "${ns2}" type veth peer name ns3eth1 netns "${ns3}"
+ ip link add ns3eth2 netns "${ns3}" type veth peer name ns4eth1 netns "${ns4}"
+ ip link add ns3eth3 netns "${ns3}" type veth peer name ns5eth1 netns "${ns5}"
+
+ sleep 1
+
+ ip -n "${ns1}" link set ns1eth1 up
+ ip -n "${ns1}" link set ns1eth2 up
+
+ ip -n "${ns2}" link set ns2eth1 up
+ ip -n "${ns2}" link set ns2eth2 up
+ ip -n "${ns2}" link set ns2eth3 up
+
+ ip -n "${ns3}" link add name ns3br1 type bridge
+ ip -n "${ns3}" link set ns3br1 up
+ ip -n "${ns3}" link set ns3eth1 master ns3br1 up
+ ip -n "${ns3}" link set ns3eth2 master ns3br1 up
+ ip -n "${ns3}" link set ns3eth3 master ns3br1 up
+
+ ip -n "${ns4}" link set ns4eth1 up
+ ip -n "${ns5}" link set ns5eth1 up
+
+ ip -net "$ns1" link set address 00:11:22:00:01:01 dev ns1eth1
+ ip -net "$ns1" link set address 00:11:22:00:01:02 dev ns1eth2
+
+ ip -net "$ns2" link set address 00:11:22:00:02:01 dev ns2eth1
+ ip -net "$ns2" link set address 00:11:22:00:02:02 dev ns2eth2
+ ip -net "$ns2" link set address 00:11:22:00:02:03 dev ns2eth3
+
+ ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth1
+ ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth2
+ ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3eth3
+ ip -net "$ns3" link set address 00:11:22:00:03:11 dev ns3br1
+
+ ip -net "$ns4" link set address 00:11:22:00:04:01 dev ns4eth1
+ ip -net "$ns5" link set address 00:11:22:00:05:01 dev ns5eth1
+
+ ip -net "${ns1}" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version ${HSRv} proto 0
+ ip -net "${ns2}" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 interlink ns2eth3 supervision 45 version ${HSRv} proto 0
+
+ ip -n "${ns1}" addr add 100.64.0.1/24 dev hsr1
+ ip -n "${ns2}" addr add 100.64.0.2/24 dev hsr2
+ ip -n "${ns3}" addr add 100.64.0.11/24 dev ns3br1
+ ip -n "${ns3}" addr add 100.64.0.3/24 dev ns3eth1
+ ip -n "${ns4}" addr add 100.64.0.41/24 dev ns4eth1
+ ip -n "${ns5}" addr add 100.64.0.51/24 dev ns5eth1
+
+ ip -n "${ns1}" link set hsr1 up
+ ip -n "${ns2}" link set hsr2 up
+}
+
+check_prerequisites
+setup_ns ns1 ns2 ns3 ns4 ns5
+
+trap cleanup_all_ns EXIT
+
+setup_hsr_interfaces 1
+do_complete_ping_test
+
+exit $ret
diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c
index 193b82745fd8..29451d2244b7 100644
--- a/tools/testing/selftests/net/ip_local_port_range.c
+++ b/tools/testing/selftests/net/ip_local_port_range.c
@@ -359,7 +359,7 @@ TEST_F(ip_local_port_range, late_bind)
struct sockaddr_in v4;
struct sockaddr_in6 v6;
} addr;
- socklen_t addr_len;
+ socklen_t addr_len = 0;
const int one = 1;
int fd, err;
__u32 range;
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index f9fe182dfbd4..d0219032f773 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -4,93 +4,216 @@
##############################################################################
# Defines
-WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
+: "${WAIT_TIMEOUT:=20}"
+
BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
-# Kselftest framework requirement - SKIP code is 4.
+# Kselftest framework constants.
+ksft_pass=0
+ksft_fail=1
+ksft_xfail=2
ksft_skip=4
+
# namespace list created by setup_ns
-NS_LIST=""
+NS_LIST=()
##############################################################################
# Helpers
-busywait()
+
+__ksft_status_merge()
{
- local timeout=$1; shift
+ local a=$1; shift
+ local b=$1; shift
+ local -A weights
+ local weight=0
+
+ local i
+ for i in "$@"; do
+ weights[$i]=$((weight++))
+ done
+
+ if [[ ${weights[$a]} > ${weights[$b]} ]]; then
+ echo "$a"
+ return 0
+ else
+ echo "$b"
+ return 1
+ fi
+}
+
+ksft_status_merge()
+{
+ local a=$1; shift
+ local b=$1; shift
+
+ __ksft_status_merge "$a" "$b" \
+ $ksft_pass $ksft_xfail $ksft_skip $ksft_fail
+}
+
+ksft_exit_status_merge()
+{
+ local a=$1; shift
+ local b=$1; shift
+
+ __ksft_status_merge "$a" "$b" \
+ $ksft_xfail $ksft_pass $ksft_skip $ksft_fail
+}
+
+loopy_wait()
+{
+ local sleep_cmd=$1; shift
+ local timeout_ms=$1; shift
local start_time="$(date -u +%s%3N)"
while true
do
local out
- out=$("$@")
- local ret=$?
- if ((!ret)); then
+ if out=$("$@"); then
echo -n "$out"
return 0
fi
local current_time="$(date -u +%s%3N)"
- if ((current_time - start_time > timeout)); then
+ if ((current_time - start_time > timeout_ms)); then
echo -n "$out"
return 1
fi
+
+ $sleep_cmd
+ done
+}
+
+busywait()
+{
+ local timeout_ms=$1; shift
+
+ loopy_wait : "$timeout_ms" "$@"
+}
+
+# timeout in seconds
+slowwait()
+{
+ local timeout_sec=$1; shift
+
+ loopy_wait "sleep 0.1" "$((timeout_sec * 1000))" "$@"
+}
+
+until_counter_is()
+{
+ local expr=$1; shift
+ local current=$("$@")
+
+ echo $((current))
+ ((current $expr))
+}
+
+busywait_for_counter()
+{
+ local timeout=$1; shift
+ local delta=$1; shift
+
+ local base=$("$@")
+ busywait "$timeout" until_counter_is ">= $((base + delta))" "$@"
+}
+
+slowwait_for_counter()
+{
+ local timeout=$1; shift
+ local delta=$1; shift
+
+ local base=$("$@")
+ slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@"
+}
+
+remove_ns_list()
+{
+ local item=$1
+ local ns
+ local ns_list=("${NS_LIST[@]}")
+ NS_LIST=()
+
+ for ns in "${ns_list[@]}"; do
+ if [ "${ns}" != "${item}" ]; then
+ NS_LIST+=("${ns}")
+ fi
done
}
cleanup_ns()
{
local ns=""
- local errexit=0
local ret=0
- # disable errexit temporary
- if [[ $- =~ "e" ]]; then
- errexit=1
- set +e
- fi
-
for ns in "$@"; do
- ip netns delete "${ns}" &> /dev/null
+ [ -z "${ns}" ] && continue
+ ip netns delete "${ns}" &> /dev/null || true
if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then
echo "Warn: Failed to remove namespace $ns"
ret=1
+ else
+ remove_ns_list "${ns}"
fi
done
- [ $errexit -eq 1 ] && set -e
return $ret
}
cleanup_all_ns()
{
- cleanup_ns $NS_LIST
+ cleanup_ns "${NS_LIST[@]}"
}
# setup netns with given names as prefix. e.g
# setup_ns local remote
setup_ns()
{
- local ns=""
local ns_name=""
- local ns_list=""
+ local ns_list=()
for ns_name in "$@"; do
+ # avoid conflicts with local var: internal error
+ if [ "${ns_name}" = "ns_name" ]; then
+ echo "Failed to setup namespace '${ns_name}': invalid name"
+ cleanup_ns "${ns_list[@]}"
+ exit $ksft_fail
+ fi
+
# Some test may setup/remove same netns multi times
- if unset ${ns_name} 2> /dev/null; then
- ns="${ns_name,,}-$(mktemp -u XXXXXX)"
- eval readonly ${ns_name}="$ns"
+ if [ -z "${!ns_name}" ]; then
+ eval "${ns_name}=${ns_name,,}-$(mktemp -u XXXXXX)"
else
- eval ns='$'${ns_name}
- cleanup_ns "$ns"
-
+ cleanup_ns "${!ns_name}"
fi
- if ! ip netns add "$ns"; then
+ if ! ip netns add "${!ns_name}"; then
echo "Failed to create namespace $ns_name"
- cleanup_ns "$ns_list"
+ cleanup_ns "${ns_list[@]}"
return $ksft_skip
fi
- ip -n "$ns" link set lo up
- ns_list="$ns_list $ns"
+ ip -n "${!ns_name}" link set lo up
+ ns_list+=("${!ns_name}")
done
- NS_LIST="$NS_LIST $ns_list"
+ NS_LIST+=("${ns_list[@]}")
+}
+
+tc_rule_stats_get()
+{
+ local dev=$1; shift
+ local pref=$1; shift
+ local dir=${1:-ingress}; shift
+ local selector=${1:-.packets}; shift
+
+ tc -j -s filter show dev $dev $dir pref $pref \
+ | jq ".[1].options.actions[].stats$selector"
+}
+
+tc_rule_handle_stats_get()
+{
+ local id=$1; shift
+ local handle=$1; shift
+ local selector=${1:-.packets}; shift
+ local netns=${1:-""}; shift
+
+ tc $netns -j -s filter show $id \
+ | jq ".[] | select(.options.handle == $handle) | \
+ .options.actions[0].stats$selector"
}
diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore
new file mode 100644
index 000000000000..1ebc6187f421
--- /dev/null
+++ b/tools/testing/selftests/net/lib/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+csum
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
new file mode 100644
index 000000000000..82c3264b115e
--- /dev/null
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
+CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES)
+# Additional include paths needed by kselftest.h
+CFLAGS += -I../../
+
+TEST_FILES := ../../../../../Documentation/netlink/specs
+TEST_FILES += ../../../../net/ynl
+
+TEST_GEN_FILES += csum
+
+TEST_INCLUDES := $(wildcard py/*.py)
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/lib/csum.c
index 90eb06fefa59..b9f3fc3c3426 100644
--- a/tools/testing/selftests/net/csum.c
+++ b/tools/testing/selftests/net/lib/csum.c
@@ -682,7 +682,7 @@ static int recv_verify_packet_ipv6(void *nh, int len)
}
/* return whether auxdata includes TP_STATUS_CSUM_VALID */
-static bool recv_verify_packet_csum(struct msghdr *msg)
+static uint32_t recv_get_packet_csum_status(struct msghdr *msg)
{
struct tpacket_auxdata *aux = NULL;
struct cmsghdr *cm;
@@ -706,7 +706,7 @@ static bool recv_verify_packet_csum(struct msghdr *msg)
if (!aux)
error(1, 0, "cmsg: no auxdata");
- return aux->tp_status & TP_STATUS_CSUM_VALID;
+ return aux->tp_status;
}
static int recv_packet(int fd)
@@ -716,6 +716,7 @@ static int recv_packet(int fd)
char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
struct pkt *buf = (void *)_buf;
struct msghdr msg = {0};
+ uint32_t tp_status;
struct iovec iov;
int len, ret;
@@ -737,6 +738,17 @@ static int recv_packet(int fd)
if (len == -1)
error(1, errno, "recv p");
+ tp_status = recv_get_packet_csum_status(&msg);
+
+ /* GRO might coalesce randomized packets. Such GSO packets are
+ * then reinitialized for csum offload (CHECKSUM_PARTIAL), with
+ * a pseudo csum. Do not try to validate these checksums.
+ */
+ if (tp_status & TP_STATUS_CSUMNOTREADY) {
+ fprintf(stderr, "cmsg: GSO packet has partial csum: skip\n");
+ continue;
+ }
+
if (cfg_family == PF_INET6)
ret = recv_verify_packet_ipv6(buf, len);
else
@@ -753,7 +765,7 @@ static int recv_packet(int fd)
* Do not fail if kernel does not validate a good csum:
* Absence of validation does not imply invalid.
*/
- if (recv_verify_packet_csum(&msg) && cfg_bad_csum) {
+ if (tp_status & TP_STATUS_CSUM_VALID && cfg_bad_csum) {
fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n");
bad_validations++;
}
diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py
new file mode 100644
index 000000000000..b6d498d125fe
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/__init__.py
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from .consts import KSRC
+from .ksft import *
+from .netns import NetNS
+from .nsim import *
+from .utils import *
+from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily
diff --git a/tools/testing/selftests/net/lib/py/consts.py b/tools/testing/selftests/net/lib/py/consts.py
new file mode 100644
index 000000000000..f518ce79d82c
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/consts.py
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import sys
+from pathlib import Path
+
+KSFT_DIR = (Path(__file__).parent / "../../..").resolve()
+KSRC = (Path(__file__).parent / "../../../../../..").resolve()
+
+KSFT_MAIN_NAME = Path(sys.argv[0]).with_suffix("").name
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
new file mode 100644
index 000000000000..f26c20df9db4
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -0,0 +1,192 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import builtins
+import inspect
+import sys
+import time
+import traceback
+from .consts import KSFT_MAIN_NAME
+from .utils import global_defer_queue
+
+KSFT_RESULT = None
+KSFT_RESULT_ALL = True
+
+
+class KsftFailEx(Exception):
+ pass
+
+
+class KsftSkipEx(Exception):
+ pass
+
+
+class KsftXfailEx(Exception):
+ pass
+
+
+def ksft_pr(*objs, **kwargs):
+ print("#", *objs, **kwargs)
+
+
+def _fail(*args):
+ global KSFT_RESULT
+ KSFT_RESULT = False
+
+ frame = inspect.stack()[2]
+ ksft_pr("At " + frame.filename + " line " + str(frame.lineno) + ":")
+ ksft_pr(*args)
+
+
+def ksft_eq(a, b, comment=""):
+ global KSFT_RESULT
+ if a != b:
+ _fail("Check failed", a, "!=", b, comment)
+
+
+def ksft_true(a, comment=""):
+ if not a:
+ _fail("Check failed", a, "does not eval to True", comment)
+
+
+def ksft_in(a, b, comment=""):
+ if a not in b:
+ _fail("Check failed", a, "not in", b, comment)
+
+
+def ksft_ge(a, b, comment=""):
+ if a < b:
+ _fail("Check failed", a, "<", b, comment)
+
+
+def ksft_lt(a, b, comment=""):
+ if a >= b:
+ _fail("Check failed", a, ">=", b, comment)
+
+
+class ksft_raises:
+ def __init__(self, expected_type):
+ self.exception = None
+ self.expected_type = expected_type
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ if exc_type is None:
+ _fail(f"Expected exception {str(self.expected_type.__name__)}, none raised")
+ elif self.expected_type != exc_type:
+ _fail(f"Expected exception {str(self.expected_type.__name__)}, raised {str(exc_type.__name__)}")
+ self.exception = exc_val
+ # Suppress the exception if its the expected one
+ return self.expected_type == exc_type
+
+
+def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""):
+ end = time.monotonic() + deadline
+ while True:
+ if cond():
+ return
+ if time.monotonic() > end:
+ _fail("Waiting for condition timed out", comment)
+ return
+ time.sleep(sleep)
+
+
+def ktap_result(ok, cnt=1, case="", comment=""):
+ global KSFT_RESULT_ALL
+ KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok
+
+ res = ""
+ if not ok:
+ res += "not "
+ res += "ok "
+ res += str(cnt) + " "
+ res += KSFT_MAIN_NAME
+ if case:
+ res += "." + str(case.__name__)
+ if comment:
+ res += " # " + comment
+ print(res)
+
+
+def ksft_flush_defer():
+ global KSFT_RESULT
+
+ i = 0
+ qlen_start = len(global_defer_queue)
+ while global_defer_queue:
+ i += 1
+ entry = global_defer_queue.pop()
+ try:
+ entry.exec_only()
+ except:
+ ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!")
+ tb = traceback.format_exc()
+ for line in tb.strip().split('\n'):
+ ksft_pr("Defer Exception|", line)
+ KSFT_RESULT = False
+
+
+def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
+ cases = cases or []
+
+ if globs and case_pfx:
+ for key, value in globs.items():
+ if not callable(value):
+ continue
+ for prefix in case_pfx:
+ if key.startswith(prefix):
+ cases.append(value)
+ break
+
+ totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
+
+ print("KTAP version 1")
+ print("1.." + str(len(cases)))
+
+ global KSFT_RESULT
+ cnt = 0
+ stop = False
+ for case in cases:
+ KSFT_RESULT = True
+ cnt += 1
+ comment = ""
+ cnt_key = ""
+
+ try:
+ case(*args)
+ except KsftSkipEx as e:
+ comment = "SKIP " + str(e)
+ cnt_key = 'skip'
+ except KsftXfailEx as e:
+ comment = "XFAIL " + str(e)
+ cnt_key = 'xfail'
+ except BaseException as e:
+ stop |= isinstance(e, KeyboardInterrupt)
+ tb = traceback.format_exc()
+ for line in tb.strip().split('\n'):
+ ksft_pr("Exception|", line)
+ if stop:
+ ksft_pr("Stopping tests due to KeyboardInterrupt.")
+ KSFT_RESULT = False
+ cnt_key = 'fail'
+
+ ksft_flush_defer()
+
+ if not cnt_key:
+ cnt_key = 'pass' if KSFT_RESULT else 'fail'
+
+ ktap_result(KSFT_RESULT, cnt, case, comment=comment)
+ totals[cnt_key] += 1
+
+ if stop:
+ break
+
+ print(
+ f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0"
+ )
+
+
+def ksft_exit():
+ global KSFT_RESULT_ALL
+ sys.exit(0 if KSFT_RESULT_ALL else 1)
diff --git a/tools/testing/selftests/net/lib/py/netns.py b/tools/testing/selftests/net/lib/py/netns.py
new file mode 100644
index 000000000000..ecff85f9074f
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/netns.py
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+
+from .utils import ip
+import random
+import string
+
+
+class NetNS:
+ def __init__(self, name=None):
+ if name:
+ self.name = name
+ else:
+ self.name = ''.join(random.choice(string.ascii_lowercase) for _ in range(8))
+ ip('netns add ' + self.name)
+
+ def __del__(self):
+ if self.name:
+ ip('netns del ' + self.name)
+ self.name = None
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ self.__del__()
+
+ def __str__(self):
+ return self.name
+
+ def __repr__(self):
+ return f"NetNS({self.name})"
diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py
new file mode 100644
index 000000000000..f571a8b3139b
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/nsim.py
@@ -0,0 +1,134 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import json
+import os
+import random
+import re
+import time
+from .utils import cmd, ip
+
+
+class NetdevSim:
+ """
+ Class for netdevsim netdevice and its attributes.
+ """
+
+ def __init__(self, nsimdev, port_index, ifname, ns=None):
+ # In case udev renamed the netdev to according to new schema,
+ # check if the name matches the port_index.
+ nsimnamere = re.compile(r"eni\d+np(\d+)")
+ match = nsimnamere.match(ifname)
+ if match and int(match.groups()[0]) != port_index + 1:
+ raise Exception("netdevice name mismatches the expected one")
+
+ self.ifname = ifname
+ self.nsimdev = nsimdev
+ self.port_index = port_index
+ self.ns = ns
+ self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index)
+ ret = ip("-j link show dev %s" % ifname, ns=ns)
+ self.dev = json.loads(ret.stdout)[0]
+ self.ifindex = self.dev["ifindex"]
+
+ def dfs_write(self, path, val):
+ self.nsimdev.dfs_write(f'ports/{self.port_index}/' + path, val)
+
+
+class NetdevSimDev:
+ """
+ Class for netdevsim bus device and its attributes.
+ """
+ @staticmethod
+ def ctrl_write(path, val):
+ fullpath = os.path.join("/sys/bus/netdevsim/", path)
+ with open(fullpath, "w") as f:
+ f.write(val)
+
+ def dfs_write(self, path, val):
+ fullpath = os.path.join(f"/sys/kernel/debug/netdevsim/netdevsim{self.addr}/", path)
+ with open(fullpath, "w") as f:
+ f.write(val)
+
+ def __init__(self, port_count=1, queue_count=1, ns=None):
+ # nsim will spawn in init_net, we'll set to actual ns once we switch it there
+ self.ns = None
+
+ if not os.path.exists("/sys/bus/netdevsim"):
+ cmd("modprobe netdevsim")
+
+ addr = random.randrange(1 << 15)
+ while True:
+ try:
+ self.ctrl_write("new_device", "%u %u %u" % (addr, port_count, queue_count))
+ except OSError as e:
+ if e.errno == errno.ENOSPC:
+ addr = random.randrange(1 << 15)
+ continue
+ raise e
+ break
+ self.addr = addr
+
+ # As probe of netdevsim device might happen from a workqueue,
+ # so wait here until all netdevs appear.
+ self.wait_for_netdevs(port_count)
+
+ if ns:
+ cmd(f"devlink dev reload netdevsim/netdevsim{addr} netns {ns.name}")
+ self.ns = ns
+
+ cmd("udevadm settle", ns=self.ns)
+ ifnames = self.get_ifnames()
+
+ self.dfs_dir = "/sys/kernel/debug/netdevsim/netdevsim%u/" % addr
+
+ self.nsims = []
+ for port_index in range(port_count):
+ self.nsims.append(self._make_port(port_index, ifnames[port_index]))
+
+ self.removed = False
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ """
+ __exit__ gets called at the end of a "with" block.
+ """
+ self.remove()
+
+ def _make_port(self, port_index, ifname):
+ return NetdevSim(self, port_index, ifname, self.ns)
+
+ def get_ifnames(self):
+ ifnames = []
+ listdir = cmd(f"ls /sys/bus/netdevsim/devices/netdevsim{self.addr}/net/",
+ ns=self.ns).stdout.split()
+ for ifname in listdir:
+ ifnames.append(ifname)
+ ifnames.sort()
+ return ifnames
+
+ def wait_for_netdevs(self, port_count):
+ timeout = 5
+ timeout_start = time.time()
+
+ while True:
+ try:
+ ifnames = self.get_ifnames()
+ except FileNotFoundError as e:
+ ifnames = []
+ if len(ifnames) == port_count:
+ break
+ if time.time() < timeout_start + timeout:
+ continue
+ raise Exception("netdevices did not appear within timeout")
+
+ def remove(self):
+ if not self.removed:
+ self.ctrl_write("del_device", "%u" % (self.addr, ))
+ self.removed = True
+
+ def remove_nsim(self, nsim):
+ self.nsims.remove(nsim)
+ self.ctrl_write("devices/netdevsim%u/del_port" % (self.addr, ),
+ "%u" % (nsim.port_index, ))
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
new file mode 100644
index 000000000000..72590c3f90f1
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -0,0 +1,155 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import errno
+import json as _json
+import random
+import re
+import socket
+import subprocess
+import time
+
+
+class CmdExitFailure(Exception):
+ pass
+
+
+class cmd:
+ def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5):
+ if ns:
+ comm = f'ip netns exec {ns} ' + comm
+
+ self.stdout = None
+ self.stderr = None
+ self.ret = None
+
+ self.comm = comm
+ if host:
+ self.proc = host.cmd(comm)
+ else:
+ self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE)
+ if not background:
+ self.process(terminate=False, fail=fail, timeout=timeout)
+
+ def process(self, terminate=True, fail=None, timeout=5):
+ if fail is None:
+ fail = not terminate
+
+ if terminate:
+ self.proc.terminate()
+ stdout, stderr = self.proc.communicate(timeout)
+ self.stdout = stdout.decode("utf-8")
+ self.stderr = stderr.decode("utf-8")
+ self.proc.stdout.close()
+ self.proc.stderr.close()
+ self.ret = self.proc.returncode
+
+ if self.proc.returncode != 0 and fail:
+ if len(stderr) > 0 and stderr[-1] == "\n":
+ stderr = stderr[:-1]
+ raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" %
+ (self.proc.args, stdout, stderr))
+
+
+class bkg(cmd):
+ def __init__(self, comm, shell=True, fail=None, ns=None, host=None,
+ exit_wait=False):
+ super().__init__(comm, background=True,
+ shell=shell, fail=fail, ns=ns, host=host)
+ self.terminate = not exit_wait
+ self.check_fail = fail
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ return self.process(terminate=self.terminate, fail=self.check_fail)
+
+
+global_defer_queue = []
+
+
+class defer:
+ def __init__(self, func, *args, **kwargs):
+ global global_defer_queue
+
+ if not callable(func):
+ raise Exception("defer created with un-callable object, did you call the function instead of passing its name?")
+
+ self.func = func
+ self.args = args
+ self.kwargs = kwargs
+
+ self._queue = global_defer_queue
+ self._queue.append(self)
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, ex_type, ex_value, ex_tb):
+ return self.exec()
+
+ def exec_only(self):
+ self.func(*self.args, **self.kwargs)
+
+ def cancel(self):
+ self._queue.remove(self)
+
+ def exec(self):
+ self.cancel()
+ self.exec_only()
+
+
+def tool(name, args, json=None, ns=None, host=None):
+ cmd_str = name + ' '
+ if json:
+ cmd_str += '--json '
+ cmd_str += args
+ cmd_obj = cmd(cmd_str, ns=ns, host=host)
+ if json:
+ return _json.loads(cmd_obj.stdout)
+ return cmd_obj
+
+
+def ip(args, json=None, ns=None, host=None):
+ if ns:
+ args = f'-netns {ns} ' + args
+ return tool('ip', args, json=json, host=host)
+
+
+def ethtool(args, json=None, ns=None, host=None):
+ return tool('ethtool', args, json=json, ns=ns, host=host)
+
+
+def rand_port():
+ """
+ Get a random unprivileged port, try to make sure it's not already used.
+ """
+ for _ in range(1000):
+ port = random.randint(10000, 65535)
+ try:
+ with socket.socket(socket.AF_INET6, socket.SOCK_STREAM) as s:
+ s.bind(("", port))
+ return port
+ except OSError as e:
+ if e.errno != errno.EADDRINUSE:
+ raise
+ raise Exception("Can't find any free unprivileged port")
+
+
+def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5):
+ end = time.monotonic() + deadline
+
+ pattern = f":{port:04X} .* "
+ if proto == "tcp": # for tcp protocol additionally check the socket state
+ pattern += "0A"
+ pattern = re.compile(pattern)
+
+ while True:
+ data = cmd(f'cat /proc/net/{proto}*', ns=ns, host=host, shell=True).stdout
+ for row in data.split("\n"):
+ if pattern.search(row):
+ return
+ if time.monotonic() > end:
+ raise Exception("Waiting for port listen timed out")
+ time.sleep(sleep)
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
new file mode 100644
index 000000000000..1ace58370c06
--- /dev/null
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import sys
+from pathlib import Path
+from .consts import KSRC, KSFT_DIR
+from .ksft import ksft_pr, ktap_result
+
+# Resolve paths
+try:
+ if (KSFT_DIR / "kselftest-list.txt").exists():
+ # Running in "installed" selftests
+ tools_full_path = KSFT_DIR
+ SPEC_PATH = KSFT_DIR / "net/lib/specs"
+
+ sys.path.append(tools_full_path.as_posix())
+ from net.lib.ynl.lib import YnlFamily, NlError
+ else:
+ # Running in tree
+ tools_full_path = KSRC / "tools"
+ SPEC_PATH = KSRC / "Documentation/netlink/specs"
+
+ sys.path.append(tools_full_path.as_posix())
+ from net.ynl.lib import YnlFamily, NlError
+except ModuleNotFoundError as e:
+ ksft_pr("Failed importing `ynl` library from kernel sources")
+ ksft_pr(str(e))
+ ktap_result(True, comment="SKIP")
+ sys.exit(4)
+
+#
+# Wrapper classes, loading the right specs
+# Set schema='' to avoid jsonschema validation, it's slow
+#
+class EthtoolFamily(YnlFamily):
+ def __init__(self):
+ super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(),
+ schema='')
+
+
+class RtnlFamily(YnlFamily):
+ def __init__(self):
+ super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
+ schema='')
+
+
+class NetdevFamily(YnlFamily):
+ def __init__(self):
+ super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(),
+ schema='')
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index bc97ab33a00e..776d43a6922d 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -200,6 +200,58 @@ chk_msk_cestab()
"${expected}" "${msg}" ""
}
+msk_info_get_value()
+{
+ local port="${1}"
+ local info="${2}"
+
+ ss -N "${ns}" -inHM dport "${port}" | \
+ mptcp_lib_get_info_value "${info}" "${info}"
+}
+
+chk_msk_info()
+{
+ local port="${1}"
+ local info="${2}"
+ local cnt="${3}"
+ local msg="....chk ${info}"
+ local delta_ms=250 # half what we waited before, just to be sure
+ local now
+
+ now=$(msk_info_get_value "${port}" "${info}")
+
+ mptcp_lib_print_title "${msg}"
+ if { [ -z "${cnt}" ] || [ -z "${now}" ]; } &&
+ ! mptcp_lib_expect_all_features; then
+ mptcp_lib_pr_skip "Feature probably not supported"
+ mptcp_lib_result_skip "${msg}"
+ elif [ "$((cnt + delta_ms))" -lt "${now}" ]; then
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${msg}"
+ else
+ mptcp_lib_pr_fail "value of ${info} changed by $((now - cnt))ms," \
+ "expected at least ${delta_ms}ms"
+ mptcp_lib_result_fail "${msg}"
+ ret=${KSFT_FAIL}
+ fi
+}
+
+chk_last_time_info()
+{
+ local port="${1}"
+ local data_sent data_recv ack_recv
+
+ data_sent=$(msk_info_get_value "${port}" "last_data_sent")
+ data_recv=$(msk_info_get_value "${port}" "last_data_recv")
+ ack_recv=$(msk_info_get_value "${port}" "last_ack_recv")
+
+ sleep 0.5 # wait to check after if the timestamps difference
+
+ chk_msk_info "${port}" "last_data_sent" "${data_sent}"
+ chk_msk_info "${port}" "last_data_recv" "${data_recv}"
+ chk_msk_info "${port}" "last_ack_recv" "${ack_recv}"
+}
+
wait_connected()
{
local listener_ns="${1}"
@@ -233,6 +285,7 @@ echo "b" | \
127.0.0.1 >/dev/null &
wait_connected $ns 10000
chk_msk_nr 2 "after MPC handshake "
+chk_last_time_info 10000
chk_msk_remote_key_nr 2 "....chk remote_key"
chk_msk_fallback_nr 0 "....chk no fallback"
chk_msk_inuse 2
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index d2043ec3bf6d..4209b9569039 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -1115,11 +1115,11 @@ again:
return 1;
}
- if (--cfg_repeat > 0) {
- if (cfg_input)
- close(fd);
+ if (cfg_input)
+ close(fd);
+
+ if (--cfg_repeat > 0)
goto again;
- }
return 0;
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 4131f3263a48..b77fb7065bfb 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -147,7 +147,7 @@ cleanup()
mptcp_lib_check_mptcp
mptcp_lib_check_kallsyms
-mptcp_lib_check_tools ip
+mptcp_lib_check_tools ip tc
sin=$(mktemp)
sout=$(mktemp)
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index e4403236f655..4df48f1f14ab 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -31,7 +31,6 @@ timeout_poll=30
timeout_test=$((timeout_poll * 2 + 1))
capture=false
checksum=false
-ip_mptcp=0
check_invert=0
validate_checksum=false
init=0
@@ -125,8 +124,8 @@ init_shapers()
{
local i
for i in $(seq 1 4); do
- tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1
- tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1
+ tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1ms
+ tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1ms
done
}
@@ -142,7 +141,7 @@ init() {
mptcp_lib_check_mptcp
mptcp_lib_check_kallsyms
- mptcp_lib_check_tools ip ss "${iptables}" "${ip6tables}"
+ mptcp_lib_check_tools ip tc ss "${iptables}" "${ip6tables}"
sin=$(mktemp)
sout=$(mktemp)
@@ -262,6 +261,8 @@ reset()
TEST_NAME="${1}"
+ MPTCP_LIB_SUBTEST_FLAKY=0 # reset if modified
+
if skip_test; then
MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1))
last_test_ignored=1
@@ -449,7 +450,9 @@ reset_with_tcp_filter()
# $1: err msg
fail_test()
{
- ret=${KSFT_FAIL}
+ if ! mptcp_lib_subtest_is_flaky; then
+ ret=${KSFT_FAIL}
+ fi
if [ ${#} -gt 0 ]; then
print_fail "${@}"
@@ -606,173 +609,65 @@ kill_events_pids()
pm_nl_set_limits()
{
- local ns=$1
- local addrs=$2
- local subflows=$3
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp limits set add_addr_accepted $addrs subflows $subflows
- else
- ip netns exec $ns ./pm_nl_ctl limits $addrs $subflows
- fi
+ mptcp_lib_pm_nl_set_limits "${@}"
}
pm_nl_add_endpoint()
{
- local ns=$1
- local addr=$2
- local flags _flags
- local port _port
- local dev _dev
- local id _id
- local nr=2
-
- local p
- for p in "${@}"
- do
- if [ $p = "flags" ]; then
- eval _flags=\$"$nr"
- [ -n "$_flags" ]; flags="flags $_flags"
- fi
- if [ $p = "dev" ]; then
- eval _dev=\$"$nr"
- [ -n "$_dev" ]; dev="dev $_dev"
- fi
- if [ $p = "id" ]; then
- eval _id=\$"$nr"
- [ -n "$_id" ]; id="id $_id"
- fi
- if [ $p = "port" ]; then
- eval _port=\$"$nr"
- [ -n "$_port" ]; port="port $_port"
- fi
-
- nr=$((nr + 1))
- done
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp endpoint add $addr ${_flags//","/" "} $dev $id $port
- else
- ip netns exec $ns ./pm_nl_ctl add $addr $flags $dev $id $port
- fi
+ mptcp_lib_pm_nl_add_endpoint "${@}"
}
pm_nl_del_endpoint()
{
- local ns=$1
- local id=$2
- local addr=$3
-
- if [ $ip_mptcp -eq 1 ]; then
- [ $id -ne 0 ] && addr=''
- ip -n $ns mptcp endpoint delete id $id $addr
- else
- ip netns exec $ns ./pm_nl_ctl del $id $addr
- fi
+ mptcp_lib_pm_nl_del_endpoint "${@}"
}
pm_nl_flush_endpoint()
{
- local ns=$1
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp endpoint flush
- else
- ip netns exec $ns ./pm_nl_ctl flush
- fi
+ mptcp_lib_pm_nl_flush_endpoint "${@}"
}
pm_nl_show_endpoints()
{
- local ns=$1
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp endpoint show
- else
- ip netns exec $ns ./pm_nl_ctl dump
- fi
+ mptcp_lib_pm_nl_show_endpoints "${@}"
}
pm_nl_change_endpoint()
{
- local ns=$1
- local id=$2
- local flags=$3
-
- if [ $ip_mptcp -eq 1 ]; then
- ip -n $ns mptcp endpoint change id $id ${flags//","/" "}
- else
- ip netns exec $ns ./pm_nl_ctl set id $id flags $flags
- fi
+ mptcp_lib_pm_nl_change_endpoint "${@}"
}
pm_nl_check_endpoint()
{
- local line expected_line
local msg="$1"
local ns=$2
local addr=$3
- local _flags=""
- local flags
- local _port
- local port
- local dev
- local _id
- local id
+ local flags dev id port
print_check "${msg}"
shift 3
while [ -n "$1" ]; do
- if [ $1 = "flags" ]; then
- _flags=$2
- [ -n "$_flags" ]; flags="flags $_flags"
- shift
- elif [ $1 = "dev" ]; then
- [ -n "$2" ]; dev="dev $2"
- shift
- elif [ $1 = "id" ]; then
- _id=$2
- [ -n "$_id" ]; id="id $_id"
+ case "${1}" in
+ "flags" | "dev" | "id" | "port")
+ eval "${1}"="${2}"
shift
- elif [ $1 = "port" ]; then
- _port=$2
- [ -n "$_port" ]; port=" port $_port"
- shift
- fi
+ ;;
+ *)
+ ;;
+ esac
shift
done
- if [ -z "$id" ]; then
- test_fail "bad test - missing endpoint id"
+ if [ -z "${id}" ]; then
+ fail_test "bad test - missing endpoint id"
return
fi
- if [ $ip_mptcp -eq 1 ]; then
- # get line and trim trailing whitespace
- line=$(ip -n $ns mptcp endpoint show $id)
- line="${line% }"
- # the dump order is: address id flags port dev
- [ -n "$addr" ] && expected_line="$addr"
- expected_line+=" $id"
- [ -n "$_flags" ] && expected_line+=" ${_flags//","/" "}"
- [ -n "$dev" ] && expected_line+=" $dev"
- [ -n "$port" ] && expected_line+=" $port"
- else
- line=$(ip netns exec $ns ./pm_nl_ctl get $_id)
- # the dump order is: id flags dev address port
- expected_line="$id"
- [ -n "$flags" ] && expected_line+=" $flags"
- [ -n "$dev" ] && expected_line+=" $dev"
- [ -n "$addr" ] && expected_line+=" $addr"
- [ -n "$_port" ] && expected_line+=" $_port"
- fi
- if [ "$line" = "$expected_line" ]; then
- print_ok
- else
- fail_test "expected '$expected_line' found '$line'"
- fi
+ check_output "mptcp_lib_pm_nl_get_endpoint ${ns} ${id}" \
+ "$(mptcp_lib_pm_nl_format_endpoints \
+ "${id},${addr},${flags//","/" "},${dev},${port}")"
}
pm_nl_set_endpoint()
@@ -1739,6 +1634,8 @@ chk_prio_nr()
{
local mp_prio_nr_tx=$1
local mp_prio_nr_rx=$2
+ local mpj_syn=$3
+ local mpj_syn_ack=$4
local count
print_check "ptx"
@@ -1760,6 +1657,26 @@ chk_prio_nr()
else
print_ok
fi
+
+ print_check "syn backup"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynBackupRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$mpj_syn" ]; then
+ fail_test "got $count JOIN[s] syn with Backup expected $mpj_syn"
+ else
+ print_ok
+ fi
+
+ print_check "synack backup"
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckBackupRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$mpj_syn_ack" ]; then
+ fail_test "got $count JOIN[s] synack with Backup expected $mpj_syn_ack"
+ else
+ print_ok
+ fi
}
chk_subflow_nr()
@@ -2354,9 +2271,10 @@ remove_tests()
if reset "remove invalid addresses"; then
pm_nl_set_limits $ns1 3 3
pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+ # broadcast IP: no packet for this address will be received on ns1
+ pm_nl_add_endpoint $ns1 224.0.0.1 flags signal
pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
- pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
- pm_nl_set_limits $ns2 3 3
+ pm_nl_set_limits $ns2 2 2
addr_nr_ns1=-3 speed=10 \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
@@ -2716,33 +2634,46 @@ backup_tests()
sflags=nobackup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 1 0
fi
# single address, backup
if reset "single address, backup" &&
continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup
+ pm_nl_set_limits $ns2 1 1
+ sflags=nobackup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ chk_prio_nr 1 0 0 1
+ fi
+
+ # single address, switch to backup
+ if reset "single address, switch to backup" &&
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ pm_nl_set_limits $ns1 0 1
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
pm_nl_set_limits $ns2 1 1
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
- chk_prio_nr 1 1
+ chk_prio_nr 1 1 0 0
fi
# single address with port, backup
if reset "single address with port, backup" &&
continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
pm_nl_set_limits $ns1 0 1
- pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup port 10100
pm_nl_set_limits $ns2 1 1
- sflags=backup speed=slow \
+ sflags=nobackup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 1
chk_add_nr 1 1
- chk_prio_nr 1 1
+ chk_prio_nr 1 0 0 1
fi
if reset "mpc backup" &&
@@ -2751,17 +2682,26 @@ backup_tests()
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 0 0
fi
if reset "mpc backup both sides" &&
continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then
- pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns1 10.0.1.1 flags signal,backup
pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
+
+ # 10.0.2.2 (non-backup) -> 10.0.1.1 (backup)
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+ # 10.0.1.2 (backup) -> 10.0.2.1 (non-backup)
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ ip -net "$ns2" route add 10.0.2.1 via 10.0.1.1 dev ns2eth1 # force this path
+
speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 0 0 0
- chk_prio_nr 1 1
+ chk_join_nr 2 2 2
+ chk_prio_nr 1 1 1 1
fi
if reset "mpc switch to backup" &&
@@ -2770,7 +2710,7 @@ backup_tests()
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 0 0
fi
if reset "mpc switch to backup both sides" &&
@@ -2780,7 +2720,7 @@ backup_tests()
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
- chk_prio_nr 1 1
+ chk_prio_nr 1 1 0 0
fi
}
@@ -3157,7 +3097,7 @@ fullmesh_tests()
addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 1 0
chk_rm_nr 0 1
fi
@@ -3170,7 +3110,7 @@ fullmesh_tests()
sflags=nobackup,nofullmesh speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 2 2 2
- chk_prio_nr 0 1
+ chk_prio_nr 0 1 1 0
chk_rm_nr 0 1
fi
}
@@ -3178,6 +3118,7 @@ fullmesh_tests()
fastclose_tests()
{
if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
+ MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=client \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0
@@ -3186,6 +3127,7 @@ fastclose_tests()
fi
if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
+ MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=1024 fastclose=server \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0 0 0 0 1
@@ -3204,6 +3146,7 @@ fail_tests()
{
# single subflow
if reset_with_fail "Infinite map" 1; then
+ MPTCP_LIB_SUBTEST_FLAKY=1
test_linkfail=128 \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
@@ -3212,7 +3155,8 @@ fail_tests()
# multiple subflows
if reset_with_fail "MP_FAIL MP_RST" 2; then
- tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5
+ MPTCP_LIB_SUBTEST_FLAKY=1
+ tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5ms
pm_nl_set_limits $ns1 0 1
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
@@ -3418,7 +3362,7 @@ userspace_tests()
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 1 1 0
- chk_prio_nr 0 0
+ chk_prio_nr 0 0 0 0
fi
# userspace pm type prevents rm_addr
@@ -3626,6 +3570,35 @@ endpoint_tests()
chk_mptcp_info subflows 1 subflows 1
mptcp_lib_kill_wait $tests_pid
fi
+
+ # remove and re-add
+ if reset "delete re-add signal" &&
+ mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal
+ test_linkfail=4 speed=20 \
+ run_tests $ns1 $ns2 10.0.1.1 &
+ local tests_pid=$!
+
+ wait_mpj $ns2
+ pm_nl_check_endpoint "creation" \
+ $ns1 10.0.2.1 id 1 flags signal
+ chk_subflow_nr "before delete" 2
+ chk_mptcp_info subflows 1 subflows 1
+
+ pm_nl_del_endpoint $ns1 1 10.0.2.1
+ sleep 0.5
+ chk_subflow_nr "after delete" 1
+ chk_mptcp_info subflows 0 subflows 0
+
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ wait_mpj $ns2
+ chk_subflow_nr "after re-add" 2
+ chk_mptcp_info subflows 1 subflows 1
+ mptcp_lib_kill_wait $tests_pid
+ fi
+
}
# [$1: error message]
@@ -3702,7 +3675,7 @@ while getopts "${all_tests_args}cCih" opt; do
checksum=true
;;
i)
- ip_mptcp=1
+ mptcp_lib_set_ip_mptcp
;;
h)
usage
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index d529b4b37af8..438280e68434 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -1,6 +1,9 @@
#! /bin/bash
# SPDX-License-Identifier: GPL-2.0
+. "$(dirname "${0}")/../lib.sh"
+. "$(dirname "${0}")/../net_helper.sh"
+
readonly KSFT_PASS=0
readonly KSFT_FAIL=1
readonly KSFT_SKIP=4
@@ -21,8 +24,10 @@ declare -rx MPTCP_LIB_AF_INET6=10
MPTCP_LIB_SUBTESTS=()
MPTCP_LIB_SUBTESTS_DUPLICATED=0
+MPTCP_LIB_SUBTEST_FLAKY=0
MPTCP_LIB_TEST_COUNTER=0
MPTCP_LIB_TEST_FORMAT="%02u %-50s"
+MPTCP_LIB_IP_MPTCP=0
# only if supported (or forced) and not disabled, see no-color.org
if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } &&
@@ -40,6 +45,16 @@ else
readonly MPTCP_LIB_COLOR_RESET=
fi
+# SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY env var can be set not to ignore errors
+# from subtests marked as flaky
+mptcp_lib_override_flaky() {
+ [ "${SELFTESTS_MPTCP_LIB_OVERRIDE_FLAKY:-}" = 1 ]
+}
+
+mptcp_lib_subtest_is_flaky() {
+ [ "${MPTCP_LIB_SUBTEST_FLAKY}" = 1 ] && ! mptcp_lib_override_flaky
+}
+
# $1: color, $2: text
mptcp_lib_print_color() {
echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}"
@@ -71,7 +86,16 @@ mptcp_lib_pr_skip() {
}
mptcp_lib_pr_fail() {
- mptcp_lib_print_err "[FAIL]${1:+ ${*}}"
+ local title cmt
+
+ if mptcp_lib_subtest_is_flaky; then
+ title="IGNO"
+ cmt=" (flaky)"
+ else
+ title="FAIL"
+ fi
+
+ mptcp_lib_print_err "[${title}]${cmt}${1:+ ${*}}"
}
mptcp_lib_pr_info() {
@@ -207,7 +231,13 @@ mptcp_lib_result_pass() {
# $1: test name
mptcp_lib_result_fail() {
- __mptcp_lib_result_add "not ok" "${1}"
+ if mptcp_lib_subtest_is_flaky; then
+ # It might sound better to use 'not ok # TODO' or 'ok # SKIP',
+ # but some CIs don't understand 'TODO' and treat SKIP as errors.
+ __mptcp_lib_result_add "ok" "${1} # IGNORE Flaky"
+ else
+ __mptcp_lib_result_add "not ok" "${1}"
+ fi
}
# $1: test name
@@ -334,20 +364,7 @@ mptcp_lib_check_transfer() {
# $1: ns, $2: port
mptcp_lib_wait_local_port_listen() {
- local listener_ns="${1}"
- local port="${2}"
-
- local port_hex
- port_hex="$(printf "%04X" "${port}")"
-
- local _
- for _ in $(seq 10); do
- ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
- awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \
- {rc=0; exit}} END {exit rc}" &&
- break
- sleep 0.1
- done
+ wait_local_port_listen "${@}" "tcp"
}
mptcp_lib_check_output() {
@@ -384,6 +401,12 @@ mptcp_lib_check_tools() {
exit ${KSFT_SKIP}
fi
;;
+ "tc")
+ if ! tc -help &> /dev/null; then
+ mptcp_lib_pr_skip "Could not run test without tc tool"
+ exit ${KSFT_SKIP}
+ fi
+ ;;
"ss")
if ! ss -h | grep -q MPTCP; then
mptcp_lib_pr_skip "ss tool does not support MPTCP"
@@ -405,17 +428,13 @@ mptcp_lib_check_tools() {
}
mptcp_lib_ns_init() {
- local sec rndh
-
- sec=$(date +%s)
- rndh=$(printf %x "${sec}")-$(mktemp -u XXXXXX)
+ if ! setup_ns "${@}"; then
+ mptcp_lib_pr_fail "Failed to setup namespaces ${*}"
+ exit ${KSFT_FAIL}
+ fi
local netns
for netns in "${@}"; do
- eval "${netns}=${netns}-${rndh}"
-
- ip netns add "${!netns}" || exit ${KSFT_SKIP}
- ip -net "${!netns}" link set lo up
ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1
ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0
ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0
@@ -423,9 +442,10 @@ mptcp_lib_ns_init() {
}
mptcp_lib_ns_exit() {
+ cleanup_ns "${@}"
+
local netns
for netns in "${@}"; do
- ip netns del "${netns}"
rm -f /tmp/"${netns}".{nstat,out}
done
}
@@ -505,3 +525,131 @@ mptcp_lib_verify_listener_events() {
mptcp_lib_check_expected "type" "family" "saddr" "sport" || rc="${?}"
return "${rc}"
}
+
+mptcp_lib_set_ip_mptcp() {
+ MPTCP_LIB_IP_MPTCP=1
+}
+
+mptcp_lib_is_ip_mptcp() {
+ [ "${MPTCP_LIB_IP_MPTCP}" = "1" ]
+}
+
+# format: <id>,<ip>,<flags>,<dev>
+mptcp_lib_pm_nl_format_endpoints() {
+ local entry id ip flags dev port
+
+ for entry in "${@}"; do
+ IFS=, read -r id ip flags dev port <<< "${entry}"
+ if mptcp_lib_is_ip_mptcp; then
+ echo -n "${ip}"
+ [ -n "${port}" ] && echo -n " port ${port}"
+ echo -n " id ${id}"
+ [ -n "${flags}" ] && echo -n " ${flags}"
+ [ -n "${dev}" ] && echo -n " dev ${dev}"
+ echo " " # always a space at the end
+ else
+ echo -n "id ${id}"
+ echo -n " flags ${flags//" "/","}"
+ [ -n "${dev}" ] && echo -n " dev ${dev}"
+ echo -n " ${ip}"
+ [ -n "${port}" ] && echo -n " ${port}"
+ echo ""
+ fi
+ done
+}
+
+mptcp_lib_pm_nl_get_endpoint() {
+ local ns=${1}
+ local id=${2}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns}" mptcp endpoint show id "${id}"
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl get "${id}"
+ fi
+}
+
+mptcp_lib_pm_nl_set_limits() {
+ local ns=${1}
+ local addrs=${2}
+ local subflows=${3}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns}" mptcp limits set add_addr_accepted "${addrs}" subflows "${subflows}"
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl limits "${addrs}" "${subflows}"
+ fi
+}
+
+mptcp_lib_pm_nl_add_endpoint() {
+ local ns=${1}
+ local addr=${2}
+ local flags dev id port
+ local nr=2
+
+ local p
+ for p in "${@}"; do
+ case "${p}" in
+ "flags" | "dev" | "id" | "port")
+ eval "${p}"=\$"${nr}"
+ ;;
+ esac
+
+ nr=$((nr + 1))
+ done
+
+ if mptcp_lib_is_ip_mptcp; then
+ # shellcheck disable=SC2086 # blanks in flags, no double quote
+ ip -n "${ns}" mptcp endpoint add "${addr}" ${flags//","/" "} \
+ ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"}
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl add "${addr}" ${flags:+flags "${flags}"} \
+ ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"}
+ fi
+}
+
+mptcp_lib_pm_nl_del_endpoint() {
+ local ns=${1}
+ local id=${2}
+ local addr=${3}
+
+ if mptcp_lib_is_ip_mptcp; then
+ [ "${id}" -ne 0 ] && addr=''
+ ip -n "${ns}" mptcp endpoint delete id "${id}" ${addr:+"${addr}"}
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl del "${id}" "${addr}"
+ fi
+}
+
+mptcp_lib_pm_nl_flush_endpoint() {
+ local ns=${1}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns}" mptcp endpoint flush
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl flush
+ fi
+}
+
+mptcp_lib_pm_nl_show_endpoints() {
+ local ns=${1}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns}" mptcp endpoint show
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl dump
+ fi
+}
+
+mptcp_lib_pm_nl_change_endpoint() {
+ local ns=${1}
+ local id=${2}
+ local flags=${3}
+
+ if mptcp_lib_is_ip_mptcp; then
+ # shellcheck disable=SC2086 # blanks in flags, no double quote
+ ip -n "${ns}" mptcp endpoint change id "${id}" ${flags//","/" "}
+ else
+ ip netns exec "${ns}" ./pm_nl_ctl set id "${id}" flags "${flags}"
+ fi
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index e2d70c18786e..68899a303a1a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -22,6 +22,28 @@ ns1=""
ns2=""
ns_sbox=""
+usage() {
+ echo "Usage: $0 [ -i ] [ -h ]"
+ echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
+ echo -e "\t-h: help"
+}
+
+while getopts "hi" option;do
+ case "$option" in
+ "h")
+ usage "$0"
+ exit ${KSFT_PASS}
+ ;;
+ "i")
+ mptcp_lib_set_ip_mptcp
+ ;;
+ "?")
+ usage "$0"
+ exit ${KSFT_FAIL}
+ ;;
+ esac
+done
+
add_mark_rules()
{
local ns=$1
@@ -58,15 +80,15 @@ init()
# let $ns2 reach any $ns1 address from any interface
ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal
- ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal
+ mptcp_lib_pm_nl_add_endpoint "${ns1}" "10.0.${i}.1" flags signal
+ mptcp_lib_pm_nl_add_endpoint "${ns1}" "dead:beef:${i}::1" flags signal
- ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal
- ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal
+ mptcp_lib_pm_nl_add_endpoint "${ns2}" "10.0.${i}.2" flags signal
+ mptcp_lib_pm_nl_add_endpoint "${ns2}" "dead:beef:${i}::2" flags signal
done
- ip netns exec $ns1 ./pm_nl_ctl limits 8 8
- ip netns exec $ns2 ./pm_nl_ctl limits 8 8
+ mptcp_lib_pm_nl_set_limits "${ns1}" 8 8
+ mptcp_lib_pm_nl_set_limits "${ns2}" 8 8
add_mark_rules $ns1 1
add_mark_rules $ns2 2
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 6ab8c5d36340..2757378b1b13 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -1,28 +1,28 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Double quotes to prevent globbing and word splitting is recommended in new
-# code but we accept it, especially because there were too many before having
-# address all other issues detected by shellcheck.
-#shellcheck disable=SC2086
-
. "$(dirname "${0}")/mptcp_lib.sh"
ret=0
usage() {
- echo "Usage: $0 [ -h ]"
+ echo "Usage: $0 [ -i ] [ -h ]"
+ echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
+ echo -e "\t-h: help"
}
-optstring=h
+optstring=hi
while getopts "$optstring" option;do
case "$option" in
"h")
- usage $0
+ usage "$0"
exit ${KSFT_PASS}
;;
+ "i")
+ mptcp_lib_set_ip_mptcp
+ ;;
"?")
- usage $0
+ usage "$0"
exit ${KSFT_FAIL}
;;
esac
@@ -35,7 +35,7 @@ err=$(mktemp)
#shellcheck disable=SC2317
cleanup()
{
- rm -f $err
+ rm -f "${err}"
mptcp_lib_ns_exit "${ns1}"
}
@@ -46,6 +46,76 @@ trap cleanup EXIT
mptcp_lib_ns_init ns1
+format_limits() {
+ local accept="${1}"
+ local subflows="${2}"
+
+ if mptcp_lib_is_ip_mptcp; then
+ # with a space at the end
+ printf "add_addr_accepted %d subflows %d \n" "${accept}" "${subflows}"
+ else
+ printf "accept %d\nsubflows %d\n" "${accept}" "${subflows}"
+ fi
+}
+
+get_limits() {
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns1}" mptcp limits
+ else
+ ip netns exec "${ns1}" ./pm_nl_ctl limits
+ fi
+}
+
+format_endpoints() {
+ mptcp_lib_pm_nl_format_endpoints "${@}"
+}
+
+get_endpoint() {
+ # shellcheck disable=SC2317 # invoked indirectly
+ mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}"
+}
+
+change_address() {
+ local addr=${1}
+ local flags=${2}
+
+ if mptcp_lib_is_ip_mptcp; then
+ ip -n "${ns1}" mptcp endpoint change "${addr}" "${flags}"
+ else
+ ip netns exec "${ns1}" ./pm_nl_ctl set "${addr}" flags "${flags}"
+ fi
+}
+
+set_limits()
+{
+ mptcp_lib_pm_nl_set_limits "${ns1}" "${@}"
+}
+
+add_endpoint()
+{
+ mptcp_lib_pm_nl_add_endpoint "${ns1}" "${@}"
+}
+
+del_endpoint()
+{
+ mptcp_lib_pm_nl_del_endpoint "${ns1}" "${@}"
+}
+
+flush_endpoint()
+{
+ mptcp_lib_pm_nl_flush_endpoint "${ns1}"
+}
+
+show_endpoints()
+{
+ mptcp_lib_pm_nl_show_endpoints "${ns1}"
+}
+
+change_endpoint()
+{
+ mptcp_lib_pm_nl_change_endpoint "${ns1}" "${@}"
+}
+
check()
{
local cmd="$1"
@@ -67,125 +137,126 @@ check()
fi
}
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list"
+check "show_endpoints" "" "defaults addr list"
-default_limits="$(ip netns exec $ns1 ./pm_nl_ctl limits)"
+default_limits="$(get_limits)"
if mptcp_lib_expect_all_features; then
- check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 2" "defaults limits"
+ check "get_limits" "$(format_limits 0 2)" "defaults limits"
fi
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 flags signal,backup
-check "ip netns exec $ns1 ./pm_nl_ctl get 1" "id 1 flags 10.0.1.1" "simple add/get addr"
+add_endpoint 10.0.1.1
+add_endpoint 10.0.1.2 flags subflow dev lo
+add_endpoint 10.0.1.3 flags signal,backup
+check "get_endpoint 1" "$(format_endpoints "1,10.0.1.1")" "simple add/get addr"
-check "ip netns exec $ns1 ./pm_nl_ctl dump" \
-"id 1 flags 10.0.1.1
-id 2 flags subflow dev lo 10.0.1.2
-id 3 flags signal,backup 10.0.1.3" "dump addrs"
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.1.1" \
+ "2,10.0.1.2,subflow,lo" \
+ "3,10.0.1.3,signal backup")" "dump addrs"
-ip netns exec $ns1 ./pm_nl_ctl del 2
-check "ip netns exec $ns1 ./pm_nl_ctl get 2" "" "simple del addr"
-check "ip netns exec $ns1 ./pm_nl_ctl dump" \
-"id 1 flags 10.0.1.1
-id 3 flags signal,backup 10.0.1.3" "dump addrs after del"
+del_endpoint 2
+check "get_endpoint 2" "" "simple del addr"
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.1.1" \
+ "3,10.0.1.3,signal backup")" "dump addrs after del"
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 2>/dev/null
-check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr"
+add_endpoint 10.0.1.3 2>/dev/null
+check "get_endpoint 4" "" "duplicate addr"
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal
-check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment"
+add_endpoint 10.0.1.4 flags signal
+check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment"
for i in $(seq 5 9); do
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1
+ add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1
done
-check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
-check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
+check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit"
+check "get_endpoint 10" "" "above hard addr limit"
-ip netns exec $ns1 ./pm_nl_ctl del 9
+del_endpoint 9
for i in $(seq 10 255); do
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i
- ip netns exec $ns1 ./pm_nl_ctl del $i
+ add_endpoint 10.0.0.9 id "${i}"
+ del_endpoint "${i}"
done
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
-id 3 flags signal,backup 10.0.1.3
-id 4 flags signal 10.0.1.4
-id 5 flags signal 10.0.1.5
-id 6 flags signal 10.0.1.6
-id 7 flags signal 10.0.1.7
-id 8 flags signal 10.0.1.8" "id limit"
-
-ip netns exec $ns1 ./pm_nl_ctl flush
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs"
-
-ip netns exec $ns1 ./pm_nl_ctl limits 9 1 2>/dev/null
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit"
-
-ip netns exec $ns1 ./pm_nl_ctl limits 1 9 2>/dev/null
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit"
-
-ip netns exec $ns1 ./pm_nl_ctl limits 8 8
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8
-subflows 8" "set limits"
-
-ip netns exec $ns1 ./pm_nl_ctl flush
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 id 100
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.5 id 254
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.6
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.7
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.8
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
-id 2 flags 10.0.1.2
-id 3 flags 10.0.1.7
-id 4 flags 10.0.1.8
-id 100 flags 10.0.1.3
-id 101 flags 10.0.1.4
-id 254 flags 10.0.1.5
-id 255 flags 10.0.1.6" "set ids"
-
-ip netns exec $ns1 ./pm_nl_ctl flush
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.1
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.2 id 254
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.3
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.4
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.5 id 253
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.6
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.7
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.8
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.0.1
-id 2 flags 10.0.0.4
-id 3 flags 10.0.0.6
-id 4 flags 10.0.0.7
-id 5 flags 10.0.0.8
-id 253 flags 10.0.0.5
-id 254 flags 10.0.0.2
-id 255 flags 10.0.0.3" "wrap-around ids"
-
-ip netns exec $ns1 ./pm_nl_ctl flush
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags subflow
-ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags backup
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow,backup 10.0.1.1" "set flags (backup)"
-ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup
-check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow 10.0.1.1" " (nobackup)"
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.1.1" \
+ "3,10.0.1.3,signal backup" \
+ "4,10.0.1.4,signal" \
+ "5,10.0.1.5,signal" \
+ "6,10.0.1.6,signal" \
+ "7,10.0.1.7,signal" \
+ "8,10.0.1.8,signal")" "id limit"
+
+flush_endpoint
+check "show_endpoints" "" "flush addrs"
+
+set_limits 9 1 2>/dev/null
+check "get_limits" "${default_limits}" "rcv addrs above hard limit"
+
+set_limits 1 9 2>/dev/null
+check "get_limits" "${default_limits}" "subflows above hard limit"
+
+set_limits 8 8
+check "get_limits" "$(format_limits 8 8)" "set limits"
+
+flush_endpoint
+add_endpoint 10.0.1.1
+add_endpoint 10.0.1.2
+add_endpoint 10.0.1.3 id 100
+add_endpoint 10.0.1.4
+add_endpoint 10.0.1.5 id 254
+add_endpoint 10.0.1.6
+add_endpoint 10.0.1.7
+add_endpoint 10.0.1.8
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.1.1" \
+ "2,10.0.1.2" \
+ "3,10.0.1.7" \
+ "4,10.0.1.8" \
+ "100,10.0.1.3" \
+ "101,10.0.1.4" \
+ "254,10.0.1.5" \
+ "255,10.0.1.6")" "set ids"
+
+flush_endpoint
+add_endpoint 10.0.0.1
+add_endpoint 10.0.0.2 id 254
+add_endpoint 10.0.0.3
+add_endpoint 10.0.0.4
+add_endpoint 10.0.0.5 id 253
+add_endpoint 10.0.0.6
+add_endpoint 10.0.0.7
+add_endpoint 10.0.0.8
+check "show_endpoints" \
+ "$(format_endpoints "1,10.0.0.1" \
+ "2,10.0.0.4" \
+ "3,10.0.0.6" \
+ "4,10.0.0.7" \
+ "5,10.0.0.8" \
+ "253,10.0.0.5" \
+ "254,10.0.0.2" \
+ "255,10.0.0.3")" "wrap-around ids"
+
+flush_endpoint
+add_endpoint 10.0.1.1 flags subflow
+change_address 10.0.1.1 backup
+check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup")" \
+ "set flags (backup)"
+change_address 10.0.1.1 nobackup
+check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \
+ " (nobackup)"
# fullmesh support has been added later
-ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null
-if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" ||
+change_endpoint 1 fullmesh 2>/dev/null
+if show_endpoints | grep -q "fullmesh" ||
mptcp_lib_expect_all_features; then
- check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow,fullmesh 10.0.1.1" " (fullmesh)"
- ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh
- check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow 10.0.1.1" " (nofullmesh)"
- ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh
- check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
-subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)"
+ check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow fullmesh")" \
+ " (fullmesh)"
+ change_endpoint 1 nofullmesh
+ check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \
+ " (nofullmesh)"
+ change_endpoint 1 backup,fullmesh
+ check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup fullmesh")" \
+ " (backup,fullmesh)"
else
for st in fullmesh nofullmesh backup,fullmesh; do
st=" (${st})"
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 7426a2cbd4a0..7ad5a59adff2 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -1276,7 +1276,7 @@ int add_listener(int argc, char *argv[])
struct sockaddr_storage addr;
struct sockaddr_in6 *a6;
struct sockaddr_in *a4;
- u_int16_t family;
+ u_int16_t family = AF_UNSPEC;
int enable = 1;
int sock;
int err;
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 1b2366220388..f74e1c3c126d 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -27,10 +27,11 @@ capout=""
size=0
usage() {
- echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
+ echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]"
echo -e "\t-b: bail out after first error, otherwise runs al testcases"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
echo -e "\t-d: debug this script"
+ echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'"
}
# This function is used in the cleanup trap
@@ -45,7 +46,7 @@ cleanup()
}
mptcp_lib_check_mptcp
-mptcp_lib_check_tools ip
+mptcp_lib_check_tools ip tc
# "$ns1" ns2 ns3
# ns1eth1 ns2eth1 ns2eth3 ns3eth1
@@ -85,8 +86,8 @@ setup()
ip -net "$ns1" route add default via 10.0.2.2 metric 101
ip -net "$ns1" route add default via dead:beef:2::2 metric 101
- ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
- ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
+ mptcp_lib_pm_nl_set_limits "${ns1}" 1 1
+ mptcp_lib_pm_nl_add_endpoint "${ns1}" 10.0.2.1 dev ns1eth2 flags subflow
ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
@@ -108,7 +109,7 @@ setup()
ip -net "$ns3" route add default via 10.0.3.2
ip -net "$ns3" route add default via dead:beef:3::2
- ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
+ mptcp_lib_pm_nl_set_limits "${ns3}" 1 1
# debug build can slow down measurably the test program
# we use quite tight time limit on the run-time, to ensure
@@ -216,8 +217,8 @@ run_test()
shift 4
local msg=$*
- [ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1=""
- [ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2=""
+ [ $delay1 -gt 0 ] && delay1="delay ${delay1}ms" || delay1=""
+ [ $delay2 -gt 0 ] && delay2="delay ${delay2}ms" || delay2=""
for dev in ns1eth1 ns1eth2; do
tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1
@@ -243,7 +244,7 @@ run_test()
do_transfer $small $large $time
lret=$?
mptcp_lib_result_code "${lret}" "${msg}"
- if [ $lret -ne 0 ]; then
+ if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then
ret=$lret
[ $bail -eq 0 ] || exit $ret
fi
@@ -253,13 +254,13 @@ run_test()
do_transfer $large $small $time
lret=$?
mptcp_lib_result_code "${lret}" "${msg}"
- if [ $lret -ne 0 ]; then
+ if [ $lret -ne 0 ] && ! mptcp_lib_subtest_is_flaky; then
ret=$lret
[ $bail -eq 0 ] || exit $ret
fi
}
-while getopts "bcdh" option;do
+while getopts "bcdhi" option;do
case "$option" in
"h")
usage $0
@@ -274,6 +275,9 @@ while getopts "bcdh" option;do
"d")
set -x
;;
+ "i")
+ mptcp_lib_set_ip_mptcp
+ ;;
"?")
usage $0
exit ${KSFT_FAIL}
@@ -286,7 +290,7 @@ run_test 10 10 0 0 "balanced bwidth"
run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
# we still need some additional infrastructure to pass the following test-cases
-run_test 10 3 0 0 "unbalanced bwidth"
+MPTCP_LIB_SUBTEST_FLAKY=1 run_test 10 3 0 0 "unbalanced bwidth"
run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay"
run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay"
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
index 9e2981f2d7f5..9cb05978269d 100755
--- a/tools/testing/selftests/net/mptcp/userspace_pm.sh
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -160,10 +160,12 @@ make_connection()
local is_v6=$1
local app_port=$app4_port
local connect_addr="10.0.1.1"
+ local client_addr="10.0.1.2"
local listen_addr="0.0.0.0"
if [ "$is_v6" = "v6" ]
then
connect_addr="dead:beef:1::1"
+ client_addr="dead:beef:1::2"
listen_addr="::"
app_port=$app6_port
else
@@ -206,6 +208,7 @@ make_connection()
[ "$server_serverside" = 1 ]
then
test_pass
+ print_title "Connection info: ${client_addr}:${client_port} -> ${connect_addr}:${app_port}"
else
test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})"
mptcp_lib_result_print_all_tap
@@ -297,7 +300,7 @@ test_announce()
ip netns exec "$ns2"\
./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\
ns2eth1
- print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, reuse port"
+ print_test "ADD_ADDR id:client 10.0.2.2 (ns2) => ns1, reuse port"
sleep 0.5
verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \
"$client4_port"
@@ -306,7 +309,7 @@ test_announce()
:>"$server_evts"
ip netns exec "$ns2" ./pm_nl_ctl ann\
dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1
- print_test "ADD_ADDR6 id:${client_addr_id} dead:beef:2::2 (ns2) => ns1, reuse port"
+ print_test "ADD_ADDR6 id:client dead:beef:2::2 (ns2) => ns1, reuse port"
sleep 0.5
verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\
"$client_addr_id" "$client6_port" "v6"
@@ -316,7 +319,7 @@ test_announce()
client_addr_id=$((client_addr_id+1))
ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
$client_addr_id dev ns2eth1 port $new4_port
- print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, new port"
+ print_test "ADD_ADDR id:client+1 10.0.2.2 (ns2) => ns1, new port"
sleep 0.5
verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\
"$client_addr_id" "$new4_port"
@@ -327,7 +330,7 @@ test_announce()
# ADD_ADDR from the server to client machine reusing the subflow port
ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
$server_addr_id dev ns1eth2
- print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port"
+ print_test "ADD_ADDR id:server 10.0.2.1 (ns1) => ns2, reuse port"
sleep 0.5
verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
"$server_addr_id" "$app4_port"
@@ -336,7 +339,7 @@ test_announce()
:>"$client_evts"
ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
$server_addr_id dev ns1eth2
- print_test "ADD_ADDR6 id:${server_addr_id} dead:beef:2::1 (ns1) => ns2, reuse port"
+ print_test "ADD_ADDR6 id:server dead:beef:2::1 (ns1) => ns2, reuse port"
sleep 0.5
verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\
"$server_addr_id" "$app6_port" "v6"
@@ -346,7 +349,7 @@ test_announce()
server_addr_id=$((server_addr_id+1))
ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
$server_addr_id dev ns1eth2 port $new4_port
- print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, new port"
+ print_test "ADD_ADDR id:server+1 10.0.2.1 (ns1) => ns2, new port"
sleep 0.5
verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
"$server_addr_id" "$new4_port"
@@ -380,7 +383,7 @@ test_remove()
local invalid_token=$(( client4_token - 1 ))
ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\
$client_addr_id > /dev/null 2>&1
- print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token"
+ print_test "RM_ADDR id:client ns2 => ns1, invalid token"
local type
type=$(mptcp_lib_evts_get_info type "$server_evts")
if [ "$type" = "" ]
@@ -394,7 +397,7 @@ test_remove()
local invalid_id=$(( client_addr_id + 1 ))
ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
$invalid_id > /dev/null 2>&1
- print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id"
+ print_test "RM_ADDR id:client+1 ns2 => ns1, invalid id"
type=$(mptcp_lib_evts_get_info type "$server_evts")
if [ "$type" = "" ]
then
@@ -407,7 +410,7 @@ test_remove()
:>"$server_evts"
ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
$client_addr_id
- print_test "RM_ADDR id:${client_addr_id} ns2 => ns1"
+ print_test "RM_ADDR id:client ns2 => ns1"
sleep 0.5
verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id"
@@ -416,7 +419,7 @@ test_remove()
client_addr_id=$(( client_addr_id - 1 ))
ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
$client_addr_id
- print_test "RM_ADDR id:${client_addr_id} ns2 => ns1"
+ print_test "RM_ADDR id:client-1 ns2 => ns1"
sleep 0.5
verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id"
@@ -424,7 +427,7 @@ test_remove()
:>"$server_evts"
ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\
$client_addr_id
- print_test "RM_ADDR6 id:${client_addr_id} ns2 => ns1"
+ print_test "RM_ADDR6 id:client-1 ns2 => ns1"
sleep 0.5
verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id"
@@ -434,7 +437,7 @@ test_remove()
# RM_ADDR from the server to client machine
ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
$server_addr_id
- print_test "RM_ADDR id:${server_addr_id} ns1 => ns2"
+ print_test "RM_ADDR id:server ns1 => ns2"
sleep 0.5
verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id"
@@ -443,7 +446,7 @@ test_remove()
server_addr_id=$(( server_addr_id - 1 ))
ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
$server_addr_id
- print_test "RM_ADDR id:${server_addr_id} ns1 => ns2"
+ print_test "RM_ADDR id:server-1 ns1 => ns2"
sleep 0.5
verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id"
@@ -451,7 +454,7 @@ test_remove()
:>"$client_evts"
ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\
$server_addr_id
- print_test "RM_ADDR6 id:${server_addr_id} ns1 => ns2"
+ print_test "RM_ADDR6 id:server-1 ns1 => ns2"
sleep 0.5
verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id"
}
@@ -479,8 +482,14 @@ verify_subflow_events()
local locid
local remid
local info
+ local e_dport_txt
- info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})"
+ # only display the fixed ports
+ if [ "${e_dport}" -ge "${app4_port}" ] && [ "${e_dport}" -le "${app6_port}" ]; then
+ e_dport_txt=":${e_dport}"
+ fi
+
+ info="${e_saddr} (${e_from}) => ${e_daddr}${e_dport_txt} (${e_to})"
if [ "$e_type" = "$SUB_ESTABLISHED" ]
then
@@ -766,7 +775,7 @@ test_subflows_v4_v6_mix()
:>"$client_evts"
ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\
$server_addr_id dev ns1eth2
- print_test "ADD_ADDR4 id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port"
+ print_test "ADD_ADDR4 id:server 10.0.2.1 (ns1) => ns2, reuse port"
sleep 0.5
verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\
"$server_addr_id" "$app6_port"
@@ -861,7 +870,7 @@ test_listener()
local listener_pid=$!
sleep 0.5
- print_test "CREATE_LISTENER 10.0.2.2:$client4_port"
+ print_test "CREATE_LISTENER 10.0.2.2 (client port)"
verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port
# ADD_ADDR from client to server machine reusing the subflow port
@@ -878,13 +887,14 @@ test_listener()
mptcp_lib_kill_wait $listener_pid
sleep 0.5
- print_test "CLOSE_LISTENER 10.0.2.2:$client4_port"
+ print_test "CLOSE_LISTENER 10.0.2.2 (client port)"
verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
}
print_title "Make connections"
make_connection
make_connection "v6"
+print_title "Will be using address IDs ${client_addr_id} (client) and ${server_addr_id} (server)"
test_announce
test_remove
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index bdc03a2097e8..7ea5fb28c93d 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -85,6 +85,7 @@ static bool cfg_rx;
static int cfg_runtime_ms = 4200;
static int cfg_verbose;
static int cfg_waittime_ms = 500;
+static int cfg_notification_limit = 32;
static bool cfg_zerocopy;
static socklen_t cfg_alen;
@@ -95,6 +96,7 @@ static char payload[IP_MAXPACKET];
static long packets, bytes, completions, expected_completions;
static int zerocopied = -1;
static uint32_t next_completion;
+static uint32_t sends_since_notify;
static unsigned long gettimeofday_ms(void)
{
@@ -208,6 +210,7 @@ static bool do_sendmsg(int fd, struct msghdr *msg, bool do_zerocopy, int domain)
error(1, errno, "send");
if (cfg_verbose && ret != len)
fprintf(stderr, "send: ret=%u != %u\n", ret, len);
+ sends_since_notify++;
if (len) {
packets++;
@@ -435,7 +438,7 @@ static bool do_recv_completion(int fd, int domain)
/* Detect notification gaps. These should not happen often, if at all.
* Gaps can occur due to drops, reordering and retransmissions.
*/
- if (lo != next_completion)
+ if (cfg_verbose && lo != next_completion)
fprintf(stderr, "gap: %u..%u does not append to %u\n",
lo, hi, next_completion);
next_completion = hi + 1;
@@ -460,6 +463,7 @@ static bool do_recv_completion(int fd, int domain)
static void do_recv_completions(int fd, int domain)
{
while (do_recv_completion(fd, domain)) {}
+ sends_since_notify = 0;
}
/* Wait for all remaining completions on the errqueue */
@@ -549,6 +553,9 @@ static void do_tx(int domain, int type, int protocol)
else
do_sendmsg(fd, &msg, cfg_zerocopy, domain);
+ if (cfg_zerocopy && sends_since_notify >= cfg_notification_limit)
+ do_recv_completions(fd, domain);
+
while (!do_poll(fd, POLLOUT)) {
if (cfg_zerocopy)
do_recv_completions(fd, domain);
@@ -708,7 +715,7 @@ static void parse_opts(int argc, char **argv)
cfg_payload_len = max_payload_len;
- while ((c = getopt(argc, argv, "46c:C:D:i:mp:rs:S:t:vz")) != -1) {
+ while ((c = getopt(argc, argv, "46c:C:D:i:l:mp:rs:S:t:vz")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
@@ -736,6 +743,9 @@ static void parse_opts(int argc, char **argv)
if (cfg_ifindex == 0)
error(1, errno, "invalid iface: %s", optarg);
break;
+ case 'l':
+ cfg_notification_limit = strtoul(optarg, NULL, 0);
+ break;
case 'm':
cfg_cork_mixed = true;
break;
diff --git a/tools/testing/selftests/net/nat6to4.c b/tools/testing/selftests/net/nat6to4.bpf.c
index ac54c36b25fc..ac54c36b25fc 100644
--- a/tools/testing/selftests/net/nat6to4.c
+++ b/tools/testing/selftests/net/nat6to4.bpf.c
diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore
index c2229b3e40d4..0a64d6d0e29a 100644
--- a/tools/testing/selftests/netfilter/.gitignore
+++ b/tools/testing/selftests/net/netfilter/.gitignore
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-nf-queue
-connect_close
audit_logread
+connect_close
conntrack_dump_flush
sctp_collision
+nf_queue
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
new file mode 100644
index 000000000000..47945b2b3f92
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -0,0 +1,52 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+HOSTPKG_CONFIG := pkg-config
+MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
+MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
+
+TEST_PROGS := br_netfilter.sh bridge_brouter.sh
+TEST_PROGS += conntrack_icmp_related.sh
+TEST_PROGS += conntrack_ipip_mtu.sh
+TEST_PROGS += conntrack_tcp_unreplied.sh
+TEST_PROGS += conntrack_sctp_collision.sh
+TEST_PROGS += conntrack_vrf.sh
+TEST_PROGS += ipvs.sh
+TEST_PROGS += nf_conntrack_packetdrill.sh
+TEST_PROGS += nf_nat_edemux.sh
+TEST_PROGS += nft_audit.sh
+TEST_PROGS += nft_concat_range.sh
+TEST_PROGS += nft_conntrack_helper.sh
+TEST_PROGS += nft_fib.sh
+TEST_PROGS += nft_flowtable.sh
+TEST_PROGS += nft_meta.sh
+TEST_PROGS += nft_nat.sh
+TEST_PROGS += nft_nat_zones.sh
+TEST_PROGS += nft_queue.sh
+TEST_PROGS += nft_synproxy.sh
+TEST_PROGS += nft_zones_many.sh
+TEST_PROGS += rpath.sh
+TEST_PROGS += xt_string.sh
+
+TEST_PROGS_EXTENDED = nft_concat_range_perf.sh
+
+TEST_GEN_PROGS = conntrack_dump_flush
+
+TEST_GEN_FILES = audit_logread
+TEST_GEN_FILES += connect_close nf_queue
+TEST_GEN_FILES += sctp_collision
+
+include ../../lib.mk
+
+$(OUTPUT)/nf_queue: CFLAGS += $(MNL_CFLAGS)
+$(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS)
+
+$(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS)
+$(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS)
+
+TEST_FILES := lib.sh
+TEST_FILES += packetdrill
+
+TEST_INCLUDES := \
+ ../lib.sh
diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/net/netfilter/audit_logread.c
index a0a880fc2d9d..a0a880fc2d9d 100644
--- a/tools/testing/selftests/netfilter/audit_logread.c
+++ b/tools/testing/selftests/net/netfilter/audit_logread.c
diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh
new file mode 100755
index 000000000000..c28379a965d8
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test for legacy br_netfilter module combined with connection tracking,
+# a combination that doesn't really work.
+# Multicast/broadcast packets race for hash table insertion.
+
+# eth0 br0 eth0
+# setup is: ns1 <->,ns0 <-> ns3
+# ns2 <-' `'-> ns4
+
+source lib.sh
+
+checktool "nft --version" "run test without nft tool"
+
+cleanup() {
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+setup_ns ns0 ns1 ns2 ns3 ns4
+
+ret=0
+
+do_ping()
+{
+ fromns="$1"
+ dstip="$2"
+
+ if ! ip netns exec "$fromns" ping -c 1 -q "$dstip" > /dev/null; then
+ echo "ERROR: ping from $fromns to $dstip"
+ ip netns exec "$ns0" nft list ruleset
+ ret=1
+ fi
+}
+
+bcast_ping()
+{
+ fromns="$1"
+ dstip="$2"
+
+ local packets=500
+
+ [ "$KSFT_MACHINE_SLOW" = yes ] && packets=100
+
+ for i in $(seq 1 $packets); do
+ if ! ip netns exec "$fromns" ping -q -f -b -c 1 -q "$dstip" > /dev/null 2>&1; then
+ echo "ERROR: ping -b from $fromns to $dstip"
+ ip netns exec "$ns0" nft list ruleset
+ ret=1
+ break
+ fi
+ done
+}
+
+ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0
+
+if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then
+ echo "SKIP: Can't create veth device"
+ exit $ksft_skip
+fi
+
+ip link add veth2 netns "$ns0" type veth peer name eth0 netns "$ns2"
+ip link add veth3 netns "$ns0" type veth peer name eth0 netns "$ns3"
+ip link add veth4 netns "$ns0" type veth peer name eth0 netns "$ns4"
+
+for i in $(seq 1 4); do
+ ip -net "$ns0" link set "veth$i" up
+done
+
+if ! ip -net "$ns0" link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1; then
+ echo "SKIP: Can't create bridge br0"
+ exit $ksft_skip
+fi
+
+# make veth0,1,2 part of bridge.
+for i in $(seq 1 3); do
+ ip -net "$ns0" link set "veth$i" master br0
+done
+
+# add a macvlan on top of the bridge.
+MACVLAN_ADDR=ba:f3:13:37:42:23
+ip -net "$ns0" link add link br0 name macvlan0 type macvlan mode private
+ip -net "$ns0" link set macvlan0 address ${MACVLAN_ADDR}
+ip -net "$ns0" link set macvlan0 up
+ip -net "$ns0" addr add 10.23.0.1/24 dev macvlan0
+
+# add a macvlan on top of veth4.
+MACVLAN_ADDR=ba:f3:13:37:42:24
+ip -net "$ns0" link add link veth4 name macvlan4 type macvlan mode passthru
+ip -net "$ns0" link set macvlan4 address ${MACVLAN_ADDR}
+ip -net "$ns0" link set macvlan4 up
+
+# make the macvlan part of the bridge.
+# veth4 is not a bridge port, only the macvlan on top of it.
+ip -net "$ns0" link set macvlan4 master br0
+
+ip -net "$ns0" link set br0 up
+ip -net "$ns0" addr add 10.0.0.1/24 dev br0
+
+modprobe -q br_netfilter
+if ! ip netns exec "$ns0" sysctl -q net.bridge.bridge-nf-call-iptables=1; then
+ echo "SKIP: bridge netfilter not available"
+ ret=$ksft_skip
+fi
+
+# for testing, so namespaces will reply to ping -b probes.
+ip netns exec "$ns0" sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0
+
+# enable conntrack in ns0 and drop broadcast packets in forward to
+# avoid them from getting confirmed in the postrouting hook before
+# the cloned skb is passed up the stack.
+ip netns exec "$ns0" nft -f - <<EOF
+table ip filter {
+ chain input {
+ type filter hook input priority 1; policy accept
+ iifname br0 counter
+ ct state new accept
+ }
+}
+
+table bridge filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept
+ meta pkttype broadcast ip protocol icmp counter drop
+ }
+}
+EOF
+if [ "$?" -ne 0 ];then
+ echo "SKIP: could not add nftables ruleset"
+ exit $ksft_skip
+fi
+
+# place 1, 2 & 3 in same subnet, connected via ns0:br0.
+# ns4 is placed in same subnet as well, but its not
+# part of the bridge: the corresponding veth4 is not
+# part of the bridge, only its macvlan interface.
+for i in $(seq 1 4); do
+ eval ip -net \$ns"$i" link set eth0 up
+done
+for i in $(seq 1 2); do
+ eval ip -net \$ns"$i" addr add "10.0.0.1$i/24" dev eth0
+done
+
+ip -net "$ns3" addr add 10.23.0.13/24 dev eth0
+ip -net "$ns4" addr add 10.23.0.14/24 dev eth0
+
+# test basic connectivity
+do_ping "$ns1" 10.0.0.12
+do_ping "$ns3" 10.23.0.1
+do_ping "$ns4" 10.23.0.1
+
+bcast_ping "$ns1" 10.0.0.255
+
+# This should deliver broadcast to macvlan0, which is on top of ns0:br0.
+bcast_ping "$ns3" 10.23.0.255
+
+# same, this time via veth4:macvlan4.
+bcast_ping "$ns4" 10.23.0.255
+
+read t < /proc/sys/kernel/tainted
+if [ "$t" -eq 0 ];then
+ echo PASS: kernel not tainted
+else
+ echo ERROR: kernel is tainted
+ ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
new file mode 100755
index 000000000000..2549b6590693
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh
@@ -0,0 +1,122 @@
+#!/bin/bash
+#
+# This test is for bridge 'brouting', i.e. make some packets being routed
+# rather than getting bridged even though they arrive on interface that is
+# part of a bridge.
+
+# eth0 br0 eth0
+# setup is: ns1 <-> nsbr <-> ns2
+
+source lib.sh
+
+if ! ebtables -V > /dev/null 2>&1;then
+ echo "SKIP: Could not run test without ebtables"
+ exit $ksft_skip
+fi
+
+cleanup() {
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+setup_ns nsbr ns1 ns2
+
+ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0
+ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0
+if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then
+ echo "SKIP: Can't create veth device"
+ exit $ksft_skip
+fi
+ip link add veth1 netns "$nsbr" type veth peer name eth0 netns "$ns2"
+
+if ! ip -net "$nsbr" link add br0 type bridge; then
+ echo "SKIP: Can't create bridge br0"
+ exit $ksft_skip
+fi
+
+ip -net "$nsbr" link set veth0 up
+ip -net "$nsbr" link set veth1 up
+
+ip -net "$nsbr" link set veth0 master br0
+ip -net "$nsbr" link set veth1 master br0
+ip -net "$nsbr" link set br0 up
+ip -net "$nsbr" addr add 10.0.0.1/24 dev br0
+
+# place both in same subnet, ${ns1} and ${ns2} connected via ${nsbr}:br0
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+ip -net "$ns1" addr add 10.0.0.11/24 dev eth0
+ip -net "$ns2" addr add 10.0.0.12/24 dev eth0
+
+test_ebtables_broute()
+{
+ # redirect is needed so the dstmac is rewritten to the bridge itself,
+ # ip stack won't process OTHERHOST (foreign unicast mac) packets.
+ if ! ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP; then
+ echo "SKIP: Could not add ebtables broute redirect rule"
+ return $ksft_skip
+ fi
+
+ ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=0
+
+ # ping net${ns1}, expected to not work (ip forwarding is off)
+ if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then
+ echo "ERROR: ping works, should have failed" 1>&2
+ return 1
+ fi
+
+ # enable forwarding on both interfaces.
+ # neither needs an ip address, but at least the bridge needs
+ # an ip address in same network segment as ${ns1} and ${ns2} (${nsbr}
+ # needs to be able to determine route for to-be-forwarded packet).
+ ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=1
+ ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth1.forwarding=1
+
+ if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then
+ echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2
+ return 1
+ fi
+
+ echo "PASS: ${ns1}/${ns2} connectivity with active broute rule"
+ ip netns exec "$nsbr" ebtables -t broute -F
+
+ # ping net${ns1}, expected to work (frames are bridged)
+ if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then
+ echo "ERROR: ping did not work, but it should (bridged)" 1>&2
+ return 1
+ fi
+
+ ip netns exec "$nsbr" ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP
+
+ # ping net${ns1}, expected to not work (DROP in bridge forward)
+ if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then
+ echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2
+ return 1
+ fi
+
+ # re-activate brouter
+ ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP
+
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.0.11 > /dev/null; then
+ echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2
+ return 1
+ fi
+
+ echo "PASS: ${ns1}/${ns2} connectivity with active broute rule and bridge forward drop"
+ return 0
+}
+
+# test basic connectivity
+if ! ip netns exec "$ns1" ping -c 1 -q 10.0.0.12 > /dev/null; then
+ echo "ERROR: Could not reach ${ns2} from ${ns1}" 1>&2
+ exit 1
+fi
+
+if ! ip netns exec "$ns2" ping -c 1 -q 10.0.0.11 > /dev/null; then
+ echo "ERROR: Could not reach ${ns1} from ${ns2}" 1>&2
+ exit 1
+fi
+
+test_ebtables_broute
+exit $?
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
new file mode 100644
index 000000000000..63ef80ef47a4
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/config
@@ -0,0 +1,89 @@
+CONFIG_AUDIT=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BRIDGE=m
+CONFIG_BRIDGE_EBT_BROUTE=m
+CONFIG_BRIDGE_EBT_IP=m
+CONFIG_BRIDGE_EBT_REDIRECT=m
+CONFIG_BRIDGE_EBT_T_FILTER=m
+CONFIG_BRIDGE_NETFILTER=m
+CONFIG_BRIDGE_NF_EBTABLES=m
+CONFIG_CGROUP_BPF=y
+CONFIG_DUMMY=m
+CONFIG_INET_ESP=m
+CONFIG_IP_NF_MATCH_RPFILTER=m
+CONFIG_IP6_NF_MATCH_RPFILTER=m
+CONFIG_IP_NF_IPTABLES=m
+CONFIG_IP6_NF_IPTABLES=m
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP6_NF_FILTER=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP6_NF_RAW=m
+CONFIG_IP_SCTP=m
+CONFIG_IP_VS=m
+CONFIG_IP_VS_PROTO_TCP=y
+CONFIG_IP_VS_RR=m
+CONFIG_IPV6=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_MACVLAN=m
+CONFIG_NAMESPACES=y
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_NS=y
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_IPIP=m
+CONFIG_NET_VRF=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NETFILTER_NETLINK_QUEUE=m
+CONFIG_NETFILTER_SYNPROXY=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_NAT=m
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
+CONFIG_NETFILTER_XT_MATCH_STATE=m
+CONFIG_NETFILTER_XT_MATCH_STRING=m
+CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_EVENTS=y
+CONFIG_NF_CONNTRACK_FTP=m
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_CONNTRACK_ZONES=y
+CONFIG_NF_CT_NETLINK=m
+CONFIG_NF_CT_PROTO_SCTP=y
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_LOG_IPV4=m
+CONFIG_NF_LOG_IPV6=m
+CONFIG_NF_NAT=m
+CONFIG_NF_NAT_REDIRECT=y
+CONFIG_NF_NAT_MASQUERADE=y
+CONFIG_NF_TABLES=m
+CONFIG_NF_TABLES_BRIDGE=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NF_TABLES_IPV4=y
+CONFIG_NF_TABLES_IPV6=y
+CONFIG_NF_TABLES_NETDEV=y
+CONFIG_NF_FLOW_TABLE_INET=m
+CONFIG_NFT_BRIDGE_META=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NFT_CT=m
+CONFIG_NFT_FIB=m
+CONFIG_NFT_FIB_INET=m
+CONFIG_NFT_FIB_IPV4=m
+CONFIG_NFT_FIB_IPV6=m
+CONFIG_NFT_FLOW_OFFLOAD=m
+CONFIG_NFT_LIMIT=m
+CONFIG_NFT_LOG=m
+CONFIG_NFT_MASQ=m
+CONFIG_NFT_NAT=m
+CONFIG_NFT_NUMGEN=m
+CONFIG_NFT_QUEUE=m
+CONFIG_NFT_QUOTA=m
+CONFIG_NFT_REDIR=m
+CONFIG_NFT_SYNPROXY=m
+CONFIG_VETH=m
+CONFIG_VLAN_8021Q=m
+CONFIG_XFRM_USER=m
+CONFIG_XFRM_STATISTICS=y
+CONFIG_NET_PKTGEN=m
+CONFIG_TUN=m
diff --git a/tools/testing/selftests/netfilter/connect_close.c b/tools/testing/selftests/net/netfilter/connect_close.c
index 1c3b0add54c4..1c3b0add54c4 100644
--- a/tools/testing/selftests/netfilter/connect_close.c
+++ b/tools/testing/selftests/net/netfilter/connect_close.c
diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
index b11ea8ee6719..bd9317bf5ada 100644
--- a/tools/testing/selftests/netfilter/conntrack_dump_flush.c
+++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c
@@ -10,7 +10,7 @@
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
#include <linux/netfilter/nf_conntrack_tcp.h>
-#include "../kselftest_harness.h"
+#include "../../kselftest_harness.h"
#define TEST_ZONE_ID 123
#define NF_CT_DEFAULT_ZONE_ID 0
@@ -313,13 +313,11 @@ FIXTURE_SETUP(conntrack_dump_flush)
self->sock = mnl_socket_open(NETLINK_NETFILTER);
if (!self->sock) {
perror("mnl_socket_open");
- exit(EXIT_FAILURE);
+ SKIP(return, "cannot open netlink_netfilter socket");
}
- if (mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID) < 0) {
- perror("mnl_socket_bind");
- exit(EXIT_FAILURE);
- }
+ ret = mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID);
+ EXPECT_EQ(ret, 0);
ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
if (ret < 0 && errno == EPERM)
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh
index 76645aaf2b58..c63d840ead61 100755
--- a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh
@@ -14,35 +14,32 @@
# check the icmp errors are propagated to the correct host as per
# nat of "established" icmp-echo "connection".
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
+source lib.sh
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
+if ! nft --version > /dev/null 2>&1;then
echo "SKIP: Could not run test without nft tool"
exit $ksft_skip
fi
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
cleanup() {
- for i in 1 2;do ip netns del nsclient$i;done
- for i in 1 2;do ip netns del nsrouter$i;done
+ cleanup_all_ns
}
trap cleanup EXIT
-ipv4() {
- echo -n 192.168.$1.2
-}
+setup_ns nsclient1 nsclient2 nsrouter1 nsrouter2
+
+ret=0
+
+add_addr()
+{
+ ns=$1
+ dev=$2
+ i=$3
-ipv6 () {
- echo -n dead:$1::2
+ ip -net "$ns" link set "$dev" up
+ ip -net "$ns" addr add "192.168.$i.2/24" dev "$dev"
+ ip -net "$ns" addr add "dead:$i::2/64" dev "$dev" nodad
}
check_counter()
@@ -52,10 +49,9 @@ check_counter()
expect=$3
local lret=0
- cnt=$(ip netns exec $ns nft list counter inet filter "$name" | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns" nft list counter inet filter "$name" | grep -q "$expect"; then
echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2
- ip netns exec $ns nft list counter inet filter "$name" 1>&2
+ ip netns exec "$ns" nft list counter inet filter "$name" 1>&2
lret=1
fi
@@ -65,9 +61,8 @@ check_counter()
check_unknown()
{
expect="packets 0 bytes 0"
- for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
- check_counter $n "unknown" "$expect"
- if [ $? -ne 0 ] ;then
+ for n in ${nsclient1} ${nsclient2} ${nsrouter1} ${nsrouter2}; do
+ if ! check_counter "$n" "unknown" "$expect"; then
return 1
fi
done
@@ -75,61 +70,48 @@ check_unknown()
return 0
}
-for n in nsclient1 nsclient2 nsrouter1 nsrouter2; do
- ip netns add $n
- ip -net $n link set lo up
-done
-
-DEV=veth0
-ip link add $DEV netns nsclient1 type veth peer name eth1 netns nsrouter1
DEV=veth0
-ip link add $DEV netns nsclient2 type veth peer name eth1 netns nsrouter2
+ip link add "$DEV" netns "$nsclient1" type veth peer name eth1 netns "$nsrouter1"
+ip link add "$DEV" netns "$nsclient2" type veth peer name eth1 netns "$nsrouter2"
+ip link add "$DEV" netns "$nsrouter1" type veth peer name eth2 netns "$nsrouter2"
-DEV=veth0
-ip link add $DEV netns nsrouter1 type veth peer name eth2 netns nsrouter2
+add_addr "$nsclient1" $DEV 1
+add_addr "$nsclient2" $DEV 2
-DEV=veth0
-for i in 1 2; do
- ip -net nsclient$i link set $DEV up
- ip -net nsclient$i addr add $(ipv4 $i)/24 dev $DEV
- ip -net nsclient$i addr add $(ipv6 $i)/64 dev $DEV
-done
-
-ip -net nsrouter1 link set eth1 up
-ip -net nsrouter1 link set veth0 up
+ip -net "$nsrouter1" link set eth1 up
+ip -net "$nsrouter1" link set $DEV up
-ip -net nsrouter2 link set eth1 up
-ip -net nsrouter2 link set eth2 up
+ip -net "$nsrouter2" link set eth1 mtu 1280 up
+ip -net "$nsrouter2" link set eth2 up
-ip -net nsclient1 route add default via 192.168.1.1
-ip -net nsclient1 -6 route add default via dead:1::1
+ip -net "$nsclient1" route add default via 192.168.1.1
+ip -net "$nsclient1" -6 route add default via dead:1::1
-ip -net nsclient2 route add default via 192.168.2.1
-ip -net nsclient2 route add default via dead:2::1
+ip -net "$nsclient2" route add default via 192.168.2.1
+ip -net "$nsclient2" route add default via dead:2::1
+ip -net "$nsclient2" link set veth0 mtu 1280
-i=3
-ip -net nsrouter1 addr add 192.168.1.1/24 dev eth1
-ip -net nsrouter1 addr add 192.168.3.1/24 dev veth0
-ip -net nsrouter1 addr add dead:1::1/64 dev eth1
-ip -net nsrouter1 addr add dead:3::1/64 dev veth0
-ip -net nsrouter1 route add default via 192.168.3.10
-ip -net nsrouter1 -6 route add default via dead:3::10
+ip -net "$nsrouter1" addr add 192.168.1.1/24 dev eth1
+ip -net "$nsrouter1" addr add 192.168.3.1/24 dev veth0
+ip -net "$nsrouter1" addr add dead:1::1/64 dev eth1 nodad
+ip -net "$nsrouter1" addr add dead:3::1/64 dev veth0 nodad
+ip -net "$nsrouter1" route add default via 192.168.3.10
+ip -net "$nsrouter1" -6 route add default via dead:3::10
-ip -net nsrouter2 addr add 192.168.2.1/24 dev eth1
-ip -net nsrouter2 addr add 192.168.3.10/24 dev eth2
-ip -net nsrouter2 addr add dead:2::1/64 dev eth1
-ip -net nsrouter2 addr add dead:3::10/64 dev eth2
-ip -net nsrouter2 route add default via 192.168.3.1
-ip -net nsrouter2 route add default via dead:3::1
+ip -net "$nsrouter2" addr add 192.168.2.1/24 dev eth1
+ip -net "$nsrouter2" addr add 192.168.3.10/24 dev eth2
+ip -net "$nsrouter2" addr add dead:2::1/64 dev eth1 nodad
+ip -net "$nsrouter2" addr add dead:3::10/64 dev eth2 nodad
+ip -net "$nsrouter2" route add default via 192.168.3.1
+ip -net "$nsrouter2" route add default via dead:3::1
-sleep 2
for i in 4 6; do
- ip netns exec nsrouter1 sysctl -q net.ipv$i.conf.all.forwarding=1
- ip netns exec nsrouter2 sysctl -q net.ipv$i.conf.all.forwarding=1
+ ip netns exec "$nsrouter1" sysctl -q net.ipv$i.conf.all.forwarding=1
+ ip netns exec "$nsrouter2" sysctl -q net.ipv$i.conf.all.forwarding=1
done
-for netns in nsrouter1 nsrouter2; do
-ip netns exec $netns nft -f - <<EOF
+for netns in "$nsrouter1" "$nsrouter2"; do
+ip netns exec "$netns" nft -f - <<EOF
table inet filter {
counter unknown { }
counter related { }
@@ -144,7 +126,7 @@ table inet filter {
EOF
done
-ip netns exec nsclient1 nft -f - <<EOF
+ip netns exec "$nsclient1" nft -f - <<EOF
table inet filter {
counter unknown { }
counter related { }
@@ -164,7 +146,7 @@ table inet filter {
}
EOF
-ip netns exec nsclient2 nft -f - <<EOF
+ip netns exec "$nsclient2" nft -f - <<EOF
table inet filter {
counter unknown { }
counter new { }
@@ -189,11 +171,10 @@ table inet filter {
}
EOF
-
# make sure NAT core rewrites adress of icmp error if nat is used according to
# conntrack nat information (icmp error will be directed at nsrouter1 address,
# but it needs to be routed to nsclient1 address).
-ip netns exec nsrouter1 nft -f - <<EOF
+ip netns exec "$nsrouter1" nft -f - <<EOF
table ip nat {
chain postrouting {
type nat hook postrouting priority 0; policy accept;
@@ -208,44 +189,32 @@ table ip6 nat {
}
EOF
-ip netns exec nsrouter2 ip link set eth1 mtu 1280
-ip netns exec nsclient2 ip link set veth0 mtu 1280
-sleep 1
-
-ip netns exec nsclient1 ping -c 1 -s 1000 -q -M do 192.168.2.2 >/dev/null
-if [ $? -ne 0 ]; then
+if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q -M "do" 192.168.2.2 >/dev/null; then
echo "ERROR: netns ip routing/connectivity broken" 1>&2
- cleanup
exit 1
fi
-ip netns exec nsclient1 ping6 -q -c 1 -s 1000 dead:2::2 >/dev/null
-if [ $? -ne 0 ]; then
+if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q dead:2::2 >/dev/null; then
echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2
- cleanup
exit 1
fi
-check_unknown
-if [ $? -ne 0 ]; then
+if ! check_unknown; then
ret=1
fi
expect="packets 0 bytes 0"
-for netns in nsrouter1 nsrouter2 nsclient1;do
- check_counter "$netns" "related" "$expect"
- if [ $? -ne 0 ]; then
+for netns in "$nsrouter1" "$nsrouter2" "$nsclient1";do
+ if ! check_counter "$netns" "related" "$expect"; then
ret=1
fi
done
expect="packets 2 bytes 2076"
-check_counter nsclient2 "new" "$expect"
-if [ $? -ne 0 ]; then
+if ! check_counter "$nsclient2" "new" "$expect"; then
ret=1
fi
-ip netns exec nsclient1 ping -q -c 1 -s 1300 -M do 192.168.2.2 > /dev/null
-if [ $? -eq 0 ]; then
+if ip netns exec "$nsclient1" ping -W 0.5 -q -c 1 -s 1300 -M "do" 192.168.2.2 > /dev/null; then
echo "ERROR: ping should have failed with PMTU too big error" 1>&2
ret=1
fi
@@ -253,30 +222,26 @@ fi
# nsrouter2 should have generated the icmp error, so
# related counter should be 0 (its in forward).
expect="packets 0 bytes 0"
-check_counter "nsrouter2" "related" "$expect"
-if [ $? -ne 0 ]; then
+if ! check_counter "$nsrouter2" "related" "$expect"; then
ret=1
fi
# but nsrouter1 should have seen it, same for nsclient1.
expect="packets 1 bytes 576"
-for netns in nsrouter1 nsclient1;do
- check_counter "$netns" "related" "$expect"
- if [ $? -ne 0 ]; then
+for netns in ${nsrouter1} ${nsclient1};do
+ if ! check_counter "$netns" "related" "$expect"; then
ret=1
fi
done
-ip netns exec nsclient1 ping6 -c 1 -s 1300 dead:2::2 > /dev/null
-if [ $? -eq 0 ]; then
+if ip netns exec "${nsclient1}" ping6 -W 0.5 -c 1 -s 1300 dead:2::2 > /dev/null; then
echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2
ret=1
fi
expect="packets 2 bytes 1856"
-for netns in nsrouter1 nsclient1;do
- check_counter "$netns" "related" "$expect"
- if [ $? -ne 0 ]; then
+for netns in "${nsrouter1}" "${nsclient1}";do
+ if ! check_counter "$netns" "related" "$expect"; then
ret=1
fi
done
@@ -288,21 +253,19 @@ else
fi
# add 'bad' route, expect icmp REDIRECT to be generated
-ip netns exec nsclient1 ip route add 192.168.1.42 via 192.168.1.1
-ip netns exec nsclient1 ip route add dead:1::42 via dead:1::1
+ip netns exec "${nsclient1}" ip route add 192.168.1.42 via 192.168.1.1
+ip netns exec "${nsclient1}" ip route add dead:1::42 via dead:1::1
-ip netns exec "nsclient1" ping -q -c 2 192.168.1.42 > /dev/null
+ip netns exec "$nsclient1" ping -W 1 -q -i 0.5 -c 2 192.168.1.42 > /dev/null
expect="packets 1 bytes 112"
-check_counter nsclient1 "redir4" "$expect"
-if [ $? -ne 0 ];then
+if ! check_counter "$nsclient1" "redir4" "$expect"; then
ret=1
fi
-ip netns exec "nsclient1" ping -c 1 dead:1::42 > /dev/null
+ip netns exec "$nsclient1" ping -W 1 -c 1 dead:1::42 > /dev/null
expect="packets 1 bytes 192"
-check_counter nsclient1 "redir6" "$expect"
-if [ $? -ne 0 ];then
+if ! check_counter "$nsclient1" "redir6" "$expect"; then
ret=1
fi
diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh
index eb9553e4986b..9832a5d0198a 100755
--- a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh
@@ -1,8 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
# Conntrack needs to reassemble fragments in order to have complete
# packets for rule matching. Reassembly can lead to packet loss.
@@ -23,56 +22,44 @@ ksft_skip=4
# between Client A and Client B over WAN. Wanrouter has MTU 1400 set
# on its interfaces.
-rnd=$(mktemp -u XXXXXXXX)
rx=$(mktemp)
-r_a="ns-ra-$rnd"
-r_b="ns-rb-$rnd"
-r_w="ns-rw-$rnd"
-c_a="ns-ca-$rnd"
-c_b="ns-cb-$rnd"
-
-checktool (){
- if ! $1 > /dev/null 2>&1; then
- echo "SKIP: Could not $2"
- exit $ksft_skip
- fi
-}
-
checktool "iptables --version" "run test without iptables"
-checktool "ip -Version" "run test without ip tool"
-checktool "which socat" "run test without socat"
-checktool "ip netns add ${r_a}" "create net namespace"
+checktool "socat -h" "run test without socat"
-for n in ${r_b} ${r_w} ${c_a} ${c_b};do
- ip netns add ${n}
-done
+setup_ns r_a r_b r_w c_a c_b
cleanup() {
- for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do
- ip netns del ${n}
- done
- rm -f ${rx}
+ cleanup_all_ns
+ rm -f "$rx"
}
trap cleanup EXIT
+listener_ready()
+{
+ ns="$1"
+ port="$2"
+ ss -N "$ns" -lnu -o "sport = :$port" | grep -q "$port"
+}
+
test_path() {
msg="$1"
- ip netns exec ${c_b} socat -t 3 - udp4-listen:5000,reuseaddr > ${rx} < /dev/null &
+ ip netns exec "$c_b" socat -t 3 - udp4-listen:5000,reuseaddr > "$rx" < /dev/null &
+
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$c_b" 5000
- sleep 1
for i in 1 2 3; do
head -c1400 /dev/zero | tr "\000" "a" | \
- ip netns exec ${c_a} socat -t 1 -u STDIN UDP:192.168.20.2:5000
+ ip netns exec "$c_a" socat -t 1 -u STDIN UDP:192.168.20.2:5000
done
wait
- bytes=$(wc -c < ${rx})
+ bytes=$(wc -c < "$rx")
- if [ $bytes -eq 1400 ];then
+ if [ "$bytes" -eq 1400 ];then
echo "OK: PMTU $msg connection tracking"
else
echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400"
@@ -91,24 +78,24 @@ test_path() {
# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter)
# No iptables rules at all.
-ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w}
-ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a}
+ip link add veth0 netns "$r_a" type veth peer name veth0 netns "$r_w"
+ip link add veth1 netns "$r_a" type veth peer name veth0 netns "$c_a"
l_addr="10.2.2.1"
r_addr="10.4.4.1"
-ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+ip netns exec "$r_a" ip link add ipip0 type ipip local "$l_addr" remote "$r_addr" mode ipip || exit $ksft_skip
for dev in lo veth0 veth1 ipip0; do
- ip -net ${r_a} link set $dev up
+ ip -net "$r_a" link set "$dev" up
done
-ip -net ${r_a} addr add 10.2.2.1/24 dev veth0
-ip -net ${r_a} addr add 192.168.10.1/24 dev veth1
+ip -net "$r_a" addr add 10.2.2.1/24 dev veth0
+ip -net "$r_a" addr add 192.168.10.1/24 dev veth1
-ip -net ${r_a} route add 192.168.20.0/24 dev ipip0
-ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254
+ip -net "$r_a" route add 192.168.20.0/24 dev ipip0
+ip -net "$r_a" route add 10.4.4.0/24 via 10.2.2.254
-ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$r_a" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
# Detailed setup for Router B
# ---------------------------
@@ -121,49 +108,46 @@ ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter)
# No iptables rules at all.
-ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w}
-ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b}
+ip link add veth0 netns "$r_b" type veth peer name veth1 netns "$r_w"
+ip link add veth1 netns "$r_b" type veth peer name veth0 netns "$c_b"
l_addr="10.4.4.1"
r_addr="10.2.2.1"
-ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+ip netns exec "$r_b" ip link add ipip0 type ipip local "${l_addr}" remote "${r_addr}" mode ipip || exit $ksft_skip
-for dev in lo veth0 veth1 ipip0; do
- ip -net ${r_b} link set $dev up
+for dev in veth0 veth1 ipip0; do
+ ip -net "$r_b" link set $dev up
done
-ip -net ${r_b} addr add 10.4.4.1/24 dev veth0
-ip -net ${r_b} addr add 192.168.20.1/24 dev veth1
+ip -net "$r_b" addr add 10.4.4.1/24 dev veth0
+ip -net "$r_b" addr add 192.168.20.1/24 dev veth1
-ip -net ${r_b} route add 192.168.10.0/24 dev ipip0
-ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254
-ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip -net "$r_b" route add 192.168.10.0/24 dev ipip0
+ip -net "$r_b" route add 10.2.2.0/24 via 10.4.4.254
+ip netns exec "$r_b" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
# Client A
-ip -net ${c_a} addr add 192.168.10.2/24 dev veth0
-ip -net ${c_a} link set dev lo up
-ip -net ${c_a} link set dev veth0 up
-ip -net ${c_a} route add default via 192.168.10.1
+ip -net "$c_a" addr add 192.168.10.2/24 dev veth0
+ip -net "$c_a" link set dev veth0 up
+ip -net "$c_a" route add default via 192.168.10.1
# Client A
-ip -net ${c_b} addr add 192.168.20.2/24 dev veth0
-ip -net ${c_b} link set dev veth0 up
-ip -net ${c_b} link set dev lo up
-ip -net ${c_b} route add default via 192.168.20.1
+ip -net "$c_b" addr add 192.168.20.2/24 dev veth0
+ip -net "$c_b" link set dev veth0 up
+ip -net "$c_b" route add default via 192.168.20.1
# Wan
-ip -net ${r_w} addr add 10.2.2.254/24 dev veth0
-ip -net ${r_w} addr add 10.4.4.254/24 dev veth1
+ip -net "$r_w" addr add 10.2.2.254/24 dev veth0
+ip -net "$r_w" addr add 10.4.4.254/24 dev veth1
-ip -net ${r_w} link set dev lo up
-ip -net ${r_w} link set dev veth0 up mtu 1400
-ip -net ${r_w} link set dev veth1 up mtu 1400
+ip -net "$r_w" link set dev veth0 up mtu 1400
+ip -net "$r_w" link set dev veth1 up mtu 1400
-ip -net ${r_a} link set dev veth0 mtu 1400
-ip -net ${r_b} link set dev veth0 mtu 1400
+ip -net "$r_a" link set dev veth0 mtu 1400
+ip -net "$r_b" link set dev veth0 mtu 1400
-ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$r_w" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
# Path MTU discovery
# ------------------
@@ -203,5 +187,5 @@ test_path "without"
#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is
#dropped on Router A before sending.
-ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW
+ip netns exec "$r_a" iptables -A FORWARD -m conntrack --ctstate NEW
test_path "with"
diff --git a/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
new file mode 100755
index 000000000000..d860f7d9744b
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing For SCTP COLLISION SCENARIO as Below:
+#
+# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
+# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
+# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
+# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
+# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
+# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
+#
+# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS
+
+source lib.sh
+
+CLIENT_IP="198.51.200.1"
+CLIENT_PORT=1234
+
+SERVER_IP="198.51.100.1"
+SERVER_PORT=1234
+
+CLIENT_GW="198.51.200.2"
+SERVER_GW="198.51.100.2"
+
+# setup the topo
+setup() {
+ setup_ns CLIENT_NS SERVER_NS ROUTER_NS
+ ip -n "$SERVER_NS" link add link0 type veth peer name link1 netns "$ROUTER_NS"
+ ip -n "$CLIENT_NS" link add link3 type veth peer name link2 netns "$ROUTER_NS"
+
+ ip -n "$SERVER_NS" link set link0 up
+ ip -n "$SERVER_NS" addr add $SERVER_IP/24 dev link0
+ ip -n "$SERVER_NS" route add $CLIENT_IP dev link0 via $SERVER_GW
+
+ ip -n "$ROUTER_NS" link set link1 up
+ ip -n "$ROUTER_NS" link set link2 up
+ ip -n "$ROUTER_NS" addr add $SERVER_GW/24 dev link1
+ ip -n "$ROUTER_NS" addr add $CLIENT_GW/24 dev link2
+ ip net exec "$ROUTER_NS" sysctl -wq net.ipv4.ip_forward=1
+
+ ip -n "$CLIENT_NS" link set link3 up
+ ip -n "$CLIENT_NS" addr add $CLIENT_IP/24 dev link3
+ ip -n "$CLIENT_NS" route add $SERVER_IP dev link3 via $CLIENT_GW
+
+ # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with
+ # tc on $SERVER_NS side
+ tc -n "$SERVER_NS" qdisc add dev link0 root handle 1: htb r2q 64
+ tc -n "$SERVER_NS" class add dev link0 parent 1: classid 1:1 htb rate 100mbit
+ tc -n "$SERVER_NS" filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \
+ 0xff match u8 2 0xff at 32 flowid 1:1
+ if ! tc -n "$SERVER_NS" qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms; then
+ echo "SKIP: Cannot add netem qdisc"
+ exit $ksft_skip
+ fi
+
+ # simulate the ctstate check on OVS nf_conntrack
+ ip net exec "$ROUTER_NS" iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP
+ ip net exec "$ROUTER_NS" iptables -A INPUT -p sctp -j DROP
+
+ # use a smaller number for assoc's max_retrans to reproduce the issue
+ modprobe -q sctp
+ ip net exec "$CLIENT_NS" sysctl -wq net.sctp.association_max_retrans=3
+}
+
+cleanup() {
+ ip net exec "$CLIENT_NS" pkill sctp_collision >/dev/null 2>&1
+ ip net exec "$SERVER_NS" pkill sctp_collision >/dev/null 2>&1
+ cleanup_all_ns
+}
+
+do_test() {
+ ip net exec "$SERVER_NS" ./sctp_collision server \
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT &
+ ip net exec "$CLIENT_NS" ./sctp_collision client \
+ $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT
+}
+
+# NOTE: one way to work around the issue is set a smaller hb_interval
+# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500
+
+# run the test case
+trap cleanup EXIT
+setup && \
+echo "Test for SCTP Collision in nf_conntrack:" && \
+do_test && echo "PASS!"
+exit $?
diff --git a/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh
new file mode 100755
index 000000000000..121ea93c0178
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that UNREPLIED tcp conntrack will eventually timeout.
+#
+
+source lib.sh
+
+if ! nft --version > /dev/null 2>&1;then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+if ! conntrack --version > /dev/null 2>&1;then
+ echo "SKIP: Could not run test without conntrack tool"
+ exit $ksft_skip
+fi
+
+ret=0
+
+cleanup() {
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
+}
+
+ipv4() {
+ echo -n 192.168."$1".2
+}
+
+check_counter()
+{
+ ns=$1
+ name=$2
+ expect=$3
+ local lret=0
+
+ if ! ip netns exec "$ns2" nft list counter inet filter "$name" | grep -q "$expect"; then
+ echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2
+ ip netns exec "$ns2" nft list counter inet filter "$name" 1>&2
+ lret=1
+ fi
+
+ return $lret
+}
+
+trap cleanup EXIT
+
+# Create test namespaces
+setup_ns ns1 ns2
+
+# Connect the namespace to the host using a veth pair
+ip -net "$ns1" link add name veth1 type veth peer name veth2
+ip -net "$ns1" link set netns "$ns2" dev veth2
+
+ip -net "$ns1" link set up dev lo
+ip -net "$ns2" link set up dev lo
+ip -net "$ns1" link set up dev veth1
+ip -net "$ns2" link set up dev veth2
+
+ip -net "$ns2" addr add 10.11.11.2/24 dev veth2
+ip -net "$ns2" route add default via 10.11.11.1
+
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.veth2.forwarding=1
+
+# add a rule inside NS so we enable conntrack
+ip netns exec "$ns1" nft -f - <<EOF
+table inet filter {
+ chain input {
+ type filter hook input priority 0; policy accept;
+ ct state established accept
+ }
+}
+EOF
+
+ip -net "$ns1" addr add 10.11.11.1/24 dev veth1
+ip -net "$ns1" route add 10.99.99.99 via 10.11.11.2
+
+# Check connectivity works
+ip netns exec "$ns1" ping -q -c 2 10.11.11.2 >/dev/null || exit 1
+
+ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT &
+
+ip netns exec "$ns2" nft -f - <<EOF
+table inet filter {
+ counter connreq { }
+ counter redir { }
+ chain input {
+ type filter hook input priority 0; policy accept;
+ ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept
+ ct state new ct status dnat tcp dport 8080 counter name "redir" accept
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not load nft rules"
+ exit 1
+fi
+
+ip netns exec "$ns2" sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10
+
+echo "INFO: connect $ns1 -> $ns2 to the virtual ip"
+ip netns exec "$ns1" bash -c 'for i in $(seq 1 $BUSYWAIT_TIMEOUT) ; do
+ socat -u STDIN TCP:10.99.99.99:80 < /dev/null
+ sleep 0.1
+ done' &
+
+wait_for_attempt()
+{
+ count=$(ip netns exec "$ns2" conntrack -L -p tcp --dport 80 2>/dev/null | wc -l)
+ if [ "$count" -gt 0 ]; then
+ return 0
+ fi
+
+ return 1
+}
+
+# wait for conntrack to pick the new connection request up before loading
+# the nat redirect rule.
+if ! busywait "$BUSYWAIT_TIMEOUT" wait_for_attempt; then
+ echo "ERROR: $ns2 did not pick up tcp connection from peer"
+ exit 1
+fi
+
+ip netns exec "$ns2" nft -f - <<EOF
+table inet nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ ip daddr 10.99.99.99 tcp dport 80 redirect to :8080
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not load nat redirect"
+ exit 1
+fi
+
+wait_for_redirect()
+{
+ count=$(ip netns exec "$ns2" conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l)
+ if [ "$count" -gt 0 ]; then
+ return 0
+ fi
+
+ return 1
+}
+echo "INFO: NAT redirect added in ns $ns2, waiting for $BUSYWAIT_TIMEOUT ms for nat to take effect"
+
+busywait "$BUSYWAIT_TIMEOUT" wait_for_redirect
+ret=$?
+
+expect="packets 1 bytes 60"
+if ! check_counter "$ns2" "redir" "$expect"; then
+ ret=1
+fi
+
+if [ $ret -eq 0 ];then
+ echo "PASS: redirection counter has expected values"
+else
+ echo "ERROR: no tcp connection was redirected"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
index 8b5ea9234588..073e8e62d350 100755
--- a/tools/testing/selftests/netfilter/conntrack_vrf.sh
+++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# This script demonstrates interaction of conntrack and vrf.
# The vrf driver calls the netfilter hooks again, with oif/iif
@@ -28,84 +28,67 @@
# that was supposed to be fixed by the commit mentioned above to make sure
# that any fix to test case 1 won't break masquerade again.
-ksft_skip=4
+source lib.sh
IP0=172.30.30.1
IP1=172.30.30.2
PFXL=30
ret=0
-sfx=$(mktemp -u "XXXXXXXX")
-ns0="ns0-$sfx"
-ns1="ns1-$sfx"
-
cleanup()
{
ip netns pids $ns0 | xargs kill 2>/dev/null
ip netns pids $ns1 | xargs kill 2>/dev/null
- ip netns del $ns0 $ns1
+ cleanup_all_ns
}
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-ip netns add "$ns0"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace $ns0"
- exit $ksft_skip
-fi
-ip netns add "$ns1"
+checktool "nft --version" "run test without nft"
+checktool "conntrack --version" "run test without conntrack"
+checktool "socat -h" "run test without socat"
trap cleanup EXIT
-ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
-ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+setup_ns ns0 ns1
+
+ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0
-ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
-if [ $? -ne 0 ];then
+if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then
echo "SKIP: Could not add veth device"
exit $ksft_skip
fi
-ip -net $ns0 li add tvrf type vrf table 9876
-if [ $? -ne 0 ];then
+if ! ip -net "$ns0" li add tvrf type vrf table 9876; then
echo "SKIP: Could not add vrf device"
exit $ksft_skip
fi
-ip -net $ns0 li set lo up
+ip -net "$ns0" li set veth0 master tvrf
+ip -net "$ns0" li set tvrf up
+ip -net "$ns0" li set veth0 up
+ip -net "$ns1" li set veth0 up
-ip -net $ns0 li set veth0 master tvrf
-ip -net $ns0 li set tvrf up
-ip -net $ns0 li set veth0 up
-ip -net $ns1 li set veth0 up
+ip -net "$ns0" addr add $IP0/$PFXL dev veth0
+ip -net "$ns1" addr add $IP1/$PFXL dev veth0
-ip -net $ns0 addr add $IP0/$PFXL dev veth0
-ip -net $ns1 addr add $IP1/$PFXL dev veth0
+listener_ready()
+{
+ local ns="$1"
-ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
-if [ $? -ne 0 ];then
- echo "SKIP: Could not start iperf3"
- exit $ksft_skip
-fi
+ ss -N "$ns" -l -n -t -o "sport = :55555" | grep -q "55555"
+}
+
+ip netns exec "$ns1" socat -u -4 TCP-LISTEN:55555,reuseaddr,fork STDOUT > /dev/null &
+busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1"
# test vrf ingress handling.
# The incoming connection should be placed in conntrack zone 1,
# as decided by the first iteration of the ruleset.
test_ct_zone_in()
{
-ip netns exec $ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f - <<EOF
table testct {
chain rawpre {
type filter hook prerouting priority raw;
@@ -126,21 +109,21 @@ table testct {
}
}
EOF
- ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
+ ip netns exec "$ns1" ping -W 1 -c 1 -I veth0 "$IP0" > /dev/null
# should be in zone 1, not zone 2
- count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
- if [ $count -eq 1 ]; then
+ count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
+ if [ "$count" -eq 1 ]; then
echo "PASS: entry found in conntrack zone 1"
else
echo "FAIL: entry not found in conntrack zone 1"
- count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
- if [ $count -eq 1 ]; then
+ count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
+ if [ "$count" -eq 1 ]; then
echo "FAIL: entry found in zone 2 instead"
else
echo "FAIL: entry not in zone 1 or 2, dumping table"
- ip netns exec $ns0 conntrack -L
- ip netns exec $ns0 nft list ruleset
+ ip netns exec "$ns0" conntrack -L
+ ip netns exec "$ns0" nft list ruleset
fi
fi
}
@@ -153,12 +136,12 @@ test_masquerade_vrf()
local qdisc=$1
if [ "$qdisc" != "default" ]; then
- tc -net $ns0 qdisc add dev tvrf root $qdisc
+ tc -net "$ns0" qdisc add dev tvrf root "$qdisc"
fi
- ip netns exec $ns0 conntrack -F 2>/dev/null
+ ip netns exec "$ns0" conntrack -F 2>/dev/null
-ip netns exec $ns0 nft -f - <<EOF
+ip netns exec "$ns0" nft -f - <<EOF
flush ruleset
table ip nat {
chain rawout {
@@ -179,25 +162,23 @@ table ip nat {
}
}
EOF
- ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
- if [ $? -ne 0 ]; then
- echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
+ if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then
+ echo "FAIL: connect failure with masquerade + sport rewrite on vrf device"
ret=1
return
fi
# must also check that nat table was evaluated on second (lower device) iteration.
- ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' &&
- ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]'
- if [ $? -eq 0 ]; then
- echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
+ if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1' &&
+ ip netns exec "$ns0" nft list table ip nat |grep -q 'untracked counter packets [1-9]'; then
+ echo "PASS: connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
else
echo "FAIL: vrf rules have unexpected counter value"
ret=1
fi
if [ "$qdisc" != "default" ]; then
- tc -net $ns0 qdisc del dev tvrf root
+ tc -net "$ns0" qdisc del dev tvrf root
fi
}
@@ -206,8 +187,8 @@ EOF
# oifname is the lower device (veth0 in this case).
test_masquerade_veth()
{
- ip netns exec $ns0 conntrack -F 2>/dev/null
-ip netns exec $ns0 nft -f - <<EOF
+ ip netns exec "$ns0" conntrack -F 2>/dev/null
+ip netns exec "$ns0" nft -f - <<EOF
flush ruleset
table ip nat {
chain postrouting {
@@ -216,17 +197,15 @@ table ip nat {
}
}
EOF
- ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
- if [ $? -ne 0 ]; then
- echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
+ if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then
+ echo "FAIL: connect failure with masquerade + sport rewrite on veth device"
ret=1
return
fi
# must also check that nat table was evaluated on second (lower device) iteration.
- ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
- if [ $? -eq 0 ]; then
- echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
+ if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1'; then
+ echo "PASS: connect with masquerade + sport rewrite on veth device"
else
echo "FAIL: vrf masq rule has unexpected counter value"
ret=1
diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh
new file mode 100755
index 000000000000..4ceee9fb3949
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/ipvs.sh
@@ -0,0 +1,211 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# End-to-end ipvs test suite
+# Topology:
+#--------------------------------------------------------------+
+# | |
+# ns0 | ns1 |
+# ----------- | ----------- ----------- |
+# | veth01 | --------- | veth10 | | veth12 | |
+# ----------- peer ----------- ----------- |
+# | | | |
+# ----------- | | |
+# | br0 | |----------------- peer |--------------|
+# ----------- | | |
+# | | | |
+# ---------- peer ---------- ----------- |
+# | veth02 | --------- | veth20 | | veth21 | |
+# ---------- | ---------- ----------- |
+# | ns2 |
+# | |
+#--------------------------------------------------------------+
+#
+# We assume that all network driver are loaded
+#
+
+source lib.sh
+
+ret=0
+GREEN='\033[0;92m'
+RED='\033[0;31m'
+NC='\033[0m' # No Color
+
+readonly port=8080
+
+readonly vip_v4=207.175.44.110
+readonly cip_v4=10.0.0.2
+readonly gip_v4=10.0.0.1
+readonly dip_v4=172.16.0.1
+readonly rip_v4=172.16.0.2
+readonly sip_v4=10.0.0.3
+
+readonly infile="$(mktemp)"
+readonly outfile="$(mktemp)"
+readonly datalen=32
+
+sysipvsnet="/proc/sys/net/ipv4/vs/"
+if [ ! -d $sysipvsnet ]; then
+ if ! modprobe -q ip_vs; then
+ echo "skip: could not run test without ipvs module"
+ exit $ksft_skip
+ fi
+fi
+
+checktool "ipvsadm -v" "run test without ipvsadm"
+checktool "socat -h" "run test without socat"
+
+setup() {
+ setup_ns ns0 ns1 ns2
+
+ ip link add veth01 netns "${ns0}" type veth peer name veth10 netns "${ns1}"
+ ip link add veth02 netns "${ns0}" type veth peer name veth20 netns "${ns2}"
+ ip link add veth12 netns "${ns1}" type veth peer name veth21 netns "${ns2}"
+
+ ip netns exec "${ns0}" ip link set veth01 up
+ ip netns exec "${ns0}" ip link set veth02 up
+ ip netns exec "${ns0}" ip link add br0 type bridge
+ ip netns exec "${ns0}" ip link set veth01 master br0
+ ip netns exec "${ns0}" ip link set veth02 master br0
+ ip netns exec "${ns0}" ip link set br0 up
+ ip netns exec "${ns0}" ip addr add "${cip_v4}/24" dev br0
+
+ ip netns exec "${ns1}" ip link set veth10 up
+ ip netns exec "${ns1}" ip addr add "${gip_v4}/24" dev veth10
+ ip netns exec "${ns1}" ip link set veth12 up
+ ip netns exec "${ns1}" ip addr add "${dip_v4}/24" dev veth12
+
+ ip netns exec "${ns2}" ip link set veth21 up
+ ip netns exec "${ns2}" ip addr add "${rip_v4}/24" dev veth21
+ ip netns exec "${ns2}" ip link set veth20 up
+ ip netns exec "${ns2}" ip addr add "${sip_v4}/24" dev veth20
+
+ sleep 1
+
+ dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
+}
+
+cleanup() {
+ cleanup_all_ns
+
+ if [ -f "${outfile}" ]; then
+ rm "${outfile}"
+ fi
+ if [ -f "${infile}" ]; then
+ rm "${infile}"
+ fi
+}
+
+server_listen() {
+ ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" &
+ server_pid=$!
+ sleep 0.2
+}
+
+client_connect() {
+ ip netns exec "${ns0}" timeout 2 socat -u -4 STDIN TCP:"${vip_v4}":"${port}" < "${infile}"
+}
+
+verify_data() {
+ wait "${server_pid}"
+ cmp "$infile" "$outfile" 2>/dev/null
+}
+
+test_service() {
+ server_listen
+ client_connect
+ verify_data
+}
+
+
+test_dr() {
+ ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
+
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr
+ ip netns exec "${ns1}" ipvsadm -a -t "${vip_v4}:${port}" -r "${rip_v4}:${port}"
+ ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1
+
+ # avoid incorrect arp response
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
+ # avoid reverse route lookup
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+ ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
+
+ test_service
+}
+
+test_nat() {
+ ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
+
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr
+ ip netns exec "${ns1}" ipvsadm -a -m -t "${vip_v4}:${port}" -r "${rip_v4}:${port}"
+ ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1
+
+ ip netns exec "${ns2}" ip link del veth20
+ ip netns exec "${ns2}" ip route add default via "${dip_v4}" dev veth21
+
+ test_service
+}
+
+test_tun() {
+ ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0
+
+ ip netns exec "${ns1}" modprobe -q ipip
+ ip netns exec "${ns1}" ip link set tunl0 up
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0
+ ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.default.send_redirects=0
+ ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr
+ ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port}
+ ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1
+
+ ip netns exec "${ns2}" modprobe -q ipip
+ ip netns exec "${ns2}" ip link set tunl0 up
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0
+ ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1
+
+ test_service
+}
+
+run_tests() {
+ local errors=
+
+ echo "Testing DR mode..."
+ cleanup
+ setup
+ test_dr
+ errors=$(( $errors + $? ))
+
+ echo "Testing NAT mode..."
+ cleanup
+ setup
+ test_nat
+ errors=$(( $errors + $? ))
+
+ echo "Testing Tunnel mode..."
+ cleanup
+ setup
+ test_tun
+ errors=$(( $errors + $? ))
+
+ return $errors
+}
+
+trap cleanup EXIT
+
+run_tests
+
+if [ $? -ne 0 ]; then
+ echo -e "$(basename $0): ${RED}FAIL${NC}"
+ exit 1
+fi
+echo -e "$(basename $0): ${GREEN}PASS${NC}"
+exit 0
diff --git a/tools/testing/selftests/net/netfilter/lib.sh b/tools/testing/selftests/net/netfilter/lib.sh
new file mode 100644
index 000000000000..bedd35298e15
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/lib.sh
@@ -0,0 +1,10 @@
+net_netfilter_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "$net_netfilter_dir/../lib.sh"
+
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
diff --git a/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh
new file mode 100755
index 000000000000..c6fdd2079f4d
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+checktool "conntrack --version" "run test without conntrack"
+checktool "iptables --version" "run test without iptables"
+checktool "ip6tables --version" "run test without ip6tables"
+
+modprobe -q tun
+modprobe -q nf_conntrack
+# echo 1 > /proc/sys/net/netfilter/nf_log_all_netns
+
+PDRILL_TIMEOUT=10
+
+files="
+conntrack_ack_loss_stall.pkt
+conntrack_inexact_rst.pkt
+conntrack_syn_challenge_ack.pkt
+conntrack_synack_old.pkt
+conntrack_synack_reuse.pkt
+conntrack_rst_invalid.pkt
+"
+
+if ! packetdrill --dry_run --verbose "packetdrill/conntrack_ack_loss_stall.pkt";then
+ echo "SKIP: packetdrill not installed"
+ exit ${ksft_skip}
+fi
+
+ret=0
+
+run_packetdrill()
+{
+ filename="$1"
+ ipver="$2"
+ local mtu=1500
+
+ export NFCT_IP_VERSION="$ipver"
+
+ if [ "$ipver" = "ipv4" ];then
+ export xtables="iptables"
+ elif [ "$ipver" = "ipv6" ];then
+ export xtables="ip6tables"
+ mtu=1520
+ fi
+
+ timeout "$PDRILL_TIMEOUT" unshare -n packetdrill --ip_version="$ipver" --mtu=$mtu \
+ --tolerance_usecs=1000000 --non_fatal packet "$filename"
+}
+
+run_one_test_file()
+{
+ filename="$1"
+
+ for v in ipv4 ipv6;do
+ printf "%-50s(%s)%-20s" "$filename" "$v" ""
+ if run_packetdrill packetdrill/"$f" "$v";then
+ echo OK
+ else
+ echo FAIL
+ ret=1
+ fi
+ done
+}
+
+echo "Replaying packetdrill test cases:"
+for f in $files;do
+ run_one_test_file packetdrill/"$f"
+done
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
new file mode 100755
index 000000000000..1014551dd769
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test NAT source port clash resolution
+#
+
+source lib.sh
+ret=0
+socatpid=0
+
+cleanup()
+{
+ [ "$socatpid" -gt 0 ] && kill "$socatpid"
+
+ cleanup_all_ns
+}
+
+checktool "socat -h" "run test without socat"
+checktool "iptables --version" "run test without iptables"
+
+trap cleanup EXIT
+
+setup_ns ns1 ns2
+
+# Connect the namespaces using a veth pair
+ip link add name veth2 type veth peer name veth1
+ip link set netns "$ns1" dev veth1
+ip link set netns "$ns2" dev veth2
+
+ip netns exec "$ns1" ip link set up dev lo
+ip netns exec "$ns1" ip link set up dev veth1
+ip netns exec "$ns1" ip addr add 192.168.1.1/24 dev veth1
+
+ip netns exec "$ns2" ip link set up dev lo
+ip netns exec "$ns2" ip link set up dev veth2
+ip netns exec "$ns2" ip addr add 192.168.1.2/24 dev veth2
+
+# Create a server in one namespace
+ip netns exec "$ns1" socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 &
+socatpid=$!
+
+# Restrict source port to just one so we don't have to exhaust
+# all others.
+ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000"
+
+# add a virtual IP using DNAT
+ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+
+# ... and route it to the other namespace
+ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1
+
+# add a persistent connection from the other namespace
+ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+
+sleep 1
+
+# ip daddr:dport will be rewritten to 192.168.1.1 5201
+# NAT must reallocate source port 10000 because
+# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
+echo test | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null
+ret=$?
+
+# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
+if [ $ret -eq 0 ]; then
+ echo "PASS: socat can connect via NAT'd address"
+else
+ echo "FAIL: socat cannot connect via NAT'd address"
+fi
+
+# check sport clashres.
+ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
+ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
+
+sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+
+# if connect succeeds, client closes instantly due to EOF on stdin.
+# if connect hangs, it will time out after 5s.
+echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
+cpid2=$!
+
+time_then=$(date +%s)
+wait $cpid2
+rv=$?
+time_now=$(date +%s)
+
+# Check how much time has elapsed, expectation is for
+# 'cpid2' to connect and then exit (and no connect delay).
+delta=$((time_now - time_then))
+
+if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then
+ echo "PASS: could connect to service via redirected ports"
+else
+ echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
+ ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c
index 9e56b9d47037..9e56b9d47037 100644
--- a/tools/testing/selftests/netfilter/nf-queue.c
+++ b/tools/testing/selftests/net/netfilter/nf_queue.c
diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/net/netfilter/nft_audit.sh
index 99ed5bd6e840..902f8114bc80 100755
--- a/tools/testing/selftests/netfilter/nft_audit.sh
+++ b/tools/testing/selftests/net/netfilter/nft_audit.sh
@@ -6,11 +6,34 @@
SKIP_RC=4
RC=0
+if [ -r /var/run/auditd.pid ];then
+ read pid < /var/run/auditd.pid
+ p=$(pgrep ^auditd$)
+
+ if [ "$pid" -eq "$p" ]; then
+ echo "SKIP: auditd is running"
+ exit $SKIP_RC
+ fi
+fi
+
nft --version >/dev/null 2>&1 || {
echo "SKIP: missing nft tool"
exit $SKIP_RC
}
+# nft must be recent enough to support "reset" keyword.
+nft --check -f /dev/stdin >/dev/null 2>&1 <<EOF
+add table t
+add chain t c
+reset rules t c
+EOF
+
+if [ "$?" -ne 0 ];then
+ echo -n "SKIP: nft reset feature test failed: "
+ nft --version
+ exit $SKIP_RC
+fi
+
# Run everything in a separate network namespace
[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
@@ -73,7 +96,7 @@ done
for ((i = 0; i < 500; i++)); do
echo "add rule t2 c3 counter accept comment \"rule $i\""
-done >$rulefile
+done > "$rulefile"
do_test "nft -f $rulefile" \
'table=t2 family=2 entries=500 op=nft_register_rule'
@@ -101,7 +124,7 @@ do_test 'nft add counter t2 c1; add counter t2 c2' \
for ((i = 3; i <= 500; i++)); do
echo "add counter t2 c$i"
-done >$rulefile
+done > "$rulefile"
do_test "nft -f $rulefile" \
'table=t2 family=2 entries=498 op=nft_register_obj'
@@ -115,7 +138,7 @@ do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \
for ((i = 3; i <= 500; i++)); do
echo "add quota t2 q$i { 10 bytes }"
-done >$rulefile
+done > "$rulefile"
do_test "nft -f $rulefile" \
'table=t2 family=2 entries=498 op=nft_register_obj'
@@ -157,7 +180,7 @@ table=t2 family=2 entries=135 op=nft_reset_rule'
# resetting sets and elements
-elem=(22 ,80 ,443)
+elem=(22 ",80" ",443")
relem=""
for i in {1..3}; do
relem+="${elem[((i - 1))]}"
diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
index e908009576c7..47088b005390 100755
--- a/tools/testing/selftests/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# nft_concat_range.sh - Tests for sets with concatenation of ranged fields
@@ -7,10 +7,10 @@
#
# Author: Stefano Brivio <sbrivio@redhat.com>
#
-# shellcheck disable=SC2154,SC2034,SC2016,SC2030,SC2031
+# shellcheck disable=SC2154,SC2034,SC2016,SC2030,SC2031,SC2317
# ^ Configuration and templates sourced with eval, counters reused in subshells
-KSELFTEST_SKIP=4
+source lib.sh
# Available test groups:
# - reported_issues: check for issues that were reported in the past
@@ -19,7 +19,7 @@ KSELFTEST_SKIP=4
# - timeout: check that packets match entries until they expire
# - performance: estimate matching rate, compare with rbtree and hash baselines
TESTS="reported_issues correctness concurrency timeout"
-[ "${quicktest}" != "1" ] && TESTS="${TESTS} performance"
+[ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}"
# Set types, defined by TYPE_ variables below
TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
@@ -27,11 +27,11 @@ TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add reload"
+BUGS="flush_remove_add reload net_port_proto_match"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
- ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+ ../../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
pktgen/pktgen_bench_xmit_mode_netif_receive.sh"
# Definition of set types:
@@ -66,7 +66,7 @@ src
start 1
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip bash
proto udp
race_repeat 3
@@ -91,7 +91,7 @@ src
start 1
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 3
@@ -116,7 +116,7 @@ src
start 10
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp6
race_repeat 3
@@ -141,7 +141,7 @@ src
start 1
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 0
@@ -163,7 +163,7 @@ src mac
start 10
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp6
race_repeat 0
@@ -185,7 +185,7 @@ src mac proto
start 10
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp6
race_repeat 0
@@ -207,7 +207,7 @@ src addr4
start 1
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 3
@@ -227,7 +227,7 @@ src addr6 port
start 10
count 5
src_delta 2000
-tools sendip socat nc
+tools sendip socat
proto udp6
race_repeat 3
@@ -247,7 +247,7 @@ src mac proto addr4
start 1
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 0
@@ -264,7 +264,7 @@ src mac
start 1
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 0
@@ -286,7 +286,7 @@ src mac addr4
start 1
count 5
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 0
@@ -337,7 +337,7 @@ src addr4
start 1
count 5
src_delta 2000
-tools sendip socat nc
+tools sendip socat
proto udp
race_repeat 3
@@ -363,7 +363,7 @@ src mac
start 1
count 1
src_delta 2000
-tools sendip socat nc bash
+tools sendip socat bash
proto udp
race_repeat 0
@@ -371,6 +371,22 @@ race_repeat 0
perf_duration 0
"
+TYPE_net_port_proto_match="
+display net,port,proto
+type_spec ipv4_addr . inet_service . inet_proto
+chain_spec ip daddr . udp dport . meta l4proto
+dst addr4 port proto
+src
+start 1
+count 9
+src_delta 9
+tools sendip bash
+proto udp
+
+race_repeat 0
+
+perf_duration 0
+"
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -473,8 +489,6 @@ setup_veth() {
B() {
ip netns exec B "$@" >/dev/null 2>&1
}
-
- sleep 2
}
# Fill in set template and initialise set
@@ -488,12 +502,6 @@ check_tools() {
__tools=
for tool in ${tools}; do
- if [ "${tool}" = "nc" ] && [ "${proto}" = "udp6" ] && \
- ! nc -u -w0 1.1.1.1 1 2>/dev/null; then
- # Some GNU netcat builds might not support IPv6
- __tools="${__tools} netcat-openbsd"
- continue
- fi
__tools="${__tools} ${tool}"
command -v "${tool}" >/dev/null && return 0
@@ -554,30 +562,7 @@ setup_send_udp() {
ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
[ -z "${dst_port}" ] && dst_port=12345
- echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:${dst_addr4}:${dst_port}"${__socatbind}"
-
- src_addr4=
- src_port=
- }
- elif command -v nc >/dev/null; then
- if nc -u -w0 1.1.1.1 1 2>/dev/null; then
- # OpenBSD netcat
- nc_opt="-w0"
- else
- # GNU netcat
- nc_opt="-q0"
- fi
-
- send_udp() {
- if [ -n "${src_addr4}" ]; then
- B ip addr add "${src_addr4}" dev veth_b
- __src_addr4="-s ${src_addr4}"
- fi
- ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
- [ -n "${src_port}" ] && src_port="-p ${src_port}"
-
- echo "" | B nc -u "${nc_opt}" "${__src_addr4}" \
- "${src_port}" "${dst_addr4}" "${dst_port}"
+ echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:"$dst_addr4":"$dst_port""${__socatbind}"
src_addr4=
src_port=
@@ -632,11 +617,7 @@ setup_send_udp6() {
__socatbind6=
if [ -n "${src_addr6}" ]; then
- if [ -n "${src_addr6} != "${src_addr6_added} ]; then
- B ip addr add "${src_addr6}" dev veth_b nodad
-
- src_addr6_added=${src_addr6}
- fi
+ B ip addr add "${src_addr6}" dev veth_b nodad
__socatbind6=",bind=[${src_addr6}]"
@@ -645,26 +626,7 @@ setup_send_udp6() {
fi
fi
- echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:[${dst_addr6}]:${dst_port}"${__socatbind6}"
- }
- elif command -v nc >/dev/null && nc -u -w0 1.1.1.1 1 2>/dev/null; then
- # GNU netcat might not work with IPv6, try next tool
- send_udp6() {
- ip -6 addr add "${dst_addr6}" dev veth_a nodad \
- 2>/dev/null
- if [ -n "${src_addr6}" ]; then
- B ip addr add "${src_addr6}" dev veth_b nodad
- else
- src_addr6="2001:db8::2"
- fi
- [ -n "${src_port}" ] && src_port="-p ${src_port}"
-
- # shellcheck disable=SC2086 # this needs split options
- echo "" | B nc -u w0 "-s${src_addr6}" ${src_port} \
- ${dst_addr6} ${dst_port}
-
- src_addr6=
- src_port=
+ echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:["$dst_addr6"]:"$dst_port""${__socatbind6}"
}
elif [ -z "$(bash -c 'type -p')" ]; then
send_udp6() {
@@ -679,10 +641,17 @@ setup_send_udp6() {
fi
}
+listener_ready()
+{
+ port="$1"
+ ss -lnt -o "sport = :$port" | grep -q "$port"
+}
+
# Set up function to send TCP traffic on IPv4
setup_flood_tcp() {
if command -v iperf3 >/dev/null; then
flood_tcp() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr4}" ]; then
B ip addr add "${src_addr4}/16" dev veth_b
@@ -699,7 +668,7 @@ setup_flood_tcp() {
# shellcheck disable=SC2086 # this needs split options
iperf3 -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B iperf3 -c "${dst_addr4}" ${dst_port} ${src_port} \
@@ -711,6 +680,7 @@ setup_flood_tcp() {
}
elif command -v iperf >/dev/null; then
flood_tcp() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr4}" ]; then
B ip addr add "${src_addr4}/16" dev veth_b
@@ -727,7 +697,7 @@ setup_flood_tcp() {
# shellcheck disable=SC2086 # this needs split options
iperf -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B iperf -c "${dst_addr4}" ${dst_port} ${src_addr4} \
@@ -739,6 +709,7 @@ setup_flood_tcp() {
}
elif command -v netperf >/dev/null; then
flood_tcp() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr4}" ]; then
B ip addr add "${src_addr4}/16" dev veth_b
@@ -755,7 +726,7 @@ setup_flood_tcp() {
# shellcheck disable=SC2086 # this needs split options
netserver -4 ${dst_port} -L "${dst_addr4}" \
>/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}"
# shellcheck disable=SC2086 # this needs split options
B netperf -4 -H "${dst_addr4}" ${dst_port} \
@@ -774,6 +745,7 @@ setup_flood_tcp() {
setup_flood_tcp6() {
if command -v iperf3 >/dev/null; then
flood_tcp6() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr6}" ]; then
B ip addr add "${src_addr6}" dev veth_b nodad
@@ -790,7 +762,7 @@ setup_flood_tcp6() {
# shellcheck disable=SC2086 # this needs split options
iperf3 -s -DB "${dst_addr6}" ${dst_port} >/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}"
# shellcheck disable=SC2086 # this needs split options
B iperf3 -c "${dst_addr6}" ${dst_port} \
@@ -802,6 +774,7 @@ setup_flood_tcp6() {
}
elif command -v iperf >/dev/null; then
flood_tcp6() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr6}" ]; then
B ip addr add "${src_addr6}" dev veth_b nodad
@@ -818,7 +791,7 @@ setup_flood_tcp6() {
# shellcheck disable=SC2086 # this needs split options
iperf -s -VDB "${dst_addr6}" ${dst_port} >/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B iperf -c "${dst_addr6}" -V ${dst_port} \
@@ -830,6 +803,7 @@ setup_flood_tcp6() {
}
elif command -v netperf >/dev/null; then
flood_tcp6() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr6}" ]; then
B ip addr add "${src_addr6}" dev veth_b nodad
@@ -846,7 +820,7 @@ setup_flood_tcp6() {
# shellcheck disable=SC2086 # this needs split options
netserver -6 ${dst_port} -L "${dst_addr6}" \
>/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B netperf -6 -H "${dst_addr6}" ${dst_port} \
@@ -865,6 +839,7 @@ setup_flood_tcp6() {
setup_flood_udp() {
if command -v iperf3 >/dev/null; then
flood_udp() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr4}" ]; then
B ip addr add "${src_addr4}/16" dev veth_b
@@ -881,7 +856,7 @@ setup_flood_udp() {
# shellcheck disable=SC2086 # this needs split options
iperf3 -s -DB "${dst_addr4}" ${dst_port}
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B iperf3 -u -c "${dst_addr4}" -Z -b 100M -l16 -t1000 \
@@ -893,6 +868,7 @@ setup_flood_udp() {
}
elif command -v iperf >/dev/null; then
flood_udp() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr4}" ]; then
B ip addr add "${src_addr4}/16" dev veth_b
@@ -909,7 +885,7 @@ setup_flood_udp() {
# shellcheck disable=SC2086 # this needs split options
iperf -u -sDB "${dst_addr4}" ${dst_port} >/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B iperf -u -c "${dst_addr4}" -b 100M -l1 -t1000 \
@@ -921,6 +897,7 @@ setup_flood_udp() {
}
elif command -v netperf >/dev/null; then
flood_udp() {
+ local n_port="${dst_port}"
[ -n "${dst_port}" ] && dst_port="-p ${dst_port}"
if [ -n "${src_addr4}" ]; then
B ip addr add "${src_addr4}/16" dev veth_b
@@ -937,7 +914,7 @@ setup_flood_udp() {
# shellcheck disable=SC2086 # this needs split options
netserver -4 ${dst_port} -L "${dst_addr4}" \
>/dev/null 2>&1
- sleep 2
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port"
# shellcheck disable=SC2086 # this needs split options
B netperf -4 -H "${dst_addr4}" ${dst_port} \
@@ -982,6 +959,7 @@ cleanup() {
ip link del dummy0 2>/dev/null
ip route del default 2>/dev/null
ip -6 route del default 2>/dev/null
+ ip netns pids B 2>/dev/null | xargs kill 2>/dev/null
ip netns del B 2>/dev/null
ip link del veth_a 2>/dev/null
timeout=
@@ -989,15 +967,18 @@ cleanup() {
killall iperf 2>/dev/null
killall netperf 2>/dev/null
killall netserver 2>/dev/null
- rm -f ${tmp}
- sleep 2
+}
+
+cleanup_exit() {
+ cleanup
+ rm -f "$tmp"
}
# Entry point for setup functions
setup() {
if [ "$(id -u)" -ne 0 ]; then
echo " need to run as root"
- exit ${KSELFTEST_SKIP}
+ exit ${ksft_skip}
fi
cleanup
@@ -1258,7 +1239,7 @@ send_nomatch() {
# - check that packets outside range don't match it
# - remove some elements, check that packets don't match anymore
test_correctness() {
- setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
+ setup veth send_"${proto}" set || return ${ksft_skip}
range_size=1
for i in $(seq "${start}" $((start + count))); do
@@ -1273,7 +1254,7 @@ test_correctness() {
srcend=$((end + src_delta))
add "$(format)" || return 1
- for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
send_match "${j}" $((j + src_delta)) || return 1
done
send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1
@@ -1281,7 +1262,7 @@ test_correctness() {
# Delete elements now and then
if [ $((i % 3)) -eq 0 ]; then
del "$(format)" || return 1
- for j in $(seq ${start} \
+ for j in $(seq "$start" \
$((range_size / 2 + 1)) ${end}); do
send_nomatch "${j}" $((j + src_delta)) \
|| return 1
@@ -1307,12 +1288,12 @@ test_concurrency() {
proto=${flood_proto}
tools=${flood_tools}
chain_spec=${flood_spec}
- setup veth flood_"${proto}" set || return ${KSELFTEST_SKIP}
+ setup veth flood_"${proto}" set || return ${ksft_skip}
range_size=1
cstart=${start}
flood_pids=
- for i in $(seq ${start} $((start + count))); do
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
@@ -1325,7 +1306,7 @@ test_concurrency() {
start=$((end + range_size))
done
- sleep 10
+ sleep $((RANDOM%10))
pids=
for c in $(seq 1 "$(nproc)"); do (
@@ -1335,7 +1316,7 @@ test_concurrency() {
# $start needs to be local to this subshell
# shellcheck disable=SC2030
start=${cstart}
- for i in $(seq ${start} $((start + count))); do
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
@@ -1350,7 +1331,7 @@ test_concurrency() {
range_size=1
start=${cstart}
- for i in $(seq ${start} $((start + count))); do
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
@@ -1366,7 +1347,7 @@ test_concurrency() {
range_size=1
start=${cstart}
- for i in $(seq ${start} $((start + count))); do
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
@@ -1379,7 +1360,7 @@ test_concurrency() {
range_size=1
start=${cstart}
- for i in $(seq ${start} $((start + count))); do
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
@@ -1407,31 +1388,34 @@ test_concurrency() {
# - add all the elements with 3s timeout while checking that packets match
# - wait 3s after the last insertion, check that packets don't match any entry
test_timeout() {
- setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
+ setup veth send_"${proto}" set || return ${ksft_skip}
timeout=3
+
+ [ "$KSFT_MACHINE_SLOW" = "yes" ] && timeout=8
+
range_size=1
- for i in $(seq "${start}" $((start + count))); do
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
add "$(format)" || return 1
- for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
send_match "${j}" $((j + src_delta)) || return 1
done
range_size=$((range_size + 1))
start=$((end + range_size))
done
- sleep 3
- for i in $(seq ${start} $((start + count))); do
+ sleep $timeout
+ for i in $(seq "$start" $((start + count))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
send_nomatch "${j}" $((j + src_delta)) || return 1
done
@@ -1450,13 +1434,13 @@ test_performance() {
chain_spec=${perf_spec}
dst="${perf_dst}"
src="${perf_src}"
- setup veth perf set || return ${KSELFTEST_SKIP}
+ setup veth perf set || return ${ksft_skip}
first=${start}
range_size=1
for set in test norange noconcat; do
start=${first}
- for i in $(seq ${start} $((start + perf_entries))); do
+ for i in $(seq "$start" $((start + perf_entries))); do
end=$((start + range_size))
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
@@ -1464,7 +1448,7 @@ test_performance() {
if [ $((end / 65534)) -gt $((start / 65534)) ]; then
start=${end}
end=$((end + 1))
- elif [ ${start} -eq ${end} ]; then
+ elif [ "$start" -eq "$end" ]; then
end=$((start + 1))
fi
@@ -1475,7 +1459,7 @@ test_performance() {
nft -f "${tmp}"
done
- perf $((end - 1)) ${srcstart}
+ perf $((end - 1)) "$srcstart"
sleep 2
@@ -1519,14 +1503,17 @@ test_performance() {
}
test_bug_flush_remove_add() {
+ rounds=100
+ [ "$KSFT_MACHINE_SLOW" = "yes" ] && rounds=10
+
set_cmd='{ set s { type ipv4_addr . inet_service; flags interval; }; }'
elem1='{ 10.0.0.1 . 22-25, 10.0.0.1 . 10-20 }'
elem2='{ 10.0.0.1 . 10-20, 10.0.0.1 . 22-25 }'
- for i in `seq 1 100`; do
- nft add table t ${set_cmd} || return ${KSELFTEST_SKIP}
- nft add element t s ${elem1} 2>/dev/null || return 1
+ for i in $(seq 1 $rounds); do
+ nft add table t "$set_cmd" || return ${ksft_skip}
+ nft add element t s "$elem1" 2>/dev/null || return 1
nft flush set t s 2>/dev/null || return 1
- nft add element t s ${elem2} 2>/dev/null || return 1
+ nft add element t s "$elem2" 2>/dev/null || return 1
done
nft flush ruleset
}
@@ -1534,7 +1521,7 @@ test_bug_flush_remove_add() {
# - add ranged element, check that packets match it
# - reload the set, check packets still match
test_bug_reload() {
- setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
+ setup veth send_"${proto}" set || return ${ksft_skip}
rstart=${start}
range_size=1
@@ -1573,7 +1560,7 @@ test_bug_reload() {
srcstart=$((start + src_delta))
srcend=$((end + src_delta))
- for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do
send_match "${j}" $((j + src_delta)) || return 1
done
@@ -1584,6 +1571,64 @@ test_bug_reload() {
nft flush ruleset
}
+# - add ranged element, check that packets match it
+# - delete element again, check it is gone
+test_bug_net_port_proto_match() {
+ setup veth send_"${proto}" set || return ${ksft_skip}
+ rstart=${start}
+
+ range_size=1
+ for i in $(seq 1 10); do
+ for j in $(seq 1 20) ; do
+ elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+
+ nft "add element inet filter test { $elem }" || return 1
+ nft "get element inet filter test { $elem }" | grep -q "$elem"
+ if [ $? -ne 0 ];then
+ local got=$(nft "get element inet filter test { $elem }")
+ err "post-add: should have returned $elem but got $got"
+ return 1
+ fi
+ done
+ done
+
+ # recheck after set was filled
+ for i in $(seq 1 10); do
+ for j in $(seq 1 20) ; do
+ elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+
+ nft "get element inet filter test { $elem }" | grep -q "$elem"
+ if [ $? -ne 0 ];then
+ local got=$(nft "get element inet filter test { $elem }")
+ err "post-fill: should have returned $elem but got $got"
+ return 1
+ fi
+ done
+ done
+
+ # random del and re-fetch
+ for i in $(seq 1 10); do
+ for j in $(seq 1 20) ; do
+ local rnd=$((RANDOM%10))
+ local got=""
+
+ elem=$(printf "10.%d.%d.0/24 . %d1-%d0 . 6-17 " ${i} ${j} ${i} "$((i+1))")
+ if [ $rnd -gt 0 ];then
+ continue
+ fi
+
+ nft "delete element inet filter test { $elem }"
+ got=$(nft "get element inet filter test { $elem }" 2>/dev/null)
+ if [ $? -eq 0 ];then
+ err "post-delete: query for $elem returned $got instead of error."
+ return 1
+ fi
+ done
+ done
+
+ nft flush ruleset
+}
+
test_reported_issues() {
eval test_bug_"${subtest}"
}
@@ -1591,12 +1636,12 @@ test_reported_issues() {
# Run everything in a separate network namespace
[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
tmp="$(mktemp)"
-trap cleanup EXIT
+trap cleanup_exit EXIT
# Entry point for test runs
passed=0
for name in ${TESTS}; do
- printf "TEST: %s\n" "$(echo ${name} | tr '_' ' ')"
+ printf "TEST: %s\n" "$(echo "$name" | tr '_' ' ')"
if [ "${name}" = "reported_issues" ]; then
SUBTESTS="${BUGS}"
else
@@ -1623,10 +1668,16 @@ for name in ${TESTS}; do
continue
fi
- printf " %-60s " "${display}"
+ [ "$KSFT_MACHINE_SLOW" = "yes" ] && count=1
+
+ printf " %-32s " "${display}"
+ tthen=$(date +%s)
eval test_"${name}"
ret=$?
+ tnow=$(date +%s)
+ printf "%5ds%-30s" $((tnow-tthen))
+
if [ $ret -eq 0 ]; then
printf "[ OK ]\n"
info_flush
@@ -1635,11 +1686,11 @@ for name in ${TESTS}; do
printf "[FAIL]\n"
err_flush
exit 1
- elif [ $ret -eq ${KSELFTEST_SKIP} ]; then
+ elif [ $ret -eq ${ksft_skip} ]; then
printf "[SKIP]\n"
err_flush
fi
done
done
-[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} || exit 0
+[ ${passed} -eq 0 ] && exit ${ksft_skip} || exit 0
diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh
new file mode 100755
index 000000000000..5d276995a5c5
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+source lib.sh
+
+[ "$KSFT_MACHINE_SLOW" = yes ] && exit ${ksft_skip}
+
+NFT_CONCAT_RANGE_TESTS="performance" exec ./nft_concat_range.sh
diff --git a/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh
new file mode 100755
index 000000000000..abcaa7337197
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+#
+# This tests connection tracking helper assignment:
+# 1. can attach ftp helper to a connection from nft ruleset.
+# 2. auto-assign still works.
+#
+# Kselftest framework requirement - SKIP code is 4.
+
+source lib.sh
+
+ret=0
+
+testipv6=1
+
+checktool "socat -h" "run test without socat"
+checktool "conntrack --version" "run test without conntrack"
+checktool "nft --version" "run test without nft"
+
+cleanup()
+{
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+
+ ip netns del "$ns1"
+ ip netns del "$ns2"
+}
+
+trap cleanup EXIT
+
+setup_ns ns1 ns2
+
+if ! ip link add veth0 netns "$ns1" type veth peer name veth0 netns "$ns2" > /dev/null 2>&1;then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+
+ip -net "$ns1" link set veth0 up
+ip -net "$ns2" link set veth0 up
+
+ip -net "$ns1" addr add 10.0.1.1/24 dev veth0
+ip -net "$ns1" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$ns2" addr add 10.0.1.2/24 dev veth0
+ip -net "$ns2" addr add dead:1::2/64 dev veth0 nodad
+
+load_ruleset_family() {
+ local family=$1
+ local ns=$2
+
+ip netns exec "$ns" nft -f - <<EOF
+table $family raw {
+ ct helper ftp {
+ type "ftp" protocol tcp
+ }
+ chain pre {
+ type filter hook prerouting priority 0; policy accept;
+ tcp dport 2121 ct helper set "ftp"
+ }
+ chain output {
+ type filter hook output priority 0; policy accept;
+ tcp dport 2121 ct helper set "ftp"
+ }
+}
+EOF
+ return $?
+}
+
+check_for_helper()
+{
+ local netns=$1
+ local message=$2
+ local port=$3
+
+ if echo "$message" |grep -q 'ipv6';then
+ local family="ipv6"
+ else
+ local family="ipv4"
+ fi
+
+ if ! ip netns exec "$netns" conntrack -L -f $family -p tcp --dport "$port" 2> /dev/null |grep -q 'helper=ftp';then
+ if [ "$autoassign" -eq 0 ] ;then
+ echo "FAIL: ${netns} did not show attached helper $message" 1>&2
+ ret=1
+ else
+ echo "PASS: ${netns} did not show attached helper $message" 1>&2
+ fi
+ else
+ if [ "$autoassign" -eq 0 ] ;then
+ echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
+ else
+ echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2
+ ret=1
+ fi
+ fi
+
+ return 0
+}
+
+listener_ready()
+{
+ ns="$1"
+ port="$2"
+ proto="$3"
+ ss -N "$ns" -lnt -o "sport = :$port" | grep -q "$port"
+}
+
+test_helper()
+{
+ local port=$1
+ local autoassign=$2
+
+ if [ "$autoassign" -eq 0 ] ;then
+ msg="set via ruleset"
+ else
+ msg="auto-assign"
+ fi
+
+ ip netns exec "$ns2" socat -t 3 -u -4 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null &
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" "$port" "-4"
+
+ ip netns exec "$ns1" socat -u -4 STDIN TCP:10.0.1.2:"$port" < /dev/null > /dev/null
+
+ check_for_helper "$ns1" "ip $msg" "$port" "$autoassign"
+ check_for_helper "$ns2" "ip $msg" "$port" "$autoassign"
+
+ if [ $testipv6 -eq 0 ] ;then
+ return 0
+ fi
+
+ ip netns exec "$ns1" conntrack -F 2> /dev/null
+ ip netns exec "$ns2" conntrack -F 2> /dev/null
+
+ ip netns exec "$ns2" socat -t 3 -u -6 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null &
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" "$port" "-6"
+
+ ip netns exec "$ns1" socat -t 3 -u -6 STDIN TCP:"[dead:1::2]":"$port" < /dev/null > /dev/null
+
+ check_for_helper "$ns1" "ipv6 $msg" "$port"
+ check_for_helper "$ns2" "ipv6 $msg" "$port"
+}
+
+if ! load_ruleset_family ip "$ns1"; then
+ echo "FAIL: ${ns1} cannot load ip ruleset" 1>&2
+ exit 1
+fi
+
+if ! load_ruleset_family ip6 "$ns1"; then
+ echo "SKIP: ${ns1} cannot load ip6 ruleset" 1>&2
+ testipv6=0
+fi
+
+if ! load_ruleset_family inet "${ns2}"; then
+ echo "SKIP: ${ns1} cannot load inet ruleset" 1>&2
+ if ! load_ruleset_family ip "${ns2}"; then
+ echo "FAIL: ${ns2} cannot load ip ruleset" 1>&2
+ exit 1
+ fi
+
+ if [ "$testipv6" -eq 1 ] ;then
+ if ! load_ruleset_family ip6 "$ns2"; then
+ echo "FAIL: ${ns2} cannot load ip6 ruleset" 1>&2
+ exit 1
+ fi
+ fi
+fi
+
+test_helper 2121 0
+ip netns exec "$ns1" sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+ip netns exec "$ns2" sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+test_helper 21 1
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh
new file mode 100755
index 000000000000..ce1451c275fd
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_fib.sh
@@ -0,0 +1,234 @@
+#!/bin/bash
+#
+# This tests the fib expression.
+#
+# Kselftest framework requirement - SKIP code is 4.
+
+source lib.sh
+
+ret=0
+
+timeout=4
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+cleanup()
+{
+ cleanup_all_ns
+
+ [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+}
+
+checktool "nft --version" "run test without nft"
+
+setup_ns nsrouter ns1 ns2
+
+trap cleanup EXIT
+
+if dmesg | grep -q ' nft_rpfilter: ';then
+ dmesg -c | grep ' nft_rpfilter: '
+ echo "WARN: a previous test run has failed" 1>&2
+fi
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+
+load_ruleset() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+ }
+}
+EOF
+}
+
+load_pbr_ruleset() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain forward {
+ type filter hook forward priority raw;
+ fib saddr . iif oif gt 0 accept
+ log drop
+ }
+}
+EOF
+}
+
+load_ruleset_count() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
+ ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
+ }
+}
+EOF
+}
+
+check_drops() {
+ if dmesg | grep -q ' nft_rpfilter: ';then
+ dmesg | grep ' nft_rpfilter: '
+ echo "FAIL: rpfilter did drop packets"
+ return 1
+ fi
+
+ return 0
+}
+
+check_fib_counter() {
+ local want=$1
+ local ns=$2
+ local address=$3
+
+ if ! ip netns exec "$ns" nft list table inet filter | grep 'fib saddr . iif' | grep "$address" | grep -q "packets $want";then
+ echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
+ ip netns exec "$ns" nft list table inet filter
+ return 1
+ fi
+
+ if [ "$want" -gt 0 ]; then
+ echo "PASS: fib expression did drop packets for $address"
+ fi
+
+ return 0
+}
+
+load_ruleset "$nsrouter"
+load_ruleset "$ns1"
+load_ruleset "$ns2"
+
+if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
+
+ip -net "$nsrouter" link set veth0 up
+ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$nsrouter" link set veth1 up
+ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
+
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" route add default via dead:1::1
+
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
+
+test_ping() {
+ local daddr4=$1
+ local daddr6=$2
+
+ if ! ip netns exec "$ns1" ping -c 1 -q "$daddr4" > /dev/null; then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
+ return 1
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q "$daddr6" > /dev/null; then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
+ return 1
+ fi
+
+ return 0
+}
+
+ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
+
+test_ping 10.0.2.1 dead:2::1 || exit 1
+check_drops || exit 1
+
+test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not cause unwanted packet drops"
+
+ip netns exec "$nsrouter" nft flush table inet filter
+
+ip -net "$ns1" route del default
+ip -net "$ns1" -6 route del default
+
+ip -net "$ns1" addr del 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr del dead:1::99/64 dev eth0
+
+ip -net "$ns1" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns1" addr add dead:2::99/64 dev eth0 nodad
+
+ip -net "$ns1" route add default via 10.0.2.1
+ip -net "$ns1" -6 route add default via dead:2::1
+
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth0 nodad
+
+# switch to ruleset that doesn't log, this time
+# its expected that this does drop the packets.
+load_ruleset_count "$nsrouter"
+
+# ns1 has a default route, but nsrouter does not.
+# must not check return value, ping to 1.1.1.1 will
+# fail.
+check_fib_counter 0 "$nsrouter" 1.1.1.1 || exit 1
+check_fib_counter 0 "$nsrouter" 1c3::c01d || exit 1
+
+ip netns exec "$ns1" ping -W 0.5 -c 1 -q 1.1.1.1 > /dev/null
+check_fib_counter 1 "$nsrouter" 1.1.1.1 || exit 1
+
+ip netns exec "$ns1" ping -W 0.5 -i 0.1 -c 3 -q 1c3::c01d > /dev/null
+check_fib_counter 3 "$nsrouter" 1c3::c01d || exit 1
+
+# delete all rules
+ip netns exec "$ns1" nft flush ruleset
+ip netns exec "$ns2" nft flush ruleset
+ip netns exec "$nsrouter" nft flush ruleset
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+
+ip -net "$ns1" addr del 10.0.2.99/24 dev eth0
+ip -net "$ns1" addr del dead:2::99/64 dev eth0
+
+ip -net "$nsrouter" addr del dead:2::1/64 dev veth0
+
+# ... pbr ruleset for the router, check iif+oif.
+if ! load_pbr_ruleset "$nsrouter";then
+ echo "SKIP: Could not load fib forward ruleset"
+ exit $ksft_skip
+fi
+
+ip -net "$nsrouter" rule add from all table 128
+ip -net "$nsrouter" rule add from all iif veth0 table 129
+ip -net "$nsrouter" route add table 128 to 10.0.1.0/24 dev veth0
+ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1
+
+# drop main ipv4 table
+ip -net "$nsrouter" -4 rule delete table main
+
+if ! test_ping 10.0.2.99 dead:2::99;then
+ ip -net "$nsrouter" nft list ruleset
+ echo "FAIL: fib mismatch in pbr setup"
+ exit 1
+fi
+
+echo "PASS: fib expression forward check with policy based routing"
+exit 0
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
index a32f490f7539..b3995550856a 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh
@@ -14,15 +14,10 @@
# nft_flowtable.sh -o8000 -l1500 -r2000
#
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-nsr1="nsr1-$sfx"
-nsr2="nsr2-$sfx"
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
+
ret=0
+SOCAT_TIMEOUT=60
nsin=""
ns1out=""
@@ -30,52 +25,41 @@ ns2out=""
log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
-checktool (){
- if ! $1 > /dev/null 2>&1; then
- echo "SKIP: Could not $2"
- exit $ksft_skip
- fi
-}
-
checktool "nft --version" "run test without nft tool"
-checktool "ip -Version" "run test without ip tool"
-checktool "which nc" "run test without nc (netcat)"
-checktool "ip netns add $nsr1" "create net namespace $nsr1"
+checktool "socat -h" "run test without socat"
-ip netns add $ns1
-ip netns add $ns2
-ip netns add $nsr2
+setup_ns ns1 ns2 nsr1 nsr2
cleanup() {
- ip netns del $ns1
- ip netns del $ns2
- ip netns del $nsr1
- ip netns del $nsr2
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
rm -f "$nsin" "$ns1out" "$ns2out"
- [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+ [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns"
}
trap cleanup EXIT
sysctl -q net.netfilter.nf_log_all_netns=1
-ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1
-ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2
+ip link add veth0 netns "$nsr1" type veth peer name eth0 netns "$ns1"
+ip link add veth1 netns "$nsr1" type veth peer name veth0 netns "$nsr2"
-ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2
+ip link add veth1 netns "$nsr2" type veth peer name eth0 netns "$ns2"
-for dev in lo veth0 veth1; do
- ip -net $nsr1 link set $dev up
- ip -net $nsr2 link set $dev up
+for dev in veth0 veth1; do
+ ip -net "$nsr1" link set "$dev" up
+ ip -net "$nsr2" link set "$dev" up
done
-ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
-ip -net $nsr1 addr add dead:1::1/64 dev veth0
+ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
-ip -net $nsr2 addr add 10.0.2.1/24 dev veth1
-ip -net $nsr2 addr add dead:2::1/64 dev veth1
+ip -net "$nsr2" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsr2" addr add dead:2::1/64 dev veth1 nodad
# set different MTUs so we need to push packets coming from ns1 (large MTU)
# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
@@ -107,56 +91,63 @@ do
esac
done
-if ! ip -net $nsr1 link set veth0 mtu $omtu; then
+if ! ip -net "$nsr1" link set veth0 mtu "$omtu"; then
+ exit 1
+fi
+
+ip -net "$ns1" link set eth0 mtu "$omtu"
+
+if ! ip -net "$nsr2" link set veth1 mtu "$rmtu"; then
exit 1
fi
-ip -net $ns1 link set eth0 mtu $omtu
+if ! ip -net "$nsr1" link set veth1 mtu "$lmtu"; then
+ exit 1
+fi
-if ! ip -net $nsr2 link set veth1 mtu $rmtu; then
+if ! ip -net "$nsr2" link set veth0 mtu "$lmtu"; then
exit 1
fi
-ip -net $ns2 link set eth0 mtu $rmtu
+ip -net "$ns2" link set eth0 mtu "$rmtu"
# transfer-net between nsr1 and nsr2.
# these addresses are not used for connections.
-ip -net $nsr1 addr add 192.168.10.1/24 dev veth1
-ip -net $nsr1 addr add fee1:2::1/64 dev veth1
+ip -net "$nsr1" addr add 192.168.10.1/24 dev veth1
+ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad
-ip -net $nsr2 addr add 192.168.10.2/24 dev veth0
-ip -net $nsr2 addr add fee1:2::2/64 dev veth0
+ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0
+ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad
for i in 0 1; do
- ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
- ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+ ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+ ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
done
-for ns in $ns1 $ns2;do
- ip -net $ns link set lo up
- ip -net $ns link set eth0 up
+for ns in "$ns1" "$ns2";do
+ ip -net "$ns" link set eth0 up
- if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
+ if ! ip netns exec "$ns" sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
echo "ERROR: Check Originator/Responder values (problem during address addition)"
exit 1
fi
# don't set ip DF bit for first two tests
- ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
+ ip netns exec "$ns" sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
done
-ip -net $ns1 addr add 10.0.1.99/24 dev eth0
-ip -net $ns2 addr add 10.0.2.99/24 dev eth0
-ip -net $ns1 route add default via 10.0.1.1
-ip -net $ns2 route add default via 10.0.2.1
-ip -net $ns1 addr add dead:1::99/64 dev eth0
-ip -net $ns2 addr add dead:2::99/64 dev eth0
-ip -net $ns1 route add default via dead:1::1
-ip -net $ns2 route add default via dead:2::1
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via dead:1::1
+ip -net "$ns2" route add default via dead:2::1
-ip -net $nsr1 route add default via 192.168.10.2
-ip -net $nsr2 route add default via 192.168.10.1
+ip -net "$nsr1" route add default via 192.168.10.2
+ip -net "$nsr2" route add default via 192.168.10.1
-ip netns exec $nsr1 nft -f - <<EOF
+ip netns exec "$nsr1" nft -f - <<EOF
table inet filter {
flowtable f1 {
hook ingress priority 0
@@ -188,7 +179,7 @@ if [ $? -ne 0 ]; then
exit $ksft_skip
fi
-ip netns exec $ns2 nft -f - <<EOF
+ip netns exec "$ns2" nft -f - <<EOF
table inet filter {
counter ip4dscp0 { }
counter ip4dscp3 { }
@@ -204,25 +195,22 @@ table inet filter {
EOF
if [ $? -ne 0 ]; then
- echo "SKIP: Could not load nft ruleset"
+ echo -n "SKIP: Could not load ruleset: "
+ nft --version
exit $ksft_skip
fi
# test basic connectivity
-if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
+if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
echo "ERROR: $ns1 cannot reach ns2" 1>&2
exit 1
fi
-if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
+if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
echo "ERROR: $ns2 cannot reach $ns1" 1>&2
exit 1
fi
-if [ $ret -eq 0 ];then
- echo "PASS: netns routing/connectivity: $ns1 can reach $ns2"
-fi
-
nsin=$(mktemp)
ns1out=$(mktemp)
ns2out=$(mktemp)
@@ -248,23 +236,27 @@ check_counters()
local what=$1
local ok=1
- local orig=$(ip netns exec $nsr1 nft reset counter inet filter routed_orig | grep packets)
- local repl=$(ip netns exec $nsr1 nft reset counter inet filter routed_repl | grep packets)
+ local orig repl
+ orig=$(ip netns exec "$nsr1" nft reset counter inet filter routed_orig | grep packets)
+ repl=$(ip netns exec "$nsr1" nft reset counter inet filter routed_repl | grep packets)
local orig_cnt=${orig#*bytes}
local repl_cnt=${repl#*bytes}
- local fs=$(du -sb $nsin)
+ local fs
+ fs=$(du -sb "$nsin")
local max_orig=${fs%%/*}
local max_repl=$((max_orig/4))
- if [ $orig_cnt -gt $max_orig ];then
+ # flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook
+ # should always be lower than the size of the transmitted file (max_orig).
+ if [ "$orig_cnt" -gt "$max_orig" ];then
echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2
ret=1
ok=0
fi
- if [ $repl_cnt -gt $max_repl ];then
+ if [ "$repl_cnt" -gt $max_repl ];then
echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2
ret=1
ok=0
@@ -280,39 +272,40 @@ check_dscp()
local what=$1
local ok=1
- local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp3 | grep packets)
+ local counter
+ counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp3 | grep packets)
local pc4=${counter%*bytes*}
local pc4=${pc4#*packets}
- local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp0 | grep packets)
+ counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp0 | grep packets)
local pc4z=${counter%*bytes*}
local pc4z=${pc4z#*packets}
case "$what" in
"dscp_none")
- if [ $pc4 -gt 0 ] || [ $pc4z -eq 0 ]; then
+ if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then
echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_fwd")
- if [ $pc4 -eq 0 ] || [ $pc4z -eq 0 ]; then
+ if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then
echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_ingress")
- if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then
+ if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
fi
;;
"dscp_egress")
- if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then
+ if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then
echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
ret=1
ok=0
@@ -324,7 +317,7 @@ check_dscp()
ok=0
esac
- if [ $ok -eq 1 ] ;then
+ if [ "$ok" -eq 1 ] ;then
echo "PASS: $what: dscp packet counters match"
fi
}
@@ -345,6 +338,11 @@ check_transfer()
return 0
}
+listener_ready()
+{
+ ss -N "$nsb" -lnt -o "sport = :12345" | grep -q 12345
+}
+
test_tcp_forwarding_ip()
{
local nsa=$1
@@ -353,40 +351,23 @@ test_tcp_forwarding_ip()
local dstport=$4
local lret=0
- ip netns exec $nsb nc -w 5 -l -p 12345 < "$nsin" > "$ns2out" &
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" &
lpid=$!
- sleep 1
- ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$nsin" > "$ns1out" &
- cpid=$!
-
- sleep 1
-
- prev="$(ls -l $ns1out $ns2out)"
- sleep 1
-
- while [[ "$prev" != "$(ls -l $ns1out $ns2out)" ]]; do
- sleep 1;
- prev="$(ls -l $ns1out $ns2out)"
- done
+ busywait 1000 listener_ready
- if test -d /proc/"$lpid"/; then
- kill $lpid
- fi
-
- if test -d /proc/"$cpid"/; then
- kill $cpid
- fi
+ timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out"
wait $lpid
- wait $cpid
if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
lret=1
+ ret=1
fi
if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
lret=1
+ ret=1
fi
return $lret
@@ -403,7 +384,7 @@ test_tcp_forwarding_set_dscp()
{
check_dscp "dscp_none"
-ip netns exec $nsr1 nft -f - <<EOF
+ip netns exec "$nsr1" nft -f - <<EOF
table netdev dscpmangle {
chain setdscp0 {
type filter hook ingress device "veth0" priority 0; policy accept
@@ -415,12 +396,12 @@ if [ $? -eq 0 ]; then
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
check_dscp "dscp_ingress"
- ip netns exec $nsr1 nft delete table netdev dscpmangle
+ ip netns exec "$nsr1" nft delete table netdev dscpmangle
else
echo "SKIP: Could not load netdev:ingress for veth0"
fi
-ip netns exec $nsr1 nft -f - <<EOF
+ip netns exec "$nsr1" nft -f - <<EOF
table netdev dscpmangle {
chain setdscp0 {
type filter hook egress device "veth1" priority 0; policy accept
@@ -432,14 +413,14 @@ if [ $? -eq 0 ]; then
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
check_dscp "dscp_egress"
- ip netns exec $nsr1 nft flush table netdev dscpmangle
+ ip netns exec "$nsr1" nft flush table netdev dscpmangle
else
echo "SKIP: Could not load netdev:egress for veth1"
fi
# partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3
# counters should have seen packets (before and after ft offload kicks in).
- ip netns exec $nsr1 nft -a insert rule inet filter forward ip dscp set cs3
+ ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
check_dscp "dscp_fwd"
}
@@ -455,8 +436,8 @@ test_tcp_forwarding_nat()
pmtu=$3
what=$4
- if [ $lret -eq 0 ] ; then
- if [ $pmtu -eq 1 ] ;then
+ if [ "$lret" -eq 0 ] ; then
+ if [ "$pmtu" -eq 1 ] ;then
check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
else
echo "PASS: flow offload for ns1/ns2 with masquerade $what"
@@ -464,9 +445,9 @@ test_tcp_forwarding_nat()
test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
lret=$?
- if [ $pmtu -eq 1 ] ;then
+ if [ "$pmtu" -eq 1 ] ;then
check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
- elif [ $lret -eq 0 ] ; then
+ elif [ "$lret" -eq 0 ] ; then
echo "PASS: flow offload for ns1/ns2 with dnat $what"
fi
fi
@@ -481,25 +462,25 @@ make_file "$nsin"
# Due to MTU mismatch in both directions, all packets (except small packets like pure
# acks) have to be handled by normal forwarding path. Therefore, packet counters
# are not checked.
-if test_tcp_forwarding $ns1 $ns2; then
+if test_tcp_forwarding "$ns1" "$ns2"; then
echo "PASS: flow offloaded for ns1/ns2"
else
echo "FAIL: flow offload for ns1/ns2:" 1>&2
- ip netns exec $nsr1 nft list ruleset
+ ip netns exec "$nsr1" nft list ruleset
ret=1
fi
# delete default route, i.e. ns2 won't be able to reach ns1 and
# will depend on ns1 being masqueraded in nsr1.
# expect ns1 has nsr1 address.
-ip -net $ns2 route del default via 10.0.2.1
-ip -net $ns2 route del default via dead:2::1
-ip -net $ns2 route add 192.168.10.1 via 10.0.2.1
+ip -net "$ns2" route del default via 10.0.2.1
+ip -net "$ns2" route del default via dead:2::1
+ip -net "$ns2" route add 192.168.10.1 via 10.0.2.1
# Second test:
# Same, but with NAT enabled. Same as in first test: we expect normal forward path
# to handle most packets.
-ip netns exec $nsr1 nft -f - <<EOF
+ip netns exec "$nsr1" nft -f - <<EOF
table ip nat {
chain prerouting {
type nat hook prerouting priority 0; policy accept;
@@ -513,14 +494,14 @@ table ip nat {
}
EOF
-if ! test_tcp_forwarding_set_dscp $ns1 $ns2 0 ""; then
+if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then
echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
exit 0
fi
-if ! test_tcp_forwarding_nat $ns1 $ns2 0 ""; then
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 0 ""; then
echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
- ip netns exec $nsr1 nft list ruleset
+ ip netns exec "$nsr1" nft list ruleset
ret=1
fi
@@ -528,35 +509,40 @@ fi
# Same as second test, but with PMTU discovery enabled. This
# means that we expect the fastpath to handle packets as soon
# as the endpoints adjust the packet size.
-ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+ip netns exec "$ns1" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
# reset counters.
# With pmtu in-place we'll also check that nft counters
# are lower than file size and packets were forwarded via flowtable layer.
# For earlier tests (large mtus), packets cannot be handled via flowtable
# (except pure acks and other small packets).
-ip netns exec $nsr1 nft reset counters table inet filter >/dev/null
+ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null
-if ! test_tcp_forwarding_nat $ns1 $ns2 1 ""; then
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then
echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
- ip netns exec $nsr1 nft list ruleset
+ ip netns exec "$nsr1" nft list ruleset
fi
# Another test:
# Add bridge interface br0 to Router1, with NAT enabled.
-ip -net $nsr1 link add name br0 type bridge
-ip -net $nsr1 addr flush dev veth0
-ip -net $nsr1 link set up dev veth0
-ip -net $nsr1 link set veth0 master br0
-ip -net $nsr1 addr add 10.0.1.1/24 dev br0
-ip -net $nsr1 addr add dead:1::1/64 dev br0
-ip -net $nsr1 link set up dev br0
+test_bridge() {
+if ! ip -net "$nsr1" link add name br0 type bridge 2>/dev/null;then
+ echo "SKIP: could not add bridge br0"
+ [ "$ret" -eq 0 ] && ret=$ksft_skip
+ return
+fi
+ip -net "$nsr1" addr flush dev veth0
+ip -net "$nsr1" link set up dev veth0
+ip -net "$nsr1" link set veth0 master br0
+ip -net "$nsr1" addr add 10.0.1.1/24 dev br0
+ip -net "$nsr1" addr add dead:1::1/64 dev br0 nodad
+ip -net "$nsr1" link set up dev br0
-ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
+ip netns exec "$nsr1" sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
# br0 with NAT enabled.
-ip netns exec $nsr1 nft -f - <<EOF
+ip netns exec "$nsr1" nft -f - <<EOF
flush table ip nat
table ip nat {
chain prerouting {
@@ -571,56 +557,59 @@ table ip nat {
}
EOF
-if ! test_tcp_forwarding_nat $ns1 $ns2 1 "on bridge"; then
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "on bridge"; then
echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
- ip netns exec $nsr1 nft list ruleset
+ ip netns exec "$nsr1" nft list ruleset
ret=1
fi
# Another test:
# Add bridge interface br0 to Router1, with NAT and VLAN.
-ip -net $nsr1 link set veth0 nomaster
-ip -net $nsr1 link set down dev veth0
-ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10
-ip -net $nsr1 link set up dev veth0
-ip -net $nsr1 link set up dev veth0.10
-ip -net $nsr1 link set veth0.10 master br0
-
-ip -net $ns1 addr flush dev eth0
-ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10
-ip -net $ns1 link set eth0 up
-ip -net $ns1 link set eth0.10 up
-ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10
-ip -net $ns1 route add default via 10.0.1.1
-ip -net $ns1 addr add dead:1::99/64 dev eth0.10
-
-if ! test_tcp_forwarding_nat $ns1 $ns2 1 "bridge and VLAN"; then
+ip -net "$nsr1" link set veth0 nomaster
+ip -net "$nsr1" link set down dev veth0
+ip -net "$nsr1" link add link veth0 name veth0.10 type vlan id 10
+ip -net "$nsr1" link set up dev veth0
+ip -net "$nsr1" link set up dev veth0.10
+ip -net "$nsr1" link set veth0.10 master br0
+
+ip -net "$ns1" addr flush dev eth0
+ip -net "$ns1" link add link eth0 name eth0.10 type vlan id 10
+ip -net "$ns1" link set eth0 up
+ip -net "$ns1" link set eth0.10 up
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0.10
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" addr add dead:1::99/64 dev eth0.10 nodad
+
+if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "bridge and VLAN"; then
echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
- ip netns exec $nsr1 nft list ruleset
+ ip netns exec "$nsr1" nft list ruleset
ret=1
fi
# restore test topology (remove bridge and VLAN)
-ip -net $nsr1 link set veth0 nomaster
-ip -net $nsr1 link set veth0 down
-ip -net $nsr1 link set veth0.10 down
-ip -net $nsr1 link delete veth0.10 type vlan
-ip -net $nsr1 link delete br0 type bridge
-ip -net $ns1 addr flush dev eth0.10
-ip -net $ns1 link set eth0.10 down
-ip -net $ns1 link set eth0 down
-ip -net $ns1 link delete eth0.10 type vlan
+ip -net "$nsr1" link set veth0 nomaster
+ip -net "$nsr1" link set veth0 down
+ip -net "$nsr1" link set veth0.10 down
+ip -net "$nsr1" link delete veth0.10 type vlan
+ip -net "$nsr1" link delete br0 type bridge
+ip -net "$ns1" addr flush dev eth0.10
+ip -net "$ns1" link set eth0.10 down
+ip -net "$ns1" link set eth0 down
+ip -net "$ns1" link delete eth0.10 type vlan
# restore address in ns1 and nsr1
-ip -net $ns1 link set eth0 up
-ip -net $ns1 addr add 10.0.1.99/24 dev eth0
-ip -net $ns1 route add default via 10.0.1.1
-ip -net $ns1 addr add dead:1::99/64 dev eth0
-ip -net $ns1 route add default via dead:1::1
-ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
-ip -net $nsr1 addr add dead:1::1/64 dev veth0
-ip -net $nsr1 link set up dev veth0
+ip -net "$ns1" link set eth0 up
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via dead:1::1
+ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad
+ip -net "$nsr1" link set up dev veth0
+}
+
+test_bridge
KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1)
@@ -640,33 +629,43 @@ do_esp() {
local spi_out=$6
local spi_in=$7
- ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet
- ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet
+ ip -net "$ns" xfrm state add src "$remote" dst "$me" proto esp spi "$spi_in" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$rnet" dst "$lnet"
+ ip -net "$ns" xfrm state add src "$me" dst "$remote" proto esp spi "$spi_out" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$lnet" dst "$rnet"
# to encrypt packets as they go out (includes forwarded packets that need encapsulation)
- ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 1 action allow
+ ip -net "$ns" xfrm policy add src "$lnet" dst "$rnet" dir out tmpl src "$me" dst "$remote" proto esp mode tunnel priority 1 action allow
# to fwd decrypted packets after esp processing:
- ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 1 action allow
-
+ ip -net "$ns" xfrm policy add src "$rnet" dst "$lnet" dir fwd tmpl src "$remote" dst "$me" proto esp mode tunnel priority 1 action allow
}
-do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
+do_esp "$nsr1" 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 "$SPI1" "$SPI2"
-do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
+do_esp "$nsr2" 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 "$SPI2" "$SPI1"
-ip netns exec $nsr1 nft delete table ip nat
+ip netns exec "$nsr1" nft delete table ip nat
# restore default routes
-ip -net $ns2 route del 192.168.10.1 via 10.0.2.1
-ip -net $ns2 route add default via 10.0.2.1
-ip -net $ns2 route add default via dead:2::1
+ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
-if test_tcp_forwarding $ns1 $ns2; then
+if test_tcp_forwarding "$ns1" "$ns2"; then
check_counters "ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
- ip netns exec $nsr1 nft list ruleset 1>&2
- ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2
+ ip netns exec "$nsr1" nft list ruleset 1>&2
+ ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2
+fi
+
+if [ "$1" = "" ]; then
+ low=1280
+ mtu=$((65536 - low))
+ o=$(((RANDOM%mtu) + low))
+ l=$(((RANDOM%mtu) + low))
+ r=$(((RANDOM%mtu) + low))
+
+ echo "re-run with random mtus: -o $o -l $l -r $r"
+ $0 -o "$o" -l "$l" -r "$r"
fi
exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/net/netfilter/nft_meta.sh
index f33154c04d34..71505b6cb252 100755
--- a/tools/testing/selftests/netfilter/nft_meta.sh
+++ b/tools/testing/selftests/net/netfilter/nft_meta.sh
@@ -91,10 +91,10 @@ check_one_counter()
local want="packets $2"
local verbose="$3"
- if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "$cname" | grep -q "$want"; then
echo "FAIL: $cname, want \"$want\", got"
ret=1
- ip netns exec "$ns0" nft list counter inet filter $cname
+ ip netns exec "$ns0" nft list counter inet filter "$cname"
fi
}
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh
index dd40d9f6f259..9e39de26455f 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat.sh
@@ -3,77 +3,60 @@
# This test is for basic NAT functionality: snat, dnat, redirect, masquerade.
#
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
+
ret=0
test_inet_nat=true
-sfx=$(mktemp -u "XXXXXXXX")
-ns0="ns0-$sfx"
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
+checktool "nft --version" "run test without nft tool"
+checktool "socat -h" "run test without socat"
cleanup()
{
- for i in 0 1 2; do ip netns del ns$i-"$sfx";done
-}
+ ip netns pids "$ns0" | xargs kill 2>/dev/null
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+ rm -f "$INFILE" "$OUTFILE"
-ip netns add "$ns0"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace $ns0"
- exit $ksft_skip
-fi
+ cleanup_all_ns
+}
trap cleanup EXIT
-ip netns add "$ns1"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace $ns1"
- exit $ksft_skip
-fi
+INFILE=$(mktemp)
+OUTFILE=$(mktemp)
-ip netns add "$ns2"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace $ns2"
- exit $ksft_skip
-fi
+setup_ns ns0 ns1 ns2
-ip link add veth0 netns "$ns0" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1
-if [ $? -ne 0 ];then
+if ! ip link add veth0 netns "$ns0" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1;then
echo "SKIP: No virtual ethernet pair device support in kernel"
exit $ksft_skip
fi
ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns2"
-ip -net "$ns0" link set lo up
ip -net "$ns0" link set veth0 up
ip -net "$ns0" addr add 10.0.1.1/24 dev veth0
-ip -net "$ns0" addr add dead:1::1/64 dev veth0
+ip -net "$ns0" addr add dead:1::1/64 dev veth0 nodad
ip -net "$ns0" link set veth1 up
ip -net "$ns0" addr add 10.0.2.1/24 dev veth1
-ip -net "$ns0" addr add dead:2::1/64 dev veth1
-
-for i in 1 2; do
- ip -net ns$i-$sfx link set lo up
- ip -net ns$i-$sfx link set eth0 up
- ip -net ns$i-$sfx addr add 10.0.$i.99/24 dev eth0
- ip -net ns$i-$sfx route add default via 10.0.$i.1
- ip -net ns$i-$sfx addr add dead:$i::99/64 dev eth0
- ip -net ns$i-$sfx route add default via dead:$i::1
-done
+ip -net "$ns0" addr add dead:2::1/64 dev veth1 nodad
+
+do_config()
+{
+ ns="$1"
+ subnet="$2"
+
+ ip -net "$ns" link set eth0 up
+ ip -net "$ns" addr add "10.0.$subnet.99/24" dev eth0
+ ip -net "$ns" route add default via "10.0.$subnet.1"
+ ip -net "$ns" addr add "dead:$subnet::99/64" dev eth0 nodad
+ ip -net "$ns" route add default via "dead:$subnet::1"
+}
+
+do_config "$ns1" 1
+do_config "$ns2" 2
bad_counter()
{
@@ -83,7 +66,7 @@ bad_counter()
local tag=$4
echo "ERROR: $counter counter in $ns has unexpected value (expected $expect) at $tag" 1>&2
- ip netns exec $ns nft list counter inet filter $counter 1>&2
+ ip netns exec "$ns" nft list counter inet filter "$counter" 1>&2
}
check_counters()
@@ -91,26 +74,23 @@ check_counters()
ns=$1
local lret=0
- cnt=$(ip netns exec $ns nft list counter inet filter ns0in | grep -q "packets 1 bytes 84")
- if [ $? -ne 0 ]; then
- bad_counter $ns ns0in "packets 1 bytes 84" "check_counters 1"
+ if ! ip netns exec "$ns" nft list counter inet filter ns0in | grep -q "packets 1 bytes 84";then
+ bad_counter "$ns" ns0in "packets 1 bytes 84" "check_counters 1"
lret=1
fi
- cnt=$(ip netns exec $ns nft list counter inet filter ns0out | grep -q "packets 1 bytes 84")
- if [ $? -ne 0 ]; then
- bad_counter $ns ns0out "packets 1 bytes 84" "check_counters 2"
+
+ if ! ip netns exec "$ns" nft list counter inet filter ns0out | grep -q "packets 1 bytes 84";then
+ bad_counter "$ns" ns0out "packets 1 bytes 84" "check_counters 2"
lret=1
fi
expect="packets 1 bytes 104"
- cnt=$(ip netns exec $ns nft list counter inet filter ns0in6 | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter $ns ns0in6 "$expect" "check_counters 3"
+ if ! ip netns exec "$ns" nft list counter inet filter ns0in6 | grep -q "$expect";then
+ bad_counter "$ns" ns0in6 "$expect" "check_counters 3"
lret=1
fi
- cnt=$(ip netns exec $ns nft list counter inet filter ns0out6 | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter $ns ns0out6 "$expect" "check_counters 4"
+ if ! ip netns exec "$ns" nft list counter inet filter ns0out6 | grep -q "$expect";then
+ bad_counter "$ns" ns0out6 "$expect" "check_counters 4"
lret=1
fi
@@ -122,41 +102,35 @@ check_ns0_counters()
local ns=$1
local lret=0
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0in | grep -q "packets 0 bytes 0")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter ns0in | grep -q "packets 0 bytes 0";then
bad_counter "$ns0" ns0in "packets 0 bytes 0" "check_ns0_counters 1"
lret=1
fi
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0";then
bad_counter "$ns0" ns0in6 "packets 0 bytes 0"
lret=1
fi
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0out | grep -q "packets 0 bytes 0")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter ns0out | grep -q "packets 0 bytes 0";then
bad_counter "$ns0" ns0out "packets 0 bytes 0" "check_ns0_counters 2"
lret=1
fi
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0";then
bad_counter "$ns0" ns0out6 "packets 0 bytes 0" "check_ns0_counters3 "
lret=1
fi
for dir in "in" "out" ; do
expect="packets 1 bytes 84"
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ${ns}${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns0" $ns$dir "$expect" "check_ns0_counters 4"
+ if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "$ns${dir}" "$expect" "check_ns0_counters 4"
lret=1
fi
expect="packets 1 bytes 104"
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ${ns}${dir}6 | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns0" $ns$dir6 "$expect" "check_ns0_counters 5"
+ if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}6" | grep -q "$expect";then
+ bad_counter "$ns0" "$ns${dir}6" "$expect" "check_ns0_counters 5"
lret=1
fi
done
@@ -166,8 +140,8 @@ check_ns0_counters()
reset_counters()
{
- for i in 0 1 2;do
- ip netns exec ns$i-$sfx nft reset counters inet > /dev/null
+ for i in "$ns0" "$ns1" "$ns2" ;do
+ ip netns exec "$i" nft reset counters inet > /dev/null
done
}
@@ -177,7 +151,7 @@ test_local_dnat6()
local lret=0
local IPF=""
- if [ $family = "inet" ];then
+ if [ "$family" = "inet" ];then
IPF="ip6"
fi
@@ -195,8 +169,7 @@ EOF
fi
# ping netns1, expect rewrite to netns2
- ip netns exec "$ns0" ping -q -c 1 dead:1::99 > /dev/null
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" ping -q -c 1 dead:1::99 > /dev/null;then
lret=1
echo "ERROR: ping6 failed"
return $lret
@@ -204,8 +177,7 @@ EOF
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat6 1"
lret=1
fi
@@ -213,8 +185,7 @@ EOF
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat6 2"
lret=1
fi
@@ -223,8 +194,7 @@ EOF
# expect 0 count in ns1
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat6 3"
lret=1
fi
@@ -233,8 +203,7 @@ EOF
# expect 1 packet in ns2
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat6 4"
lret=1
fi
@@ -252,7 +221,7 @@ test_local_dnat()
local lret=0
local IPF=""
- if [ $family = "inet" ];then
+ if [ "$family" = "inet" ];then
IPF="ip"
fi
@@ -265,7 +234,7 @@ table $family nat {
}
EOF
if [ $? -ne 0 ]; then
- if [ $family = "inet" ];then
+ if [ "$family" = "inet" ];then
echo "SKIP: inet nat tests"
test_inet_nat=false
return $ksft_skip
@@ -275,8 +244,7 @@ EOF
fi
# ping netns1, expect rewrite to netns2
- ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then
lret=1
echo "ERROR: ping failed"
return $lret
@@ -284,18 +252,16 @@ EOF
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat 1"
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "ns1$dir" "$expect" "test_local_dnat 1"
lret=1
fi
done
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 2"
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "ns2$dir" "$expect" "test_local_dnat 2"
lret=1
fi
done
@@ -303,9 +269,8 @@ EOF
# expect 0 count in ns1
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat 3"
+ if ! ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect";then
+ bad_counter "$ns1" "ns0$dir" "$expect" "test_local_dnat 3"
lret=1
fi
done
@@ -313,20 +278,18 @@ EOF
# expect 1 packet in ns2
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 4"
+ if ! ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect";then
+ bad_counter "$ns2" "ns0$dir" "$expect" "test_local_dnat 4"
lret=1
fi
done
test $lret -eq 0 && echo "PASS: ping to $ns1 was $family NATted to $ns2"
- ip netns exec "$ns0" nft flush chain $family nat output
+ ip netns exec "$ns0" nft flush chain "$family" nat output
reset_counters
- ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then
lret=1
echo "ERROR: ping failed"
return $lret
@@ -334,16 +297,14 @@ EOF
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns1$dir "$expect" "test_local_dnat 5"
lret=1
fi
done
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 6"
lret=1
fi
@@ -352,8 +313,7 @@ EOF
# expect 1 count in ns1
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
bad_counter "$ns0" ns0$dir "$expect" "test_local_dnat 7"
lret=1
fi
@@ -362,8 +322,7 @@ EOF
# expect 0 packet in ns2
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 8"
lret=1
fi
@@ -374,13 +333,19 @@ EOF
return $lret
}
+listener_ready()
+{
+ local ns="$1"
+ local port="$2"
+ local proto="$3"
+ ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port"
+}
+
test_local_dnat_portonly()
{
local family=$1
local daddr=$2
local lret=0
- local sr_s
- local sr_r
ip netns exec "$ns0" nft -f /dev/stdin <<EOF
table $family nat {
@@ -392,7 +357,7 @@ table $family nat {
}
EOF
if [ $? -ne 0 ]; then
- if [ $family = "inet" ];then
+ if [ "$family" = "inet" ];then
echo "SKIP: inet port test"
test_inet_nat=false
return
@@ -401,17 +366,16 @@ EOF
return
fi
- echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 &
- sc_s=$!
+ echo "SERVER-$family" | ip netns exec "$ns1" timeout 3 socat -u STDIN TCP-LISTEN:2000 &
- sleep 1
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 2000 "-t"
- result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT)
+ result=$(ip netns exec "$ns0" timeout 1 socat -u TCP:"$daddr":2000 STDOUT)
if [ "$result" = "SERVER-inet" ];then
echo "PASS: inet port rewrite without l3 address"
else
- echo "ERROR: inet port rewrite"
+ echo "ERROR: inet port rewrite without l3 address, got $result"
ret=1
fi
}
@@ -424,24 +388,20 @@ test_masquerade6()
ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
- ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 via ipv6"
return 1
- lret=1
fi
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns1" ns2$dir "$expect" "test_masquerade6 1"
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade6 1"
lret=1
fi
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 2"
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade6 2"
lret=1
fi
done
@@ -462,8 +422,7 @@ EOF
return $ksft_skip
fi
- ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags"
lret=1
fi
@@ -471,14 +430,12 @@ EOF
# ns1 should have seen packets from ns0, due to masquerade
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 3"
lret=1
fi
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 4"
lret=1
fi
@@ -487,27 +444,23 @@ EOF
# ns1 should not have seen packets from ns2, due to masquerade
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 5"
lret=1
fi
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns0" ns1$dir "$expect" "test_masquerade6 6"
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade6 6"
lret=1
fi
done
- ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 with active ipv6 masquerade $natflags (attempt 2)"
lret=1
fi
- ip netns exec "$ns0" nft flush chain $family nat postrouting
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting;then
echo "ERROR: Could not flush $family nat postrouting" 1>&2
lret=1
fi
@@ -526,23 +479,20 @@ test_masquerade()
ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
- echo "ERROR: cannot ping $ns1 from "$ns2" $natflags"
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
+ echo "ERROR: cannot ping $ns1 from $ns2 $natflags"
lret=1
fi
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns1" ns2$dir "$expect" "test_masquerade 1"
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade 1"
lret=1
fi
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns2" ns1$dir "$expect" "test_masquerade 2"
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 2"
lret=1
fi
done
@@ -563,8 +513,7 @@ EOF
return $ksft_skip
fi
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags"
lret=1
fi
@@ -572,15 +521,13 @@ EOF
# ns1 should have seen packets from ns0, due to masquerade
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns1" ns0$dir "$expect" "test_masquerade 3"
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 3"
lret=1
fi
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns2" ns1$dir "$expect" "test_masquerade 4"
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 4"
lret=1
fi
done
@@ -588,27 +535,23 @@ EOF
# ns1 should not have seen packets from ns2, due to masquerade
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns1" ns0$dir "$expect" "test_masquerade 5"
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 5"
lret=1
fi
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns0" ns1$dir "$expect" "test_masquerade 6"
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
+ bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade 6"
lret=1
fi
done
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 with active ip masquerade $natflags (attempt 2)"
lret=1
fi
- ip netns exec "$ns0" nft flush chain $family nat postrouting
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting; then
echo "ERROR: Could not flush $family nat postrouting" 1>&2
lret=1
fi
@@ -625,22 +568,19 @@ test_redirect6()
ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
- ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6"
lret=1
fi
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns2$dir "$expect" "test_redirect6 1"
lret=1
fi
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then
bad_counter "$ns2" ns1$dir "$expect" "test_redirect6 2"
lret=1
fi
@@ -662,8 +602,7 @@ EOF
return $ksft_skip
fi
- ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 via ipv6 with active $family redirect"
lret=1
fi
@@ -671,8 +610,7 @@ EOF
# ns1 should have seen no packets from ns2, due to redirection
expect="packets 0 bytes 0"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 3"
lret=1
fi
@@ -681,15 +619,13 @@ EOF
# ns0 should have seen packets from ns2, due to masquerade
expect="packets 1 bytes 104"
for dir in "in6" "out6" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 4"
lret=1
fi
done
- ip netns exec "$ns0" nft delete table $family nat
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft delete table "$family" nat;then
echo "ERROR: Could not delete $family nat table" 1>&2
lret=1
fi
@@ -707,22 +643,19 @@ test_redirect()
ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2"
lret=1
fi
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
- bad_counter "$ns1" $ns2$dir "$expect" "test_redirect 1"
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
+ bad_counter "$ns1" "$ns2$dir" "$expect" "test_redirect 1"
lret=1
fi
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then
bad_counter "$ns2" ns1$dir "$expect" "test_redirect 2"
lret=1
fi
@@ -744,8 +677,7 @@ EOF
return $ksft_skip
fi
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 with active $family ip redirect"
lret=1
fi
@@ -754,8 +686,7 @@ EOF
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_redirect 3"
lret=1
fi
@@ -764,15 +695,13 @@ EOF
# ns0 should have seen packets from ns2, due to masquerade
expect="packets 1 bytes 84"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then
bad_counter "$ns0" ns0$dir "$expect" "test_redirect 4"
lret=1
fi
done
- ip netns exec "$ns0" nft delete table $family nat
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft delete table "$family" nat;then
echo "ERROR: Could not delete $family nat table" 1>&2
lret=1
fi
@@ -803,13 +732,13 @@ test_port_shadow()
# make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
- echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 &
- sc_r=$!
+ echo ROUTER | ip netns exec "$ns0" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405 2>/dev/null &
+ local sc_r=$!
+ echo CLIENT | ip netns exec "$ns2" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405,reuseport 2>/dev/null &
+ local sc_c=$!
- echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport &
- sc_c=$!
-
- sleep 0.3
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1405 "-u"
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" 1405 "-u"
# ns1 tries to connect to ns0:1405. With default settings this should connect
# to client, it matches the conntrack entry created above.
@@ -846,7 +775,7 @@ table $family filter {
EOF
test_port_shadow "port-filter" "ROUTER"
- ip netns exec "$ns0" nft delete table $family filter
+ ip netns exec "$ns0" nft delete table "$family" filter
}
# This prevents port shadow of router service via notrack.
@@ -868,7 +797,7 @@ table $family raw {
EOF
test_port_shadow "port-notrack" "ROUTER"
- ip netns exec "$ns0" nft delete table $family raw
+ ip netns exec "$ns0" nft delete table "$family" raw
}
# This prevents port shadow of router service via sport remap.
@@ -886,21 +815,19 @@ table $family pat {
EOF
test_port_shadow "pat" "ROUTER"
- ip netns exec "$ns0" nft delete table $family pat
+ ip netns exec "$ns0" nft delete table "$family" pat
}
test_port_shadowing()
{
local family="ip"
- conntrack -h >/dev/null 2>&1
- if [ $? -ne 0 ];then
+ if ! conntrack -h >/dev/null 2>&1;then
echo "SKIP: Could not run nat port shadowing test without conntrack tool"
return
fi
- socat -h > /dev/null 2>&1
- if [ $? -ne 0 ];then
+ if ! socat -h > /dev/null 2>&1;then
echo "SKIP: Could not run nat port shadowing test without socat tool"
return
fi
@@ -946,8 +873,7 @@ test_stateless_nat_ip()
ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then
echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules"
return 1
fi
@@ -981,23 +907,20 @@ EOF
reset_counters
- ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null; then
echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules"
lret=1
fi
# ns1 should have seen packets from .2.2, due to stateless rewrite.
expect="packets 1 bytes 84"
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then
bad_counter "$ns1" ns0insl "$expect" "test_stateless 1"
lret=1
fi
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then
bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2"
lret=1
fi
@@ -1006,14 +929,12 @@ EOF
# ns1 should not have seen packets from ns2, due to masquerade
expect="packets 0 bytes 0"
for dir in "in" "out" ; do
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect";then
bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3"
lret=1
fi
- cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect";then
bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4"
lret=1
fi
@@ -1021,8 +942,7 @@ EOF
reset_counters
- socat -h > /dev/null 2>&1
- if [ $? -ne 0 ];then
+ if ! socat -h > /dev/null 2>&1;then
echo "SKIP: Could not run stateless nat frag test without socat tool"
if [ $lret -eq 0 ]; then
return $ksft_skip
@@ -1032,42 +952,36 @@ EOF
return $lret
fi
- local tmpfile=$(mktemp)
- dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null
+ dd if=/dev/urandom of="$INFILE" bs=4096 count=1 2>/dev/null
- local outfile=$(mktemp)
- ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null &
- sc_r=$!
+ ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:"$OUTFILE" < /dev/null 2>/dev/null &
+
+ busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 4233 "-u"
- sleep 1
# re-do with large ping -> ip fragmentation
- ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null
- if [ $? -ne 0 ] ; then
+ if ! ip netns exec "$ns2" timeout 3 socat -u STDIN UDP4-SENDTO:"10.0.1.99:4233" < "$INFILE" > /dev/null;then
echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2
lret=1
fi
wait
- cmp "$tmpfile" "$outfile"
- if [ $? -ne 0 ]; then
- ls -l "$tmpfile" "$outfile"
+ if ! cmp "$INFILE" "$OUTFILE";then
+ ls -l "$INFILE" "$OUTFILE"
echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
lret=1
fi
- rm -f "$tmpfile" "$outfile"
+ :> "$OUTFILE"
# ns1 should have seen packets from 2.2, due to stateless rewrite.
expect="packets 3 bytes 4164"
- cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then
bad_counter "$ns1" ns0insl "$expect" "test_stateless 5"
lret=1
fi
- ip netns exec "$ns0" nft delete table ip stateless
- if [ $? -ne 0 ]; then
+ if ! ip netns exec "$ns0" nft delete table ip stateless; then
echo "ERROR: Could not delete table ip stateless" 1>&2
lret=1
fi
@@ -1078,8 +992,8 @@ EOF
}
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
-for i in 0 1 2; do
-ip netns exec ns$i-$sfx nft -f /dev/stdin <<EOF
+for i in "$ns0" "$ns1" "$ns2" ;do
+ip netns exec "$i" nft -f /dev/stdin <<EOF
table inet filter {
counter ns0in {}
counter ns1in {}
@@ -1145,7 +1059,7 @@ done
# special case for stateless nat check, counter needs to
# be done before (input) ip defragmentation
-ip netns exec ns1-$sfx nft -f /dev/stdin <<EOF
+ip netns exec "$ns1" nft -f /dev/stdin <<EOF
table inet filter {
counter ns0insl {}
@@ -1156,31 +1070,49 @@ table inet filter {
}
EOF
-sleep 3
-# test basic connectivity
-for i in 1 2; do
- ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 > /dev/null
- if [ $? -ne 0 ];then
- echo "ERROR: Could not reach other namespace(s)" 1>&2
- ret=1
- fi
-
- ip netns exec "$ns0" ping -c 1 -q dead:$i::99 > /dev/null
- if [ $? -ne 0 ];then
- echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2
- ret=1
- fi
- check_counters ns$i-$sfx
- if [ $? -ne 0 ]; then
- ret=1
- fi
-
- check_ns0_counters ns$i
- if [ $? -ne 0 ]; then
- ret=1
- fi
- reset_counters
-done
+ping_basic()
+{
+ i="$1"
+ if ! ip netns exec "$ns0" ping -c 1 -q 10.0."$i".99 > /dev/null;then
+ echo "ERROR: Could not reach other namespace(s)" 1>&2
+ ret=1
+ fi
+
+ if ! ip netns exec "$ns0" ping -c 1 -q dead:"$i"::99 > /dev/null;then
+ echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2
+ ret=1
+ fi
+}
+
+test_basic_conn()
+{
+ local nsexec
+ name="$1"
+
+ nsexec=$(eval echo \$"$1")
+
+ ping_basic 1
+ ping_basic 2
+
+ if ! check_counters "$nsexec";then
+ return 1
+ fi
+
+ if ! check_ns0_counters "$name";then
+ return 1
+ fi
+
+ reset_counters
+ return 0
+}
+
+if ! test_basic_conn "ns1" ; then
+ echo "ERROR: basic test for ns1 failed" 1>&2
+ exit 1
+fi
+if ! test_basic_conn "ns2"; then
+ echo "ERROR: basic test for ns1 failed" 1>&2
+fi
if [ $ret -eq 0 ];then
echo "PASS: netns routing/connectivity: $ns0 can reach $ns1 and $ns2"
diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
index b9ab37380f33..3b81d88bdde3 100755
--- a/tools/testing/selftests/netfilter/nft_nat_zones.sh
+++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh
@@ -3,17 +3,17 @@
# Test connection tracking zone and NAT source port reallocation support.
#
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
# Don't increase too much, 2000 clients should work
# just fine but script can then take several minutes with
# KASAN/debug builds.
maxclients=100
-have_iperf=1
+have_socat=0
ret=0
+[ "$KSFT_MACHINE_SLOW" = yes ] && maxclients=40
# client1---.
# veth1-.
# |
@@ -31,12 +31,6 @@ ret=0
# NAT Gateway is supposed to do port reallocation for each of the
# connections.
-sfx=$(mktemp -u "XXXXXXXX")
-gw="ns-gw-$sfx"
-cl1="ns-cl1-$sfx"
-cl2="ns-cl2-$sfx"
-srv="ns-srv-$sfx"
-
v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
@@ -46,61 +40,29 @@ v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
cleanup()
{
- ip netns del $gw
- ip netns del $srv
- for i in $(seq 1 $maxclients); do
- ip netns del ns-cl$i-$sfx 2>/dev/null
- done
-
- sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
- sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
- sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
- sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
- sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
- sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
+ cleanup_all_ns
+
+ sysctl -q net.ipv4.neigh.default.gc_thresh1="$v4gc1" 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh2="$v4gc2" 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh3="$v4gc3" 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh1="$v6gc1" 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh2="$v6gc2" 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh3="$v6gc3" 2>/dev/null
}
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
+checktool "nft --version" echo "run test without nft tool"
+checktool "conntrack -V" "run test without conntrack tool"
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
+if socat -h >/dev/null 2>&1; then
+ have_socat=1
fi
-conntrack -V > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without conntrack tool"
- exit $ksft_skip
-fi
-
-iperf3 -v >/dev/null 2>&1
-if [ $? -ne 0 ];then
- have_iperf=0
-fi
-
-ip netns add "$gw"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace $gw"
- exit $ksft_skip
-fi
-ip -net "$gw" link set lo up
+setup_ns gw srv
trap cleanup EXIT
-ip netns add "$srv"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create server netns $srv"
- exit $ksft_skip
-fi
-
ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
ip -net "$gw" link set veth0 up
-ip -net "$srv" link set lo up
ip -net "$srv" link set eth0 up
sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null
@@ -110,55 +72,49 @@ sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null
sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
-for i in $(seq 1 $maxclients);do
- cl="ns-cl$i-$sfx"
+for i in $(seq 1 "$maxclients");do
+ setup_ns "cl$i"
- ip netns add "$cl"
- if [ $? -ne 0 ];then
- echo "SKIP: Could not create client netns $cl"
- exit $ksft_skip
- fi
- ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
- if [ $? -ne 0 ];then
+ cl=$(eval echo \$cl"$i")
+ if ! ip link add veth"$i" netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1;then
echo "SKIP: No virtual ethernet pair device support in kernel"
exit $ksft_skip
fi
done
-for i in $(seq 1 $maxclients);do
- cl="ns-cl$i-$sfx"
- echo netns exec "$cl" ip link set lo up
+for i in $(seq 1 "$maxclients");do
+ cl=$(eval echo \$cl"$i")
echo netns exec "$cl" ip link set eth0 up
echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
- echo netns exec "$gw" ip link set veth$i up
- echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
- echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
+ echo netns exec "$gw" ip link set "veth$i" up
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0
# clients have same IP addresses.
echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
- echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
+ echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0 nodad
echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
# NB: same addresses on client-facing interfaces.
- echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
- echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
+ echo netns exec "$gw" ip addr add 10.1.0.2/24 dev "veth$i"
+ echo netns exec "$gw" ip addr add dead:1::2/64 dev "veth$i" nodad
# gw: policy routing
- echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
- echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add 10.1.0.0/24 dev "veth$i" table $((1000+i))
+ echo netns exec "$gw" ip route add dead:1::0/64 dev "veth$i" table $((1000+i))
echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
- echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
+ echo netns exec "$gw" ip rule add fwmark "$i" lookup $((1000+i))
done | ip -batch /dev/stdin
ip -net "$gw" addr add 10.3.0.1/24 dev veth0
-ip -net "$gw" addr add dead:3::1/64 dev veth0
+ip -net "$gw" addr add dead:3::1/64 dev veth0 nodad
ip -net "$srv" addr add 10.3.0.99/24 dev eth0
-ip -net "$srv" addr add dead:3::99/64 dev eth0
+ip -net "$srv" addr add dead:3::99/64 dev eth0 nodad
-ip netns exec $gw nft -f /dev/stdin<<EOF
+ip netns exec "$gw" nft -f /dev/stdin<<EOF
table inet raw {
map iiftomark {
type ifname : mark
@@ -203,18 +159,22 @@ table inet raw {
}
}
EOF
+if [ "$?" -ne 0 ];then
+ echo "SKIP: Could not add nftables rules"
+ exit $ksft_skip
+fi
( echo add element inet raw iiftomark \{
for i in $(seq 1 $((maxclients-1))); do
- echo \"veth$i\" : $i,
+ echo \"veth"$i"\" : "$i",
done
- echo \"veth$maxclients\" : $maxclients \}
+ echo \"veth"$maxclients"\" : "$maxclients" \}
echo add element inet raw iiftozone \{
for i in $(seq 1 $((maxclients-1))); do
- echo \"veth$i\" : $i,
+ echo \"veth"$i"\" : "$i",
done
echo \"veth$maxclients\" : $maxclients \}
-) | ip netns exec $gw nft -f /dev/stdin
+) | ip netns exec "$gw" nft -f /dev/stdin
ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -224,73 +184,72 @@ ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
-for i in $(seq 1 $maxclients); do
- cl="ns-cl$i-$sfx"
- ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
- if [ $? -ne 0 ]; then
- echo FAIL: Ping failure from $cl 1>&2
- ret=1
- break
- fi
+for i in $(seq 1 "$maxclients"); do
+ cl=$(eval echo \$cl"$i")
+ ip netns exec "$cl" ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
done
-wait
+wait || ret=1
-for i in $(seq 1 $maxclients); do
- ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
- if [ $? -ne 0 ];then
+[ "$ret" -ne 0 ] && "FAIL: Ping failure from $cl" 1>&2
+
+for i in $(seq 1 "$maxclients"); do
+ if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"; then
ret=1
echo "FAIL: counter icmp mismatch for veth$i" 1>&2
- ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
+ ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
break
fi
done
-ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
-if [ $? -ne 0 ];then
+if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }"; then
ret=1
- echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
- ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
+ echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }"
+ ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
fi
-if [ $ret -eq 0 ]; then
+if [ $ret -eq 0 ]; then
echo "PASS: ping test from all $maxclients namespaces"
fi
-if [ $have_iperf -eq 0 ];then
- echo "SKIP: iperf3 not installed"
+if [ $have_socat -eq 0 ];then
+ echo "SKIP: socat not installed"
if [ $ret -ne 0 ];then
exit $ret
fi
exit $ksft_skip
fi
-ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
-iperfpid=$!
-sleep 1
+listener_ready()
+{
+ ss -N "$1" -lnt -o "sport = :5201" | grep -q 5201
+}
+
+ip netns exec "$srv" socat -u TCP-LISTEN:5201,fork STDOUT > /dev/null 2>/dev/null &
+socatpid=$!
+
+busywait 1000 listener_ready "$srv"
-for i in $(seq 1 $maxclients); do
+for i in $(seq 1 "$maxclients"); do
if [ $ret -ne 0 ]; then
break
fi
- cl="ns-cl$i-$sfx"
- ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
- if [ $? -ne 0 ]; then
- echo FAIL: Failure to connect for $cl 1>&2
- ip netns exec $gw conntrack -S 1>&2
+ cl=$(eval echo \$cl"$i")
+ if ! ip netns exec "$cl" socat -4 -u STDIN TCP:10.3.0.99:5201,sourceport=10000 < /dev/null > /dev/null; then
+ echo "FAIL: Failure to connect for $cl" 1>&2
+ ip netns exec "$gw" conntrack -S 1>&2
ret=1
fi
done
if [ $ret -eq 0 ];then
- echo "PASS: iperf3 connections for all $maxclients net namespaces"
+ echo "PASS: socat connections for all $maxclients net namespaces"
fi
-kill $iperfpid
+kill $socatpid
wait
-for i in $(seq 1 $maxclients); do
- ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
- if [ $? -ne 0 ];then
+for i in $(seq 1 "$maxclients"); do
+ if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null;then
ret=1
echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
break
@@ -300,8 +259,7 @@ if [ $ret -eq 0 ];then
echo "PASS: Found client connection for all $maxclients net namespaces"
fi
-ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
-if [ $? -ne 0 ];then
+if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null;then
ret=1
echo "FAIL: cannot find return entry on veth0" 1>&2
fi
diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh
new file mode 100755
index 000000000000..c61d23a8c88d
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_queue.sh
@@ -0,0 +1,454 @@
+#!/bin/bash
+#
+# This tests nf_queue:
+# 1. can process packets from all hooks
+# 2. support running nfqueue from more than one base chain
+#
+# shellcheck disable=SC2162,SC2317
+
+source lib.sh
+ret=0
+timeout=2
+
+cleanup()
+{
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+ ip netns pids "$nsrouter" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
+
+ rm -f "$TMPINPUT"
+ rm -f "$TMPFILE0"
+ rm -f "$TMPFILE1"
+ rm -f "$TMPFILE2" "$TMPFILE3"
+}
+
+checktool "nft --version" "test without nft tool"
+
+trap cleanup EXIT
+
+setup_ns ns1 ns2 nsrouter
+
+TMPFILE0=$(mktemp)
+TMPFILE1=$(mktemp)
+TMPFILE2=$(mktemp)
+TMPFILE3=$(mktemp)
+
+TMPINPUT=$(mktemp)
+dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
+
+if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2"
+
+ip -net "$nsrouter" link set veth0 up
+ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad
+
+ip -net "$nsrouter" link set veth1 up
+ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1
+ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad
+
+ip -net "$ns1" link set eth0 up
+ip -net "$ns2" link set eth0 up
+
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns1" route add default via dead:1::1
+
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad
+ip -net "$ns2" route add default via 10.0.2.1
+ip -net "$ns2" route add default via dead:2::1
+
+load_ruleset() {
+ local name=$1
+ local prio=$2
+
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+table inet $name {
+ chain nfq {
+ ip protocol icmp queue bypass
+ icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
+ }
+ chain pre {
+ type filter hook prerouting priority $prio; policy accept;
+ jump nfq
+ }
+ chain input {
+ type filter hook input priority $prio; policy accept;
+ jump nfq
+ }
+ chain forward {
+ type filter hook forward priority $prio; policy accept;
+ tcp dport 12345 queue num 2
+ jump nfq
+ }
+ chain output {
+ type filter hook output priority $prio; policy accept;
+ tcp dport 12345 queue num 3
+ tcp sport 23456 queue num 3
+ jump nfq
+ }
+ chain post {
+ type filter hook postrouting priority $prio; policy accept;
+ jump nfq
+ }
+}
+EOF
+}
+
+load_counter_ruleset() {
+ local prio=$1
+
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+table inet countrules {
+ chain pre {
+ type filter hook prerouting priority $prio; policy accept;
+ counter
+ }
+ chain input {
+ type filter hook input priority $prio; policy accept;
+ counter
+ }
+ chain forward {
+ type filter hook forward priority $prio; policy accept;
+ counter
+ }
+ chain output {
+ type filter hook output priority $prio; policy accept;
+ counter
+ }
+ chain post {
+ type filter hook postrouting priority $prio; policy accept;
+ counter
+ }
+}
+EOF
+}
+
+test_ping() {
+ if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
+ return 1
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then
+ return 2
+ fi
+
+ return 0
+}
+
+test_ping_router() {
+ if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then
+ return 3
+ fi
+
+ if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then
+ return 4
+ fi
+
+ return 0
+}
+
+test_queue_blackhole() {
+ local proto=$1
+
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+table $proto blackh {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ queue num 600
+ }
+}
+EOF
+ if [ "$proto" = "ip" ] ;then
+ ip netns exec "$ns1" ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
+ lret=$?
+ elif [ "$proto" = "ip6" ]; then
+ ip netns exec "$ns1" ping -W 2 -c 1 -q dead:2::99 > /dev/null
+ lret=$?
+ else
+ lret=111
+ fi
+
+ # queue without bypass keyword should drop traffic if no listener exists.
+ if [ "$lret" -eq 0 ];then
+ echo "FAIL: $proto expected failure, got $lret" 1>&2
+ exit 1
+ fi
+
+ if ! ip netns exec "$nsrouter" nft delete table "$proto" blackh; then
+ echo "FAIL: $proto: Could not delete blackh table"
+ exit 1
+ fi
+
+ echo "PASS: $proto: statement with no listener results in packet drop"
+}
+
+nf_queue_wait()
+{
+ local procfile="/proc/self/net/netfilter/nfnetlink_queue"
+ local netns id
+
+ netns="$1"
+ id="$2"
+
+ # if this file doesn't exist, nfnetlink_module isn't loaded.
+ # rather than loading it ourselves, wait for kernel module autoload
+ # completion, nfnetlink should do so automatically because nf_queue
+ # helper program, spawned in the background, asked for this functionality.
+ test -f "$procfile" &&
+ ip netns exec "$netns" cat "$procfile" | grep -q "^ *$id "
+}
+
+test_queue()
+{
+ local expected="$1"
+ local last=""
+
+ # spawn nf_queue listeners
+ ip netns exec "$nsrouter" ./nf_queue -c -q 0 -t $timeout > "$TMPFILE0" &
+ ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t $timeout > "$TMPFILE1" &
+
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 0
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1
+
+ if ! test_ping;then
+ echo "FAIL: netns routing/connectivity with active listener on queues 0 and 1: $ret" 1>&2
+ exit $ret
+ fi
+
+ if ! test_ping_router;then
+ echo "FAIL: netns router unreachable listener on queue 0 and 1: $ret" 1>&2
+ exit $ret
+ fi
+
+ wait
+ ret=$?
+
+ for file in $TMPFILE0 $TMPFILE1; do
+ last=$(tail -n1 "$file")
+ if [ x"$last" != x"$expected packets total" ]; then
+ echo "FAIL: Expected $expected packets total, but got $last" 1>&2
+ ip netns exec "$nsrouter" nft list ruleset
+ exit 1
+ fi
+ done
+
+ echo "PASS: Expected and received $last"
+}
+
+listener_ready()
+{
+ ss -N "$1" -lnt -o "sport = :12345" | grep -q 12345
+}
+
+test_tcp_forward()
+{
+ ip netns exec "$nsrouter" ./nf_queue -q 2 -t "$timeout" &
+ local nfqpid=$!
+
+ timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
+ local rpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2"
+
+ ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null
+
+ wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain"
+}
+
+test_tcp_localhost()
+{
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT"
+ timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
+ local rpid=$!
+
+ ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+ local nfqpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
+
+ ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null
+
+ wait "$rpid" && echo "PASS: tcp via loopback"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_connectclose()
+{
+ ip netns exec "$nsrouter" ./connect_close -p 23456 -t "$timeout" &
+ ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" &
+
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3
+
+ wait && echo "PASS: tcp via loopback with connect/close"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_requeue()
+{
+ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+}
+EOF
+ timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null &
+ local rpid=$!
+
+ ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t "$timeout" > "$TMPFILE2" &
+
+ # nfqueue 1 will be called via output hook. But this time,
+ # re-queue the packet to nfqueue program on queue 2.
+ ip netns exec "$nsrouter" ./nf_queue -G -d 150 -c -q 0 -Q 1 -t "$timeout" > "$TMPFILE3" &
+
+ busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter"
+ ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" > /dev/null
+
+ wait
+
+ if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
+ echo "FAIL: lost packets during requeue?!" 1>&2
+ return
+ fi
+
+ echo "PASS: tcp via loopback and re-queueing"
+}
+
+test_icmp_vrf() {
+ if ! ip -net "$ns1" link add tvrf type vrf table 9876;then
+ echo "SKIP: Could not add vrf device"
+ return
+ fi
+
+ ip -net "$ns1" li set eth0 master tvrf
+ ip -net "$ns1" li set tvrf up
+
+ ip -net "$ns1" route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
+ip netns exec "$ns1" nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+}
+EOF
+ ip netns exec "$ns1" ./nf_queue -q 1 -t "$timeout" &
+ local nfqpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1
+
+ ip netns exec "$ns1" ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
+
+ for n in output post; do
+ for d in tvrf eth0; do
+ if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then
+ echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
+ ip netns exec "$ns1" nft list ruleset
+ ret=1
+ return
+ fi
+ done
+ done
+
+ wait "$nfqpid" && echo "PASS: icmp+nfqueue via vrf"
+ wait 2>/dev/null
+}
+
+test_queue_removal()
+{
+ read tainted_then < /proc/sys/kernel/tainted
+
+ ip netns exec "$ns1" nft -f - <<EOF
+flush ruleset
+table ip filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ ip protocol icmp queue num 0
+ }
+}
+EOF
+ ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" &
+ local nfqpid=$!
+
+ busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0
+
+ ip netns exec "$ns1" ping -w 2 -f -c 10 127.0.0.1 -q >/dev/null
+ kill $nfqpid
+
+ ip netns exec "$ns1" nft flush ruleset
+
+ if [ "$tainted_then" -ne 0 ];then
+ return
+ fi
+
+ read tainted_now < /proc/sys/kernel/tainted
+ if [ "$tainted_now" -eq 0 ];then
+ echo "PASS: queue program exiting while packets queued"
+ else
+ echo "TAINT: queue program exiting while packets queued"
+ ret=1
+ fi
+}
+
+ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+load_ruleset "filter" 0
+
+if test_ping; then
+ # queue bypass works (rules were skipped, no listener)
+ echo "PASS: ${ns1} can reach ${ns2}"
+else
+ echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
+ exit $ret
+fi
+
+test_queue_blackhole ip
+test_queue_blackhole ip6
+
+# dummy ruleset to add base chains between the
+# queueing rules. We don't want the second reinject
+# to re-execute the old hooks.
+load_counter_ruleset 10
+
+# we are hooking all: prerouting/input/forward/output/postrouting.
+# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so:
+# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
+# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
+# so we expect that userspace program receives 10 packets.
+test_queue 10
+
+# same. We queue to a second program as well.
+load_ruleset "filter2" 20
+test_queue 20
+
+test_tcp_forward
+test_tcp_localhost
+test_tcp_localhost_connectclose
+test_tcp_localhost_requeue
+test_icmp_vrf
+test_queue_removal
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_synproxy.sh b/tools/testing/selftests/net/netfilter/nft_synproxy.sh
new file mode 100755
index 000000000000..293f667a6aec
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/nft_synproxy.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+ret=0
+
+checktool "nft --version" "run test without nft tool"
+checktool "iperf3 --version" "run test without iperf3"
+
+setup_ns nsr ns1 ns2
+
+modprobe -q nf_conntrack
+
+cleanup() {
+ ip netns pids "$ns1" | xargs kill 2>/dev/null
+ ip netns pids "$ns2" | xargs kill 2>/dev/null
+
+ cleanup_all_ns
+}
+
+trap cleanup EXIT
+
+ip link add veth0 netns "$nsr" type veth peer name eth0 netns "$ns1"
+ip link add veth1 netns "$nsr" type veth peer name eth0 netns "$ns2"
+
+for dev in veth0 veth1; do
+ ip -net "$nsr" link set "$dev" up
+done
+
+ip -net "$nsr" addr add 10.0.1.1/24 dev veth0
+ip -net "$nsr" addr add 10.0.2.1/24 dev veth1
+
+ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth0.forwarding=1
+ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth1.forwarding=1
+ip netns exec "$nsr" sysctl -q net.netfilter.nf_conntrack_tcp_loose=0
+
+for n in $ns1 $ns2; do
+ ip -net "$n" link set eth0 up
+done
+ip -net "$ns1" addr add 10.0.1.99/24 dev eth0
+ip -net "$ns2" addr add 10.0.2.99/24 dev eth0
+ip -net "$ns1" route add default via 10.0.1.1
+ip -net "$ns2" route add default via 10.0.2.1
+
+# test basic connectivity
+if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then
+ echo "ERROR: $ns1 cannot reach $ns2" 1>&2
+ exit 1
+fi
+
+if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then
+ echo "ERROR: $ns2 cannot reach $ns1" 1>&2
+ exit 1
+fi
+
+ip netns exec "$ns2" iperf3 -s > /dev/null 2>&1 &
+# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 &
+
+sleep 1
+
+ip netns exec "$nsr" nft -f - <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+ meta iif veth0 tcp flags syn counter notrack
+ }
+
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+
+ ct state new,established counter accept
+
+ meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp
+
+ ct state invalid counter drop
+
+ # make ns2 unreachable w.o. tcp synproxy
+ tcp flags syn counter drop
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "SKIP: Cannot add nft synproxy"
+ exit $ksft_skip
+fi
+
+if ! ip netns exec "$ns1" timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null; then
+ echo "FAIL: iperf3 returned an error" 1>&2
+ ret=1
+ ip netns exec "$nsr" nft list ruleset
+else
+ echo "PASS: synproxy connection successful"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/net/netfilter/nft_zones_many.sh
index 5a8db0b48928..7db9982ba5a6 100755
--- a/tools/testing/selftests/netfilter/nft_zones_many.sh
+++ b/tools/testing/selftests/net/netfilter/nft_zones_many.sh
@@ -3,47 +3,34 @@
# Test insertion speed for packets with identical addresses/ports
# that are all placed in distinct conntrack zones.
-sfx=$(mktemp -u "XXXXXXXX")
-ns="ns-$sfx"
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
zones=2000
+[ "$KSFT_MACHINE_SLOW" = yes ] && zones=500
+
have_ct_tool=0
ret=0
cleanup()
{
- ip netns del $ns
-}
-
-checktool (){
- if ! $1 > /dev/null 2>&1; then
- echo "SKIP: Could not $2"
- exit $ksft_skip
- fi
+ cleanup_all_ns
}
checktool "nft --version" "run test without nft tool"
-checktool "ip -Version" "run test without ip tool"
checktool "socat -V" "run test without socat tool"
-checktool "ip netns add $ns" "create net namespace"
+
+setup_ns ns1
trap cleanup EXIT
-conntrack -V > /dev/null 2>&1
-if [ $? -eq 0 ];then
+if conntrack -V > /dev/null 2>&1; then
have_ct_tool=1
fi
-ip -net "$ns" link set lo up
-
test_zones() {
local max_zones=$1
-ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
-ip netns exec $ns nft -f /dev/stdin<<EOF
+ip netns exec "$ns1" nft -f /dev/stdin<<EOF
flush ruleset
table inet raw {
map rndzone {
@@ -56,29 +43,39 @@ table inet raw {
}
}
EOF
+if [ "$?" -ne 0 ];then
+ echo "SKIP: Cannot add nftables rules"
+ exit $ksft_skip
+fi
+
+ ip netns exec "$ns1" sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
+
(
echo "add element inet raw rndzone {"
- for i in $(seq 1 $max_zones);do
+ for i in $(seq 1 "$max_zones");do
echo -n "$i : $i"
- if [ $i -lt $max_zones ]; then
+ if [ "$i" -lt "$max_zones" ]; then
echo ","
else
echo "}"
fi
done
- ) | ip netns exec $ns nft -f /dev/stdin
+ ) | ip netns exec "$ns1" nft -f /dev/stdin
local i=0
local j=0
- local outerstart=$(date +%s%3N)
- local stop=$outerstart
-
- while [ $i -lt $max_zones ]; do
- local start=$(date +%s%3N)
+ local outerstart
+ local stop
+ outerstart=$(date +%s%3N)
+ stop=$outerstart
+
+ while [ "$i" -lt "$max_zones" ]; do
+ local start
+ start=$(date +%s%3N)
i=$((i + 1000))
j=$((j + 1))
# nft rule in output places each packet in a different zone.
- dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345
+ dd if=/dev/zero bs=8k count=1000 2>/dev/null | ip netns exec "$ns1" socat -u STDIN UDP:127.0.0.1:12345,sourceport=12345
if [ $? -ne 0 ] ;then
ret=1
break
@@ -89,14 +86,15 @@ EOF
echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)"
done
- if [ $have_ct_tool -eq 1 ]; then
- local count=$(ip netns exec "$ns" conntrack -C)
- local duration=$((stop-outerstart))
+ if [ "$have_ct_tool" -eq 1 ]; then
+ local count duration
+ count=$(ip netns exec "$ns1" conntrack -C)
+ duration=$((stop-outerstart))
- if [ $count -eq $max_zones ]; then
+ if [ "$count" -ge "$max_zones" ]; then
echo "PASS: inserted $count entries from packet path in $duration ms total"
else
- ip netns exec $ns conntrack -S 1>&2
+ ip netns exec "$ns1" conntrack -S 1>&2
echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
ret=1
fi
@@ -110,18 +108,19 @@ EOF
test_conntrack_tool() {
local max_zones=$1
- ip netns exec $ns conntrack -F >/dev/null 2>/dev/null
+ ip netns exec "$ns1" conntrack -F >/dev/null 2>/dev/null
- local outerstart=$(date +%s%3N)
- local start=$(date +%s%3N)
- local stop=$start
- local i=0
- while [ $i -lt $max_zones ]; do
+ local outerstart start stop i
+ outerstart=$(date +%s%3N)
+ start=$(date +%s%3N)
+ stop="$start"
+ i=0
+ while [ "$i" -lt "$max_zones" ]; do
i=$((i + 1))
- ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
--timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
if [ $? -ne 0 ];then
- ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
--timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
echo "FAIL: conntrack -I returned an error"
ret=1
@@ -137,13 +136,15 @@ test_conntrack_tool() {
fi
done
- local count=$(ip netns exec "$ns" conntrack -C)
- local duration=$((stop-outerstart))
+ local count
+ local duration
+ count=$(ip netns exec "$ns1" conntrack -C)
+ duration=$((stop-outerstart))
- if [ $count -eq $max_zones ]; then
+ if [ "$count" -eq "$max_zones" ]; then
echo "PASS: inserted $count entries via ctnetlink in $duration ms"
else
- ip netns exec $ns conntrack -S 1>&2
+ ip netns exec "$ns1" conntrack -S 1>&2
echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
ret=1
fi
@@ -151,7 +152,7 @@ test_conntrack_tool() {
test_zones $zones
-if [ $have_ct_tool -eq 1 ];then
+if [ "$have_ct_tool" -eq 1 ];then
test_conntrack_tool $zones
else
echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/common.sh b/tools/testing/selftests/net/netfilter/packetdrill/common.sh
new file mode 100755
index 000000000000..ed36d535196d
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/common.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# for debugging set net.netfilter.nf_log_all_netns=1 in init_net
+# or do not use net namespaces.
+modprobe -q nf_conntrack
+sysctl -q net.netfilter.nf_conntrack_log_invalid=6
+
+# Flush old cached data (fastopen cookies).
+ip tcp_metrics flush all > /dev/null 2>&1
+
+# TCP min, default, and max receive and send buffer sizes.
+sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))"
+sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304"
+
+# TCP congestion control.
+sysctl -q net.ipv4.tcp_congestion_control=cubic
+
+# TCP slow start after idle.
+sysctl -q net.ipv4.tcp_slow_start_after_idle=0
+
+# TCP Explicit Congestion Notification (ECN)
+sysctl -q net.ipv4.tcp_ecn=0
+
+sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1
+
+# Override the default qdisc on the tun device.
+# Many tests fail with timing errors if the default
+# is FQ and that paces their flows.
+tc qdisc add dev tun0 root pfifo
+
+# Enable conntrack
+$xtables -A INPUT -m conntrack --ctstate NEW -p tcp --syn
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt
new file mode 100644
index 000000000000..d755bd64c54f
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt
@@ -0,0 +1,118 @@
+// check that already-acked (retransmitted) packet is let through rather
+// than tagged as INVALID.
+
+`packetdrill/common.sh`
+
+// should set -P DROP but it disconnects VM w.o. extra netns
++0 `$xtables -A INPUT -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 10) = 0
+
++0 < S 0:0(0) win 32792 <mss 1000>
++0 > S. 0:0(0) ack 1 <mss 1460>
++.01 < . 1:1(0) ack 1 win 65535
++0 accept(3, ..., ...) = 4
+
++0.0001 < P. 1:1461(1460) ack 1 win 257
++.0 > . 1:1(0) ack 1461 win 65535
++0.0001 < P. 1461:2921(1460) ack 1 win 257
++.0 > . 1:1(0) ack 2921 win 65535
++0.0001 < P. 2921:4381(1460) ack 1 win 257
++.0 > . 1:1(0) ack 4381 win 65535
++0.0001 < P. 4381:5841(1460) ack 1 win 257
++.0 > . 1:1(0) ack 5841 win 65535
++0.0001 < P. 5841:7301(1460) ack 1 win 257
++.0 > . 1:1(0) ack 7301 win 65535
++0.0001 < P. 7301:8761(1460) ack 1 win 257
++.0 > . 1:1(0) ack 8761 win 65535
++0.0001 < P. 8761:10221(1460) ack 1 win 257
++.0 > . 1:1(0) ack 10221 win 65535
++0.0001 < P. 10221:11681(1460) ack 1 win 257
++.0 > . 1:1(0) ack 11681 win 65535
++0.0001 < P. 11681:13141(1460) ack 1 win 257
++.0 > . 1:1(0) ack 13141 win 65535
++0.0001 < P. 13141:14601(1460) ack 1 win 257
++.0 > . 1:1(0) ack 14601 win 65535
++0.0001 < P. 14601:16061(1460) ack 1 win 257
++.0 > . 1:1(0) ack 16061 win 65535
++0.0001 < P. 16061:17521(1460) ack 1 win 257
++.0 > . 1:1(0) ack 17521 win 65535
++0.0001 < P. 17521:18981(1460) ack 1 win 257
++.0 > . 1:1(0) ack 18981 win 65535
++0.0001 < P. 18981:20441(1460) ack 1 win 257
++.0 > . 1:1(0) ack 20441 win 65535
++0.0001 < P. 20441:21901(1460) ack 1 win 257
++.0 > . 1:1(0) ack 21901 win 65535
++0.0001 < P. 21901:23361(1460) ack 1 win 257
++.0 > . 1:1(0) ack 23361 win 65535
++0.0001 < P. 23361:24821(1460) ack 1 win 257
+0.055 > . 1:1(0) ack 24821 win 65535
++0.0001 < P. 24821:26281(1460) ack 1 win 257
++.0 > . 1:1(0) ack 26281 win 65535
++0.0001 < P. 26281:27741(1460) ack 1 win 257
++.0 > . 1:1(0) ack 27741 win 65535
++0.0001 < P. 27741:29201(1460) ack 1 win 257
++.0 > . 1:1(0) ack 29201 win 65535
++0.0001 < P. 29201:30661(1460) ack 1 win 257
++.0 > . 1:1(0) ack 30661 win 65535
++0.0001 < P. 30661:32121(1460) ack 1 win 257
++.0 > . 1:1(0) ack 32121 win 65535
++0.0001 < P. 32121:33581(1460) ack 1 win 257
++.0 > . 1:1(0) ack 33581 win 65535
++0.0001 < P. 33581:35041(1460) ack 1 win 257
++.0 > . 1:1(0) ack 35041 win 65535
++0.0001 < P. 35041:36501(1460) ack 1 win 257
++.0 > . 1:1(0) ack 36501 win 65535
++0.0001 < P. 36501:37961(1460) ack 1 win 257
++.0 > . 1:1(0) ack 37961 win 65535
++0.0001 < P. 37961:39421(1460) ack 1 win 257
++.0 > . 1:1(0) ack 39421 win 65535
++0.0001 < P. 39421:40881(1460) ack 1 win 257
++.0 > . 1:1(0) ack 40881 win 65535
++0.0001 < P. 40881:42341(1460) ack 1 win 257
++.0 > . 1:1(0) ack 42341 win 65535
++0.0001 < P. 42341:43801(1460) ack 1 win 257
++.0 > . 1:1(0) ack 43801 win 65535
++0.0001 < P. 43801:45261(1460) ack 1 win 257
++.0 > . 1:1(0) ack 45261 win 65535
++0.0001 < P. 45261:46721(1460) ack 1 win 257
++.0 > . 1:1(0) ack 46721 win 65535
++0.0001 < P. 46721:48181(1460) ack 1 win 257
++.0 > . 1:1(0) ack 48181 win 65535
++0.0001 < P. 48181:49641(1460) ack 1 win 257
++.0 > . 1:1(0) ack 49641 win 65535
++0.0001 < P. 49641:51101(1460) ack 1 win 257
++.0 > . 1:1(0) ack 51101 win 65535
++0.0001 < P. 51101:52561(1460) ack 1 win 257
++.0 > . 1:1(0) ack 52561 win 65535
++0.0001 < P. 52561:54021(1460) ack 1 win 257
++.0 > . 1:1(0) ack 54021 win 65535
++0.0001 < P. 54021:55481(1460) ack 1 win 257
++.0 > . 1:1(0) ack 55481 win 65535
++0.0001 < P. 55481:56941(1460) ack 1 win 257
++.0 > . 1:1(0) ack 56941 win 65535
++0.0001 < P. 56941:58401(1460) ack 1 win 257
++.0 > . 1:1(0) ack 58401 win 65535
++0.0001 < P. 58401:59861(1460) ack 1 win 257
++.0 > . 1:1(0) ack 59861 win 65535
++0.0001 < P. 59861:61321(1460) ack 1 win 257
++.0 > . 1:1(0) ack 61321 win 65535
++0.0001 < P. 61321:62781(1460) ack 1 win 257
++.0 > . 1:1(0) ack 62781 win 65535
++0.0001 < P. 62781:64241(1460) ack 1 win 257
++.0 > . 1:1(0) ack 64241 win 65535
++0.0001 < P. 64241:65701(1460) ack 1 win 257
++.0 > . 1:1(0) ack 65701 win 65535
++0.0001 < P. 65701:67161(1460) ack 1 win 257
++.0 > . 1:1(0) ack 67161 win 65535
+
+// nf_ct_proto_6: SEQ is under the lower bound (already ACKed data retransmitted) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.24.72 LEN=1500 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=34375 DPT=8080 SEQ=1 ACK=4162510439 WINDOW=257 RES=0x00 ACK PSH URGP=0
++0.0001 < P. 1:1461(1460) ack 1 win 257
+
+// only sent if above packet isn't flagged as invalid
++.0 > . 1:1(0) ack 67161 win 65535
+
++0 `$xtables -D INPUT -m conntrack --ctstate INVALID -j DROP`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt
new file mode 100644
index 000000000000..dccdd4c009c6
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt
@@ -0,0 +1,62 @@
+// check RST packet that doesn't exactly match expected next sequence
+// number still transitions conntrack state to CLOSE iff its already in
+// FIN/CLOSE_WAIT.
+
+`packetdrill/common.sh`
+
+// 5.771921 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture]
+// 5.771994 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture]
+// 5.772212 client_ip > server_ip TCP 66 45020 > 443 [ACK] Seq=1905874048 Ack=781810658 Win=36352 Len=0 TSval=3317842872 TSecr=675936334
+// 5.787924 server_ip > client_ip TLSv1.2 1300 [Packet size limited during capture]
+// 5.788126 server_ip > client_ip TLSv1.2 90 Application Data
+// 5.788207 server_ip > client_ip TCP 66 443 > 45020 [FIN, ACK] Seq=781811916 Ack=1905874048 Win=31104 Len=0 TSval=675936350 TSecr=3317842872
+// 5.788447 client_ip > server_ip TLSv1.2 90 Application Data
+// 5.788479 client_ip > server_ip TCP 66 45020 > 443 [RST, ACK] Seq=1905874072 Ack=781811917 Win=39040 Len=0 TSval=3317842889 TSecr=675936350
+// 5.788581 server_ip > client_ip TCP 54 8443 > 45020 [RST] Seq=781811892 Win=0 Len=0
+
++0 `iptables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `iptables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460>
+
++0 > . 1:1(0) ack 1 win 65535
++0 < . 1:1001(1000) ack 1 win 65535
++0 < . 1001:2001(1000) ack 1 win 65535
++0 < . 2001:3001(1000) ack 1 win 65535
+
++0 > . 1:1(0) ack 1001 win 65535
++0 > . 1:1(0) ack 2001 win 65535
++0 > . 1:1(0) ack 3001 win 65535
+
++0 write(3, ..., 1000) = 1000
+
++0.0 > P. 1:1001(1000) ack 3001 win 65535
+
++0.1 read(3, ..., 1000) = 1000
+
+// Conntrack should move to FIN_WAIT, then CLOSE_WAIT.
++0 < F. 3001:3001(0) ack 1001 win 65535
++0 > . 1001:1001(0) ack 3002 win 65535
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE_WAIT`
+
++1 close(3) = 0
+// RST: unread data. FIN was seen, hence ack + 1
++0 > R. 1001:1001(0) ack 3002 win 65535
+// ... and then, CLOSE.
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ `
+
+// Spurious RST from peer -- no sk state. Should NOT get
+// marked INVALID, because conntrack is already closing.
++0.1 < R 2001:2001(0) win 0
+
+// No packets should have been marked INVALID
++0 `iptables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"`
++0 `iptables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt
new file mode 100644
index 000000000000..686f18a3d9ef
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt
@@ -0,0 +1,59 @@
+// check that out of window resets are marked as INVALID and conntrack remains
+// in ESTABLISHED state.
+
+`packetdrill/common.sh`
+
++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460>
+
++0 > . 1:1(0) ack 1 win 65535
++0 < . 1:1001(1000) ack 1 win 65535
++0 < . 1001:2001(1000) ack 1 win 65535
++0 < . 2001:3001(1000) ack 1 win 65535
+
++0 > . 1:1(0) ack 1001 win 65535
++0 > . 1:1(0) ack 2001 win 65535
++0 > . 1:1(0) ack 3001 win 65535
+
++0 write(3, ..., 1000) = 1000
+
+// out of window
++0.0 < R 0:0(0) win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
+// out of window
++0.0 < R 1000000:1000000(0) win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
+// in-window but not exact match
++0.0 < R 42:42(0) win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
++0.0 > P. 1:1001(1000) ack 3001 win 65535
+
++0.1 read(3, ..., 1000) = 1000
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
++0 < . 3001:3001(0) ack 1001 win 65535
+
++0.0 < R. 3000:3000(0) ack 1001 win 0
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED`
+
+// exact next sequence
++0.0 < R. 3001:3001(0) ack 1001 win 0
+// Conntrack should move to CLOSE
+
+// Expect four invalid RSTs
++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 4 "`
++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ `
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
new file mode 100644
index 000000000000..3442cd29bc93
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt
@@ -0,0 +1,44 @@
+// Check connection re-use, i.e. peer that receives the SYN answers with
+// a challenge-ACK.
+// Check that conntrack lets all packets pass, including the challenge ack,
+// and that a new connection is established.
+
+`packetdrill/common.sh`
+
+// S >
+// . < (challnge-ack)
+// R. >
+// S >
+// S. <
+// Expected outcome: established connection.
+
++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
+// Challenge ACK, old incarnation.
+0.1 < . 145824453:145824453(0) ack 643160523 win 240 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0>
+
++0.01 > R 643160523:643160523(0) win 0
+
++0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
+
+// Must go through.
++0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
+// correct synack
++0.1 < S. 0:0(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0>
+
+// 3whs completes.
++0.01 > . 1:1(0) ack 1 win 256 <nop,nop,TS val 1 ecr 1>
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED`
+
+// No packets should have been marked INVALID
++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"`
++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt
new file mode 100644
index 000000000000..3047160c4bf3
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt
@@ -0,0 +1,51 @@
+// Check conntrack copes with syn/ack reply for a previous, old incarnation.
+
+// tcpdump with buggy sequence
+// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083107423 ecr 0,nop,wscale 7], length 0
+// OLD synack, for old/previous S
+// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 145824453, ack 643160523, win 65535, options [mss 8952,nop,wscale 5,TS val 3215437785 ecr 2082921663,nop,nop], length 0
+// This reset never makes it to the endpoint, elided in the packetdrill script
+// 10.192.171.30.2049 > 10.176.25.8.829: Flags [R.], seq 1, ack 1, win 65535, options [mss 8952,nop,wscale 5,TS val 3215443451 ecr 2082921663,nop,nop], length 0
+// Syn retransmit, no change
+// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083115583 ecr 0,nop,wscale 7], length 0
+// CORRECT synack, should be accepted, but conntrack classified this as INVALID:
+// SEQ is over the upper bound (over the window of the receiver) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.37.78 LEN=40 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=8080 DPT=34500 SEQ=162602411 ACK=2124350315 ..
+// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 162602410, ack 2375731742, win 65535, options [mss 8952,nop,wscale 5,TS val 3215445754 ecr 2083115583,nop,nop], length 0
+
+`packetdrill/common.sh`
+
++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8>
+
+// bogus/outdated synack, invalid ack value
+0.1 < S. 145824453:145824453(0) ack 643160523 win 240 <mss 1440,nop,nop,TS val 1 ecr 1,nop,wscale 0>
+
+// syn retransmitted
+1.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8>
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT`
+
+// correct synack
++0 < S. 145758918:145758918(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0>
++0 write(3, ..., 1) = 1
+
+// with buggy conntrack above packet is dropped, so SYN rtx is seen:
+// script packet: 1.054007 . 1:1(0) ack 16777958 win 256 <nop,nop,TS val 1033 ecr 1>
+// actual packet: 3.010000 S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8>
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED`
+
++0 > P. 1:2(1) ack 4294901762 win 256 <nop,nop,TS val 1067 ecr 1>
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ASSURED | grep -q ESTABLISHED`
+
+// No packets should have been marked INVALID in OUTPUT direction, 1 in INPUT
++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"`
++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 1 "`
+
++0 `$xtables -D INPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
++0 `$xtables -D OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP`
diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt
new file mode 100644
index 000000000000..842242f8ccf7
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt
@@ -0,0 +1,34 @@
+// Check reception of another SYN while we have an established conntrack state.
+// Challenge ACK is supposed to pass through, RST reply should clear conntrack
+// state and SYN retransmit should give us new 'SYN_RECV' connection state.
+
+`packetdrill/common.sh`
+
+// should show a match if bug is present:
++0 `iptables -A INPUT -m conntrack --ctstate INVALID -p tcp --tcp-flags SYN,ACK SYN,ACK`
+
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
++0 bind(3, ..., ...) = 0
++0 listen(3, 10) = 0
+
++0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7, TS val 1 ecr 0,nop,nop>
++0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,TS val 100 ecr 1,nop,wscale 8>
++.01 < . 1:1(0) ack 1 win 257 <TS val 1 ecr 100,nop,nop>
++0 accept(3, ..., ...) = 4
+
++0 < P. 1:101(100) ack 1 win 257 <TS val 2 ecr 100,nop,nop>
++.001 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 110 ecr 2>
++0 read(4, ..., 101) = 100
+
+1.0 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 7, TS val 233 ecr 0,nop,nop>
+// Won't expect this: challenge ack.
+
++0 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 112 ecr 2>
++0 < R. 101:101(0) ack 1 win 257
++0 close(4) = 0
+
+1.5 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 0, TS val 233 ecr 0,nop,nop>
+
++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep -q SYN_RECV`
++0 `iptables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"`
diff --git a/tools/testing/selftests/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh
index 5289c8447a41..4485fd7675ed 100755
--- a/tools/testing/selftests/netfilter/rpath.sh
+++ b/tools/testing/selftests/net/netfilter/rpath.sh
@@ -64,12 +64,18 @@ ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
# firewall matches to test
[ -n "$iptables" ] && {
common='-t raw -A PREROUTING -s 192.168.0.0/16'
- ip netns exec "$ns2" "$iptables" $common -m rpfilter
+ if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then
+ echo "Cannot add rpfilter rule"
+ exit $ksft_skip
+ fi
ip netns exec "$ns2" "$iptables" $common -m rpfilter --invert
}
[ -n "$ip6tables" ] && {
common='-t raw -A PREROUTING -s fec0::/16'
- ip netns exec "$ns2" "$ip6tables" $common -m rpfilter
+ if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then
+ echo "Cannot add rpfilter rule"
+ exit $ksft_skip
+ fi
ip netns exec "$ns2" "$ip6tables" $common -m rpfilter --invert
}
[ -n "$nft" ] && ip netns exec "$ns2" $nft -f - <<EOF
diff --git a/tools/testing/selftests/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c
index 21bb1cfd8a85..21bb1cfd8a85 100644
--- a/tools/testing/selftests/netfilter/sctp_collision.c
+++ b/tools/testing/selftests/net/netfilter/sctp_collision.c
diff --git a/tools/testing/selftests/net/netfilter/settings b/tools/testing/selftests/net/netfilter/settings
new file mode 100644
index 000000000000..abc5648b59ab
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/settings
@@ -0,0 +1 @@
+timeout=1800
diff --git a/tools/testing/selftests/netfilter/xt_string.sh b/tools/testing/selftests/net/netfilter/xt_string.sh
index 1802653a4728..8d401c69e317 100755
--- a/tools/testing/selftests/netfilter/xt_string.sh
+++ b/tools/testing/selftests/net/netfilter/xt_string.sh
@@ -5,53 +5,57 @@
ksft_skip=4
rc=0
-if ! iptables --version >/dev/null 2>&1; then
- echo "SKIP: Test needs iptables"
- exit $ksft_skip
-fi
-if ! ip -V >/dev/null 2>&1; then
- echo "SKIP: Test needs iproute2"
- exit $ksft_skip
-fi
-if ! nc -h >/dev/null 2>&1; then
- echo "SKIP: Test needs netcat"
- exit $ksft_skip
-fi
+source lib.sh
+
+checktool "socat -h" "run test without socat"
+checktool "iptables --version" "test needs iptables"
+
+infile=$(mktemp)
+
+cleanup()
+{
+ ip netns del "$netns"
+ rm -f "$infile"
+}
+
+trap cleanup EXIT
+
+setup_ns netns
+
+ip -net "$netns" link add d0 type dummy
+ip -net "$netns" link set d0 up
+ip -net "$netns" addr add 10.1.2.1/24 dev d0
pattern="foo bar baz"
patlen=11
hdrlen=$((20 + 8)) # IPv4 + UDP
-ns="ns-$(mktemp -u XXXXXXXX)"
-trap 'ip netns del $ns' EXIT
-ip netns add "$ns"
-ip -net "$ns" link add d0 type dummy
-ip -net "$ns" link set d0 up
-ip -net "$ns" addr add 10.1.2.1/24 dev d0
-
-#ip netns exec "$ns" tcpdump -npXi d0 &
+
+#ip netns exec "$netns" tcpdump -npXi d0 &
#tcpdump_pid=$!
-#trap 'kill $tcpdump_pid; ip netns del $ns' EXIT
+#trap 'kill $tcpdump_pid; ip netns del $netns' EXIT
add_rule() { # (alg, from, to)
- ip netns exec "$ns" \
+ ip netns exec "$netns" \
iptables -A OUTPUT -o d0 -m string \
- --string "$pattern" --algo $1 --from $2 --to $3
+ --string "$pattern" --algo "$1" --from "$2" --to "$3"
}
showrules() { # ()
- ip netns exec "$ns" iptables -v -S OUTPUT | grep '^-A'
+ ip netns exec "$netns" iptables -v -S OUTPUT | grep '^-A'
}
zerorules() {
- ip netns exec "$ns" iptables -Z OUTPUT
+ ip netns exec "$netns" iptables -Z OUTPUT
}
countrule() { # (pattern)
showrules | grep -c -- "$*"
}
send() { # (offset)
- ( for ((i = 0; i < $1 - $hdrlen; i++)); do
- printf " "
+ ( for ((i = 0; i < $1 - hdrlen; i++)); do
+ echo -n " "
done
- printf "$pattern"
- ) | ip netns exec "$ns" nc -w 1 -u 10.1.2.2 27374
+ echo -n "$pattern"
+ ) > "$infile"
+
+ ip netns exec "$netns" socat -t 1 -u STDIN UDP-SENDTO:10.1.2.2:27374 < "$infile"
}
add_rule bm 1000 1500
@@ -61,8 +65,8 @@ add_rule kmp 1400 1600
zerorules
send 0
-send $((1000 - $patlen))
-if [ $(countrule -c 0 0) -ne 4 ]; then
+send $((1000 - patlen))
+if [ "$(countrule -c 0 0)" -ne 4 ]; then
echo "FAIL: rules match data before --from"
showrules
((rc--))
@@ -70,16 +74,16 @@ fi
zerorules
send 1000
-send $((1400 - $patlen))
-if [ $(countrule -c 2) -ne 2 ]; then
+send $((1400 - patlen))
+if [ "$(countrule -c 2)" -ne 2 ]; then
echo "FAIL: only two rules should match at low offset"
showrules
((rc--))
fi
zerorules
-send $((1500 - $patlen))
-if [ $(countrule -c 1) -ne 4 ]; then
+send $((1500 - patlen))
+if [ "$(countrule -c 1)" -ne 4 ]; then
echo "FAIL: all rules should match at end of packet"
showrules
((rc--))
@@ -87,7 +91,7 @@ fi
zerorules
send 1495
-if [ $(countrule -c 1) -ne 1 ]; then
+if [ "$(countrule -c 1)" -ne 1 ]; then
echo "FAIL: only kmp with proper --to should match pattern spanning fragments"
showrules
((rc--))
@@ -95,23 +99,23 @@ fi
zerorules
send 1500
-if [ $(countrule -c 1) -ne 2 ]; then
+if [ "$(countrule -c 1)" -ne 2 ]; then
echo "FAIL: two rules should match pattern at start of second fragment"
showrules
((rc--))
fi
zerorules
-send $((1600 - $patlen))
-if [ $(countrule -c 1) -ne 2 ]; then
+send $((1600 - patlen))
+if [ "$(countrule -c 1)" -ne 2 ]; then
echo "FAIL: two rules should match pattern at end of largest --to"
showrules
((rc--))
fi
zerorules
-send $((1600 - $patlen + 1))
-if [ $(countrule -c 1) -ne 0 ]; then
+send $((1600 - patlen + 1))
+if [ "$(countrule -c 1)" -ne 0 ]; then
echo "FAIL: no rules should match pattern extending largest --to"
showrules
((rc--))
@@ -119,10 +123,11 @@ fi
zerorules
send 1600
-if [ $(countrule -c 1) -ne 0 ]; then
+if [ "$(countrule -c 1)" -ne 0 ]; then
echo "FAIL: no rule should match pattern past largest --to"
showrules
((rc--))
fi
+[ $rc -eq 0 ] && echo "PASS: string match tests"
exit $rc
diff --git a/tools/testing/selftests/net/netns-sysctl.sh b/tools/testing/selftests/net/netns-sysctl.sh
new file mode 100755
index 000000000000..45c34a3b9aae
--- /dev/null
+++ b/tools/testing/selftests/net/netns-sysctl.sh
@@ -0,0 +1,40 @@
+#!/bin/bash -e
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test checks that the network buffer sysctls are present
+# in a network namespaces, and that they are readonly.
+
+source lib.sh
+
+cleanup() {
+ cleanup_ns $test_ns
+}
+
+trap cleanup EXIT
+
+fail() {
+ echo "ERROR: $*" >&2
+ exit 1
+}
+
+setup_ns test_ns
+
+for sc in {r,w}mem_{default,max}; do
+ # check that this is writable in a netns
+ [ -w "/proc/sys/net/core/$sc" ] ||
+ fail "$sc isn't writable in the init netns!"
+
+ # change the value in the host netns
+ sysctl -qw "net.core.$sc=300000" ||
+ fail "Can't write $sc in init netns!"
+
+ # check that the value is read from the init netns
+ [ "$(ip netns exec $test_ns sysctl -n "net.core.$sc")" -eq 300000 ] ||
+ fail "Value for $sc mismatch!"
+
+ # check that this isn't writable in a netns
+ ip netns exec $test_ns [ -w "/proc/sys/net/core/$sc" ] &&
+ fail "$sc is writable in a netns!"
+done
+
+echo 'Test passed OK'
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
new file mode 100755
index 000000000000..93d9d914529b
--- /dev/null
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -0,0 +1,98 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import time
+from lib.py import ksft_run, ksft_exit, ksft_pr
+from lib.py import ksft_eq, ksft_ge, ksft_busy_wait
+from lib.py import NetdevFamily, NetdevSimDev, ip
+
+
+def empty_check(nf) -> None:
+ devs = nf.dev_get({}, dump=True)
+ ksft_ge(len(devs), 1)
+
+
+def lo_check(nf) -> None:
+ lo_info = nf.dev_get({"ifindex": 1})
+ ksft_eq(len(lo_info['xdp-features']), 0)
+ ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0)
+
+
+def page_pool_check(nf) -> None:
+ with NetdevSimDev() as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ def up():
+ ip(f"link set dev {nsim.ifname} up")
+
+ def down():
+ ip(f"link set dev {nsim.ifname} down")
+
+ def get_pp():
+ pp_list = nf.page_pool_get({}, dump=True)
+ return [pp for pp in pp_list if pp.get("ifindex") == nsim.ifindex]
+
+ # No page pools when down
+ down()
+ ksft_eq(len(get_pp()), 0)
+
+ # Up, empty page pool appears
+ up()
+ pp_list = get_pp()
+ ksft_ge(len(pp_list), 0)
+ refs = sum([pp["inflight"] for pp in pp_list])
+ ksft_eq(refs, 0)
+
+ # Down, it disappears, again
+ down()
+ pp_list = get_pp()
+ ksft_eq(len(pp_list), 0)
+
+ # Up, allocate a page
+ up()
+ nsim.dfs_write("pp_hold", "y")
+ pp_list = nf.page_pool_get({}, dump=True)
+ refs = sum([pp["inflight"] for pp in pp_list if pp.get("ifindex") == nsim.ifindex])
+ ksft_ge(refs, 1)
+
+ # Now let's leak a page
+ down()
+ pp_list = get_pp()
+ ksft_eq(len(pp_list), 1)
+ refs = sum([pp["inflight"] for pp in pp_list])
+ ksft_eq(refs, 1)
+ attached = [pp for pp in pp_list if "detach-time" not in pp]
+ ksft_eq(len(attached), 0)
+
+ # New pp can get created, and we'll have two
+ up()
+ pp_list = get_pp()
+ attached = [pp for pp in pp_list if "detach-time" not in pp]
+ detached = [pp for pp in pp_list if "detach-time" in pp]
+ ksft_eq(len(attached), 1)
+ ksft_eq(len(detached), 1)
+
+ # Free the old page and the old pp is gone
+ nsim.dfs_write("pp_hold", "n")
+ # Freeing check is once a second so we may need to retry
+ ksft_busy_wait(lambda: len(get_pp()) == 1, deadline=2)
+
+ # And down...
+ down()
+ ksft_eq(len(get_pp()), 0)
+
+ # Last, leave the page hanging for destroy, nothing to check
+ # we're trying to exercise the orphaning path in the kernel
+ up()
+ nsim.dfs_write("pp_hold", "y")
+
+
+def main() -> None:
+ nf = NetdevFamily()
+ ksft_run([empty_check, lo_check, page_pool_check],
+ args=(nf, ))
+ ksft_exit()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
index 5cae53543849..cc0bfae2bafa 100755
--- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# OVS kernel module self tests
@@ -11,6 +11,11 @@ ksft_skip=4
PAUSE_ON_FAIL=no
VERBOSE=0
TRACING=0
+WAIT_TIMEOUT=5
+
+if test "X$KSFT_MACHINE_SLOW" == "Xyes"; then
+ WAIT_TIMEOUT=10
+fi
tests="
arp_ping eth-arp: Basic arp ping between two NS
@@ -20,10 +25,37 @@ tests="
nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT
netlink_checks ovsnl: validate netlink attrs and settings
upcall_interfaces ovs: test the upcall interfaces
- drop_reason drop: test drop reasons are emitted"
+ drop_reason drop: test drop reasons are emitted
+ psample psample: Sampling packets with psample"
info() {
- [ $VERBOSE = 0 ] || echo $*
+ [ "${ovs_dir}" != "" ] &&
+ echo "`date +"[%m-%d %H:%M:%S]"` $*" >> ${ovs_dir}/debug.log
+ [ $VERBOSE = 0 ] || echo $*
+}
+
+ovs_wait() {
+ info "waiting $WAIT_TIMEOUT s for: $@"
+
+ if "$@" ; then
+ info "wait succeeded immediately"
+ return 0
+ fi
+
+ # A quick re-check helps speed up small races in fast systems.
+ # However, fractional sleeps might not necessarily work.
+ local start=0
+ sleep 0.1 || { sleep 1; start=1; }
+
+ for (( i=start; i<WAIT_TIMEOUT; i++ )); do
+ if "$@" ; then
+ info "wait succeeded after $i seconds"
+ return 0
+ fi
+ sleep 1
+ done
+ info "wait failed after $i seconds"
+ return 1
}
ovs_base=`pwd`
@@ -65,7 +97,8 @@ ovs_setenv() {
ovs_sbx() {
if test "X$2" != X; then
- (ovs_setenv $1; shift; "$@" >> ${ovs_dir}/debug.log)
+ (ovs_setenv $1; shift;
+ info "run cmd: $@"; "$@" >> ${ovs_dir}/debug.log)
else
ovs_setenv $1
fi
@@ -102,12 +135,21 @@ ovs_netns_spawn_daemon() {
shift
netns=$1
shift
- info "spawning cmd: $*"
- ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr &
+ if [ "$netns" == "_default" ]; then
+ $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr &
+ else
+ ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr &
+ fi
pid=$!
ovs_sbx "$sbx" on_exit "kill -TERM $pid 2>/dev/null"
}
+ovs_spawn_daemon() {
+ sbx=$1
+ shift
+ ovs_netns_spawn_daemon $sbx "_default" $*
+}
+
ovs_add_netns_and_veths () {
info "Adding netns attached: sbx:$1 dp:$2 {$3, $4, $5}"
ovs_sbx "$1" ip netns add "$3" || return 1
@@ -139,7 +181,7 @@ ovs_add_flow () {
info "Adding flow to DP: sbx:$1 br:$2 flow:$3 act:$4"
ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-flow "$2" "$3" "$4"
if [ $? -ne 0 ]; then
- echo "Flow [ $3 : $4 ] failed" >> ${ovs_dir}/debug.log
+ info "Flow [ $3 : $4 ] failed"
return 1
fi
return 0
@@ -170,6 +212,19 @@ ovs_drop_reason_count()
return `echo "$perf_output" | grep "$pattern" | wc -l`
}
+ovs_test_flow_fails () {
+ ERR_MSG="Flow actions may not be safe on all matching packets"
+
+ PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
+ ovs_add_flow $@ &> /dev/null $@ && return 1
+ POST_TEST=$(dmesg | grep -c "${ERR_MSG}")
+
+ if [ "$PRE_TEST" == "$POST_TEST" ]; then
+ return 1
+ fi
+ return 0
+}
+
usage() {
echo
echo "$0 [OPTIONS] [TEST]..."
@@ -184,6 +239,91 @@ usage() {
exit 1
}
+
+# psample test
+# - use psample to observe packets
+test_psample() {
+ sbx_add "test_psample" || return $?
+
+ # Add a datapath with per-vport dispatching.
+ ovs_add_dp "test_psample" psample -V 2:1 || return 1
+
+ info "create namespaces"
+ ovs_add_netns_and_veths "test_psample" "psample" \
+ client c0 c1 172.31.110.10/24 -u || return 1
+ ovs_add_netns_and_veths "test_psample" "psample" \
+ server s0 s1 172.31.110.20/24 -u || return 1
+
+ # Check if psample actions can be configured.
+ ovs_add_flow "test_psample" psample \
+ 'in_port(1),eth(),eth_type(0x0806),arp()' 'psample(group=1)' &> /dev/null
+ if [ $? == 1 ]; then
+ info "no support for psample - skipping"
+ ovs_exit_sig
+ return $ksft_skip
+ fi
+
+ ovs_del_flows "test_psample" psample
+
+ # Test action verification.
+ OLDIFS=$IFS
+ IFS='*'
+ min_key='in_port(1),eth(),eth_type(0x0800),ipv4()'
+ for testcase in \
+ "cookie to large"*"psample(group=1,cookie=1615141312111009080706050403020100)" \
+ "no group with cookie"*"psample(cookie=abcd)" \
+ "no group"*"psample()";
+ do
+ set -- $testcase;
+ ovs_test_flow_fails "test_psample" psample $min_key $2
+ if [ $? == 1 ]; then
+ info "failed - $1"
+ return 1
+ fi
+ done
+ IFS=$OLDIFS
+
+ ovs_del_flows "test_psample" psample
+ # Allow ARP
+ ovs_add_flow "test_psample" psample \
+ 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
+ ovs_add_flow "test_psample" psample \
+ 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1
+
+ # Sample first 14 bytes of all traffic.
+ ovs_add_flow "test_psample" psample \
+ "in_port(1),eth(),eth_type(0x0800),ipv4()" \
+ "trunc(14),psample(group=1,cookie=c0ffee),2"
+
+ # Sample all traffic. In this case, use a sample() action with both
+ # psample and an upcall emulating simultaneous local sampling and
+ # sFlow / IPFIX.
+ nlpid=$(grep -E "listening on upcall packet handler" \
+ $ovs_dir/s0.out | cut -d ":" -f 2 | tr -d ' ')
+
+ ovs_add_flow "test_psample" psample \
+ "in_port(2),eth(),eth_type(0x0800),ipv4()" \
+ "sample(sample=100%,actions(psample(group=2,cookie=eeff0c),userspace(pid=${nlpid},userdata=eeff0c))),1"
+
+ # Record psample data.
+ ovs_spawn_daemon "test_psample" python3 $ovs_base/ovs-dpctl.py psample-events
+ ovs_wait grep -q "listening for psample events" ${ovs_dir}/stdout
+
+ # Send a single ping.
+ ovs_sbx "test_psample" ip netns exec client ping -I c1 172.31.110.20 -c 1 || return 1
+
+ # We should have received one userspace action upcall and 2 psample packets.
+ ovs_wait grep -q "userspace action command" $ovs_dir/s0.out || return 1
+
+ # client -> server samples should only contain the first 14 bytes of the packet.
+ ovs_wait grep -qE "rate:4294967295,group:1,cookie:c0ffee data:[0-9a-f]{28}$" \
+ $ovs_dir/stdout || return 1
+
+ ovs_wait grep -q "rate:4294967295,group:2,cookie:eeff0c" $ovs_dir/stdout || return 1
+
+ return 0
+}
+
# drop_reason test
# - drop packets and verify the right drop reason is reported
test_drop_reason() {
@@ -599,7 +739,8 @@ test_upcall_interfaces() {
ovs_add_netns_and_veths "test_upcall_interfaces" ui0 upc left0 l0 \
172.31.110.1/24 -u || return 1
- sleep 1
+ ovs_wait grep -q "listening on upcall packet handler" ${ovs_dir}/left0.out
+
info "sending arping"
ip netns exec upc arping -I l0 172.31.110.20 -c 1 \
>$ovs_dir/arping.stdout 2>$ovs_dir/arping.stderr
@@ -613,16 +754,20 @@ run_test() {
tname="$1"
tdesc="$2"
- if ! lsmod | grep openvswitch >/dev/null 2>&1; then
- stdbuf -o0 printf "TEST: %-60s [NOMOD]\n" "${tdesc}"
- return $ksft_skip
- fi
-
if python3 ovs-dpctl.py -h 2>&1 | \
grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then
stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}"
return $ksft_skip
fi
+
+ python3 ovs-dpctl.py show >/dev/null 2>&1 || \
+ echo "[DPCTL] show exception."
+
+ if ! lsmod | grep openvswitch >/dev/null 2>&1; then
+ stdbuf -o0 printf "TEST: %-60s [NOMOD]\n" "${tdesc}"
+ return $ksft_skip
+ fi
+
printf "TEST: %-60s [START]\n" "${tname}"
unset IFS
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
index 5e0e539a323d..8a0396bfaf99 100644
--- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -8,8 +8,10 @@ import argparse
import errno
import ipaddress
import logging
+import math
import multiprocessing
import re
+import socket
import struct
import sys
import time
@@ -26,13 +28,16 @@ try:
from pyroute2.netlink import genlmsg
from pyroute2.netlink import nla
from pyroute2.netlink import nlmsg_atoms
+ from pyroute2.netlink.event import EventSocket
from pyroute2.netlink.exceptions import NetlinkError
from pyroute2.netlink.generic import GenericNetlinkSocket
+ from pyroute2.netlink.nlsocket import Marshal
import pyroute2
+ import pyroute2.iproute
except ModuleNotFoundError:
print("Need to install the python pyroute2 package >= 0.6.")
- sys.exit(0)
+ sys.exit(1)
OVS_DATAPATH_FAMILY = "ovs_datapath"
@@ -58,6 +63,7 @@ OVS_FLOW_CMD_DEL = 2
OVS_FLOW_CMD_GET = 3
OVS_FLOW_CMD_SET = 4
+UINT32_MAX = 0xFFFFFFFF
def macstr(mac):
outstr = ":".join(["%02X" % i for i in mac])
@@ -198,6 +204,18 @@ def convert_ipv4(data):
return int(ipaddress.IPv4Address(ip)), int(ipaddress.IPv4Address(mask))
+def convert_ipv6(data):
+ ip, _, mask = data.partition('/')
+
+ if not ip:
+ ip = mask = 0
+ elif not mask:
+ mask = 'ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff'
+ elif mask.isdigit():
+ mask = ipaddress.IPv6Network("::/" + mask).hostmask
+
+ return ipaddress.IPv6Address(ip).packed, ipaddress.IPv6Address(mask).packed
+
def convert_int(size):
def convert_int_sized(data):
value, _, mask = data.partition('/')
@@ -267,6 +285,75 @@ def parse_extract_field(
return str_skipped, data
+def parse_attrs(actstr, attr_desc):
+ """Parses the given action string and returns a list of netlink
+ attributes based on a list of attribute descriptions.
+
+ Each element in the attribute description list is a tuple such as:
+ (name, attr_name, parse_func)
+ where:
+ name: is the string representing the attribute
+ attr_name: is the name of the attribute as defined in the uAPI.
+ parse_func: is a callable accepting a string and returning either
+ a single object (the parsed attribute value) or a tuple of
+ two values (the parsed attribute value and the remaining string)
+
+ Returns a list of attributes and the remaining string.
+ """
+ def parse_attr(actstr, key, func):
+ actstr = actstr[len(key) :]
+
+ if not func:
+ return None, actstr
+
+ delim = actstr[0]
+ actstr = actstr[1:]
+
+ if delim == "=":
+ pos = strcspn(actstr, ",)")
+ ret = func(actstr[:pos])
+ else:
+ ret = func(actstr)
+
+ if isinstance(ret, tuple):
+ (datum, actstr) = ret
+ else:
+ datum = ret
+ actstr = actstr[strcspn(actstr, ",)"):]
+
+ if delim == "(":
+ if not actstr or actstr[0] != ")":
+ raise ValueError("Action contains unbalanced parentheses")
+
+ actstr = actstr[1:]
+
+ actstr = actstr[strspn(actstr, ", ") :]
+
+ return datum, actstr
+
+ attrs = []
+ attr_desc = list(attr_desc)
+ while actstr and actstr[0] != ")" and attr_desc:
+ found = False
+ for i, (key, attr, func) in enumerate(attr_desc):
+ if actstr.startswith(key):
+ datum, actstr = parse_attr(actstr, key, func)
+ attrs.append([attr, datum])
+ found = True
+ del attr_desc[i]
+
+ if not found:
+ raise ValueError("Unknown attribute: '%s'" % actstr)
+
+ actstr = actstr[strspn(actstr, ", ") :]
+
+ if actstr[0] != ")":
+ raise ValueError("Action string contains extra garbage or has "
+ "unbalanced parenthesis: '%s'" % actstr)
+
+ return attrs, actstr[1:]
+
+
class ovs_dp_msg(genlmsg):
# include the OVS version
# We need a custom header rather than just being able to rely on
@@ -282,15 +369,15 @@ class ovsactions(nla):
("OVS_ACTION_ATTR_UNSPEC", "none"),
("OVS_ACTION_ATTR_OUTPUT", "uint32"),
("OVS_ACTION_ATTR_USERSPACE", "userspace"),
- ("OVS_ACTION_ATTR_SET", "none"),
+ ("OVS_ACTION_ATTR_SET", "ovskey"),
("OVS_ACTION_ATTR_PUSH_VLAN", "none"),
("OVS_ACTION_ATTR_POP_VLAN", "flag"),
- ("OVS_ACTION_ATTR_SAMPLE", "none"),
+ ("OVS_ACTION_ATTR_SAMPLE", "sample"),
("OVS_ACTION_ATTR_RECIRC", "uint32"),
("OVS_ACTION_ATTR_HASH", "none"),
("OVS_ACTION_ATTR_PUSH_MPLS", "none"),
("OVS_ACTION_ATTR_POP_MPLS", "flag"),
- ("OVS_ACTION_ATTR_SET_MASKED", "none"),
+ ("OVS_ACTION_ATTR_SET_MASKED", "ovskey"),
("OVS_ACTION_ATTR_CT", "ctact"),
("OVS_ACTION_ATTR_TRUNC", "uint32"),
("OVS_ACTION_ATTR_PUSH_ETH", "none"),
@@ -304,8 +391,85 @@ class ovsactions(nla):
("OVS_ACTION_ATTR_ADD_MPLS", "none"),
("OVS_ACTION_ATTR_DEC_TTL", "none"),
("OVS_ACTION_ATTR_DROP", "uint32"),
+ ("OVS_ACTION_ATTR_PSAMPLE", "psample"),
)
+ class psample(nla):
+ nla_flags = NLA_F_NESTED
+
+ nla_map = (
+ ("OVS_PSAMPLE_ATTR_UNSPEC", "none"),
+ ("OVS_PSAMPLE_ATTR_GROUP", "uint32"),
+ ("OVS_PSAMPLE_ATTR_COOKIE", "array(uint8)"),
+ )
+
+ def dpstr(self, more=False):
+ args = "group=%d" % self.get_attr("OVS_PSAMPLE_ATTR_GROUP")
+
+ cookie = self.get_attr("OVS_PSAMPLE_ATTR_COOKIE")
+ if cookie:
+ args += ",cookie(%s)" % \
+ "".join(format(x, "02x") for x in cookie)
+
+ return "psample(%s)" % args
+
+ def parse(self, actstr):
+ desc = (
+ ("group", "OVS_PSAMPLE_ATTR_GROUP", int),
+ ("cookie", "OVS_PSAMPLE_ATTR_COOKIE",
+ lambda x: list(bytearray.fromhex(x)))
+ )
+
+ attrs, actstr = parse_attrs(actstr, desc)
+
+ for attr in attrs:
+ self["attrs"].append(attr)
+
+ return actstr
+
+ class sample(nla):
+ nla_flags = NLA_F_NESTED
+
+ nla_map = (
+ ("OVS_SAMPLE_ATTR_UNSPEC", "none"),
+ ("OVS_SAMPLE_ATTR_PROBABILITY", "uint32"),
+ ("OVS_SAMPLE_ATTR_ACTIONS", "ovsactions"),
+ )
+
+ def dpstr(self, more=False):
+ args = []
+
+ args.append("sample={:.2f}%".format(
+ 100 * self.get_attr("OVS_SAMPLE_ATTR_PROBABILITY") /
+ UINT32_MAX))
+
+ actions = self.get_attr("OVS_SAMPLE_ATTR_ACTIONS")
+ if actions:
+ args.append("actions(%s)" % actions.dpstr(more))
+
+ return "sample(%s)" % ",".join(args)
+
+ def parse(self, actstr):
+ def parse_nested_actions(actstr):
+ subacts = ovsactions()
+ parsed_len = subacts.parse(actstr)
+ return subacts, actstr[parsed_len :]
+
+ def percent_to_rate(percent):
+ percent = float(percent.strip('%'))
+ return int(math.floor(UINT32_MAX * (percent / 100.0) + .5))
+
+ desc = (
+ ("sample", "OVS_SAMPLE_ATTR_PROBABILITY", percent_to_rate),
+ ("actions", "OVS_SAMPLE_ATTR_ACTIONS", parse_nested_actions),
+ )
+ attrs, actstr = parse_attrs(actstr, desc)
+
+ for attr in attrs:
+ self["attrs"].append(attr)
+
+ return actstr
+
class ctact(nla):
nla_flags = NLA_F_NESTED
@@ -427,50 +591,77 @@ class ovsactions(nla):
print_str += "userdata="
for f in self.get_attr("OVS_USERSPACE_ATTR_USERDATA"):
print_str += "%x." % f
- if self.get_attr("OVS_USERSPACE_ATTR_TUN_PORT") is not None:
+ if self.get_attr("OVS_USERSPACE_ATTR_EGRESS_TUN_PORT") is not None:
print_str += "egress_tun_port=%d" % self.get_attr(
- "OVS_USERSPACE_ATTR_TUN_PORT"
+ "OVS_USERSPACE_ATTR_EGRESS_TUN_PORT"
)
print_str += ")"
return print_str
+ def parse(self, actstr):
+ attrs_desc = (
+ ("pid", "OVS_USERSPACE_ATTR_PID", int),
+ ("userdata", "OVS_USERSPACE_ATTR_USERDATA",
+ lambda x: list(bytearray.fromhex(x))),
+ ("egress_tun_port", "OVS_USERSPACE_ATTR_EGRESS_TUN_PORT", int)
+ )
+
+ attrs, actstr = parse_attrs(actstr, attrs_desc)
+ for attr in attrs:
+ self["attrs"].append(attr)
+
+ return actstr
+
def dpstr(self, more=False):
print_str = ""
- for field in self.nla_map:
+ for field in self["attrs"]:
if field[1] == "none" or self.get_attr(field[0]) is None:
continue
if print_str != "":
print_str += ","
- if field[1] == "uint32":
- if field[0] == "OVS_ACTION_ATTR_OUTPUT":
- print_str += "%d" % int(self.get_attr(field[0]))
- elif field[0] == "OVS_ACTION_ATTR_RECIRC":
- print_str += "recirc(0x%x)" % int(self.get_attr(field[0]))
- elif field[0] == "OVS_ACTION_ATTR_TRUNC":
- print_str += "trunc(%d)" % int(self.get_attr(field[0]))
- elif field[0] == "OVS_ACTION_ATTR_DROP":
- print_str += "drop(%d)" % int(self.get_attr(field[0]))
- elif field[1] == "flag":
- if field[0] == "OVS_ACTION_ATTR_CT_CLEAR":
- print_str += "ct_clear"
- elif field[0] == "OVS_ACTION_ATTR_POP_VLAN":
- print_str += "pop_vlan"
- elif field[0] == "OVS_ACTION_ATTR_POP_ETH":
- print_str += "pop_eth"
- elif field[0] == "OVS_ACTION_ATTR_POP_NSH":
- print_str += "pop_nsh"
- elif field[0] == "OVS_ACTION_ATTR_POP_MPLS":
- print_str += "pop_mpls"
+ if field[0] == "OVS_ACTION_ATTR_OUTPUT":
+ print_str += "%d" % int(self.get_attr(field[0]))
+ elif field[0] == "OVS_ACTION_ATTR_RECIRC":
+ print_str += "recirc(0x%x)" % int(self.get_attr(field[0]))
+ elif field[0] == "OVS_ACTION_ATTR_TRUNC":
+ print_str += "trunc(%d)" % int(self.get_attr(field[0]))
+ elif field[0] == "OVS_ACTION_ATTR_DROP":
+ print_str += "drop(%d)" % int(self.get_attr(field[0]))
+ elif field[0] == "OVS_ACTION_ATTR_CT_CLEAR":
+ print_str += "ct_clear"
+ elif field[0] == "OVS_ACTION_ATTR_POP_VLAN":
+ print_str += "pop_vlan"
+ elif field[0] == "OVS_ACTION_ATTR_POP_ETH":
+ print_str += "pop_eth"
+ elif field[0] == "OVS_ACTION_ATTR_POP_NSH":
+ print_str += "pop_nsh"
+ elif field[0] == "OVS_ACTION_ATTR_POP_MPLS":
+ print_str += "pop_mpls"
else:
datum = self.get_attr(field[0])
if field[0] == "OVS_ACTION_ATTR_CLONE":
print_str += "clone("
print_str += datum.dpstr(more)
print_str += ")"
+ elif field[0] == "OVS_ACTION_ATTR_SET" or \
+ field[0] == "OVS_ACTION_ATTR_SET_MASKED":
+ print_str += "set"
+ field = datum
+ mask = None
+ if field[0] == "OVS_ACTION_ATTR_SET_MASKED":
+ print_str += "_masked"
+ field = datum[0]
+ mask = datum[1]
+ print_str += "("
+ print_str += field.dpstr(mask, more)
+ print_str += ")"
else:
- print_str += datum.dpstr(more)
+ try:
+ print_str += datum.dpstr(more)
+ except:
+ print_str += "{ATTR: %s not decoded}" % field[0]
return print_str
@@ -489,7 +680,7 @@ class ovsactions(nla):
actstr, reason = parse_extract_field(
actstr,
"drop(",
- "([0-9]+)",
+ r"([0-9]+)",
lambda x: int(x, 0),
False,
None,
@@ -502,9 +693,9 @@ class ovsactions(nla):
actstr = actstr[len("drop"): ]
return (totallen - len(actstr))
- elif parse_starts_block(actstr, "^(\d+)", False, True):
+ elif parse_starts_block(actstr, r"^(\d+)", False, True):
actstr, output = parse_extract_field(
- actstr, None, "(\d+)", lambda x: int(x), False, "0"
+ actstr, None, r"(\d+)", lambda x: int(x), False, "0"
)
self["attrs"].append(["OVS_ACTION_ATTR_OUTPUT", output])
parsed = True
@@ -512,7 +703,7 @@ class ovsactions(nla):
actstr, recircid = parse_extract_field(
actstr,
"recirc(",
- "([0-9a-fA-Fx]+)",
+ r"([0-9a-fA-Fx]+)",
lambda x: int(x, 0),
False,
0,
@@ -531,7 +722,7 @@ class ovsactions(nla):
for flat_act in parse_flat_map:
if parse_starts_block(actstr, flat_act[0], False):
actstr = actstr[len(flat_act[0]):]
- self["attrs"].append([flat_act[1]])
+ self["attrs"].append([flat_act[1], True])
actstr = actstr[strspn(actstr, ", ") :]
parsed = True
@@ -544,6 +735,25 @@ class ovsactions(nla):
self["attrs"].append(("OVS_ACTION_ATTR_CLONE", subacts))
actstr = actstr[parsedLen:]
parsed = True
+ elif parse_starts_block(actstr, "set(", False):
+ parencount += 1
+ k = ovskey()
+ actstr = actstr[len("set("):]
+ actstr = k.parse(actstr, None)
+ self["attrs"].append(("OVS_ACTION_ATTR_SET", k))
+ if not actstr.startswith(")"):
+ actstr = ")" + actstr
+ parsed = True
+ elif parse_starts_block(actstr, "set_masked(", False):
+ parencount += 1
+ k = ovskey()
+ m = ovskey()
+ actstr = actstr[len("set_masked("):]
+ actstr = k.parse(actstr, m)
+ self["attrs"].append(("OVS_ACTION_ATTR_SET_MASKED", [k, m]))
+ if not actstr.startswith(")"):
+ actstr = ")" + actstr
+ parsed = True
elif parse_starts_block(actstr, "ct(", False):
parencount += 1
actstr = actstr[len("ct(") :]
@@ -588,17 +798,17 @@ class ovsactions(nla):
actstr = actstr[3:]
actstr, ip_block_min = parse_extract_field(
- actstr, "=", "([0-9a-fA-F\.]+)", str, False
+ actstr, "=", r"([0-9a-fA-F\.]+)", str, False
)
actstr, ip_block_max = parse_extract_field(
- actstr, "-", "([0-9a-fA-F\.]+)", str, False
+ actstr, "-", r"([0-9a-fA-F\.]+)", str, False
)
actstr, proto_min = parse_extract_field(
- actstr, ":", "(\d+)", int, False
+ actstr, ":", r"(\d+)", int, False
)
actstr, proto_max = parse_extract_field(
- actstr, "-", "(\d+)", int, False
+ actstr, "-", r"(\d+)", int, False
)
if t is not None:
@@ -637,6 +847,37 @@ class ovsactions(nla):
self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact])
parsed = True
+ elif parse_starts_block(actstr, "sample(", False):
+ sampleact = self.sample()
+ actstr = sampleact.parse(actstr[len("sample(") : ])
+ self["attrs"].append(["OVS_ACTION_ATTR_SAMPLE", sampleact])
+ parsed = True
+
+ elif parse_starts_block(actstr, "psample(", False):
+ psampleact = self.psample()
+ actstr = psampleact.parse(actstr[len("psample(") : ])
+ self["attrs"].append(["OVS_ACTION_ATTR_PSAMPLE", psampleact])
+ parsed = True
+
+ elif parse_starts_block(actstr, "userspace(", False):
+ uact = self.userspace()
+ actstr = uact.parse(actstr[len("userspace(") : ])
+ self["attrs"].append(["OVS_ACTION_ATTR_USERSPACE", uact])
+ parsed = True
+
+ elif parse_starts_block(actstr, "trunc(", False):
+ parencount += 1
+ actstr, val = parse_extract_field(
+ actstr,
+ "trunc(",
+ r"([0-9]+)",
+ int,
+ False,
+ None,
+ )
+ self["attrs"].append(["OVS_ACTION_ATTR_TRUNC", val])
+ parsed = True
+
actstr = actstr[strspn(actstr, ", ") :]
while parencount > 0:
parencount -= 1
@@ -675,7 +916,7 @@ class ovskey(nla):
("OVS_KEY_ATTR_ARP", "ovs_key_arp"),
("OVS_KEY_ATTR_ND", "ovs_key_nd"),
("OVS_KEY_ATTR_SKB_MARK", "uint32"),
- ("OVS_KEY_ATTR_TUNNEL", "none"),
+ ("OVS_KEY_ATTR_TUNNEL", "ovs_key_tunnel"),
("OVS_KEY_ATTR_SCTP", "ovs_key_sctp"),
("OVS_KEY_ATTR_TCP_FLAGS", "be16"),
("OVS_KEY_ATTR_DP_HASH", "uint32"),
@@ -907,21 +1148,21 @@ class ovskey(nla):
"src",
"src",
lambda x: str(ipaddress.IPv6Address(x)),
- lambda x: int.from_bytes(x, "big"),
- lambda x: ipaddress.IPv6Address(x),
+ lambda x: ipaddress.IPv6Address(x).packed if x else 0,
+ convert_ipv6,
),
(
"dst",
"dst",
lambda x: str(ipaddress.IPv6Address(x)),
- lambda x: int.from_bytes(x, "big"),
- lambda x: ipaddress.IPv6Address(x),
+ lambda x: ipaddress.IPv6Address(x).packed if x else 0,
+ convert_ipv6,
),
- ("label", "label", "%d", int),
- ("proto", "proto", "%d", int),
- ("tclass", "tclass", "%d", int),
- ("hlimit", "hlimit", "%d", int),
- ("frag", "frag", "%d", int),
+ ("label", "label", "%d", lambda x: int(x) if x else 0),
+ ("proto", "proto", "%d", lambda x: int(x) if x else 0),
+ ("tclass", "tclass", "%d", lambda x: int(x) if x else 0),
+ ("hlimit", "hlimit", "%d", lambda x: int(x) if x else 0),
+ ("frag", "frag", "%d", lambda x: int(x) if x else 0),
)
def __init__(
@@ -1119,7 +1360,7 @@ class ovskey(nla):
"target",
"target",
lambda x: str(ipaddress.IPv6Address(x)),
- lambda x: int.from_bytes(x, "big"),
+ convert_ipv6,
),
("sll", "sll", macstr, lambda x: int.from_bytes(x, "big")),
("tll", "tll", macstr, lambda x: int.from_bytes(x, "big")),
@@ -1204,13 +1445,13 @@ class ovskey(nla):
"src",
"src",
lambda x: str(ipaddress.IPv6Address(x)),
- lambda x: int.from_bytes(x, "big", convertmac),
+ convert_ipv6,
),
(
"dst",
"dst",
lambda x: str(ipaddress.IPv6Address(x)),
- lambda x: int.from_bytes(x, "big"),
+ convert_ipv6,
),
("tp_src", "tp_src", "%d", int),
("tp_dst", "tp_dst", "%d", int),
@@ -1235,6 +1476,163 @@ class ovskey(nla):
init=init,
)
+ class ovs_key_tunnel(nla):
+ nla_flags = NLA_F_NESTED
+
+ nla_map = (
+ ("OVS_TUNNEL_KEY_ATTR_ID", "be64"),
+ ("OVS_TUNNEL_KEY_ATTR_IPV4_SRC", "ipaddr"),
+ ("OVS_TUNNEL_KEY_ATTR_IPV4_DST", "ipaddr"),
+ ("OVS_TUNNEL_KEY_ATTR_TOS", "uint8"),
+ ("OVS_TUNNEL_KEY_ATTR_TTL", "uint8"),
+ ("OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT", "flag"),
+ ("OVS_TUNNEL_KEY_ATTR_CSUM", "flag"),
+ ("OVS_TUNNEL_KEY_ATTR_OAM", "flag"),
+ ("OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS", "array(uint32)"),
+ ("OVS_TUNNEL_KEY_ATTR_TP_SRC", "be16"),
+ ("OVS_TUNNEL_KEY_ATTR_TP_DST", "be16"),
+ ("OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS", "none"),
+ ("OVS_TUNNEL_KEY_ATTR_IPV6_SRC", "ipaddr"),
+ ("OVS_TUNNEL_KEY_ATTR_IPV6_DST", "ipaddr"),
+ ("OVS_TUNNEL_KEY_ATTR_PAD", "none"),
+ ("OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS", "none"),
+ ("OVS_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE", "flag"),
+ )
+
+ def parse(self, flowstr, mask=None):
+ if not flowstr.startswith("tunnel("):
+ return None, None
+
+ k = ovskey.ovs_key_tunnel()
+ if mask is not None:
+ mask = ovskey.ovs_key_tunnel()
+
+ flowstr = flowstr[len("tunnel("):]
+
+ v6_address = None
+
+ fields = [
+ ("tun_id=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_ID",
+ 0xffffffffffffffff, None, None),
+
+ ("src=", r"([0-9a-fA-F\.]+)", str,
+ "OVS_TUNNEL_KEY_ATTR_IPV4_SRC", "255.255.255.255", "0.0.0.0",
+ False),
+ ("dst=", r"([0-9a-fA-F\.]+)", str,
+ "OVS_TUNNEL_KEY_ATTR_IPV4_DST", "255.255.255.255", "0.0.0.0",
+ False),
+
+ ("ipv6_src=", r"([0-9a-fA-F:]+)", str,
+ "OVS_TUNNEL_KEY_ATTR_IPV6_SRC",
+ "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "::", True),
+ ("ipv6_dst=", r"([0-9a-fA-F:]+)", str,
+ "OVS_TUNNEL_KEY_ATTR_IPV6_DST",
+ "ffff:ffff:ffff:ffff:ffff:ffff:ffff:ffff", "::", True),
+
+ ("tos=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TOS", 255, 0,
+ None),
+ ("ttl=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TTL", 255, 0,
+ None),
+
+ ("tp_src=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TP_SRC",
+ 65535, 0, None),
+ ("tp_dst=", r"(\d+)", int, "OVS_TUNNEL_KEY_ATTR_TP_DST",
+ 65535, 0, None),
+ ]
+
+ forced_include = ["OVS_TUNNEL_KEY_ATTR_TTL"]
+
+ for prefix, regex, typ, attr_name, mask_val, default_val, v46_flag in fields:
+ flowstr, value = parse_extract_field(flowstr, prefix, regex, typ, False)
+ if not attr_name:
+ raise Exception("Bad list value in tunnel fields")
+
+ if value is None and attr_name in forced_include:
+ value = default_val
+ mask_val = default_val
+
+ if value is not None:
+ if v46_flag is not None:
+ if v6_address is None:
+ v6_address = v46_flag
+ if v46_flag != v6_address:
+ raise ValueError("Cannot mix v6 and v4 addresses")
+ k["attrs"].append([attr_name, value])
+ if mask is not None:
+ mask["attrs"].append([attr_name, mask_val])
+ else:
+ if v46_flag is not None:
+ if v6_address is None or v46_flag != v6_address:
+ continue
+ if mask is not None:
+ mask["attrs"].append([attr_name, default_val])
+
+ if k["attrs"][0][0] != "OVS_TUNNEL_KEY_ATTR_ID":
+ raise ValueError("Needs a tunid set")
+
+ if flowstr.startswith("flags("):
+ flowstr = flowstr[len("flags("):]
+ flagspos = flowstr.find(")")
+ flags = flowstr[:flagspos]
+ flowstr = flowstr[flagspos + 1:]
+
+ flag_attrs = {
+ "df": "OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT",
+ "csum": "OVS_TUNNEL_KEY_ATTR_CSUM",
+ "oam": "OVS_TUNNEL_KEY_ATTR_OAM"
+ }
+
+ for flag in flags.split("|"):
+ if flag in flag_attrs:
+ k["attrs"].append([flag_attrs[flag], True])
+ if mask is not None:
+ mask["attrs"].append([flag_attrs[flag], True])
+
+ flowstr = flowstr[strspn(flowstr, ", ") :]
+ return flowstr, k, mask
+
+ def dpstr(self, mask=None, more=False):
+ print_str = "tunnel("
+
+ flagsattrs = []
+ for k in self["attrs"]:
+ noprint = False
+ if k[0] == "OVS_TUNNEL_KEY_ATTR_ID":
+ print_str += "tun_id=%d" % k[1]
+ elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV4_SRC":
+ print_str += "src=%s" % k[1]
+ elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV4_DST":
+ print_str += "dst=%s" % k[1]
+ elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV6_SRC":
+ print_str += "ipv6_src=%s" % k[1]
+ elif k[0] == "OVS_TUNNEL_KEY_ATTR_IPV6_DST":
+ print_str += "ipv6_dst=%s" % k[1]
+ elif k[0] == "OVS_TUNNEL_KEY_ATTR_TOS":
+ print_str += "tos=%d" % k[1]
+ elif k[0] == "OVS_TUNNEL_KEY_ATTR_TTL":
+ print_str += "ttl=%d" % k[1]
+ elif k[0] == "OVS_TUNNEL_KEY_ATTR_TP_SRC":
+ print_str += "tp_src=%d" % k[1]
+ elif k[0] == "OVS_TUNNEL_KEY_ATTR_TP_DST":
+ print_str += "tp_dst=%d" % k[1]
+ elif k[0] == "OVS_TUNNEL_KEY_ATTR_DONT_FRAGMENT":
+ noprint = True
+ flagsattrs.append("df")
+ elif k[0] == "OVS_TUNNEL_KEY_ATTR_CSUM":
+ noprint = True
+ flagsattrs.append("csum")
+ elif k[0] == "OVS_TUNNEL_KEY_ATTR_OAM":
+ noprint = True
+ flagsattrs.append("oam")
+
+ if not noprint:
+ print_str += ","
+
+ if len(flagsattrs):
+ print_str += "flags(" + "|".join(flagsattrs) + ")"
+ print_str += ")"
+ return print_str
+
class ovs_key_mpls(nla):
fields = (("lse", ">I"),)
@@ -1243,6 +1641,7 @@ class ovskey(nla):
("OVS_KEY_ATTR_PRIORITY", "skb_priority", intparse),
("OVS_KEY_ATTR_SKB_MARK", "skb_mark", intparse),
("OVS_KEY_ATTR_RECIRC_ID", "recirc_id", intparse),
+ ("OVS_KEY_ATTR_TUNNEL", "tunnel", ovskey.ovs_key_tunnel),
("OVS_KEY_ATTR_DP_HASH", "dp_hash", intparse),
("OVS_KEY_ATTR_CT_STATE", "ct_state", parse_ct_state),
("OVS_KEY_ATTR_CT_ZONE", "ct_zone", intparse),
@@ -1309,7 +1708,7 @@ class ovskey(nla):
mask["attrs"].append([field[0], m])
self["attrs"].append([field[0], k])
- flowstr = flowstr[strspn(flowstr, "),") :]
+ flowstr = flowstr[strspn(flowstr, "), ") :]
return flowstr
@@ -1346,6 +1745,13 @@ class ovskey(nla):
True,
),
(
+ "OVS_KEY_ATTR_TUNNEL",
+ "tunnel",
+ None,
+ False,
+ False,
+ ),
+ (
"OVS_KEY_ATTR_CT_STATE",
"ct_state",
"0x%04x",
@@ -1617,7 +2023,7 @@ class OvsVport(GenericNetlinkSocket):
("OVS_VPORT_ATTR_PORT_NO", "uint32"),
("OVS_VPORT_ATTR_TYPE", "uint32"),
("OVS_VPORT_ATTR_NAME", "asciiz"),
- ("OVS_VPORT_ATTR_OPTIONS", "none"),
+ ("OVS_VPORT_ATTR_OPTIONS", "vportopts"),
("OVS_VPORT_ATTR_UPCALL_PID", "array(uint32)"),
("OVS_VPORT_ATTR_STATS", "vportstats"),
("OVS_VPORT_ATTR_PAD", "none"),
@@ -1625,6 +2031,13 @@ class OvsVport(GenericNetlinkSocket):
("OVS_VPORT_ATTR_NETNSID", "uint32"),
)
+ class vportopts(nla):
+ nla_map = (
+ ("OVS_TUNNEL_ATTR_UNSPEC", "none"),
+ ("OVS_TUNNEL_ATTR_DST_PORT", "uint16"),
+ ("OVS_TUNNEL_ATTR_EXTENSION", "none"),
+ )
+
class vportstats(nla):
fields = (
("rx_packets", "=Q"),
@@ -1693,7 +2106,7 @@ class OvsVport(GenericNetlinkSocket):
raise ne
return reply
- def attach(self, dpindex, vport_ifname, ptype):
+ def attach(self, dpindex, vport_ifname, ptype, dport, lwt):
msg = OvsVport.ovs_vport_msg()
msg["cmd"] = OVS_VPORT_CMD_NEW
@@ -1702,12 +2115,43 @@ class OvsVport(GenericNetlinkSocket):
msg["dpifindex"] = dpindex
port_type = OvsVport.str_to_type(ptype)
- msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type])
msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname])
msg["attrs"].append(
["OVS_VPORT_ATTR_UPCALL_PID", [self.upcall_packet.epid]]
)
+ TUNNEL_DEFAULTS = [("geneve", 6081),
+ ("vxlan", 4789)]
+
+ for tnl in TUNNEL_DEFAULTS:
+ if ptype == tnl[0]:
+ if not dport:
+ dport = tnl[1]
+
+ if not lwt:
+ vportopt = OvsVport.ovs_vport_msg.vportopts()
+ vportopt["attrs"].append(
+ ["OVS_TUNNEL_ATTR_DST_PORT", socket.htons(dport)]
+ )
+ msg["attrs"].append(
+ ["OVS_VPORT_ATTR_OPTIONS", vportopt]
+ )
+ else:
+ port_type = OvsVport.OVS_VPORT_TYPE_NETDEV
+ ipr = pyroute2.iproute.IPRoute()
+
+ if tnl[0] == "geneve":
+ ipr.link("add", ifname=vport_ifname, kind=tnl[0],
+ geneve_port=dport,
+ geneve_collect_metadata=True,
+ geneve_udp_zero_csum6_rx=1)
+ elif tnl[0] == "vxlan":
+ ipr.link("add", ifname=vport_ifname, kind=tnl[0],
+ vxlan_learning=0, vxlan_collect_metadata=1,
+ vxlan_udp_zero_csum6_rx=1, vxlan_port=dport)
+ break
+ msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type])
+
try:
reply = self.nlm_request(
msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
@@ -2018,10 +2462,71 @@ class OvsFlow(GenericNetlinkSocket):
print("MISS upcall[%d/%s]: %s" % (seq, pktpres, keystr), flush=True)
def execute(self, packetmsg):
- print("userspace execute command")
+ print("userspace execute command", flush=True)
def action(self, packetmsg):
- print("userspace action command")
+ print("userspace action command", flush=True)
+
+
+class psample_sample(genlmsg):
+ nla_map = (
+ ("PSAMPLE_ATTR_IIFINDEX", "none"),
+ ("PSAMPLE_ATTR_OIFINDEX", "none"),
+ ("PSAMPLE_ATTR_ORIGSIZE", "none"),
+ ("PSAMPLE_ATTR_SAMPLE_GROUP", "uint32"),
+ ("PSAMPLE_ATTR_GROUP_SEQ", "none"),
+ ("PSAMPLE_ATTR_SAMPLE_RATE", "uint32"),
+ ("PSAMPLE_ATTR_DATA", "array(uint8)"),
+ ("PSAMPLE_ATTR_GROUP_REFCOUNT", "none"),
+ ("PSAMPLE_ATTR_TUNNEL", "none"),
+ ("PSAMPLE_ATTR_PAD", "none"),
+ ("PSAMPLE_ATTR_OUT_TC", "none"),
+ ("PSAMPLE_ATTR_OUT_TC_OCC", "none"),
+ ("PSAMPLE_ATTR_LATENCY", "none"),
+ ("PSAMPLE_ATTR_TIMESTAMP", "none"),
+ ("PSAMPLE_ATTR_PROTO", "none"),
+ ("PSAMPLE_ATTR_USER_COOKIE", "array(uint8)"),
+ )
+
+ def dpstr(self):
+ fields = []
+ data = ""
+ for (attr, value) in self["attrs"]:
+ if attr == "PSAMPLE_ATTR_SAMPLE_GROUP":
+ fields.append("group:%d" % value)
+ if attr == "PSAMPLE_ATTR_SAMPLE_RATE":
+ fields.append("rate:%d" % value)
+ if attr == "PSAMPLE_ATTR_USER_COOKIE":
+ value = "".join(format(x, "02x") for x in value)
+ fields.append("cookie:%s" % value)
+ if attr == "PSAMPLE_ATTR_DATA" and len(value) > 0:
+ data = "data:%s" % "".join(format(x, "02x") for x in value)
+
+ return ("%s %s" % (",".join(fields), data)).strip()
+
+
+class psample_msg(Marshal):
+ PSAMPLE_CMD_SAMPLE = 0
+ PSAMPLE_CMD_GET_GROUP = 1
+ PSAMPLE_CMD_NEW_GROUP = 2
+ PSAMPLE_CMD_DEL_GROUP = 3
+ PSAMPLE_CMD_SET_FILTER = 4
+ msg_map = {PSAMPLE_CMD_SAMPLE: psample_sample}
+
+
+class PsampleEvent(EventSocket):
+ genl_family = "psample"
+ mcast_groups = ["packets"]
+ marshal_class = psample_msg
+
+ def read_samples(self):
+ print("listening for psample events", flush=True)
+ while True:
+ try:
+ for msg in self.get():
+ print(msg.dpstr(), flush=True)
+ except NetlinkError as ne:
+ raise ne
def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()):
@@ -2053,12 +2558,19 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()):
for iface in ndb.interfaces:
rep = vpl.info(iface.ifname, ifindex)
if rep is not None:
+ opts = ""
+ vpo = rep.get_attr("OVS_VPORT_ATTR_OPTIONS")
+ if vpo:
+ dpo = vpo.get_attr("OVS_TUNNEL_ATTR_DST_PORT")
+ if dpo:
+ opts += " tnl-dport:%s" % socket.ntohs(dpo)
print(
- " port %d: %s (%s)"
+ " port %d: %s (%s%s)"
% (
rep.get_attr("OVS_VPORT_ATTR_PORT_NO"),
rep.get_attr("OVS_VPORT_ATTR_NAME"),
OvsVport.type_to_str(rep.get_attr("OVS_VPORT_ATTR_TYPE")),
+ opts,
)
)
@@ -2081,7 +2593,7 @@ def main(argv):
help="Increment 'verbose' output counter.",
default=0,
)
- subparsers = parser.add_subparsers()
+ subparsers = parser.add_subparsers(dest="subcommand")
showdpcmd = subparsers.add_parser("show")
showdpcmd.add_argument(
@@ -2120,12 +2632,30 @@ def main(argv):
"--ptype",
type=str,
default="netdev",
- choices=["netdev", "internal"],
+ choices=["netdev", "internal", "geneve", "vxlan"],
help="Interface type (default netdev)",
)
+ addifcmd.add_argument(
+ "-p",
+ "--dport",
+ type=int,
+ default=0,
+ help="Destination port (0 for default)"
+ )
+ addifcmd.add_argument(
+ "-l",
+ "--lwt",
+ type=bool,
+ default=True,
+ help="Use LWT infrastructure instead of vport (default true)."
+ )
delifcmd = subparsers.add_parser("del-if")
delifcmd.add_argument("dpname", help="Datapath Name")
delifcmd.add_argument("delif", help="Interface name for adding")
+ delifcmd.add_argument("-d",
+ "--dellink",
+ type=bool, default=False,
+ help="Delete the link as well.")
dumpflcmd = subparsers.add_parser("dump-flows")
dumpflcmd.add_argument("dumpdp", help="Datapath Name")
@@ -2138,6 +2668,8 @@ def main(argv):
delfscmd = subparsers.add_parser("del-flows")
delfscmd.add_argument("flsbr", help="Datapath name")
+ subparsers.add_parser("psample-events")
+
args = parser.parse_args()
if args.verbose > 0:
@@ -2152,6 +2684,9 @@ def main(argv):
sys.setrecursionlimit(100000)
+ if args.subcommand == "psample-events":
+ PsampleEvent().read_samples()
+
if hasattr(args, "showdp"):
found = False
for iface in ndb.interfaces:
@@ -2186,7 +2721,8 @@ def main(argv):
print("DP '%s' not found." % args.dpname)
return 1
dpindex = rep["dpifindex"]
- rep = ovsvp.attach(rep["dpifindex"], args.addif, args.ptype)
+ rep = ovsvp.attach(rep["dpifindex"], args.addif, args.ptype,
+ args.dport, args.lwt)
msg = "vport '%s'" % args.addif
if rep and rep["header"]["error"] is None:
msg += " added."
@@ -2207,6 +2743,9 @@ def main(argv):
msg += " removed."
else:
msg += " failed to remove."
+ if args.dellink:
+ ipr = pyroute2.iproute.IPRoute()
+ ipr.link("del", index=ipr.link_lookup(ifname=args.delif)[0])
elif hasattr(args, "dumpdp"):
rep = ovsdp.info(args.dumpdp, 0)
if rep is None:
diff --git a/tools/testing/selftests/net/openvswitch/settings b/tools/testing/selftests/net/openvswitch/settings
new file mode 100644
index 000000000000..e2206265f67c
--- /dev/null
+++ b/tools/testing/selftests/net/openvswitch/settings
@@ -0,0 +1 @@
+timeout=900
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index cfc84958025a..5175c0c83a23 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -842,25 +842,97 @@ setup_bridge() {
run_cmd ${ns_a} ip link set veth_A-C master br0
}
+setup_ovs_via_internal_utility() {
+ type="${1}"
+ a_addr="${2}"
+ b_addr="${3}"
+ dport="${4}"
+
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-if ovs_br0 ${type}_a -t ${type} || return 1
+
+ ports=$(python3 ./openvswitch/ovs-dpctl.py show)
+ br0_port=$(echo "$ports" | grep -E "\sovs_br0" | sed -e 's@port @@' | cut -d: -f1 | xargs)
+ type_a_port=$(echo "$ports" | grep ${type}_a | sed -e 's@port @@' | cut -d: -f1 | xargs)
+ veth_a_port=$(echo "$ports" | grep veth_A | sed -e 's@port @@' | cut -d: -f1 | xargs)
+
+ v4_a_tun="${prefix4}.${a_r1}.1"
+ v4_b_tun="${prefix4}.${b_r1}.1"
+
+ v6_a_tun="${prefix6}:${a_r1}::1"
+ v6_b_tun="${prefix6}:${b_r1}::1"
+
+ if [ "${v4_a_tun}" = "${a_addr}" ]; then
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+ "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0800),ipv4()" \
+ "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}"
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+ "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x86dd),ipv6()" \
+ "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}"
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+ "recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0800),ipv4()" \
+ "${veth_a_port}"
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+ "recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x86dd),ipv6()" \
+ "${veth_a_port}"
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+ "recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0806),arp()" \
+ "${veth_a_port}"
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+ "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0806),arp(sip=${veth4_c_addr},tip=${tunnel4_b_addr})" \
+ "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}"
+ else
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+ "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0800),ipv4()" \
+ "set(tunnel(tun_id=1,ipv6_dst=${v6_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}"
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+ "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x86dd),ipv6()" \
+ "set(tunnel(tun_id=1,ipv6_dst=${v6_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}"
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+ "recirc_id(0),tunnel(tun_id=1,ipv6_src=${v6_b_tun},ipv6_dst=${v6_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0800),ipv4()" \
+ "${veth_a_port}"
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+ "recirc_id(0),tunnel(tun_id=1,ipv6_src=${v6_b_tun},ipv6_dst=${v6_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x86dd),ipv6()" \
+ "${veth_a_port}"
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+ "recirc_id(0),tunnel(tun_id=1,ipv6_src=${v6_b_tun},ipv6_dst=${v6_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0806),arp()" \
+ "${veth_a_port}"
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
+ "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0806),arp(sip=${veth4_c_addr},tip=${tunnel4_b_addr})" \
+ "set(tunnel(tun_id=1,ipv6_dst=${v6_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}"
+ fi
+}
+
+setup_ovs_via_vswitchd() {
+ type="${1}"
+ b_addr="${2}"
+
+ run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \
+ set interface ${type}_a type=${type} \
+ options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1
+}
+
setup_ovs_vxlan_or_geneve() {
type="${1}"
a_addr="${2}"
b_addr="${3}"
+ dport="6081"
if [ "${type}" = "vxlan" ]; then
+ dport="4789"
opts="${opts} ttl 64 dstport 4789"
opts_b="local ${b_addr}"
fi
- run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \
- set interface ${type}_a type=${type} \
- options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1
+ setup_ovs_via_internal_utility "${type}" "${a_addr}" "${b_addr}" \
+ "${dport}" || \
+ setup_ovs_via_vswitchd "${type}" "${b_addr}" || return 1
run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1
run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
+ run_cmd ip link set ${type}_a up
run_cmd ${ns_b} ip link set ${type}_b up
}
@@ -880,8 +952,24 @@ setup_ovs_vxlan6() {
setup_ovs_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
}
+setup_ovs_br_internal() {
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-dp ovs_br0 || \
+ return 1
+}
+
+setup_ovs_br_vswitchd() {
+ run_cmd ovs-vsctl add-br ovs_br0 || return 1
+}
+
+setup_ovs_add_if() {
+ ifname="${1}"
+ run_cmd python3 ./openvswitch/ovs-dpctl.py add-if ovs_br0 \
+ "${ifname}" || \
+ run_cmd ovs-vsctl add-port ovs_br0 "${ifname}"
+}
+
setup_ovs_bridge() {
- run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip
+ setup_ovs_br_internal || setup_ovs_br_vswitchd || return $ksft_skip
run_cmd ip link set ovs_br0 up
run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
@@ -891,7 +979,7 @@ setup_ovs_bridge() {
run_cmd ${ns_c} ip link set veth_C-A up
run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
- run_cmd ovs-vsctl add-port ovs_br0 veth_A-C
+ setup_ovs_add_if veth_A-C
# Move veth_A-R1 to init
run_cmd ${ns_a} ip link set veth_A-R1 netns 1
@@ -922,6 +1010,18 @@ trace() {
sleep 1
}
+cleanup_del_ovs_internal() {
+ # squelch the output of the del-if commands since it can be wordy
+ python3 ./openvswitch/ovs-dpctl.py del-if ovs_br0 -d true vxlan_a >/dev/null 2>&1
+ python3 ./openvswitch/ovs-dpctl.py del-if ovs_br0 -d true geneve_a >/dev/null 2>&1
+ python3 ./openvswitch/ovs-dpctl.py del-dp ovs_br0 >/dev/null 2>&1
+}
+
+cleanup_del_ovs_vswitchd() {
+ ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null
+ ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null
+}
+
cleanup() {
for pid in ${tcpdump_pids}; do
kill ${pid}
@@ -940,10 +1040,10 @@ cleanup() {
cleanup_all_ns
- ip link del veth_A-C 2>/dev/null
- ip link del veth_A-R1 2>/dev/null
- ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null
- ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null
+ ip link del veth_A-C 2>/dev/null
+ ip link del veth_A-R1 2>/dev/null
+ cleanup_del_ovs_internal
+ cleanup_del_ovs_vswitchd
rm -f "$tmpoutfile"
}
@@ -1397,6 +1497,12 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
outer_family=${3}
ll_mtu=4000
+ if [ "${type}" = "vxlan" ]; then
+ tun_a="vxlan_sys_4789"
+ elif [ "${type}" = "geneve" ]; then
+ tun_a="genev_sys_6081"
+ fi
+
if [ ${outer_family} -eq 4 ]; then
setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip
# IPv4 header UDP header VXLAN/GENEVE header Ethernet header
@@ -1407,17 +1513,11 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
fi
- if [ "${type}" = "vxlan" ]; then
- tun_a="vxlan_sys_4789"
- elif [ "${type}" = "geneve" ]; then
- tun_a="genev_sys_6081"
- fi
-
- trace "" "${tun_a}" "${ns_b}" ${type}_b \
- "" veth_A-R1 "${ns_r1}" veth_R1-A \
- "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \
- "" ovs_br0 "" veth-A-C \
- "${ns_c}" veth_C-A
+ trace "" ${type}_a "${ns_b}" ${type}_b \
+ "" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \
+ "" ovs_br0 "" veth-A_C \
+ "${ns_c}" veth_C-A "" "${tun_a}"
if [ ${family} -eq 4 ]; then
ping=ping
@@ -1436,8 +1536,9 @@ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
mtu "${ns_b}" veth_B-R1 ${ll_mtu}
mtu "${ns_r1}" veth_R1-B ${ll_mtu}
- mtu "" ${tun_a} $((${ll_mtu} + 1000))
- mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
+ mtu "" ${tun_a} $((${ll_mtu} + 1000)) 2>/dev/null || \
+ mtu "" ${type}_a $((${ll_mtu} + 1000)) 2>/dev/null
+ mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1
diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/net/sample_map_ret0.bpf.c
index 495990d355ef..43ca92594926 100644
--- a/tools/testing/selftests/bpf/progs/sample_map_ret0.c
+++ b/tools/testing/selftests/net/sample_map_ret0.bpf.c
@@ -17,7 +17,7 @@ struct {
} array SEC(".maps");
/* Sample program which should always load for testing control paths. */
-SEC(".text") int func()
+SEC("xdp") int func()
{
__u64 key64 = 0;
__u32 key = 0;
diff --git a/tools/testing/selftests/bpf/progs/sample_ret0.c b/tools/testing/selftests/net/sample_ret0.bpf.c
index fec99750d6ea..1df5ca98bb65 100644
--- a/tools/testing/selftests/bpf/progs/sample_ret0.c
+++ b/tools/testing/selftests/net/sample_ret0.bpf.c
@@ -1,6 +1,9 @@
/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */
+#define SEC(name) __attribute__((section(name), used))
+
/* Sample program which should always load for testing control paths. */
+SEC("xdp")
int func()
{
return 0;
diff --git a/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh
new file mode 100755
index 000000000000..e23210aa547f
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dx4_netfilter_test.sh
@@ -0,0 +1,335 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Jianguo Wu <wujianguo@chinatelecom.cn>
+#
+# Mostly copied from tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh.
+#
+# This script is designed for testing the support of netfilter hooks for
+# SRv6 End.DX4 behavior.
+#
+# Hereafter a network diagram is shown, where one tenants (named 100) offer
+# IPv4 L3 VPN services allowing hosts to communicate with each other across
+# an IPv6 network.
+#
+# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DX4 behavior.
+#
+# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-1 pings
+# the host hs-2.
+#
+# First of all, L2 reachability of the host hs-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-1 sends an IPv4 packet destined to hs-2, the router rt-1
+# receives the packet on the internal veth-t100 interface, rt-1 contains the
+# SRv6 Encap route for encapsulating the IPv4 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and
+# routs the packet to the specified nexthop. Afterwards, the packet is sent to
+# the host hs-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a
+# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING
+# hooks.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-1 netns | | hs-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | 10.0.0.11/24 | +----------+ | | +----------+ | 10.0.0.22/24 | |
+# | +-------+-------+ | route | | | | route | +-------+-------- |
+# | | table | | | | table | |
+# | +----------+ | | +----------+ |
+# | +--------------+ | | +--------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | |
+# | +--------------+ | | +--------------+ |
+# | | | |
+# | rt-1 netns | | rt-2 netns |
+# | | | |
+# +-----------------------------------+ +-----------------------------------+
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table
+# +----------------------------------------------------------------+
+# |SID |Action |
+# +----------------------------------------------------------------+
+# |fc00:21:100::6004|apply SRv6 End.DX4 nh4 10.0.0.1 dev veth-t100 |
+# +----------------------------------------------------------------+
+#
+# rt-1: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table
+# +---------------------------------------------------------------+
+# |SID |Action |
+# +---------------------------------------------------------------+
+# |fc00:12:100::6004|apply SRv6 End.DX4 nh4 10.0.0.2 dev veth-t100|
+# +---------------------------------------------------------------+
+#
+# rt-2: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+readonly IPv6_RT_NETWORK=2001:11
+readonly IPv4_HS_NETWORK=10.0.0
+readonly SID_LOCATOR=fc00
+
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_rt_netfilter()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1
+ ip netns exec ${nsname} iptables -t raw -A PREROUTING -m rpfilter --invert -j DROP
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.${rt}${hs}/24 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \
+ via 2001:11::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \
+ encap seg6local action End.DX4 nh4 ${IPv4_HS_NETWORK}.${hsdst} dev veth-t${tid}
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100
+ setup_hs 2 2 100
+
+ # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+}
+
+check_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})"
+}
+
+host_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+router_netfilter_tests()
+{
+ log_section "SRv6 VPN connectivity test with netfilter enabled in routers"
+ setup_rt_netfilter 1
+ setup_rt_netfilter 2
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+host_tests
+router_netfilter_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh
new file mode 100755
index 000000000000..9e69a2ed5bc3
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dx6_netfilter_test.sh
@@ -0,0 +1,340 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Jianguo Wu <wujianguo@chinatelecom.cn>
+#
+# Mostly copied from tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh.
+#
+# This script is designed for testing the support of netfilter hooks for
+# SRv6 End.DX4 behavior.
+#
+# Hereafter a network diagram is shown, where one tenants (named 100) offer
+# IPv6 L3 VPN services allowing hosts to communicate with each other across
+# an IPv6 network.
+#
+# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DX4 behavior.
+#
+# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-1 pings
+# the host hs-2.
+#
+# First of all, L2 reachability of the host hs-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-1 sends an IPv6 packet destined to hs-2, the router rt-1
+# receives the packet on the internal veth-t100 interface, rt-1 contains the
+# SRv6 Encap route for encapsulating the IPv6 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DX4 behavior removes the outer IPv6+SRH headers and
+# routs the packet to the specified nexthop. Afterwards, the packet is sent to
+# the host hs-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# And when net.netfilter.nf_hooks_lwtunnel is set to 1 in rt-1 or rt-2, and a
+# rpfilter iptables rule is added, SRv6 packets will go through netfilter PREROUTING
+# hooks.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-1 netns | | hs-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::1/64 | | | | cafe::2/64 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | cafe::11/64 | +----------+ | | +----------+ | cafe::22/64 | |
+# | +-------+-------+ | route | | | | route | +-------+-------- |
+# | | table | | | | table | |
+# | +----------+ | | +----------+ |
+# | +--------------+ | | +--------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 2001:11::1/64 |.|...|.| 2001:11::2/64 | |
+# | +--------------+ | | +--------------+ |
+# | | | |
+# | rt-1 netns | | rt-2 netns |
+# | | | |
+# +-----------------------------------+ +-----------------------------------+
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table
+# +----------------------------------------------------------------+
+# |SID |Action |
+# +----------------------------------------------------------------+
+# |fc00:21:100::6004|apply SRv6 End.DX6 nh6 cafe::1 dev veth-t100 |
+# +----------------------------------------------------------------+
+#
+# rt-1: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::2 |apply seg6 encap segs fc00:12:100::6004|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table
+# +---------------------------------------------------------------+
+# |SID |Action |
+# +---------------------------------------------------------------+
+# |fc00:12:100::6004|apply SRv6 End.DX6 nh6 cafe::2 dev veth-t100 |
+# +---------------------------------------------------------------+
+#
+# rt-2: route table
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::1 |apply seg6 encap segs fc00:21:100::6004|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+readonly IPv6_RT_NETWORK=2001:11
+readonly IPv6_HS_NETWORK=cafe
+readonly SID_LOCATOR=fc00
+
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ # destroy routers rt-* and hosts hs-*
+ for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do
+ ip netns del ${ns} || true
+ done
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns add ${nsname}
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip link set veth-rt-${rt} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${rt} name veth0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_rt_netfilter()
+{
+ local rt=$1
+ local nsname=rt-${rt}
+
+ ip netns exec ${nsname} sysctl -wq net.netfilter.nf_hooks_lwtunnel=1
+ ip netns exec ${nsname} ip6tables -t raw -A PREROUTING -m rpfilter --invert -j DROP
+}
+
+setup_hs()
+{
+ local hs=$1
+ local rt=$2
+ local tid=$3
+ local hsname=hs-${hs}
+ local rtname=rt-${rt}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns add ${hsname}
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::${rt}${hs}/64 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ local hssrc_name=hs-t${tid}-${hssrc}
+ local hsdst_name=hs-t${tid}-${hsdst}
+ local rtsrc_name=rt-${rtsrc}
+ local rtdst_name=rt-${rtdst}
+ local rtveth=veth-t${tid}
+ local vpn_sid=${SID_LOCATOR}:${hssrc}${hsdst}:${tid}::6004
+
+ ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 \
+ via 2001:11::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 \
+ encap seg6local action End.DX6 nh6 ${IPv6_HS_NETWORK}::${hsdst} dev veth-t${tid}
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_hs 1 1 100
+ setup_hs 2 2 100
+
+ # setup the IPv4 L3 VPN which connects the host hs-1 and host hs-2.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+}
+
+check_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ ip netns exec hs-${hssrc} ping -6 -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "Hosts connectivity: hs-${hssrc} -> hs-${hsdst} (tenant ${tid})"
+}
+
+host_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+router_netfilter_tests()
+{
+ log_section "SRv6 VPN connectivity test with netfilter enabled in routers"
+ setup_rt_netfilter 1
+ setup_rt_netfilter 2
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+host_tests
+router_netfilter_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
index 522d991e310e..bd88b90b902b 100644
--- a/tools/testing/selftests/net/tcp_ao/Makefile
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -26,7 +26,7 @@ LIB := $(LIBDIR)/libaotst.a
LDLIBS += $(LIB) -pthread
LIBDEPS := lib/aolib.h Makefile
-CFLAGS := -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing
+CFLAGS += -Wall -O2 -g -fno-strict-aliasing
CFLAGS += $(KHDR_INCLUDES)
CFLAGS += -iquote ./lib/ -I ../../../../include/
diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c
index e154d9e198a9..a5698b0a3718 100644
--- a/tools/testing/selftests/net/tcp_ao/self-connect.c
+++ b/tools/testing/selftests/net/tcp_ao/self-connect.c
@@ -30,8 +30,6 @@ static void setup_lo_intf(const char *lo_intf)
static void tcp_self_connect(const char *tst, unsigned int port,
bool different_keyids, bool check_restore)
{
- uint64_t before_challenge_ack, after_challenge_ack;
- uint64_t before_syn_challenge, after_syn_challenge;
struct tcp_ao_counters before_ao, after_ao;
uint64_t before_aogood, after_aogood;
struct netstat *ns_before, *ns_after;
@@ -62,8 +60,6 @@ static void tcp_self_connect(const char *tst, unsigned int port,
ns_before = netstat_read();
before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
- before_challenge_ack = netstat_get(ns_before, "TCPChallengeACK", NULL);
- before_syn_challenge = netstat_get(ns_before, "TCPSYNChallenge", NULL);
if (test_get_tcp_ao_counters(sk, &before_ao))
test_error("test_get_tcp_ao_counters()");
@@ -82,8 +78,6 @@ static void tcp_self_connect(const char *tst, unsigned int port,
ns_after = netstat_read();
after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
- after_challenge_ack = netstat_get(ns_after, "TCPChallengeACK", NULL);
- after_syn_challenge = netstat_get(ns_after, "TCPSYNChallenge", NULL);
if (test_get_tcp_ao_counters(sk, &after_ao))
test_error("test_get_tcp_ao_counters()");
if (!check_restore) {
@@ -98,18 +92,6 @@ static void tcp_self_connect(const char *tst, unsigned int port,
close(sk);
return;
}
- if (after_challenge_ack <= before_challenge_ack ||
- after_syn_challenge <= before_syn_challenge) {
- /*
- * It's also meant to test simultaneous open, so check
- * these counters as well.
- */
- test_fail("%s: Didn't challenge SYN or ACK: %zu <= %zu OR %zu <= %zu",
- tst, after_challenge_ack, before_challenge_ack,
- after_syn_challenge, before_syn_challenge);
- close(sk);
- return;
- }
if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) {
close(sk);
diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
index 8533393a4f18..02b986c9c247 100755
--- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
+++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
@@ -154,17 +154,9 @@ setup_topo()
setup_topo_ns $ns
done
- ip link add name veth0 type veth peer name veth1
- ip link set dev veth0 netns $h1 name eth0
- ip link set dev veth1 netns $sw1 name swp1
-
- ip link add name veth0 type veth peer name veth1
- ip link set dev veth0 netns $sw1 name veth0
- ip link set dev veth1 netns $sw2 name veth0
-
- ip link add name veth0 type veth peer name veth1
- ip link set dev veth0 netns $h2 name eth0
- ip link set dev veth1 netns $sw2 name swp1
+ ip -n $h1 link add name eth0 type veth peer name swp1 netns $sw1
+ ip -n $sw1 link add name veth0 type veth peer name veth0 netns $sw2
+ ip -n $h2 link add name eth0 type veth peer name swp1 netns $sw2
}
setup_host_common()
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index 8802604148dd..11a1ebda564f 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
# set global exit status, but never reset nonzero one.
check_err()
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 7080eae5312b..c51ea90a1395 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
cleanup() {
local -r jobs="$(jobs -p)"
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
index e1ff645bd3d1..17404f49cdb6 100755
--- a/tools/testing/selftests/net/udpgro_frglist.sh
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -7,7 +7,7 @@ source net_helper.sh
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
cleanup() {
local -r jobs="$(jobs -p)"
@@ -42,8 +42,8 @@ run_one() {
ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
tc -n "${PEER_NS}" qdisc add dev veth1 clsact
- tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6 direct-action
- tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action
+ tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.bpf.o section schedcls/ingress6/nat_6 direct-action
+ tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action
echo ${rx_args}
ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
@@ -89,7 +89,7 @@ if [ ! -f ${BPF_FILE} ]; then
exit -1
fi
-if [ ! -f nat6to4.o ]; then
+if [ ! -f nat6to4.bpf.o ]; then
echo "Missing nat6to4 helper. Run 'make' first"
exit -1
fi
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
index 83ed987cff34..550d8eb3e224 100755
--- a/tools/testing/selftests/net/udpgro_fwd.sh
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -3,7 +3,7 @@
source net_helper.sh
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
readonly BASE="ns-$(mktemp -u XXXXXX)"
readonly SRC=2
readonly DST=1
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index 85b3baa3f7f3..3e74cfa1a2bf 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -53,6 +53,7 @@ static bool cfg_do_ipv6;
static bool cfg_do_connected;
static bool cfg_do_connectionless;
static bool cfg_do_msgmore;
+static bool cfg_do_recv = true;
static bool cfg_do_setsockopt;
static int cfg_specific_test_id = -1;
@@ -414,6 +415,9 @@ static void run_one(struct testcase *test, int fdt, int fdr,
if (!sent)
return;
+ if (!cfg_do_recv)
+ return;
+
if (test->gso_len)
mss = test->gso_len;
else
@@ -464,8 +468,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen)
if (fdr == -1)
error(1, errno, "socket r");
- if (bind(fdr, addr, alen))
- error(1, errno, "bind");
+ if (cfg_do_recv) {
+ if (bind(fdr, addr, alen))
+ error(1, errno, "bind");
+ }
/* Have tests fail quickly instead of hang */
if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
@@ -524,7 +530,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
+ while ((c = getopt(argc, argv, "46cCmRst:")) != -1) {
switch (c) {
case '4':
cfg_do_ipv4 = true;
@@ -541,6 +547,9 @@ static void parse_opts(int argc, char **argv)
case 'm':
cfg_do_msgmore = true;
break;
+ case 'R':
+ cfg_do_recv = false;
+ break;
case 's':
cfg_do_setsockopt = true;
break;
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
index 6c63178086b0..85d1fa3c1ff7 100755
--- a/tools/testing/selftests/net/udpgso.sh
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -27,6 +27,31 @@ test_route_mtu() {
ip route add local fd00::1/128 table local dev lo mtu 1500
}
+setup_dummy_sink() {
+ ip link add name sink mtu 1500 type dummy
+ ip addr add dev sink 10.0.0.0/24
+ ip addr add dev sink fd00::2/64 nodad
+ ip link set dev sink up
+}
+
+test_hw_gso_hw_csum() {
+ setup_dummy_sink
+ ethtool -K sink tx-checksum-ip-generic on >/dev/null
+ ethtool -K sink tx-udp-segmentation on >/dev/null
+}
+
+test_sw_gso_hw_csum() {
+ setup_dummy_sink
+ ethtool -K sink tx-checksum-ip-generic on >/dev/null
+ ethtool -K sink tx-udp-segmentation off >/dev/null
+}
+
+test_sw_gso_sw_csum() {
+ setup_dummy_sink
+ ethtool -K sink tx-checksum-ip-generic off >/dev/null
+ ethtool -K sink tx-udp-segmentation off >/dev/null
+}
+
if [ "$#" -gt 0 ]; then
"$1"
shift 2 # pop "test_*" arg and "--" delimiter
@@ -56,3 +81,21 @@ echo "ipv4 msg_more"
echo "ipv6 msg_more"
./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m
+
+echo "ipv4 hw-gso hw-csum"
+./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -4 -C -R
+
+echo "ipv6 hw-gso hw-csum"
+./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -6 -C -R
+
+echo "ipv4 sw-gso hw-csum"
+./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -4 -C -R
+
+echo "ipv6 sw-gso hw-csum"
+./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -6 -C -R
+
+echo "ipv4 sw-gso sw-csum"
+./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -4 -C -R
+
+echo "ipv6 sw-gso sw-csum"
+./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -6 -C -R
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
index 3a394b43e274..4f1edbafb946 100755
--- a/tools/testing/selftests/net/veth.sh
+++ b/tools/testing/selftests/net/veth.sh
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-BPF_FILE="xdp_dummy.o"
+BPF_FILE="xdp_dummy.bpf.o"
readonly STATS="$(mktemp -p /tmp ns-XXXXXX)"
readonly BASE=`basename $STATS`
readonly SRC=2
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
index 2da32f4c479b..152171fb1fc8 100755
--- a/tools/testing/selftests/net/vrf_route_leaking.sh
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -59,6 +59,7 @@
# while it is forwarded between different vrfs.
source lib.sh
+PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH
VERBOSE=0
PAUSE_ON_FAIL=no
DEFAULT_TTYPE=sym
@@ -533,6 +534,86 @@ ipv6_ping_frag_asym()
ipv6_ping_frag asym
}
+ipv4_ping_local()
+{
+ log_section "IPv4 (sym route): VRF ICMP local error route lookup ping"
+
+ setup_sym
+
+ check_connectivity || return
+
+ run_cmd ip netns exec $r1 ip vrf exec blue ping -c1 -w1 ${H2_N2_IP}
+ log_test $? 0 "VRF ICMP local IPv4"
+}
+
+ipv4_tcp_local()
+{
+ log_section "IPv4 (sym route): VRF tcp local connection"
+
+ setup_sym
+
+ check_connectivity || return
+
+ run_cmd nettest -s -O "$h2" -l ${H2_N2_IP} -I eth0 -3 eth0 &
+ sleep 1
+ run_cmd nettest -N "$r1" -d blue -r ${H2_N2_IP}
+ log_test $? 0 "VRF tcp local connection IPv4"
+}
+
+ipv4_udp_local()
+{
+ log_section "IPv4 (sym route): VRF udp local connection"
+
+ setup_sym
+
+ check_connectivity || return
+
+ run_cmd nettest -s -D -O "$h2" -l ${H2_N2_IP} -I eth0 -3 eth0 &
+ sleep 1
+ run_cmd nettest -D -N "$r1" -d blue -r ${H2_N2_IP}
+ log_test $? 0 "VRF udp local connection IPv4"
+}
+
+ipv6_ping_local()
+{
+ log_section "IPv6 (sym route): VRF ICMP local error route lookup ping"
+
+ setup_sym
+
+ check_connectivity6 || return
+
+ run_cmd ip netns exec $r1 ip vrf exec blue ${ping6} -c1 -w1 ${H2_N2_IP6}
+ log_test $? 0 "VRF ICMP local IPv6"
+}
+
+ipv6_tcp_local()
+{
+ log_section "IPv6 (sym route): VRF tcp local connection"
+
+ setup_sym
+
+ check_connectivity6 || return
+
+ run_cmd nettest -s -6 -O "$h2" -l ${H2_N2_IP6} -I eth0 -3 eth0 &
+ sleep 1
+ run_cmd nettest -6 -N "$r1" -d blue -r ${H2_N2_IP6}
+ log_test $? 0 "VRF tcp local connection IPv6"
+}
+
+ipv6_udp_local()
+{
+ log_section "IPv6 (sym route): VRF udp local connection"
+
+ setup_sym
+
+ check_connectivity6 || return
+
+ run_cmd nettest -s -6 -D -O "$h2" -l ${H2_N2_IP6} -I eth0 -3 eth0 &
+ sleep 1
+ run_cmd nettest -6 -D -N "$r1" -d blue -r ${H2_N2_IP6}
+ log_test $? 0 "VRF udp local connection IPv6"
+}
+
################################################################################
# usage
@@ -555,8 +636,10 @@ EOF
# Some systems don't have a ping6 binary anymore
command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
-TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym"
-TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_ttl_asym ipv6_traceroute_asym"
+TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_local ipv4_tcp_local
+ipv4_udp_local ipv4_ping_ttl_asym ipv4_traceroute_asym"
+TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_local ipv6_tcp_local ipv6_udp_local
+ipv6_ping_ttl_asym ipv6_traceroute_asym"
ret=0
nsuccess=0
@@ -594,12 +677,18 @@ do
ipv4_traceroute|traceroute) ipv4_traceroute;;&
ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;&
ipv4_ping_frag|ping) ipv4_ping_frag;;&
+ ipv4_ping_local|ping) ipv4_ping_local;;&
+ ipv4_tcp_local) ipv4_tcp_local;;&
+ ipv4_udp_local) ipv4_udp_local;;&
ipv6_ping_ttl|ping) ipv6_ping_ttl;;&
ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;&
ipv6_traceroute|traceroute) ipv6_traceroute;;&
ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;&
ipv6_ping_frag|ping) ipv6_ping_frag;;&
+ ipv6_ping_local|ping) ipv6_ping_local;;&
+ ipv6_tcp_local) ipv6_tcp_local;;&
+ ipv6_udp_local) ipv6_udp_local;;&
# setup namespaces and config, but do not run any tests
setup_sym|setup) setup_sym; exit 0;;
diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/xdp_dummy.bpf.c
index d988b2e0cee8..d988b2e0cee8 100644
--- a/tools/testing/selftests/net/xdp_dummy.c
+++ b/tools/testing/selftests/net/xdp_dummy.bpf.c
diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh
index 457789530645..3eeeeffb4005 100755
--- a/tools/testing/selftests/net/xfrm_policy.sh
+++ b/tools/testing/selftests/net/xfrm_policy.sh
@@ -293,7 +293,7 @@ check_random_order()
local ns=$1
local log=$2
- for i in $(seq 100); do
+ for i in $(seq 50); do
ip -net $ns xfrm policy flush
for j in $(seq 0 16 255 | sort -R); do
ip -net $ns xfrm policy add dst $j.0.0.0/24 dir out priority 10 action allow
@@ -306,7 +306,7 @@ check_random_order()
done
done
- for i in $(seq 100); do
+ for i in $(seq 50); do
ip -net $ns xfrm policy flush
for j in $(seq 0 16 255 | sort -R); do
local addr=$(printf "e000:0000:%02x00::/56" $j)
diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk
new file mode 100644
index 000000000000..59cb26cf3f73
--- /dev/null
+++ b/tools/testing/selftests/net/ynl.mk
@@ -0,0 +1,21 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# YNL selftest build snippet
+
+# Inputs:
+#
+# YNL_GENS: families we need in the selftests
+# YNL_PROGS: TEST_PROGS which need YNL (TODO, none exist, yet)
+# YNL_GEN_FILES: TEST_GEN_FILES which need YNL
+
+YNL_OUTPUTS := $(patsubst %,$(OUTPUT)/%,$(YNL_GEN_FILES))
+
+$(YNL_OUTPUTS): $(OUTPUT)/libynl.a
+$(YNL_OUTPUTS): CFLAGS += \
+ -I$(top_srcdir)/usr/include/ $(KHDR_INCLUDES) \
+ -I$(top_srcdir)/tools/net/ynl/lib/ \
+ -I$(top_srcdir)/tools/net/ynl/generated/
+
+$(OUTPUT)/libynl.a:
+ $(Q)$(MAKE) -C $(top_srcdir)/tools/net/ynl GENS="$(YNL_GENS)" libynl.a
+ $(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
deleted file mode 100644
index 936c3085bb83..000000000000
--- a/tools/testing/selftests/netfilter/Makefile
+++ /dev/null
@@ -1,21 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Makefile for netfilter selftests
-
-TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
- conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
- nft_concat_range.sh nft_conntrack_helper.sh \
- nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
- ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
- conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \
- conntrack_sctp_collision.sh xt_string.sh \
- bridge_netfilter.sh
-
-HOSTPKG_CONFIG := pkg-config
-
-CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
-LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
-
-TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision \
- conntrack_dump_flush
-
-include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/bridge_brouter.sh b/tools/testing/selftests/netfilter/bridge_brouter.sh
deleted file mode 100755
index 29f3955b9af7..000000000000
--- a/tools/testing/selftests/netfilter/bridge_brouter.sh
+++ /dev/null
@@ -1,146 +0,0 @@
-#!/bin/bash
-#
-# This test is for bridge 'brouting', i.e. make some packets being routed
-# rather than getting bridged even though they arrive on interface that is
-# part of a bridge.
-
-# eth0 br0 eth0
-# setup is: ns1 <-> ns0 <-> ns2
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-ebtables -V > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ebtables"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-ip netns add ns0
-ip netns add ns1
-ip netns add ns2
-
-ip link add veth0 netns ns0 type veth peer name eth0 netns ns1
-if [ $? -ne 0 ]; then
- echo "SKIP: Can't create veth device"
- exit $ksft_skip
-fi
-ip link add veth1 netns ns0 type veth peer name eth0 netns ns2
-
-ip -net ns0 link set lo up
-ip -net ns0 link set veth0 up
-ip -net ns0 link set veth1 up
-
-ip -net ns0 link add br0 type bridge
-if [ $? -ne 0 ]; then
- echo "SKIP: Can't create bridge br0"
- exit $ksft_skip
-fi
-
-ip -net ns0 link set veth0 master br0
-ip -net ns0 link set veth1 master br0
-ip -net ns0 link set br0 up
-ip -net ns0 addr add 10.0.0.1/24 dev br0
-
-# place both in same subnet, ns1 and ns2 connected via ns0:br0
-for i in 1 2; do
- ip -net ns$i link set lo up
- ip -net ns$i link set eth0 up
- ip -net ns$i addr add 10.0.0.1$i/24 dev eth0
-done
-
-test_ebtables_broute()
-{
- local cipt
-
- # redirect is needed so the dstmac is rewritten to the bridge itself,
- # ip stack won't process OTHERHOST (foreign unicast mac) packets.
- ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP
- if [ $? -ne 0 ]; then
- echo "SKIP: Could not add ebtables broute redirect rule"
- return $ksft_skip
- fi
-
- # ping netns1, expected to not work (ip forwarding is off)
- ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1
- if [ $? -eq 0 ]; then
- echo "ERROR: ping works, should have failed" 1>&2
- return 1
- fi
-
- # enable forwarding on both interfaces.
- # neither needs an ip address, but at least the bridge needs
- # an ip address in same network segment as ns1 and ns2 (ns0
- # needs to be able to determine route for to-be-forwarded packet).
- ip netns exec ns0 sysctl -q net.ipv4.conf.veth0.forwarding=1
- ip netns exec ns0 sysctl -q net.ipv4.conf.veth1.forwarding=1
-
- sleep 1
-
- ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null
- if [ $? -ne 0 ]; then
- echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2
- return 1
- fi
-
- echo "PASS: ns1/ns2 connectivity with active broute rule"
- ip netns exec ns0 ebtables -t broute -F
-
- # ping netns1, expected to work (frames are bridged)
- ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null
- if [ $? -ne 0 ]; then
- echo "ERROR: ping did not work, but it should (bridged)" 1>&2
- return 1
- fi
-
- ip netns exec ns0 ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP
-
- # ping netns1, expected to not work (DROP in bridge forward)
- ip netns exec ns1 ping -q -c 1 10.0.0.12 > /dev/null 2>&1
- if [ $? -eq 0 ]; then
- echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2
- return 1
- fi
-
- # re-activate brouter
- ip netns exec ns0 ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP
-
- ip netns exec ns2 ping -q -c 1 10.0.0.11 > /dev/null
- if [ $? -ne 0 ]; then
- echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2
- return 1
- fi
-
- echo "PASS: ns1/ns2 connectivity with active broute rule and bridge forward drop"
- return 0
-}
-
-# test basic connectivity
-ip netns exec ns1 ping -c 1 -q 10.0.0.12 > /dev/null
-if [ $? -ne 0 ]; then
- echo "ERROR: Could not reach ns2 from ns1" 1>&2
- ret=1
-fi
-
-ip netns exec ns2 ping -c 1 -q 10.0.0.11 > /dev/null
-if [ $? -ne 0 ]; then
- echo "ERROR: Could not reach ns1 from ns2" 1>&2
- ret=1
-fi
-
-if [ $ret -eq 0 ];then
- echo "PASS: netns connectivity: ns1 and ns2 can reach each other"
-fi
-
-test_ebtables_broute
-ret=$?
-for i in 0 1 2; do ip netns del ns$i;done
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/bridge_netfilter.sh b/tools/testing/selftests/netfilter/bridge_netfilter.sh
deleted file mode 100644
index 659b3ab02c8b..000000000000
--- a/tools/testing/selftests/netfilter/bridge_netfilter.sh
+++ /dev/null
@@ -1,188 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test bridge netfilter + conntrack, a combination that doesn't really work,
-# with multicast/broadcast packets racing for hash table insertion.
-
-# eth0 br0 eth0
-# setup is: ns1 <->,ns0 <-> ns3
-# ns2 <-' `'-> ns4
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-sfx=$(mktemp -u "XXXXXXXX")
-ns0="ns0-$sfx"
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-ns3="ns3-$sfx"
-ns4="ns4-$sfx"
-
-ebtables -V > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ebtables"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-for i in $(seq 0 4); do
- eval ip netns add \$ns$i
-done
-
-cleanup() {
- for i in $(seq 0 4); do eval ip netns del \$ns$i;done
-}
-
-trap cleanup EXIT
-
-do_ping()
-{
- fromns="$1"
- dstip="$2"
-
- ip netns exec $fromns ping -c 1 -q $dstip > /dev/null
- if [ $? -ne 0 ]; then
- echo "ERROR: ping from $fromns to $dstip"
- ip netns exec ${ns0} nft list ruleset
- ret=1
- fi
-}
-
-bcast_ping()
-{
- fromns="$1"
- dstip="$2"
-
- for i in $(seq 1 1000); do
- ip netns exec $fromns ping -q -f -b -c 1 -q $dstip > /dev/null 2>&1
- if [ $? -ne 0 ]; then
- echo "ERROR: ping -b from $fromns to $dstip"
- ip netns exec ${ns0} nft list ruleset
- fi
- done
-}
-
-ip link add veth1 netns ${ns0} type veth peer name eth0 netns ${ns1}
-if [ $? -ne 0 ]; then
- echo "SKIP: Can't create veth device"
- exit $ksft_skip
-fi
-
-ip link add veth2 netns ${ns0} type veth peer name eth0 netns $ns2
-ip link add veth3 netns ${ns0} type veth peer name eth0 netns $ns3
-ip link add veth4 netns ${ns0} type veth peer name eth0 netns $ns4
-
-ip -net ${ns0} link set lo up
-
-for i in $(seq 1 4); do
- ip -net ${ns0} link set veth$i up
-done
-
-ip -net ${ns0} link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1
-if [ $? -ne 0 ]; then
- echo "SKIP: Can't create bridge br0"
- exit $ksft_skip
-fi
-
-# make veth0,1,2 part of bridge.
-for i in $(seq 1 3); do
- ip -net ${ns0} link set veth$i master br0
-done
-
-# add a macvlan on top of the bridge.
-MACVLAN_ADDR=ba:f3:13:37:42:23
-ip -net ${ns0} link add link br0 name macvlan0 type macvlan mode private
-ip -net ${ns0} link set macvlan0 address ${MACVLAN_ADDR}
-ip -net ${ns0} link set macvlan0 up
-ip -net ${ns0} addr add 10.23.0.1/24 dev macvlan0
-
-# add a macvlan on top of veth4.
-MACVLAN_ADDR=ba:f3:13:37:42:24
-ip -net ${ns0} link add link veth4 name macvlan4 type macvlan mode vepa
-ip -net ${ns0} link set macvlan4 address ${MACVLAN_ADDR}
-ip -net ${ns0} link set macvlan4 up
-
-# make the macvlan part of the bridge.
-# veth4 is not a bridge port, only the macvlan on top of it.
-ip -net ${ns0} link set macvlan4 master br0
-
-ip -net ${ns0} link set br0 up
-ip -net ${ns0} addr add 10.0.0.1/24 dev br0
-ip netns exec ${ns0} sysctl -q net.bridge.bridge-nf-call-iptables=1
-ret=$?
-if [ $ret -ne 0 ] ; then
- echo "SKIP: bridge netfilter not available"
- ret=$ksft_skip
-fi
-
-# for testing, so namespaces will reply to ping -b probes.
-ip netns exec ${ns0} sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0
-
-# enable conntrack in ns0 and drop broadcast packets in forward to
-# avoid them from getting confirmed in the postrouting hook before
-# the cloned skb is passed up the stack.
-ip netns exec ${ns0} nft -f - <<EOF
-table ip filter {
- chain input {
- type filter hook input priority 1; policy accept
- iifname br0 counter
- ct state new accept
- }
-}
-
-table bridge filter {
- chain forward {
- type filter hook forward priority 0; policy accept
- meta pkttype broadcast ip protocol icmp counter drop
- }
-}
-EOF
-
-# place 1, 2 & 3 in same subnet, connected via ns0:br0.
-# ns4 is placed in same subnet as well, but its not
-# part of the bridge: the corresponding veth4 is not
-# part of the bridge, only its macvlan interface.
-for i in $(seq 1 4); do
- eval ip -net \$ns$i link set lo up
- eval ip -net \$ns$i link set eth0 up
-done
-for i in $(seq 1 2); do
- eval ip -net \$ns$i addr add 10.0.0.1$i/24 dev eth0
-done
-
-ip -net ${ns3} addr add 10.23.0.13/24 dev eth0
-ip -net ${ns4} addr add 10.23.0.14/24 dev eth0
-
-# test basic connectivity
-do_ping ${ns1} 10.0.0.12
-do_ping ${ns3} 10.23.0.1
-do_ping ${ns4} 10.23.0.1
-
-if [ $ret -eq 0 ];then
- echo "PASS: netns connectivity: ns1 can reach ns2, ns3 and ns4 can reach ns0"
-fi
-
-bcast_ping ${ns1} 10.0.0.255
-
-# This should deliver broadcast to macvlan0, which is on top of ns0:br0.
-bcast_ping ${ns3} 10.23.0.255
-
-# same, this time via veth4:macvlan4.
-bcast_ping ${ns4} 10.23.0.255
-
-read t < /proc/sys/kernel/tainted
-
-if [ $t -eq 0 ];then
- echo PASS: kernel not tainted
-else
- echo ERROR: kernel is tainted
- ret=1
-fi
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
deleted file mode 100644
index 7c42b1b2c69b..000000000000
--- a/tools/testing/selftests/netfilter/config
+++ /dev/null
@@ -1,9 +0,0 @@
-CONFIG_NET_NS=y
-CONFIG_NF_TABLES_INET=y
-CONFIG_NFT_QUEUE=m
-CONFIG_NFT_NAT=m
-CONFIG_NFT_REDIR=m
-CONFIG_NFT_MASQ=m
-CONFIG_NFT_FLOW_OFFLOAD=m
-CONFIG_NF_CT_NETLINK=m
-CONFIG_AUDIT=y
diff --git a/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh
deleted file mode 100755
index a924e595cfd8..000000000000
--- a/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Testing For SCTP COLLISION SCENARIO as Below:
-#
-# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
-# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
-# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
-# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
-# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
-# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
-#
-# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS
-
-CLIENT_NS=$(mktemp -u client-XXXXXXXX)
-CLIENT_IP="198.51.200.1"
-CLIENT_PORT=1234
-
-SERVER_NS=$(mktemp -u server-XXXXXXXX)
-SERVER_IP="198.51.100.1"
-SERVER_PORT=1234
-
-ROUTER_NS=$(mktemp -u router-XXXXXXXX)
-CLIENT_GW="198.51.200.2"
-SERVER_GW="198.51.100.2"
-
-# setup the topo
-setup() {
- ip net add $CLIENT_NS
- ip net add $SERVER_NS
- ip net add $ROUTER_NS
- ip -n $SERVER_NS link add link0 type veth peer name link1 netns $ROUTER_NS
- ip -n $CLIENT_NS link add link3 type veth peer name link2 netns $ROUTER_NS
-
- ip -n $SERVER_NS link set link0 up
- ip -n $SERVER_NS addr add $SERVER_IP/24 dev link0
- ip -n $SERVER_NS route add $CLIENT_IP dev link0 via $SERVER_GW
-
- ip -n $ROUTER_NS link set link1 up
- ip -n $ROUTER_NS link set link2 up
- ip -n $ROUTER_NS addr add $SERVER_GW/24 dev link1
- ip -n $ROUTER_NS addr add $CLIENT_GW/24 dev link2
- ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1
-
- ip -n $CLIENT_NS link set link3 up
- ip -n $CLIENT_NS addr add $CLIENT_IP/24 dev link3
- ip -n $CLIENT_NS route add $SERVER_IP dev link3 via $CLIENT_GW
-
- # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with
- # tc on $SERVER_NS side
- tc -n $SERVER_NS qdisc add dev link0 root handle 1: htb
- tc -n $SERVER_NS class add dev link0 parent 1: classid 1:1 htb rate 100mbit
- tc -n $SERVER_NS filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \
- 0xff match u8 2 0xff at 32 flowid 1:1
- tc -n $SERVER_NS qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms
-
- # simulate the ctstate check on OVS nf_conntrack
- ip net exec $ROUTER_NS iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP
- ip net exec $ROUTER_NS iptables -A INPUT -p sctp -j DROP
-
- # use a smaller number for assoc's max_retrans to reproduce the issue
- modprobe sctp
- ip net exec $CLIENT_NS sysctl -wq net.sctp.association_max_retrans=3
-}
-
-cleanup() {
- ip net exec $CLIENT_NS pkill sctp_collision 2>&1 >/dev/null
- ip net exec $SERVER_NS pkill sctp_collision 2>&1 >/dev/null
- ip net del "$CLIENT_NS"
- ip net del "$SERVER_NS"
- ip net del "$ROUTER_NS"
-}
-
-do_test() {
- ip net exec $SERVER_NS ./sctp_collision server \
- $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT &
- ip net exec $CLIENT_NS ./sctp_collision client \
- $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT
-}
-
-# NOTE: one way to work around the issue is set a smaller hb_interval
-# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500
-
-# run the test case
-trap cleanup EXIT
-setup && \
-echo "Test for SCTP Collision in nf_conntrack:" && \
-do_test && echo "PASS!"
-exit $?
diff --git a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh
deleted file mode 100755
index e7d7bf13cff5..000000000000
--- a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh
+++ /dev/null
@@ -1,167 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Check that UNREPLIED tcp conntrack will eventually timeout.
-#
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-waittime=20
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-cleanup() {
- ip netns pids $ns1 | xargs kill 2>/dev/null
- ip netns pids $ns2 | xargs kill 2>/dev/null
-
- ip netns del $ns1
- ip netns del $ns2
-}
-
-ipv4() {
- echo -n 192.168.$1.2
-}
-
-check_counter()
-{
- ns=$1
- name=$2
- expect=$3
- local lret=0
-
- cnt=$(ip netns exec $ns2 nft list counter inet filter "$name" | grep -q "$expect")
- if [ $? -ne 0 ]; then
- echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2
- ip netns exec $ns2 nft list counter inet filter "$name" 1>&2
- lret=1
- fi
-
- return $lret
-}
-
-# Create test namespaces
-ip netns add $ns1 || exit 1
-
-trap cleanup EXIT
-
-ip netns add $ns2 || exit 1
-
-# Connect the namespace to the host using a veth pair
-ip -net $ns1 link add name veth1 type veth peer name veth2
-ip -net $ns1 link set netns $ns2 dev veth2
-
-ip -net $ns1 link set up dev lo
-ip -net $ns2 link set up dev lo
-ip -net $ns1 link set up dev veth1
-ip -net $ns2 link set up dev veth2
-
-ip -net $ns2 addr add 10.11.11.2/24 dev veth2
-ip -net $ns2 route add default via 10.11.11.1
-
-ip netns exec $ns2 sysctl -q net.ipv4.conf.veth2.forwarding=1
-
-# add a rule inside NS so we enable conntrack
-ip netns exec $ns1 iptables -A INPUT -m state --state established,related -j ACCEPT
-
-ip -net $ns1 addr add 10.11.11.1/24 dev veth1
-ip -net $ns1 route add 10.99.99.99 via 10.11.11.2
-
-# Check connectivity works
-ip netns exec $ns1 ping -q -c 2 10.11.11.2 >/dev/null || exit 1
-
-ip netns exec $ns2 nc -l -p 8080 < /dev/null &
-
-# however, conntrack entries are there
-
-ip netns exec $ns2 nft -f - <<EOF
-table inet filter {
- counter connreq { }
- counter redir { }
- chain input {
- type filter hook input priority 0; policy accept;
- ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept
- ct state new ct status dnat tcp dport 8080 counter name "redir" accept
- }
-}
-EOF
-if [ $? -ne 0 ]; then
- echo "ERROR: Could not load nft rules"
- exit 1
-fi
-
-ip netns exec $ns2 sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10
-
-echo "INFO: connect $ns1 -> $ns2 to the virtual ip"
-ip netns exec $ns1 bash -c 'while true ; do
- nc -p 60000 10.99.99.99 80
- sleep 1
- done' &
-
-sleep 1
-
-ip netns exec $ns2 nft -f - <<EOF
-table inet nat {
- chain prerouting {
- type nat hook prerouting priority 0; policy accept;
- ip daddr 10.99.99.99 tcp dport 80 redirect to :8080
- }
-}
-EOF
-if [ $? -ne 0 ]; then
- echo "ERROR: Could not load nat redirect"
- exit 1
-fi
-
-count=$(ip netns exec $ns2 conntrack -L -p tcp --dport 80 2>/dev/null | wc -l)
-if [ $count -eq 0 ]; then
- echo "ERROR: $ns2 did not pick up tcp connection from peer"
- exit 1
-fi
-
-echo "INFO: NAT redirect added in ns $ns2, waiting for $waittime seconds for nat to take effect"
-for i in $(seq 1 $waittime); do
- echo -n "."
-
- sleep 1
-
- count=$(ip netns exec $ns2 conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l)
- if [ $count -gt 0 ]; then
- echo
- echo "PASS: redirection took effect after $i seconds"
- break
- fi
-
- m=$((i%20))
- if [ $m -eq 0 ]; then
- echo " waited for $i seconds"
- fi
-done
-
-expect="packets 1 bytes 60"
-check_counter "$ns2" "redir" "$expect"
-if [ $? -ne 0 ]; then
- ret=1
-fi
-
-if [ $ret -eq 0 ];then
- echo "PASS: redirection counter has expected values"
-else
- echo "ERROR: no tcp connection was redirected"
-fi
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/ipvs.sh b/tools/testing/selftests/netfilter/ipvs.sh
deleted file mode 100755
index c3b8f90c497e..000000000000
--- a/tools/testing/selftests/netfilter/ipvs.sh
+++ /dev/null
@@ -1,228 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# End-to-end ipvs test suite
-# Topology:
-#--------------------------------------------------------------+
-# | |
-# ns0 | ns1 |
-# ----------- | ----------- ----------- |
-# | veth01 | --------- | veth10 | | veth12 | |
-# ----------- peer ----------- ----------- |
-# | | | |
-# ----------- | | |
-# | br0 | |----------------- peer |--------------|
-# ----------- | | |
-# | | | |
-# ---------- peer ---------- ----------- |
-# | veth02 | --------- | veth20 | | veth21 | |
-# ---------- | ---------- ----------- |
-# | ns2 |
-# | |
-#--------------------------------------------------------------+
-#
-# We assume that all network driver are loaded
-#
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-GREEN='\033[0;92m'
-RED='\033[0;31m'
-NC='\033[0m' # No Color
-
-readonly port=8080
-
-readonly vip_v4=207.175.44.110
-readonly cip_v4=10.0.0.2
-readonly gip_v4=10.0.0.1
-readonly dip_v4=172.16.0.1
-readonly rip_v4=172.16.0.2
-readonly sip_v4=10.0.0.3
-
-readonly infile="$(mktemp)"
-readonly outfile="$(mktemp)"
-readonly datalen=32
-
-sysipvsnet="/proc/sys/net/ipv4/vs/"
-if [ ! -d $sysipvsnet ]; then
- modprobe -q ip_vs
- if [ $? -ne 0 ]; then
- echo "skip: could not run test without ipvs module"
- exit $ksft_skip
- fi
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ]; then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-ipvsadm -v > /dev/null 2>&1
-if [ $? -ne 0 ]; then
- echo "SKIP: Could not run test without ipvsadm"
- exit $ksft_skip
-fi
-
-setup() {
- ip netns add ns0
- ip netns add ns1
- ip netns add ns2
-
- ip link add veth01 netns ns0 type veth peer name veth10 netns ns1
- ip link add veth02 netns ns0 type veth peer name veth20 netns ns2
- ip link add veth12 netns ns1 type veth peer name veth21 netns ns2
-
- ip netns exec ns0 ip link set veth01 up
- ip netns exec ns0 ip link set veth02 up
- ip netns exec ns0 ip link add br0 type bridge
- ip netns exec ns0 ip link set veth01 master br0
- ip netns exec ns0 ip link set veth02 master br0
- ip netns exec ns0 ip link set br0 up
- ip netns exec ns0 ip addr add ${cip_v4}/24 dev br0
-
- ip netns exec ns1 ip link set lo up
- ip netns exec ns1 ip link set veth10 up
- ip netns exec ns1 ip addr add ${gip_v4}/24 dev veth10
- ip netns exec ns1 ip link set veth12 up
- ip netns exec ns1 ip addr add ${dip_v4}/24 dev veth12
-
- ip netns exec ns2 ip link set lo up
- ip netns exec ns2 ip link set veth21 up
- ip netns exec ns2 ip addr add ${rip_v4}/24 dev veth21
- ip netns exec ns2 ip link set veth20 up
- ip netns exec ns2 ip addr add ${sip_v4}/24 dev veth20
-
- sleep 1
-
- dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none
-}
-
-cleanup() {
- for i in 0 1 2
- do
- ip netns del ns$i > /dev/null 2>&1
- done
-
- if [ -f "${outfile}" ]; then
- rm "${outfile}"
- fi
- if [ -f "${infile}" ]; then
- rm "${infile}"
- fi
-}
-
-server_listen() {
- ip netns exec ns2 nc -l -p 8080 > "${outfile}" &
- server_pid=$!
- sleep 0.2
-}
-
-client_connect() {
- ip netns exec ns0 timeout 2 nc -w 1 ${vip_v4} ${port} < "${infile}"
-}
-
-verify_data() {
- wait "${server_pid}"
- cmp "$infile" "$outfile" 2>/dev/null
-}
-
-test_service() {
- server_listen
- client_connect
- verify_data
-}
-
-
-test_dr() {
- ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
-
- ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
- ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
- ip netns exec ns1 ipvsadm -a -t ${vip_v4}:${port} -r ${rip_v4}:${port}
- ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
-
- # avoid incorrect arp response
- ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
- ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
- # avoid reverse route lookup
- ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
- ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
-
- test_service
-}
-
-test_nat() {
- ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
-
- ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
- ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
- ip netns exec ns1 ipvsadm -a -m -t ${vip_v4}:${port} -r ${rip_v4}:${port}
- ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
-
- ip netns exec ns2 ip link del veth20
- ip netns exec ns2 ip route add default via ${dip_v4} dev veth21
-
- test_service
-}
-
-test_tun() {
- ip netns exec ns0 ip route add ${vip_v4} via ${gip_v4} dev br0
-
- ip netns exec ns1 modprobe ipip
- ip netns exec ns1 ip link set tunl0 up
- ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=0
- ip netns exec ns1 sysctl -qw net.ipv4.conf.all.send_redirects=0
- ip netns exec ns1 sysctl -qw net.ipv4.conf.default.send_redirects=0
- ip netns exec ns1 ipvsadm -A -t ${vip_v4}:${port} -s rr
- ip netns exec ns1 ipvsadm -a -i -t ${vip_v4}:${port} -r ${rip_v4}:${port}
- ip netns exec ns1 ip addr add ${vip_v4}/32 dev lo:1
-
- ip netns exec ns2 modprobe ipip
- ip netns exec ns2 ip link set tunl0 up
- ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_ignore=1
- ip netns exec ns2 sysctl -qw net.ipv4.conf.all.arp_announce=2
- ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0
- ip netns exec ns2 sysctl -qw net.ipv4.conf.tunl0.rp_filter=0
- ip netns exec ns2 sysctl -qw net.ipv4.conf.veth21.rp_filter=0
- ip netns exec ns2 ip addr add ${vip_v4}/32 dev lo:1
-
- test_service
-}
-
-run_tests() {
- local errors=
-
- echo "Testing DR mode..."
- cleanup
- setup
- test_dr
- errors=$(( $errors + $? ))
-
- echo "Testing NAT mode..."
- cleanup
- setup
- test_nat
- errors=$(( $errors + $? ))
-
- echo "Testing Tunnel mode..."
- cleanup
- setup
- test_tun
- errors=$(( $errors + $? ))
-
- return $errors
-}
-
-trap cleanup EXIT
-
-run_tests
-
-if [ $? -ne 0 ]; then
- echo -e "$(basename $0): ${RED}FAIL${NC}"
- exit 1
-fi
-echo -e "$(basename $0): ${GREEN}PASS${NC}"
-exit 0
diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
deleted file mode 100755
index a1aa8f4a5828..000000000000
--- a/tools/testing/selftests/netfilter/nf_nat_edemux.sh
+++ /dev/null
@@ -1,127 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test NAT source port clash resolution
-#
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-socatpid=0
-
-cleanup()
-{
- [ $socatpid -gt 0 ] && kill $socatpid
- ip netns del $ns1
- ip netns del $ns2
-}
-
-socat -h > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without socat"
- exit $ksft_skip
-fi
-
-iptables --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without iptables"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-ip netns add "$ns1"
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace $ns1"
- exit $ksft_skip
-fi
-
-trap cleanup EXIT
-
-ip netns add $ns2
-
-# Connect the namespaces using a veth pair
-ip link add name veth2 type veth peer name veth1
-ip link set netns $ns1 dev veth1
-ip link set netns $ns2 dev veth2
-
-ip netns exec $ns1 ip link set up dev lo
-ip netns exec $ns1 ip link set up dev veth1
-ip netns exec $ns1 ip addr add 192.168.1.1/24 dev veth1
-
-ip netns exec $ns2 ip link set up dev lo
-ip netns exec $ns2 ip link set up dev veth2
-ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2
-
-# Create a server in one namespace
-ip netns exec $ns1 socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 &
-socatpid=$!
-
-# Restrict source port to just one so we don't have to exhaust
-# all others.
-ip netns exec $ns2 sysctl -q net.ipv4.ip_local_port_range="10000 10000"
-
-# add a virtual IP using DNAT
-ip netns exec $ns2 iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
-
-# ... and route it to the other namespace
-ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1
-
-sleep 1
-
-# add a persistent connection from the other namespace
-ip netns exec $ns2 socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
-
-sleep 1
-
-# ip daddr:dport will be rewritten to 192.168.1.1 5201
-# NAT must reallocate source port 10000 because
-# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
-echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null
-ret=$?
-
-# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
-if [ $ret -eq 0 ]; then
- echo "PASS: socat can connect via NAT'd address"
-else
- echo "FAIL: socat cannot connect via NAT'd address"
-fi
-
-# check sport clashres.
-ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
-ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
-
-sleep 5 | ip netns exec $ns2 socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
-cpid1=$!
-sleep 1
-
-# if connect succeeds, client closes instantly due to EOF on stdin.
-# if connect hangs, it will time out after 5s.
-echo | ip netns exec $ns2 socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
-cpid2=$!
-
-time_then=$(date +%s)
-wait $cpid2
-rv=$?
-time_now=$(date +%s)
-
-# Check how much time has elapsed, expectation is for
-# 'cpid2' to connect and then exit (and no connect delay).
-delta=$((time_now - time_then))
-
-if [ $delta -lt 2 -a $rv -eq 0 ]; then
- echo "PASS: could connect to service via redirected ports"
-else
- echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
- ret=1
-fi
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
deleted file mode 100755
index faa7778d7bd1..000000000000
--- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
+++ /dev/null
@@ -1,197 +0,0 @@
-#!/bin/bash
-#
-# This tests connection tracking helper assignment:
-# 1. can attach ftp helper to a connection from nft ruleset.
-# 2. auto-assign still works.
-#
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-testipv6=1
-
-cleanup()
-{
- ip netns del ${ns1}
- ip netns del ${ns2}
-}
-
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-conntrack -V > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without conntrack tool"
- exit $ksft_skip
-fi
-
-which nc >/dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without netcat tool"
- exit $ksft_skip
-fi
-
-trap cleanup EXIT
-
-ip netns add ${ns1}
-ip netns add ${ns2}
-
-ip link add veth0 netns ${ns1} type veth peer name veth0 netns ${ns2} > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: No virtual ethernet pair device support in kernel"
- exit $ksft_skip
-fi
-
-ip -net ${ns1} link set lo up
-ip -net ${ns1} link set veth0 up
-
-ip -net ${ns2} link set lo up
-ip -net ${ns2} link set veth0 up
-
-ip -net ${ns1} addr add 10.0.1.1/24 dev veth0
-ip -net ${ns1} addr add dead:1::1/64 dev veth0
-
-ip -net ${ns2} addr add 10.0.1.2/24 dev veth0
-ip -net ${ns2} addr add dead:1::2/64 dev veth0
-
-load_ruleset_family() {
- local family=$1
- local ns=$2
-
-ip netns exec ${ns} nft -f - <<EOF
-table $family raw {
- ct helper ftp {
- type "ftp" protocol tcp
- }
- chain pre {
- type filter hook prerouting priority 0; policy accept;
- tcp dport 2121 ct helper set "ftp"
- }
- chain output {
- type filter hook output priority 0; policy accept;
- tcp dport 2121 ct helper set "ftp"
- }
-}
-EOF
- return $?
-}
-
-check_for_helper()
-{
- local netns=$1
- local message=$2
- local port=$3
-
- if echo $message |grep -q 'ipv6';then
- local family="ipv6"
- else
- local family="ipv4"
- fi
-
- ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp'
- if [ $? -ne 0 ] ; then
- if [ $autoassign -eq 0 ] ;then
- echo "FAIL: ${netns} did not show attached helper $message" 1>&2
- ret=1
- else
- echo "PASS: ${netns} did not show attached helper $message" 1>&2
- fi
- else
- if [ $autoassign -eq 0 ] ;then
- echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
- else
- echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2
- ret=1
- fi
- fi
-
- return 0
-}
-
-test_helper()
-{
- local port=$1
- local autoassign=$2
-
- if [ $autoassign -eq 0 ] ;then
- msg="set via ruleset"
- else
- msg="auto-assign"
- fi
-
- sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null &
-
- sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null &
- sleep 1
-
- check_for_helper "$ns1" "ip $msg" $port $autoassign
- check_for_helper "$ns2" "ip $msg" $port $autoassign
-
- wait
-
- if [ $testipv6 -eq 0 ] ;then
- return 0
- fi
-
- ip netns exec ${ns1} conntrack -F 2> /dev/null
- ip netns exec ${ns2} conntrack -F 2> /dev/null
-
- sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null &
-
- sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null &
- sleep 1
-
- check_for_helper "$ns1" "ipv6 $msg" $port
- check_for_helper "$ns2" "ipv6 $msg" $port
-
- wait
-}
-
-load_ruleset_family ip ${ns1}
-if [ $? -ne 0 ];then
- echo "FAIL: ${ns1} cannot load ip ruleset" 1>&2
- exit 1
-fi
-
-load_ruleset_family ip6 ${ns1}
-if [ $? -ne 0 ];then
- echo "SKIP: ${ns1} cannot load ip6 ruleset" 1>&2
- testipv6=0
-fi
-
-load_ruleset_family inet ${ns2}
-if [ $? -ne 0 ];then
- echo "SKIP: ${ns1} cannot load inet ruleset" 1>&2
- load_ruleset_family ip ${ns2}
- if [ $? -ne 0 ];then
- echo "FAIL: ${ns2} cannot load ip ruleset" 1>&2
- exit 1
- fi
-
- if [ $testipv6 -eq 1 ] ;then
- load_ruleset_family ip6 ${ns2}
- if [ $? -ne 0 ];then
- echo "FAIL: ${ns2} cannot load ip6 ruleset" 1>&2
- exit 1
- fi
- fi
-fi
-
-test_helper 2121 0
-ip netns exec ${ns1} sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
-ip netns exec ${ns2} sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
-test_helper 21 1
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
deleted file mode 100755
index dff476e45e77..000000000000
--- a/tools/testing/selftests/netfilter/nft_fib.sh
+++ /dev/null
@@ -1,273 +0,0 @@
-#!/bin/bash
-#
-# This tests the fib expression.
-#
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-nsrouter="nsrouter-$sfx"
-timeout=4
-
-log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
-
-cleanup()
-{
- ip netns del ${ns1}
- ip netns del ${ns2}
- ip netns del ${nsrouter}
-
- [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
-}
-
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-ip netns add ${nsrouter}
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace"
- exit $ksft_skip
-fi
-
-trap cleanup EXIT
-
-dmesg | grep -q ' nft_rpfilter: '
-if [ $? -eq 0 ]; then
- dmesg -c | grep ' nft_rpfilter: '
- echo "WARN: a previous test run has failed" 1>&2
-fi
-
-sysctl -q net.netfilter.nf_log_all_netns=1
-ip netns add ${ns1}
-ip netns add ${ns2}
-
-load_ruleset() {
- local netns=$1
-
-ip netns exec ${netns} nft -f /dev/stdin <<EOF
-table inet filter {
- chain prerouting {
- type filter hook prerouting priority 0; policy accept;
- fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
- }
-}
-EOF
-}
-
-load_pbr_ruleset() {
- local netns=$1
-
-ip netns exec ${netns} nft -f /dev/stdin <<EOF
-table inet filter {
- chain forward {
- type filter hook forward priority raw;
- fib saddr . iif oif gt 0 accept
- log drop
- }
-}
-EOF
-}
-
-load_ruleset_count() {
- local netns=$1
-
-ip netns exec ${netns} nft -f /dev/stdin <<EOF
-table inet filter {
- chain prerouting {
- type filter hook prerouting priority 0; policy accept;
- ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
- ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
- }
-}
-EOF
-}
-
-check_drops() {
- dmesg | grep -q ' nft_rpfilter: '
- if [ $? -eq 0 ]; then
- dmesg | grep ' nft_rpfilter: '
- echo "FAIL: rpfilter did drop packets"
- return 1
- fi
-
- return 0
-}
-
-check_fib_counter() {
- local want=$1
- local ns=$2
- local address=$3
-
- line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" )
- ret=$?
-
- if [ $ret -ne 0 ];then
- echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
- ip netns exec ${ns} nft list table inet filter
- return 1
- fi
-
- if [ $want -gt 0 ]; then
- echo "PASS: fib expression did drop packets for $address"
- fi
-
- return 0
-}
-
-load_ruleset ${nsrouter}
-load_ruleset ${ns1}
-load_ruleset ${ns2}
-
-ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: No virtual ethernet pair device support in kernel"
- exit $ksft_skip
-fi
-ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
-
-ip -net ${nsrouter} link set lo up
-ip -net ${nsrouter} link set veth0 up
-ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
-ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
-
-ip -net ${nsrouter} link set veth1 up
-ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
-ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
-
-ip -net ${ns1} link set lo up
-ip -net ${ns1} link set eth0 up
-
-ip -net ${ns2} link set lo up
-ip -net ${ns2} link set eth0 up
-
-ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
-ip -net ${ns1} addr add dead:1::99/64 dev eth0
-ip -net ${ns1} route add default via 10.0.1.1
-ip -net ${ns1} route add default via dead:1::1
-
-ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
-ip -net ${ns2} addr add dead:2::99/64 dev eth0
-ip -net ${ns2} route add default via 10.0.2.1
-ip -net ${ns2} route add default via dead:2::1
-
-test_ping() {
- local daddr4=$1
- local daddr6=$2
-
- ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null
- ret=$?
- if [ $ret -ne 0 ];then
- check_drops
- echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
- return 1
- fi
-
- ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null
- ret=$?
- if [ $ret -ne 0 ];then
- check_drops
- echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
- return 1
- fi
-
- return 0
-}
-
-ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
-ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
-ip netns exec ${nsrouter} sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
-ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
-
-sleep 3
-
-test_ping 10.0.2.1 dead:2::1 || exit 1
-check_drops || exit 1
-
-test_ping 10.0.2.99 dead:2::99 || exit 1
-check_drops || exit 1
-
-echo "PASS: fib expression did not cause unwanted packet drops"
-
-ip netns exec ${nsrouter} nft flush table inet filter
-
-ip -net ${ns1} route del default
-ip -net ${ns1} -6 route del default
-
-ip -net ${ns1} addr del 10.0.1.99/24 dev eth0
-ip -net ${ns1} addr del dead:1::99/64 dev eth0
-
-ip -net ${ns1} addr add 10.0.2.99/24 dev eth0
-ip -net ${ns1} addr add dead:2::99/64 dev eth0
-
-ip -net ${ns1} route add default via 10.0.2.1
-ip -net ${ns1} -6 route add default via dead:2::1
-
-ip -net ${nsrouter} addr add dead:2::1/64 dev veth0
-
-# switch to ruleset that doesn't log, this time
-# its expected that this does drop the packets.
-load_ruleset_count ${nsrouter}
-
-# ns1 has a default route, but nsrouter does not.
-# must not check return value, ping to 1.1.1.1 will
-# fail.
-check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1
-check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1
-
-ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null
-check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1
-
-sleep 2
-ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
-check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
-
-# delete all rules
-ip netns exec ${ns1} nft flush ruleset
-ip netns exec ${ns2} nft flush ruleset
-ip netns exec ${nsrouter} nft flush ruleset
-
-ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
-ip -net ${ns1} addr add dead:1::99/64 dev eth0
-
-ip -net ${ns1} addr del 10.0.2.99/24 dev eth0
-ip -net ${ns1} addr del dead:2::99/64 dev eth0
-
-ip -net ${nsrouter} addr del dead:2::1/64 dev veth0
-
-# ... pbr ruleset for the router, check iif+oif.
-load_pbr_ruleset ${nsrouter}
-if [ $? -ne 0 ] ; then
- echo "SKIP: Could not load fib forward ruleset"
- exit $ksft_skip
-fi
-
-ip -net ${nsrouter} rule add from all table 128
-ip -net ${nsrouter} rule add from all iif veth0 table 129
-ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0
-ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1
-
-# drop main ipv4 table
-ip -net ${nsrouter} -4 rule delete table main
-
-test_ping 10.0.2.99 dead:2::99
-if [ $? -ne 0 ] ; then
- ip -net ${nsrouter} nft list ruleset
- echo "FAIL: fib mismatch in pbr setup"
- exit 1
-fi
-
-echo "PASS: fib expression forward check with policy based routing"
-exit 0
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
deleted file mode 100755
index e12729753351..000000000000
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ /dev/null
@@ -1,449 +0,0 @@
-#!/bin/bash
-#
-# This tests nf_queue:
-# 1. can process packets from all hooks
-# 2. support running nfqueue from more than one base chain
-#
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-sfx=$(mktemp -u "XXXXXXXX")
-ns1="ns1-$sfx"
-ns2="ns2-$sfx"
-nsrouter="nsrouter-$sfx"
-timeout=4
-
-cleanup()
-{
- ip netns pids ${ns1} | xargs kill 2>/dev/null
- ip netns pids ${ns2} | xargs kill 2>/dev/null
- ip netns pids ${nsrouter} | xargs kill 2>/dev/null
-
- ip netns del ${ns1}
- ip netns del ${ns2}
- ip netns del ${nsrouter}
- rm -f "$TMPFILE0"
- rm -f "$TMPFILE1"
- rm -f "$TMPFILE2" "$TMPFILE3"
-}
-
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-ip netns add ${nsrouter}
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace"
- exit $ksft_skip
-fi
-
-TMPFILE0=$(mktemp)
-TMPFILE1=$(mktemp)
-TMPFILE2=$(mktemp)
-TMPFILE3=$(mktemp)
-trap cleanup EXIT
-
-ip netns add ${ns1}
-ip netns add ${ns2}
-
-ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: No virtual ethernet pair device support in kernel"
- exit $ksft_skip
-fi
-ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
-
-ip -net ${nsrouter} link set lo up
-ip -net ${nsrouter} link set veth0 up
-ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
-ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
-
-ip -net ${nsrouter} link set veth1 up
-ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
-ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
-
-ip -net ${ns1} link set lo up
-ip -net ${ns1} link set eth0 up
-
-ip -net ${ns2} link set lo up
-ip -net ${ns2} link set eth0 up
-
-ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
-ip -net ${ns1} addr add dead:1::99/64 dev eth0
-ip -net ${ns1} route add default via 10.0.1.1
-ip -net ${ns1} route add default via dead:1::1
-
-ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
-ip -net ${ns2} addr add dead:2::99/64 dev eth0
-ip -net ${ns2} route add default via 10.0.2.1
-ip -net ${ns2} route add default via dead:2::1
-
-load_ruleset() {
- local name=$1
- local prio=$2
-
-ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
-table inet $name {
- chain nfq {
- ip protocol icmp queue bypass
- icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass
- }
- chain pre {
- type filter hook prerouting priority $prio; policy accept;
- jump nfq
- }
- chain input {
- type filter hook input priority $prio; policy accept;
- jump nfq
- }
- chain forward {
- type filter hook forward priority $prio; policy accept;
- tcp dport 12345 queue num 2
- jump nfq
- }
- chain output {
- type filter hook output priority $prio; policy accept;
- tcp dport 12345 queue num 3
- tcp sport 23456 queue num 3
- jump nfq
- }
- chain post {
- type filter hook postrouting priority $prio; policy accept;
- jump nfq
- }
-}
-EOF
-}
-
-load_counter_ruleset() {
- local prio=$1
-
-ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
-table inet countrules {
- chain pre {
- type filter hook prerouting priority $prio; policy accept;
- counter
- }
- chain input {
- type filter hook input priority $prio; policy accept;
- counter
- }
- chain forward {
- type filter hook forward priority $prio; policy accept;
- counter
- }
- chain output {
- type filter hook output priority $prio; policy accept;
- counter
- }
- chain post {
- type filter hook postrouting priority $prio; policy accept;
- counter
- }
-}
-EOF
-}
-
-test_ping() {
- ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
- if [ $? -ne 0 ];then
- return 1
- fi
-
- ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
- if [ $? -ne 0 ];then
- return 1
- fi
-
- return 0
-}
-
-test_ping_router() {
- ip netns exec ${ns1} ping -c 1 -q 10.0.2.1 > /dev/null
- if [ $? -ne 0 ];then
- return 1
- fi
-
- ip netns exec ${ns1} ping -c 1 -q dead:2::1 > /dev/null
- if [ $? -ne 0 ];then
- return 1
- fi
-
- return 0
-}
-
-test_queue_blackhole() {
- local proto=$1
-
-ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
-table $proto blackh {
- chain forward {
- type filter hook forward priority 0; policy accept;
- queue num 600
- }
-}
-EOF
- if [ $proto = "ip" ] ;then
- ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
- lret=$?
- elif [ $proto = "ip6" ]; then
- ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null
- lret=$?
- else
- lret=111
- fi
-
- # queue without bypass keyword should drop traffic if no listener exists.
- if [ $lret -eq 0 ];then
- echo "FAIL: $proto expected failure, got $lret" 1>&2
- exit 1
- fi
-
- ip netns exec ${nsrouter} nft delete table $proto blackh
- if [ $? -ne 0 ] ;then
- echo "FAIL: $proto: Could not delete blackh table"
- exit 1
- fi
-
- echo "PASS: $proto: statement with no listener results in packet drop"
-}
-
-test_queue()
-{
- local expected=$1
- local last=""
-
- # spawn nf-queue listeners
- ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" &
- ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" &
- sleep 1
- test_ping
- ret=$?
- if [ $ret -ne 0 ];then
- echo "FAIL: netns routing/connectivity with active listener on queue $queue: $ret" 1>&2
- exit $ret
- fi
-
- test_ping_router
- ret=$?
- if [ $ret -ne 0 ];then
- echo "FAIL: netns router unreachable listener on queue $queue: $ret" 1>&2
- exit $ret
- fi
-
- wait
- ret=$?
-
- for file in $TMPFILE0 $TMPFILE1; do
- last=$(tail -n1 "$file")
- if [ x"$last" != x"$expected packets total" ]; then
- echo "FAIL: Expected $expected packets total, but got $last" 1>&2
- cat "$file" 1>&2
-
- ip netns exec ${nsrouter} nft list ruleset
- exit 1
- fi
- done
-
- echo "PASS: Expected and received $last"
-}
-
-test_tcp_forward()
-{
- ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout &
- local nfqpid=$!
-
- tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
- ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
- local rpid=$!
-
- sleep 1
- ip netns exec ${ns1} nc -w 5 10.0.2.99 12345 <"$tmpfile" >/dev/null &
-
- rm -f "$tmpfile"
-
- wait $rpid
- wait $lpid
- [ $? -eq 0 ] && echo "PASS: tcp and nfqueue in forward chain"
-}
-
-test_tcp_localhost()
-{
- tmpfile=$(mktemp) || exit 1
-
- dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
- ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
- local rpid=$!
-
- ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
- local nfqpid=$!
-
- sleep 1
- ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
- rm -f "$tmpfile"
-
- wait $rpid
- [ $? -eq 0 ] && echo "PASS: tcp via loopback"
- wait 2>/dev/null
-}
-
-test_tcp_localhost_connectclose()
-{
- tmpfile=$(mktemp) || exit 1
-
- ip netns exec ${nsrouter} ./connect_close -p 23456 -t $timeout &
-
- ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
- local nfqpid=$!
-
- sleep 1
- rm -f "$tmpfile"
-
- wait $rpid
- [ $? -eq 0 ] && echo "PASS: tcp via loopback with connect/close"
- wait 2>/dev/null
-}
-
-test_tcp_localhost_requeue()
-{
-ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
-flush ruleset
-table inet filter {
- chain output {
- type filter hook output priority 0; policy accept;
- tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
- }
- chain post {
- type filter hook postrouting priority 0; policy accept;
- tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
- }
-}
-EOF
- tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
- ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
- local rpid=$!
-
- ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" &
-
- # nfqueue 1 will be called via output hook. But this time,
- # re-queue the packet to nfqueue program on queue 2.
- ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" &
-
- sleep 1
- ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
- rm -f "$tmpfile"
-
- wait
-
- if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
- echo "FAIL: lost packets during requeue?!" 1>&2
- return
- fi
-
- echo "PASS: tcp via loopback and re-queueing"
-}
-
-test_icmp_vrf() {
- ip -net $ns1 link add tvrf type vrf table 9876
- if [ $? -ne 0 ];then
- echo "SKIP: Could not add vrf device"
- return
- fi
-
- ip -net $ns1 li set eth0 master tvrf
- ip -net $ns1 li set tvrf up
-
- ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
-ip netns exec ${ns1} nft -f /dev/stdin <<EOF
-flush ruleset
-table inet filter {
- chain output {
- type filter hook output priority 0; policy accept;
- meta oifname "tvrf" icmp type echo-request counter queue num 1
- meta oifname "eth0" icmp type echo-request counter queue num 1
- }
- chain post {
- type filter hook postrouting priority 0; policy accept;
- meta oifname "tvrf" icmp type echo-request counter queue num 1
- meta oifname "eth0" icmp type echo-request counter queue num 1
- }
-}
-EOF
- ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout &
- local nfqpid=$!
-
- sleep 1
- ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
-
- for n in output post; do
- for d in tvrf eth0; do
- ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"
- if [ $? -ne 0 ] ; then
- echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
- ip netns exec ${ns1} nft list ruleset
- ret=1
- return
- fi
- done
- done
-
- wait $nfqpid
- [ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf"
- wait 2>/dev/null
-}
-
-ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
-ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
-
-load_ruleset "filter" 0
-
-sleep 3
-
-test_ping
-ret=$?
-if [ $ret -eq 0 ];then
- # queue bypass works (rules were skipped, no listener)
- echo "PASS: ${ns1} can reach ${ns2}"
-else
- echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2
- exit $ret
-fi
-
-test_queue_blackhole ip
-test_queue_blackhole ip6
-
-# dummy ruleset to add base chains between the
-# queueing rules. We don't want the second reinject
-# to re-execute the old hooks.
-load_counter_ruleset 10
-
-# we are hooking all: prerouting/input/forward/output/postrouting.
-# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so:
-# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply).
-# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply.
-# so we expect that userspace program receives 10 packets.
-test_queue 10
-
-# same. We queue to a second program as well.
-load_ruleset "filter2" 20
-test_queue 20
-
-test_tcp_forward
-test_tcp_localhost
-test_tcp_localhost_connectclose
-test_tcp_localhost_requeue
-test_icmp_vrf
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_synproxy.sh b/tools/testing/selftests/netfilter/nft_synproxy.sh
deleted file mode 100755
index b62933b680d6..000000000000
--- a/tools/testing/selftests/netfilter/nft_synproxy.sh
+++ /dev/null
@@ -1,117 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-ret=0
-
-rnd=$(mktemp -u XXXXXXXX)
-nsr="nsr-$rnd" # synproxy machine
-ns1="ns1-$rnd" # iperf client
-ns2="ns2-$rnd" # iperf server
-
-checktool (){
- if ! $1 > /dev/null 2>&1; then
- echo "SKIP: Could not $2"
- exit $ksft_skip
- fi
-}
-
-checktool "nft --version" "run test without nft tool"
-checktool "ip -Version" "run test without ip tool"
-checktool "iperf3 --version" "run test without iperf3"
-checktool "ip netns add $nsr" "create net namespace"
-
-modprobe -q nf_conntrack
-
-ip netns add $ns1
-ip netns add $ns2
-
-cleanup() {
- ip netns pids $ns1 | xargs kill 2>/dev/null
- ip netns pids $ns2 | xargs kill 2>/dev/null
- ip netns del $ns1
- ip netns del $ns2
-
- ip netns del $nsr
-}
-
-trap cleanup EXIT
-
-ip link add veth0 netns $nsr type veth peer name eth0 netns $ns1
-ip link add veth1 netns $nsr type veth peer name eth0 netns $ns2
-
-for dev in lo veth0 veth1; do
-ip -net $nsr link set $dev up
-done
-
-ip -net $nsr addr add 10.0.1.1/24 dev veth0
-ip -net $nsr addr add 10.0.2.1/24 dev veth1
-
-ip netns exec $nsr sysctl -q net.ipv4.conf.veth0.forwarding=1
-ip netns exec $nsr sysctl -q net.ipv4.conf.veth1.forwarding=1
-ip netns exec $nsr sysctl -q net.netfilter.nf_conntrack_tcp_loose=0
-
-for n in $ns1 $ns2; do
- ip -net $n link set lo up
- ip -net $n link set eth0 up
-done
-ip -net $ns1 addr add 10.0.1.99/24 dev eth0
-ip -net $ns2 addr add 10.0.2.99/24 dev eth0
-ip -net $ns1 route add default via 10.0.1.1
-ip -net $ns2 route add default via 10.0.2.1
-
-# test basic connectivity
-if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
- echo "ERROR: $ns1 cannot reach $ns2" 1>&2
- exit 1
-fi
-
-if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
- echo "ERROR: $ns2 cannot reach $ns1" 1>&2
- exit 1
-fi
-
-ip netns exec $ns2 iperf3 -s > /dev/null 2>&1 &
-# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 &
-
-sleep 1
-
-ip netns exec $nsr nft -f - <<EOF
-table inet filter {
- chain prerouting {
- type filter hook prerouting priority -300; policy accept;
- meta iif veth0 tcp flags syn counter notrack
- }
-
- chain forward {
- type filter hook forward priority 0; policy accept;
-
- ct state new,established counter accept
-
- meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp
-
- ct state invalid counter drop
-
- # make ns2 unreachable w.o. tcp synproxy
- tcp flags syn counter drop
- }
-}
-EOF
-if [ $? -ne 0 ]; then
- echo "SKIP: Cannot add nft synproxy"
- exit $ksft_skip
-fi
-
-ip netns exec $ns1 timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null
-
-if [ $? -ne 0 ]; then
- echo "FAIL: iperf3 returned an error" 1>&2
- ret=$?
- ip netns exec $nsr nft list ruleset
-else
- echo "PASS: synproxy connection successful"
-fi
-
-exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh
deleted file mode 100755
index 2ffba45a78bf..000000000000
--- a/tools/testing/selftests/netfilter/nft_trans_stress.sh
+++ /dev/null
@@ -1,151 +0,0 @@
-#!/bin/bash
-#
-# This test is for stress-testing the nf_tables config plane path vs.
-# packet path processing: Make sure we never release rules that are
-# still visible to other cpus.
-#
-# set -e
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-testns=testns-$(mktemp -u "XXXXXXXX")
-tmp=""
-
-tables="foo bar baz quux"
-global_ret=0
-eret=0
-lret=0
-
-cleanup() {
- ip netns pids "$testns" | xargs kill 2>/dev/null
- ip netns del "$testns"
-
- rm -f "$tmp"
-}
-
-check_result()
-{
- local r=$1
- local OK="PASS"
-
- if [ $r -ne 0 ] ;then
- OK="FAIL"
- global_ret=$r
- fi
-
- echo "$OK: nft $2 test returned $r"
-
- eret=0
-}
-
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-trap cleanup EXIT
-tmp=$(mktemp)
-
-for table in $tables; do
- echo add table inet "$table" >> "$tmp"
- echo flush table inet "$table" >> "$tmp"
-
- echo "add chain inet $table INPUT { type filter hook input priority 0; }" >> "$tmp"
- echo "add chain inet $table OUTPUT { type filter hook output priority 0; }" >> "$tmp"
- for c in $(seq 1 400); do
- chain=$(printf "chain%03u" "$c")
- echo "add chain inet $table $chain" >> "$tmp"
- done
-
- for c in $(seq 1 400); do
- chain=$(printf "chain%03u" "$c")
- for BASE in INPUT OUTPUT; do
- echo "add rule inet $table $BASE counter jump $chain" >> "$tmp"
- done
- echo "add rule inet $table $chain counter return" >> "$tmp"
- done
-done
-
-ip netns add "$testns"
-ip -netns "$testns" link set lo up
-
-lscpu | grep ^CPU\(s\): | ( read cpu cpunum ;
-cpunum=$((cpunum-1))
-for i in $(seq 0 $cpunum);do
- mask=$(printf 0x%x $((1<<$i)))
- ip netns exec "$testns" taskset $mask ping -4 127.0.0.1 -fq > /dev/null &
- ip netns exec "$testns" taskset $mask ping -6 ::1 -fq > /dev/null &
-done)
-
-sleep 1
-
-ip netns exec "$testns" nft -f "$tmp"
-for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done
-
-for table in $tables;do
- randsleep=$((RANDOM%2))
- sleep $randsleep
- ip netns exec "$testns" nft delete table inet $table
- lret=$?
- if [ $lret -ne 0 ]; then
- eret=$lret
- fi
-done
-
-check_result $eret "add/delete"
-
-for i in $(seq 1 10) ; do
- (echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin
-
- lret=$?
- if [ $lret -ne 0 ]; then
- eret=$lret
- fi
-done
-
-check_result $eret "reload"
-
-for i in $(seq 1 10) ; do
- (echo "flush ruleset"; cat "$tmp"
- echo "insert rule inet foo INPUT meta nftrace set 1"
- echo "insert rule inet foo OUTPUT meta nftrace set 1"
- ) | ip netns exec "$testns" nft -f /dev/stdin
- lret=$?
- if [ $lret -ne 0 ]; then
- eret=$lret
- fi
-
- (echo "flush ruleset"; cat "$tmp"
- ) | ip netns exec "$testns" nft -f /dev/stdin
-
- lret=$?
- if [ $lret -ne 0 ]; then
- eret=$lret
- fi
-done
-
-check_result $eret "add/delete with nftrace enabled"
-
-echo "insert rule inet foo INPUT meta nftrace set 1" >> $tmp
-echo "insert rule inet foo OUTPUT meta nftrace set 1" >> $tmp
-
-for i in $(seq 1 10) ; do
- (echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin
-
- lret=$?
- if [ $lret -ne 0 ]; then
- eret=1
- fi
-done
-
-check_result $lret "add/delete with nftrace enabled"
-
-exit $global_ret
diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings
deleted file mode 100644
index 6091b45d226b..000000000000
--- a/tools/testing/selftests/netfilter/settings
+++ /dev/null
@@ -1 +0,0 @@
-timeout=120
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index 40dd95228051..3fbabab46958 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -152,7 +152,7 @@ CFLAGS_mips32be = -EB -mabi=32
CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))
CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
$(call cc-option,-fno-stack-protector) \
- $(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR)
+ $(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR) $(CFLAGS_EXTRA)
LDFLAGS :=
REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
index 6ba4f8275ac4..093d0512f4c5 100644
--- a/tools/testing/selftests/nolibc/nolibc-test.c
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -27,6 +27,7 @@
#include <sys/syscall.h>
#include <sys/sysmacros.h>
#include <sys/time.h>
+#include <sys/utsname.h>
#include <sys/wait.h>
#include <dirent.h>
#include <errno.h>
@@ -63,6 +64,14 @@ static const char *argv0;
/* will be used by constructor tests */
static int constructor_test_value;
+static const int is_nolibc =
+#ifdef NOLIBC
+ 1
+#else
+ 0
+#endif
+;
+
/* definition of a series of tests */
struct test {
const char *name; /* test name */
@@ -600,6 +609,70 @@ int expect_strne(const char *expr, int llen, const char *cmp)
return ret;
}
+#define EXPECT_STRBUFEQ(cond, expr, buf, val, cmp) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_str_buf_eq(expr, buf, val, llen, cmp); } while (0)
+
+static __attribute__((unused))
+int expect_str_buf_eq(size_t expr, const char *buf, size_t val, int llen, const char *cmp)
+{
+ llen += printf(" = %lu <%s> ", (unsigned long)expr, buf);
+ if (strcmp(buf, cmp) != 0) {
+ result(llen, FAIL);
+ return 1;
+ }
+ if (expr != val) {
+ result(llen, FAIL);
+ return 1;
+ }
+
+ result(llen, OK);
+ return 0;
+}
+
+#define EXPECT_STRTOX(cond, func, input, base, expected, chars, expected_errno) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strtox(llen, func, input, base, expected, chars, expected_errno); } while (0)
+
+static __attribute__((unused))
+int expect_strtox(int llen, void *func, const char *input, int base, intmax_t expected, int expected_chars, int expected_errno)
+{
+ char *endptr;
+ int actual_errno, actual_chars;
+ intmax_t r;
+
+ errno = 0;
+ if (func == strtol) {
+ r = strtol(input, &endptr, base);
+ } else if (func == strtoul) {
+ r = strtoul(input, &endptr, base);
+ } else {
+ result(llen, FAIL);
+ return 1;
+ }
+ actual_errno = errno;
+ actual_chars = endptr - input;
+
+ llen += printf(" %lld = %lld", (long long)expected, (long long)r);
+ if (r != expected) {
+ result(llen, FAIL);
+ return 1;
+ }
+ if (expected_chars == -1) {
+ if (*endptr != '\0') {
+ result(llen, FAIL);
+ return 1;
+ }
+ } else if (expected_chars != actual_chars) {
+ result(llen, FAIL);
+ return 1;
+ }
+ if (actual_errno != expected_errno) {
+ result(llen, FAIL);
+ return 1;
+ }
+
+ result(llen, OK);
+ return 0;
+}
/* declare tests based on line numbers. There must be exactly one test per line. */
#define CASE_TEST(name) \
@@ -761,6 +834,45 @@ int test_stat_timestamps(void)
return 0;
}
+int test_uname(void)
+{
+ struct utsname buf;
+ char osrelease[sizeof(buf.release)];
+ ssize_t r;
+ int fd;
+
+ memset(&buf.domainname, 'P', sizeof(buf.domainname));
+
+ if (uname(&buf))
+ return 1;
+
+ if (strncmp("Linux", buf.sysname, sizeof(buf.sysname)))
+ return 1;
+
+ fd = open("/proc/sys/kernel/osrelease", O_RDONLY);
+ if (fd == -1)
+ return 1;
+
+ r = read(fd, osrelease, sizeof(osrelease));
+ if (r == -1)
+ return 1;
+
+ close(fd);
+
+ if (osrelease[r - 1] == '\n')
+ r--;
+
+ /* Validate one of the later fields to ensure field sizes are correct */
+ if (strncmp(osrelease, buf.release, r))
+ return 1;
+
+ /* Ensure the field domainname is set, it is missing from struct old_utsname */
+ if (strnlen(buf.domainname, sizeof(buf.domainname)) == sizeof(buf.domainname))
+ return 1;
+
+ return 0;
+}
+
int test_mmap_munmap(void)
{
int ret, fd, i, page_size;
@@ -883,6 +995,7 @@ int run_syscall(int min, int max)
int ret = 0;
void *p1, *p2;
int has_gettid = 1;
+ int has_brk;
/* <proc> indicates whether or not /proc is mounted */
proc = stat("/proc", &stat_buf) == 0;
@@ -895,6 +1008,9 @@ int run_syscall(int min, int max)
has_gettid = __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 30);
#endif
+ /* on musl setting brk()/sbrk() always fails */
+ has_brk = brk(0) == 0;
+
for (test = min; test >= 0 && test <= max; test++) {
int llen = 0; /* line length */
@@ -910,9 +1026,9 @@ int run_syscall(int min, int max)
CASE_TEST(kill_0); EXPECT_SYSZR(1, kill(getpid(), 0)); break;
CASE_TEST(kill_CONT); EXPECT_SYSZR(1, kill(getpid(), 0)); break;
CASE_TEST(kill_BADPID); EXPECT_SYSER(1, kill(INT_MAX, 0), -1, ESRCH); break;
- CASE_TEST(sbrk_0); EXPECT_PTRNE(1, sbrk(0), (void *)-1); break;
- CASE_TEST(sbrk); if ((p1 = p2 = sbrk(4096)) != (void *)-1) p2 = sbrk(-4096); EXPECT_SYSZR(1, (p2 == (void *)-1) || p2 == p1); break;
- CASE_TEST(brk); EXPECT_SYSZR(1, brk(sbrk(0))); break;
+ CASE_TEST(sbrk_0); EXPECT_PTRNE(has_brk, sbrk(0), (void *)-1); break;
+ CASE_TEST(sbrk); if ((p1 = p2 = sbrk(4096)) != (void *)-1) p2 = sbrk(-4096); EXPECT_SYSZR(has_brk, (p2 == (void *)-1) || p2 == p1); break;
+ CASE_TEST(brk); EXPECT_SYSZR(has_brk, brk(sbrk(0))); break;
CASE_TEST(chdir_root); EXPECT_SYSZR(1, chdir("/")); chdir(getenv("PWD")); break;
CASE_TEST(chdir_dot); EXPECT_SYSZR(1, chdir(".")); break;
CASE_TEST(chdir_blah); EXPECT_SYSER(1, chdir("/blah"), -1, ENOENT); break;
@@ -966,6 +1082,8 @@ int run_syscall(int min, int max)
CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break;
CASE_TEST(stat_timestamps); EXPECT_SYSZR(1, test_stat_timestamps()); break;
CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break;
+ CASE_TEST(uname); EXPECT_SYSZR(proc, test_uname()); break;
+ CASE_TEST(uname_fault); EXPECT_SYSER(1, uname(NULL), -1, EFAULT); break;
CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break;
CASE_TEST(unlink_blah); EXPECT_SYSER(1, unlink("/proc/self/blah"), -1, ENOENT); break;
CASE_TEST(wait_child); EXPECT_SYSER(1, wait(&tmp), -1, ECHILD); break;
@@ -991,6 +1109,14 @@ int run_stdlib(int min, int max)
for (test = min; test >= 0 && test <= max; test++) {
int llen = 0; /* line length */
+ /* For functions that take a long buffer, like strlcat()
+ * Add some more chars after the \0, to test functions that overwrite the buffer set
+ * the \0 at the exact right position.
+ */
+ char buf[10] = "test123456";
+ buf[4] = '\0';
+
+
/* avoid leaving empty lines below, this will insert holes into
* test numbers.
*/
@@ -1007,6 +1133,17 @@ int run_stdlib(int min, int max)
CASE_TEST(strchr_foobar_z); EXPECT_STRZR(1, strchr("foobar", 'z')); break;
CASE_TEST(strrchr_foobar_o); EXPECT_STREQ(1, strrchr("foobar", 'o'), "obar"); break;
CASE_TEST(strrchr_foobar_z); EXPECT_STRZR(1, strrchr("foobar", 'z')); break;
+ CASE_TEST(strlcat_0); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 0), buf, 3, "test"); break;
+ CASE_TEST(strlcat_1); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 1), buf, 4, "test"); break;
+ CASE_TEST(strlcat_5); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 5), buf, 7, "test"); break;
+ CASE_TEST(strlcat_6); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 6), buf, 7, "testb"); break;
+ CASE_TEST(strlcat_7); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 7), buf, 7, "testba"); break;
+ CASE_TEST(strlcat_8); EXPECT_STRBUFEQ(is_nolibc, strlcat(buf, "bar", 8), buf, 7, "testbar"); break;
+ CASE_TEST(strlcpy_0); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 0), buf, 3, "test"); break;
+ CASE_TEST(strlcpy_1); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 1), buf, 3, ""); break;
+ CASE_TEST(strlcpy_2); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 2), buf, 3, "b"); break;
+ CASE_TEST(strlcpy_3); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 3), buf, 3, "ba"); break;
+ CASE_TEST(strlcpy_4); EXPECT_STRBUFEQ(is_nolibc, strlcpy(buf, "bar", 4), buf, 3, "bar"); break;
CASE_TEST(memcmp_20_20); EXPECT_EQ(1, memcmp("aaa\x20", "aaa\x20", 4), 0); break;
CASE_TEST(memcmp_20_60); EXPECT_LT(1, memcmp("aaa\x20", "aaa\x60", 4), 0); break;
CASE_TEST(memcmp_60_20); EXPECT_GT(1, memcmp("aaa\x60", "aaa\x20", 4), 0); break;
@@ -1057,6 +1194,26 @@ int run_stdlib(int min, int max)
CASE_TEST(limit_ptrdiff_min); EXPECT_EQ(1, PTRDIFF_MIN, sizeof(long) == 8 ? (ptrdiff_t) 0x8000000000000000LL : (ptrdiff_t) 0x80000000); break;
CASE_TEST(limit_ptrdiff_max); EXPECT_EQ(1, PTRDIFF_MAX, sizeof(long) == 8 ? (ptrdiff_t) 0x7fffffffffffffffLL : (ptrdiff_t) 0x7fffffff); break;
CASE_TEST(limit_size_max); EXPECT_EQ(1, SIZE_MAX, sizeof(long) == 8 ? (size_t) 0xffffffffffffffffULL : (size_t) 0xffffffffU); break;
+ CASE_TEST(strtol_simple); EXPECT_STRTOX(1, strtol, "35", 10, 35, -1, 0); break;
+ CASE_TEST(strtol_positive); EXPECT_STRTOX(1, strtol, "+35", 10, 35, -1, 0); break;
+ CASE_TEST(strtol_negative); EXPECT_STRTOX(1, strtol, "-35", 10, -35, -1, 0); break;
+ CASE_TEST(strtol_hex_auto); EXPECT_STRTOX(1, strtol, "0xFF", 0, 255, -1, 0); break;
+ CASE_TEST(strtol_base36); EXPECT_STRTOX(1, strtol, "12yZ", 36, 50507, -1, 0); break;
+ CASE_TEST(strtol_cutoff); EXPECT_STRTOX(1, strtol, "1234567890", 8, 342391, 7, 0); break;
+ CASE_TEST(strtol_octal_auto); EXPECT_STRTOX(1, strtol, "011", 0, 9, -1, 0); break;
+ CASE_TEST(strtol_hex_00); EXPECT_STRTOX(1, strtol, "0x00", 16, 0, -1, 0); break;
+ CASE_TEST(strtol_hex_FF); EXPECT_STRTOX(1, strtol, "FF", 16, 255, -1, 0); break;
+ CASE_TEST(strtol_hex_ff); EXPECT_STRTOX(1, strtol, "ff", 16, 255, -1, 0); break;
+ CASE_TEST(strtol_hex_prefix); EXPECT_STRTOX(1, strtol, "0xFF", 16, 255, -1, 0); break;
+ CASE_TEST(strtol_trailer); EXPECT_STRTOX(1, strtol, "35foo", 10, 35, 2, 0); break;
+ CASE_TEST(strtol_overflow); EXPECT_STRTOX(1, strtol, "0x8000000000000000", 16, LONG_MAX, -1, ERANGE); break;
+ CASE_TEST(strtol_underflow); EXPECT_STRTOX(1, strtol, "-0x8000000000000001", 16, LONG_MIN, -1, ERANGE); break;
+ CASE_TEST(strtoul_negative); EXPECT_STRTOX(1, strtoul, "-0x1", 16, ULONG_MAX, 4, 0); break;
+ CASE_TEST(strtoul_overflow); EXPECT_STRTOX(1, strtoul, "0x10000000000000000", 16, ULONG_MAX, -1, ERANGE); break;
+ CASE_TEST(strerror_success); EXPECT_STREQ(is_nolibc, strerror(0), "errno=0"); break;
+ CASE_TEST(strerror_EINVAL); EXPECT_STREQ(is_nolibc, strerror(EINVAL), "errno=22"); break;
+ CASE_TEST(strerror_int_max); EXPECT_STREQ(is_nolibc, strerror(INT_MAX), "errno=2147483647"); break;
+ CASE_TEST(strerror_int_min); EXPECT_STREQ(is_nolibc, strerror(INT_MIN), "errno=-2147483648"); break;
case __LINE__:
return ret; /* must be last */
diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh
index c0a5a7cea9fa..0446e6326a40 100755
--- a/tools/testing/selftests/nolibc/run-tests.sh
+++ b/tools/testing/selftests/nolibc/run-tests.sh
@@ -15,9 +15,10 @@ download_location="${cache_dir}/crosstools/"
build_location="$(realpath "${cache_dir}"/nolibc-tests/)"
perform_download=0
test_mode=system
+CFLAGS_EXTRA="-Werror"
archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv s390 loongarch"
-TEMP=$(getopt -o 'j:d:c:b:a:m:ph' -n "$0" -- "$@")
+TEMP=$(getopt -o 'j:d:c:b:a:m:peh' -n "$0" -- "$@")
eval set -- "$TEMP"
unset TEMP
@@ -40,6 +41,7 @@ Options:
-a [ARCH] Host architecture of toolchains to use (default: ${hostarch})
-b [DIR] Build location (default: ${build_location})
-m [MODE] Test mode user/system (default: ${test_mode})
+ -e Disable -Werror
EOF
}
@@ -66,6 +68,9 @@ while true; do
'-m')
test_mode="$2"
shift 2; continue ;;
+ '-e')
+ CFLAGS_EXTRA=""
+ shift; continue ;;
'-h')
print_usage
exit 0
@@ -153,7 +158,7 @@ test_arch() {
exit 1
esac
printf '%-15s' "$arch:"
- swallow_output "${MAKE[@]}" "$test_target" V=1
+ swallow_output "${MAKE[@]}" CFLAGS_EXTRA="$CFLAGS_EXTRA" "$test_target" V=1
cp run.out run.out."${arch}"
"${MAKE[@]}" report | grep passed
}
diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile
index 254d676a2689..185dc76ebb5f 100644
--- a/tools/testing/selftests/openat2/Makefile
+++ b/tools/testing/selftests/openat2/Makefile
@@ -1,8 +1,18 @@
# SPDX-License-Identifier: GPL-2.0-or-later
-CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined
TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test
+# gcc requires -static-libasan in order to ensure that Address Sanitizer's
+# library is the first one loaded. However, clang already statically links the
+# Address Sanitizer if -fsanitize is specified. Therefore, simply omit
+# -static-libasan for clang builds.
+ifeq ($(LLVM),)
+ CFLAGS += -static-libasan
+endif
+
+LOCAL_HDRS += helpers.h
+
include ../lib.mk
-$(TEST_GEN_PROGS): helpers.c helpers.h
+$(TEST_GEN_PROGS): helpers.c
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index 9024754530b2..5790ab446527 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -5,6 +5,7 @@
*/
#define _GNU_SOURCE
+#define __SANE_USERSPACE_TYPES__ // Use ll64
#include <fcntl.h>
#include <sched.h>
#include <sys/stat.h>
diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore
index 790c47001e77..ee93dc4969b8 100644
--- a/tools/testing/selftests/perf_events/.gitignore
+++ b/tools/testing/selftests/perf_events/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
sigtrap_threads
remove_on_exec
+watermark_signal
diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile
index db93c4ff081a..70e3ff211278 100644
--- a/tools/testing/selftests/perf_events/Makefile
+++ b/tools/testing/selftests/perf_events/Makefile
@@ -2,5 +2,5 @@
CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
LDFLAGS += -lpthread
-TEST_GEN_PROGS := sigtrap_threads remove_on_exec
+TEST_GEN_PROGS := sigtrap_threads remove_on_exec watermark_signal
include ../lib.mk
diff --git a/tools/testing/selftests/perf_events/watermark_signal.c b/tools/testing/selftests/perf_events/watermark_signal.c
new file mode 100644
index 000000000000..49dc1e831174
--- /dev/null
+++ b/tools/testing/selftests/perf_events/watermark_signal.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/perf_event.h>
+#include <stddef.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#define __maybe_unused __attribute__((__unused__))
+
+static int sigio_count;
+
+static void handle_sigio(int signum __maybe_unused,
+ siginfo_t *oh __maybe_unused,
+ void *uc __maybe_unused)
+{
+ ++sigio_count;
+}
+
+static void do_child(void)
+{
+ raise(SIGSTOP);
+
+ for (int i = 0; i < 20; ++i)
+ sleep(1);
+
+ raise(SIGSTOP);
+
+ exit(0);
+}
+
+TEST(watermark_signal)
+{
+ struct perf_event_attr attr;
+ struct perf_event_mmap_page *p = NULL;
+ struct sigaction previous_sigio, sigio = { 0 };
+ pid_t child = -1;
+ int child_status;
+ int fd = -1;
+ long page_size = sysconf(_SC_PAGE_SIZE);
+
+ sigio.sa_sigaction = handle_sigio;
+ EXPECT_EQ(sigaction(SIGIO, &sigio, &previous_sigio), 0);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_DUMMY;
+ attr.sample_period = 1;
+ attr.disabled = 1;
+ attr.watermark = 1;
+ attr.context_switch = 1;
+ attr.wakeup_watermark = 1;
+
+ child = fork();
+ EXPECT_GE(child, 0);
+ if (child == 0)
+ do_child();
+ else if (child < 0) {
+ perror("fork()");
+ goto cleanup;
+ }
+
+ if (waitpid(child, &child_status, WSTOPPED) != child ||
+ !(WIFSTOPPED(child_status) && WSTOPSIG(child_status) == SIGSTOP)) {
+ fprintf(stderr,
+ "failed to sycnhronize with child errno=%d status=%x\n",
+ errno,
+ child_status);
+ goto cleanup;
+ }
+
+ fd = syscall(__NR_perf_event_open, &attr, child, -1, -1,
+ PERF_FLAG_FD_CLOEXEC);
+ if (fd < 0) {
+ fprintf(stderr, "failed opening event %llx\n", attr.config);
+ goto cleanup;
+ }
+
+ if (fcntl(fd, F_SETFL, FASYNC)) {
+ perror("F_SETFL FASYNC");
+ goto cleanup;
+ }
+
+ if (fcntl(fd, F_SETOWN, getpid())) {
+ perror("F_SETOWN getpid()");
+ goto cleanup;
+ }
+
+ if (fcntl(fd, F_SETSIG, SIGIO)) {
+ perror("F_SETSIG SIGIO");
+ goto cleanup;
+ }
+
+ p = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (p == NULL) {
+ perror("mmap");
+ goto cleanup;
+ }
+
+ if (ioctl(fd, PERF_EVENT_IOC_ENABLE, 0)) {
+ perror("PERF_EVENT_IOC_ENABLE");
+ goto cleanup;
+ }
+
+ if (kill(child, SIGCONT) < 0) {
+ perror("SIGCONT");
+ goto cleanup;
+ }
+
+ if (waitpid(child, &child_status, WSTOPPED) != -1 || errno != EINTR)
+ fprintf(stderr,
+ "expected SIGIO to terminate wait errno=%d status=%x\n%d",
+ errno,
+ child_status,
+ sigio_count);
+
+ EXPECT_GE(sigio_count, 1);
+
+cleanup:
+ if (p != NULL)
+ munmap(p, 2 * page_size);
+
+ if (fd >= 0)
+ close(fd);
+
+ if (child > 0) {
+ kill(child, SIGKILL);
+ waitpid(child, NULL, 0);
+ }
+
+ sigaction(SIGIO, &previous_sigio, NULL);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config
index f6f2965e17af..6133524710f7 100644
--- a/tools/testing/selftests/pidfd/config
+++ b/tools/testing/selftests/pidfd/config
@@ -3,5 +3,7 @@ CONFIG_IPC_NS=y
CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_NET_NS=y
+CONFIG_TIME_NS=y
+CONFIG_GENERIC_VDSO_TIME_NS=y
CONFIG_CGROUPS=y
CONFIG_CHECKPOINT_RESTORE=y
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
index 01cc37bf611c..f062a986e382 100644
--- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
@@ -307,5 +307,5 @@ int main(int argc, char **argv)
test_pidfd_fdinfo_nspid();
test_pidfd_dead_fdinfo();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
index 8a59438ccc78..c62564c264b1 100644
--- a/tools/testing/selftests/pidfd/pidfd_open_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -159,5 +159,7 @@ on_error:
if (pidfd >= 0)
close(pidfd);
- return !ret ? ksft_exit_pass() : ksft_exit_fail();
+ if (ret)
+ ksft_exit_fail();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c
index 610811275357..55d74a50358f 100644
--- a/tools/testing/selftests/pidfd/pidfd_poll_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c
@@ -112,5 +112,5 @@ int main(int argc, char **argv)
}
ksft_test_result_pass("pidfd poll test: pass\n");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 6e2f2cd400ca..7c2a4349170a 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -16,11 +16,56 @@
#include <unistd.h>
#include <sys/socket.h>
#include <sys/stat.h>
+#include <linux/ioctl.h>
#include "pidfd.h"
#include "../clone3/clone3_selftests.h"
#include "../kselftest_harness.h"
+#ifndef PIDFS_IOCTL_MAGIC
+#define PIDFS_IOCTL_MAGIC 0xFF
+#endif
+
+#ifndef PIDFD_GET_CGROUP_NAMESPACE
+#define PIDFD_GET_CGROUP_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 1)
+#endif
+
+#ifndef PIDFD_GET_IPC_NAMESPACE
+#define PIDFD_GET_IPC_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 2)
+#endif
+
+#ifndef PIDFD_GET_MNT_NAMESPACE
+#define PIDFD_GET_MNT_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 3)
+#endif
+
+#ifndef PIDFD_GET_NET_NAMESPACE
+#define PIDFD_GET_NET_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 4)
+#endif
+
+#ifndef PIDFD_GET_PID_NAMESPACE
+#define PIDFD_GET_PID_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 5)
+#endif
+
+#ifndef PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE
+#define PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 6)
+#endif
+
+#ifndef PIDFD_GET_TIME_NAMESPACE
+#define PIDFD_GET_TIME_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 7)
+#endif
+
+#ifndef PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE
+#define PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 8)
+#endif
+
+#ifndef PIDFD_GET_USER_NAMESPACE
+#define PIDFD_GET_USER_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 9)
+#endif
+
+#ifndef PIDFD_GET_UTS_NAMESPACE
+#define PIDFD_GET_UTS_NAMESPACE _IO(PIDFS_IOCTL_MAGIC, 10)
+#endif
+
enum {
PIDFD_NS_USER,
PIDFD_NS_MNT,
@@ -31,22 +76,25 @@ enum {
PIDFD_NS_CGROUP,
PIDFD_NS_PIDCLD,
PIDFD_NS_TIME,
+ PIDFD_NS_TIMECLD,
PIDFD_NS_MAX
};
const struct ns_info {
const char *name;
int flag;
+ unsigned int pidfd_ioctl;
} ns_info[] = {
- [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, },
- [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, },
- [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, },
- [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, },
- [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, },
- [PIDFD_NS_NET] = { "net", CLONE_NEWNET, },
- [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, },
- [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, },
- [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, },
+ [PIDFD_NS_USER] = { "user", CLONE_NEWUSER, PIDFD_GET_USER_NAMESPACE, },
+ [PIDFD_NS_MNT] = { "mnt", CLONE_NEWNS, PIDFD_GET_MNT_NAMESPACE, },
+ [PIDFD_NS_PID] = { "pid", CLONE_NEWPID, PIDFD_GET_PID_NAMESPACE, },
+ [PIDFD_NS_UTS] = { "uts", CLONE_NEWUTS, PIDFD_GET_UTS_NAMESPACE, },
+ [PIDFD_NS_IPC] = { "ipc", CLONE_NEWIPC, PIDFD_GET_IPC_NAMESPACE, },
+ [PIDFD_NS_NET] = { "net", CLONE_NEWNET, PIDFD_GET_NET_NAMESPACE, },
+ [PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, PIDFD_GET_CGROUP_NAMESPACE, },
+ [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, PIDFD_GET_TIME_NAMESPACE, },
+ [PIDFD_NS_PIDCLD] = { "pid_for_children", 0, PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE, },
+ [PIDFD_NS_TIMECLD] = { "time_for_children", 0, PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE, },
};
FIXTURE(current_nsset)
@@ -54,6 +102,7 @@ FIXTURE(current_nsset)
pid_t pid;
int pidfd;
int nsfds[PIDFD_NS_MAX];
+ int child_pidfd_derived_nsfds[PIDFD_NS_MAX];
pid_t child_pid_exited;
int child_pidfd_exited;
@@ -61,10 +110,12 @@ FIXTURE(current_nsset)
pid_t child_pid1;
int child_pidfd1;
int child_nsfds1[PIDFD_NS_MAX];
+ int child_pidfd_derived_nsfds1[PIDFD_NS_MAX];
pid_t child_pid2;
int child_pidfd2;
int child_nsfds2[PIDFD_NS_MAX];
+ int child_pidfd_derived_nsfds2[PIDFD_NS_MAX];
};
static int sys_waitid(int which, pid_t pid, int options)
@@ -128,9 +179,12 @@ FIXTURE_SETUP(current_nsset)
char c;
for (i = 0; i < PIDFD_NS_MAX; i++) {
- self->nsfds[i] = -EBADF;
- self->child_nsfds1[i] = -EBADF;
- self->child_nsfds2[i] = -EBADF;
+ self->nsfds[i] = -EBADF;
+ self->child_nsfds1[i] = -EBADF;
+ self->child_nsfds2[i] = -EBADF;
+ self->child_pidfd_derived_nsfds[i] = -EBADF;
+ self->child_pidfd_derived_nsfds1[i] = -EBADF;
+ self->child_pidfd_derived_nsfds2[i] = -EBADF;
}
proc_fd = open("/proc/self/ns", O_DIRECTORY | O_CLOEXEC);
@@ -139,6 +193,11 @@ FIXTURE_SETUP(current_nsset)
}
self->pid = getpid();
+ self->pidfd = sys_pidfd_open(self->pid, 0);
+ EXPECT_GT(self->pidfd, 0) {
+ TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
+ }
+
for (i = 0; i < PIDFD_NS_MAX; i++) {
const struct ns_info *info = &ns_info[i];
self->nsfds[i] = openat(proc_fd, info->name, O_RDONLY | O_CLOEXEC);
@@ -148,20 +207,27 @@ FIXTURE_SETUP(current_nsset)
info->name, self->pid);
}
}
- }
- self->pidfd = sys_pidfd_open(self->pid, 0);
- EXPECT_GT(self->pidfd, 0) {
- TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
+ self->child_pidfd_derived_nsfds[i] = ioctl(self->pidfd, info->pidfd_ioctl, 0);
+ if (self->child_pidfd_derived_nsfds[i] < 0) {
+ EXPECT_EQ(errno, EOPNOTSUPP) {
+ TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d",
+ info->name, self->pid);
+ }
+ }
}
/* Create task that exits right away. */
- self->child_pid_exited = create_child(&self->child_pidfd_exited,
- CLONE_NEWUSER | CLONE_NEWNET);
- EXPECT_GT(self->child_pid_exited, 0);
+ self->child_pid_exited = create_child(&self->child_pidfd_exited, 0);
+ EXPECT_GE(self->child_pid_exited, 0);
- if (self->child_pid_exited == 0)
+ if (self->child_pid_exited == 0) {
+ if (self->nsfds[PIDFD_NS_USER] >= 0 && unshare(CLONE_NEWUSER) < 0)
+ _exit(EXIT_FAILURE);
+ if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0)
+ _exit(EXIT_FAILURE);
_exit(EXIT_SUCCESS);
+ }
ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
@@ -174,18 +240,43 @@ FIXTURE_SETUP(current_nsset)
EXPECT_EQ(ret, 0);
/* Create tasks that will be stopped. */
- self->child_pid1 = create_child(&self->child_pidfd1,
- CLONE_NEWUSER | CLONE_NEWNS |
- CLONE_NEWCGROUP | CLONE_NEWIPC |
- CLONE_NEWUTS | CLONE_NEWPID |
- CLONE_NEWNET);
+ if (self->nsfds[PIDFD_NS_USER] >= 0 && self->nsfds[PIDFD_NS_PID] >= 0)
+ self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER | CLONE_NEWPID);
+ else if (self->nsfds[PIDFD_NS_PID] >= 0)
+ self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWPID);
+ else if (self->nsfds[PIDFD_NS_USER] >= 0)
+ self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER);
+ else
+ self->child_pid1 = create_child(&self->child_pidfd1, 0);
EXPECT_GE(self->child_pid1, 0);
if (self->child_pid1 == 0) {
close(ipc_sockets[0]);
- if (!switch_timens())
+ if (self->nsfds[PIDFD_NS_MNT] >= 0 && unshare(CLONE_NEWNS) < 0) {
+ TH_LOG("%m - Failed to unshare mount namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
+ }
+ if (self->nsfds[PIDFD_NS_CGROUP] >= 0 && unshare(CLONE_NEWCGROUP) < 0) {
+ TH_LOG("%m - Failed to unshare cgroup namespace for process %d", self->pid);
+ _exit(EXIT_FAILURE);
+ }
+ if (self->nsfds[PIDFD_NS_IPC] >= 0 && unshare(CLONE_NEWIPC) < 0) {
+ TH_LOG("%m - Failed to unshare ipc namespace for process %d", self->pid);
+ _exit(EXIT_FAILURE);
+ }
+ if (self->nsfds[PIDFD_NS_UTS] >= 0 && unshare(CLONE_NEWUTS) < 0) {
+ TH_LOG("%m - Failed to unshare uts namespace for process %d", self->pid);
+ _exit(EXIT_FAILURE);
+ }
+ if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) {
+ TH_LOG("%m - Failed to unshare net namespace for process %d", self->pid);
+ _exit(EXIT_FAILURE);
+ }
+ if (self->nsfds[PIDFD_NS_TIME] >= 0 && !switch_timens()) {
+ TH_LOG("%m - Failed to unshare time namespace for process %d", self->pid);
+ _exit(EXIT_FAILURE);
+ }
if (write_nointr(ipc_sockets[1], "1", 1) < 0)
_exit(EXIT_FAILURE);
@@ -203,18 +294,43 @@ FIXTURE_SETUP(current_nsset)
ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
EXPECT_EQ(ret, 0);
- self->child_pid2 = create_child(&self->child_pidfd2,
- CLONE_NEWUSER | CLONE_NEWNS |
- CLONE_NEWCGROUP | CLONE_NEWIPC |
- CLONE_NEWUTS | CLONE_NEWPID |
- CLONE_NEWNET);
+ if (self->nsfds[PIDFD_NS_USER] >= 0 && self->nsfds[PIDFD_NS_PID] >= 0)
+ self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID);
+ else if (self->nsfds[PIDFD_NS_PID] >= 0)
+ self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWPID);
+ else if (self->nsfds[PIDFD_NS_USER] >= 0)
+ self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER);
+ else
+ self->child_pid2 = create_child(&self->child_pidfd2, 0);
EXPECT_GE(self->child_pid2, 0);
if (self->child_pid2 == 0) {
close(ipc_sockets[0]);
- if (!switch_timens())
+ if (self->nsfds[PIDFD_NS_MNT] >= 0 && unshare(CLONE_NEWNS) < 0) {
+ TH_LOG("%m - Failed to unshare mount namespace for process %d", self->pid);
+ _exit(EXIT_FAILURE);
+ }
+ if (self->nsfds[PIDFD_NS_CGROUP] >= 0 && unshare(CLONE_NEWCGROUP) < 0) {
+ TH_LOG("%m - Failed to unshare cgroup namespace for process %d", self->pid);
_exit(EXIT_FAILURE);
+ }
+ if (self->nsfds[PIDFD_NS_IPC] >= 0 && unshare(CLONE_NEWIPC) < 0) {
+ TH_LOG("%m - Failed to unshare ipc namespace for process %d", self->pid);
+ _exit(EXIT_FAILURE);
+ }
+ if (self->nsfds[PIDFD_NS_UTS] >= 0 && unshare(CLONE_NEWUTS) < 0) {
+ TH_LOG("%m - Failed to unshare uts namespace for process %d", self->pid);
+ _exit(EXIT_FAILURE);
+ }
+ if (self->nsfds[PIDFD_NS_NET] >= 0 && unshare(CLONE_NEWNET) < 0) {
+ TH_LOG("%m - Failed to unshare net namespace for process %d", self->pid);
+ _exit(EXIT_FAILURE);
+ }
+ if (self->nsfds[PIDFD_NS_TIME] >= 0 && !switch_timens()) {
+ TH_LOG("%m - Failed to unshare time namespace for process %d", self->pid);
+ _exit(EXIT_FAILURE);
+ }
if (write_nointr(ipc_sockets[1], "1", 1) < 0)
_exit(EXIT_FAILURE);
@@ -267,6 +383,22 @@ FIXTURE_SETUP(current_nsset)
info->name, self->child_pid1);
}
}
+
+ self->child_pidfd_derived_nsfds1[i] = ioctl(self->child_pidfd1, info->pidfd_ioctl, 0);
+ if (self->child_pidfd_derived_nsfds1[i] < 0) {
+ EXPECT_EQ(errno, EOPNOTSUPP) {
+ TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d",
+ info->name, self->child_pid1);
+ }
+ }
+
+ self->child_pidfd_derived_nsfds2[i] = ioctl(self->child_pidfd2, info->pidfd_ioctl, 0);
+ if (self->child_pidfd_derived_nsfds2[i] < 0) {
+ EXPECT_EQ(errno, EOPNOTSUPP) {
+ TH_LOG("%m - Failed to derive %s namespace from pidfd of process %d",
+ info->name, self->child_pid2);
+ }
+ }
}
close(proc_fd);
@@ -288,6 +420,12 @@ FIXTURE_TEARDOWN(current_nsset)
close(self->child_nsfds1[i]);
if (self->child_nsfds2[i] >= 0)
close(self->child_nsfds2[i]);
+ if (self->child_pidfd_derived_nsfds[i] >= 0)
+ close(self->child_pidfd_derived_nsfds[i]);
+ if (self->child_pidfd_derived_nsfds1[i] >= 0)
+ close(self->child_pidfd_derived_nsfds1[i]);
+ if (self->child_pidfd_derived_nsfds2[i] >= 0)
+ close(self->child_pidfd_derived_nsfds2[i]);
}
if (self->child_pidfd1 >= 0)
@@ -446,6 +584,42 @@ TEST_F(current_nsset, nsfd_incremental_setns)
}
}
+TEST_F(current_nsset, pidfd_derived_nsfd_incremental_setns)
+{
+ int i;
+ pid_t pid;
+
+ pid = getpid();
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+ int nsfd;
+
+ if (self->child_pidfd_derived_nsfds1[i] < 0)
+ continue;
+
+ if (info->flag) {
+ ASSERT_EQ(setns(self->child_pidfd_derived_nsfds1[i], info->flag), 0) {
+ TH_LOG("%m - Failed to setns to %s namespace of %d via nsfd %d",
+ info->name, self->child_pid1,
+ self->child_pidfd_derived_nsfds1[i]);
+ }
+ }
+
+ /* Verify that we have changed to the correct namespaces. */
+ if (info->flag == CLONE_NEWPID)
+ nsfd = self->child_pidfd_derived_nsfds[i];
+ else
+ nsfd = self->child_pidfd_derived_nsfds1[i];
+ ASSERT_EQ(in_same_namespace(nsfd, pid, info->name), 1) {
+ TH_LOG("setns failed to place us correctly into %s namespace of %d via nsfd %d",
+ info->name, self->child_pid1,
+ self->child_pidfd_derived_nsfds1[i]);
+ }
+ TH_LOG("Managed to correctly setns to %s namespace of %d via nsfd %d",
+ info->name, self->child_pid1, self->child_pidfd_derived_nsfds1[i]);
+ }
+}
+
TEST_F(current_nsset, pidfd_one_shot_setns)
{
unsigned flags = 0;
@@ -542,6 +716,28 @@ TEST_F(current_nsset, no_foul_play)
info->name, self->child_pid2,
self->child_nsfds2[i]);
}
+
+ /*
+ * Can't setns to a user namespace outside of our hierarchy since we
+ * don't have caps in there and didn't create it. That means that under
+ * no circumstances should we be able to setns to any of the other
+ * ones since they aren't owned by our user namespace.
+ */
+ for (i = 0; i < PIDFD_NS_MAX; i++) {
+ const struct ns_info *info = &ns_info[i];
+
+ if (self->child_pidfd_derived_nsfds2[i] < 0 || !info->flag)
+ continue;
+
+ ASSERT_NE(setns(self->child_pidfd_derived_nsfds2[i], info->flag), 0) {
+ TH_LOG("Managed to setns to %s namespace of %d via nsfd %d",
+ info->name, self->child_pid2,
+ self->child_pidfd_derived_nsfds2[i]);
+ }
+ TH_LOG("%m - Correctly failed to setns to %s namespace of %d via nsfd %d",
+ info->name, self->child_pid2,
+ self->child_pidfd_derived_nsfds2[i]);
+ }
}
TEST(setns_einval)
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index c081ae91313a..9faa686f90e4 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -572,5 +572,5 @@ int main(int argc, char **argv)
test_pidfd_send_signal_exited_fail();
test_pidfd_send_signal_recycled_pid_fail();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/power_supply/test_power_supply_properties.sh b/tools/testing/selftests/power_supply/test_power_supply_properties.sh
index df272dfe1d2a..a66b1313ed88 100755
--- a/tools/testing/selftests/power_supply/test_power_supply_properties.sh
+++ b/tools/testing/selftests/power_supply/test_power_supply_properties.sh
@@ -23,7 +23,7 @@ count_tests() {
total_tests=0
for i in $SUPPLIES; do
- total_tests=$(("$total_tests" + "$NUM_TESTS"))
+ total_tests=$((total_tests + NUM_TESTS))
done
echo "$total_tests"
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index c376151982c4..b175e94e1901 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -7,12 +7,6 @@ ARCH := $(shell echo $(ARCH) | sed -e s/ppc.*/powerpc/)
ifeq ($(ARCH),powerpc)
-GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
-
-CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR)/include $(CFLAGS)
-
-export CFLAGS
-
SUB_DIRS = alignment \
benchmarks \
cache_shape \
@@ -46,6 +40,7 @@ $(SUB_DIRS):
BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all
include ../lib.mk
+include ./flags.mk
override define RUN_TESTS
+@for TARGET in $(SUB_DIRS); do \
@@ -57,14 +52,14 @@ endef
override define INSTALL_RULE
+@for TARGET in $(SUB_DIRS); do \
BUILD_TARGET=$(OUTPUT)/$$TARGET; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET INSTALL_PATH=$$INSTALL_PATH/$$TARGET -C $$TARGET install;\
done;
endef
emit_tests:
+@for TARGET in $(SUB_DIRS); do \
BUILD_TARGET=$(OUTPUT)/$$TARGET; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET $@;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET COLLECTION=$(COLLECTION)/$$TARGET -s -C $$TARGET $@;\
done;
override define CLEAN
diff --git a/tools/testing/selftests/powerpc/alignment/Makefile b/tools/testing/selftests/powerpc/alignment/Makefile
index 93e9af37449d..66d5d7aaeb20 100644
--- a/tools/testing/selftests/powerpc/alignment/Makefile
+++ b/tools/testing/selftests/powerpc/alignment/Makefile
@@ -3,5 +3,6 @@ TEST_GEN_PROGS := copy_first_unaligned alignment_handler
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/benchmarks/Makefile b/tools/testing/selftests/powerpc/benchmarks/Makefile
index a32a6ab89914..1321922038d0 100644
--- a/tools/testing/selftests/powerpc/benchmarks/Makefile
+++ b/tools/testing/selftests/powerpc/benchmarks/Makefile
@@ -4,10 +4,11 @@ TEST_GEN_FILES := exec_target
TEST_FILES := settings
-CFLAGS += -O2
-
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS += -O2
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/cache_shape/Makefile b/tools/testing/selftests/powerpc/cache_shape/Makefile
index 689f6c8ebcd8..3a3ca956ac66 100644
--- a/tools/testing/selftests/powerpc/cache_shape/Makefile
+++ b/tools/testing/selftests/powerpc/cache_shape/Makefile
@@ -3,5 +3,6 @@ TEST_GEN_PROGS := cache_shape
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 77594e697f2f..42940f92d832 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -1,14 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-# The loops are all 64-bit code
-CFLAGS += -m64
-CFLAGS += -I$(CURDIR)
-CFLAGS += -D SELFTEST
-CFLAGS += -maltivec
-CFLAGS += -mcpu=power4
-
-# Use our CFLAGS for the implicit .S rule & set the asm machine type
-ASFLAGS = $(CFLAGS) -Wa,-mpower4
-
TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
copyuser_p7_t0 copyuser_p7_t1 \
memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
@@ -20,6 +10,17 @@ EXTRA_SOURCES := validate.c ../harness.c stubs.S
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+# The loops are all 64-bit code
+CFLAGS += -m64
+CFLAGS += -I$(CURDIR)
+CFLAGS += -D SELFTEST
+CFLAGS += -maltivec
+CFLAGS += -mcpu=power4
+
+# Use our CFLAGS for the implicit .S rule & set the asm machine type
+ASFLAGS = $(CFLAGS) -Wa,-mpower4
$(OUTPUT)/copyuser_64_t%: copyuser_64.S $(EXTRA_SOURCES)
$(CC) $(CPPFLAGS) $(CFLAGS) \
diff --git a/tools/testing/selftests/powerpc/dexcr/.gitignore b/tools/testing/selftests/powerpc/dexcr/.gitignore
index b82f45dd46b9..11eefb4b9fa4 100644
--- a/tools/testing/selftests/powerpc/dexcr/.gitignore
+++ b/tools/testing/selftests/powerpc/dexcr/.gitignore
@@ -1,2 +1,4 @@
+dexcr_test
hashchk_test
+chdexcr
lsdexcr
diff --git a/tools/testing/selftests/powerpc/dexcr/Makefile b/tools/testing/selftests/powerpc/dexcr/Makefile
index 76210f2bcec3..58cf9f722905 100644
--- a/tools/testing/selftests/powerpc/dexcr/Makefile
+++ b/tools/testing/selftests/powerpc/dexcr/Makefile
@@ -1,9 +1,12 @@
-TEST_GEN_PROGS := hashchk_test
-TEST_GEN_FILES := lsdexcr
+TEST_GEN_PROGS := dexcr_test hashchk_test
+TEST_GEN_FILES := lsdexcr chdexcr
include ../../lib.mk
+include ../flags.mk
-$(OUTPUT)/hashchk_test: CFLAGS += -fno-pie $(call cc-option,-mno-rop-protect)
+CFLAGS += $(KHDR_INCLUDES)
+
+$(OUTPUT)/hashchk_test: CFLAGS += -fno-pie -no-pie $(call cc-option,-mno-rop-protect)
$(TEST_GEN_PROGS): ../harness.c ../utils.c ./dexcr.c
$(TEST_GEN_FILES): ../utils.c ./dexcr.c
diff --git a/tools/testing/selftests/powerpc/dexcr/chdexcr.c b/tools/testing/selftests/powerpc/dexcr/chdexcr.c
new file mode 100644
index 000000000000..c548d7a5bb9b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/chdexcr.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+
+#include "dexcr.h"
+#include "utils.h"
+
+static void die(const char *msg)
+{
+ printf("%s\n", msg);
+ exit(1);
+}
+
+static void help(void)
+{
+ printf("Invoke a provided program with a custom DEXCR on-exec reset value\n"
+ "\n"
+ "usage: chdexcr [CHDEXCR OPTIONS] -- PROGRAM [ARGS...]\n"
+ "\n"
+ "Each configurable DEXCR aspect is exposed as an option.\n"
+ "\n"
+ "The normal option sets the aspect in the DEXCR. The --no- variant\n"
+ "clears that aspect. For example, --ibrtpd sets the IBRTPD aspect bit,\n"
+ "so indirect branch prediction will be disabled in the provided program.\n"
+ "Conversely, --no-ibrtpd clears the aspect bit, so indirect branch\n"
+ "prediction may occur.\n"
+ "\n"
+ "CHDEXCR OPTIONS:\n");
+
+ for (int i = 0; i < ARRAY_SIZE(aspects); i++) {
+ const struct dexcr_aspect *aspect = &aspects[i];
+
+ if (aspect->prctl == -1)
+ continue;
+
+ printf(" --%-6s / --no-%-6s : %s\n", aspect->opt, aspect->opt, aspect->desc);
+ }
+}
+
+static const struct dexcr_aspect *opt_to_aspect(const char *opt)
+{
+ for (int i = 0; i < ARRAY_SIZE(aspects); i++)
+ if (aspects[i].prctl != -1 && !strcmp(aspects[i].opt, opt))
+ return &aspects[i];
+
+ return NULL;
+}
+
+static int apply_option(const char *option)
+{
+ const struct dexcr_aspect *aspect;
+ const char *opt = NULL;
+ const char *set_prefix = "--";
+ const char *clear_prefix = "--no-";
+ unsigned long ctrl = 0;
+ int err;
+
+ if (!strcmp(option, "-h") || !strcmp(option, "--help")) {
+ help();
+ exit(0);
+ }
+
+ /* Strip out --(no-) prefix and determine ctrl value */
+ if (!strncmp(option, clear_prefix, strlen(clear_prefix))) {
+ opt = &option[strlen(clear_prefix)];
+ ctrl |= PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC;
+ } else if (!strncmp(option, set_prefix, strlen(set_prefix))) {
+ opt = &option[strlen(set_prefix)];
+ ctrl |= PR_PPC_DEXCR_CTRL_SET_ONEXEC;
+ }
+
+ if (!opt || !*opt)
+ return 1;
+
+ aspect = opt_to_aspect(opt);
+ if (!aspect)
+ die("unknown aspect");
+
+ err = pr_set_dexcr(aspect->prctl, ctrl);
+ if (err)
+ die("failed to apply option");
+
+ return 0;
+}
+
+int main(int argc, char *const argv[])
+{
+ int i;
+
+ if (!dexcr_exists())
+ die("DEXCR not detected on this hardware");
+
+ for (i = 1; i < argc; i++)
+ if (apply_option(argv[i]))
+ break;
+
+ if (i < argc && !strcmp(argv[i], "--"))
+ i++;
+
+ if (i >= argc)
+ die("missing command");
+
+ execvp(argv[i], &argv[i]);
+ perror("execve");
+
+ return errno;
+}
diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.c b/tools/testing/selftests/powerpc/dexcr/dexcr.c
index 65ec5347de98..468fd0dc9912 100644
--- a/tools/testing/selftests/powerpc/dexcr/dexcr.c
+++ b/tools/testing/selftests/powerpc/dexcr/dexcr.c
@@ -3,6 +3,7 @@
#include <errno.h>
#include <setjmp.h>
#include <signal.h>
+#include <sys/prctl.h>
#include <sys/types.h>
#include <sys/wait.h>
@@ -43,6 +44,45 @@ out:
return exists;
}
+unsigned int pr_which_to_aspect(unsigned long which)
+{
+ switch (which) {
+ case PR_PPC_DEXCR_SBHE:
+ return DEXCR_PR_SBHE;
+ case PR_PPC_DEXCR_IBRTPD:
+ return DEXCR_PR_IBRTPD;
+ case PR_PPC_DEXCR_SRAPD:
+ return DEXCR_PR_SRAPD;
+ case PR_PPC_DEXCR_NPHIE:
+ return DEXCR_PR_NPHIE;
+ default:
+ FAIL_IF_EXIT_MSG(true, "unknown PR aspect");
+ }
+}
+
+int pr_get_dexcr(unsigned long which)
+{
+ return prctl(PR_PPC_GET_DEXCR, which, 0UL, 0UL, 0UL);
+}
+
+int pr_set_dexcr(unsigned long which, unsigned long ctrl)
+{
+ return prctl(PR_PPC_SET_DEXCR, which, ctrl, 0UL, 0UL);
+}
+
+bool pr_dexcr_aspect_supported(unsigned long which)
+{
+ if (pr_get_dexcr(which) == -1)
+ return errno == ENODEV;
+
+ return true;
+}
+
+bool pr_dexcr_aspect_editable(unsigned long which)
+{
+ return pr_get_dexcr(which) & PR_PPC_DEXCR_CTRL_EDITABLE;
+}
+
/*
* Just test if a bad hashchk triggers a signal, without checking
* for support or if the NPHIE aspect is enabled.
diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.h b/tools/testing/selftests/powerpc/dexcr/dexcr.h
index f55cbbc8643b..51e9ba3b0997 100644
--- a/tools/testing/selftests/powerpc/dexcr/dexcr.h
+++ b/tools/testing/selftests/powerpc/dexcr/dexcr.h
@@ -9,6 +9,7 @@
#define _SELFTESTS_POWERPC_DEXCR_DEXCR_H
#include <stdbool.h>
+#include <sys/prctl.h>
#include <sys/types.h>
#include "reg.h"
@@ -26,8 +27,64 @@
#define PPC_RAW_HASHCHK(b, i, a) \
str(.long (0x7C0005E4 | PPC_RAW_HASH_ARGS(b, i, a));)
+struct dexcr_aspect {
+ const char *name; /* Short display name */
+ const char *opt; /* Option name for chdexcr */
+ const char *desc; /* Expanded aspect meaning */
+ unsigned int index; /* Aspect bit index in DEXCR */
+ unsigned long prctl; /* 'which' value for get/set prctl */
+};
+
+static const struct dexcr_aspect aspects[] = {
+ {
+ .name = "SBHE",
+ .opt = "sbhe",
+ .desc = "Speculative branch hint enable",
+ .index = 0,
+ .prctl = PR_PPC_DEXCR_SBHE,
+ },
+ {
+ .name = "IBRTPD",
+ .opt = "ibrtpd",
+ .desc = "Indirect branch recurrent target prediction disable",
+ .index = 3,
+ .prctl = PR_PPC_DEXCR_IBRTPD,
+ },
+ {
+ .name = "SRAPD",
+ .opt = "srapd",
+ .desc = "Subroutine return address prediction disable",
+ .index = 4,
+ .prctl = PR_PPC_DEXCR_SRAPD,
+ },
+ {
+ .name = "NPHIE",
+ .opt = "nphie",
+ .desc = "Non-privileged hash instruction enable",
+ .index = 5,
+ .prctl = PR_PPC_DEXCR_NPHIE,
+ },
+ {
+ .name = "PHIE",
+ .opt = "phie",
+ .desc = "Privileged hash instruction enable",
+ .index = 6,
+ .prctl = -1,
+ },
+};
+
bool dexcr_exists(void);
+bool pr_dexcr_aspect_supported(unsigned long which);
+
+bool pr_dexcr_aspect_editable(unsigned long which);
+
+int pr_get_dexcr(unsigned long pr_aspect);
+
+int pr_set_dexcr(unsigned long pr_aspect, unsigned long ctrl);
+
+unsigned int pr_which_to_aspect(unsigned long which);
+
bool hashchk_triggers(void);
enum dexcr_source {
diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr_test.c b/tools/testing/selftests/powerpc/dexcr/dexcr_test.c
new file mode 100644
index 000000000000..7a8657164908
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/dexcr_test.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "dexcr.h"
+#include "utils.h"
+
+/*
+ * Helper function for testing the behaviour of a newly exec-ed process
+ */
+static int dexcr_prctl_onexec_test_child(unsigned long which, const char *status)
+{
+ unsigned long dexcr = mfspr(SPRN_DEXCR_RO);
+ unsigned long aspect = pr_which_to_aspect(which);
+ int ctrl = pr_get_dexcr(which);
+
+ if (!strcmp(status, "set")) {
+ FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET),
+ "setting aspect across exec not applied");
+
+ FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC),
+ "setting aspect across exec not inherited");
+
+ FAIL_IF_EXIT_MSG(!(aspect & dexcr), "setting aspect across exec did not take effect");
+ } else if (!strcmp(status, "clear")) {
+ FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR),
+ "clearing aspect across exec not applied");
+
+ FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC),
+ "clearing aspect across exec not inherited");
+
+ FAIL_IF_EXIT_MSG(aspect & dexcr, "clearing aspect across exec did not take effect");
+ } else {
+ FAIL_IF_EXIT_MSG(true, "unknown expected status");
+ }
+
+ return 0;
+}
+
+/*
+ * Test that the given prctl value can be manipulated freely
+ */
+static int dexcr_prctl_aspect_test(unsigned long which)
+{
+ unsigned long aspect = pr_which_to_aspect(which);
+ pid_t pid;
+ int ctrl;
+ int err;
+ int errno_save;
+
+ SKIP_IF_MSG(!dexcr_exists(), "DEXCR not supported");
+ SKIP_IF_MSG(!pr_dexcr_aspect_supported(which), "DEXCR aspect not supported");
+ SKIP_IF_MSG(!pr_dexcr_aspect_editable(which), "DEXCR aspect not editable with prctl");
+
+ /* We reject invalid combinations of arguments */
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR);
+ errno_save = errno;
+ FAIL_IF_MSG(err != -1, "simultaneous set and clear should be rejected");
+ FAIL_IF_MSG(errno_save != EINVAL, "simultaneous set and clear should be rejected with EINVAL");
+
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET_ONEXEC | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC);
+ errno_save = errno;
+ FAIL_IF_MSG(err != -1, "simultaneous set and clear on exec should be rejected");
+ FAIL_IF_MSG(errno_save != EINVAL, "simultaneous set and clear on exec should be rejected with EINVAL");
+
+ /* We set the aspect */
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET failed");
+
+ ctrl = pr_get_dexcr(which);
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET), "config value not PR_PPC_DEXCR_CTRL_SET");
+ FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_CLEAR, "config value unexpected clear flag");
+ FAIL_IF_MSG(!(aspect & mfspr(SPRN_DEXCR_RO)), "setting aspect did not take effect");
+
+ /* We clear the aspect */
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_CLEAR);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_CLEAR failed");
+
+ ctrl = pr_get_dexcr(which);
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "config value not PR_PPC_DEXCR_CTRL_CLEAR");
+ FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_SET, "config value unexpected set flag");
+ FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "clearing aspect did not take effect");
+
+ /* We make it set on exec (doesn't change our current value) */
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET_ONEXEC);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET_ONEXEC failed");
+
+ ctrl = pr_get_dexcr(which);
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "process aspect should still be cleared");
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_SET_ONEXEC");
+ FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC, "config value unexpected clear on exec flag");
+ FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "scheduling aspect to set on exec should not change it now");
+
+ /* We make it clear on exec (doesn't change our current value) */
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC failed");
+
+ ctrl = pr_get_dexcr(which);
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "process aspect config should still be cleared");
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC");
+ FAIL_IF_MSG(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC, "config value unexpected set on exec flag");
+ FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "process aspect should still be cleared");
+
+ /* We allow setting the current and on-exec value in a single call */
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC failed");
+
+ ctrl = pr_get_dexcr(which);
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET), "config value not PR_PPC_DEXCR_CTRL_SET");
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC");
+ FAIL_IF_MSG(!(aspect & mfspr(SPRN_DEXCR_RO)), "process aspect should be set");
+
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_CLEAR | PR_PPC_DEXCR_CTRL_SET_ONEXEC);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_CLEAR | PR_PPC_DEXCR_CTRL_SET_ONEXEC failed");
+
+ ctrl = pr_get_dexcr(which);
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR), "config value not PR_PPC_DEXCR_CTRL_CLEAR");
+ FAIL_IF_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC), "config value not PR_PPC_DEXCR_CTRL_SET_ONEXEC");
+ FAIL_IF_MSG(aspect & mfspr(SPRN_DEXCR_RO), "process aspect should be clear");
+
+ /* Verify the onexec value is applied across exec */
+ pid = fork();
+ if (!pid) {
+ char which_str[32] = {};
+ char *args[] = { "dexcr_prctl_onexec_test_child", which_str, "set", NULL };
+ unsigned int ctrl = pr_get_dexcr(which);
+
+ sprintf(which_str, "%lu", which);
+
+ FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_SET_ONEXEC),
+ "setting aspect on exec not copied across fork");
+
+ FAIL_IF_EXIT_MSG(mfspr(SPRN_DEXCR_RO) & aspect,
+ "setting aspect on exec wrongly applied to fork");
+
+ execve("/proc/self/exe", args, NULL);
+ _exit(errno);
+ }
+ await_child_success(pid);
+
+ err = pr_set_dexcr(which, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC);
+ FAIL_IF_MSG(err, "PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC failed");
+
+ pid = fork();
+ if (!pid) {
+ char which_str[32] = {};
+ char *args[] = { "dexcr_prctl_onexec_test_child", which_str, "clear", NULL };
+ unsigned int ctrl = pr_get_dexcr(which);
+
+ sprintf(which_str, "%lu", which);
+
+ FAIL_IF_EXIT_MSG(!(ctrl & PR_PPC_DEXCR_CTRL_CLEAR_ONEXEC),
+ "clearing aspect on exec not copied across fork");
+
+ FAIL_IF_EXIT_MSG(!(mfspr(SPRN_DEXCR_RO) & aspect),
+ "clearing aspect on exec wrongly applied to fork");
+
+ execve("/proc/self/exe", args, NULL);
+ _exit(errno);
+ }
+ await_child_success(pid);
+
+ return 0;
+}
+
+static int dexcr_prctl_ibrtpd_test(void)
+{
+ return dexcr_prctl_aspect_test(PR_PPC_DEXCR_IBRTPD);
+}
+
+static int dexcr_prctl_srapd_test(void)
+{
+ return dexcr_prctl_aspect_test(PR_PPC_DEXCR_SRAPD);
+}
+
+static int dexcr_prctl_nphie_test(void)
+{
+ return dexcr_prctl_aspect_test(PR_PPC_DEXCR_NPHIE);
+}
+
+int main(int argc, char *argv[])
+{
+ int err = 0;
+
+ /*
+ * Some tests require checking what happens across exec, so we may be
+ * invoked as the child of a particular test
+ */
+ if (argc > 1) {
+ if (argc == 3 && !strcmp(argv[0], "dexcr_prctl_onexec_test_child")) {
+ unsigned long which;
+
+ err = parse_ulong(argv[1], strlen(argv[1]), &which, 10);
+ FAIL_IF_MSG(err, "failed to parse which value for child");
+
+ return dexcr_prctl_onexec_test_child(which, argv[2]);
+ }
+
+ FAIL_IF_MSG(true, "unknown test case");
+ }
+
+ /*
+ * Otherwise we are the main test invocation and run the full suite
+ */
+ err |= test_harness(dexcr_prctl_ibrtpd_test, "dexcr_prctl_ibrtpd");
+ err |= test_harness(dexcr_prctl_srapd_test, "dexcr_prctl_srapd");
+ err |= test_harness(dexcr_prctl_nphie_test, "dexcr_prctl_nphie");
+
+ return err;
+}
diff --git a/tools/testing/selftests/powerpc/dexcr/hashchk_test.c b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c
index 7d5658c9ebe4..645224bdc142 100644
--- a/tools/testing/selftests/powerpc/dexcr/hashchk_test.c
+++ b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c
@@ -21,8 +21,14 @@
static int require_nphie(void)
{
SKIP_IF_MSG(!dexcr_exists(), "DEXCR not supported");
+
+ pr_set_dexcr(PR_PPC_DEXCR_NPHIE, PR_PPC_DEXCR_CTRL_SET | PR_PPC_DEXCR_CTRL_SET_ONEXEC);
+
+ if (get_dexcr(EFFECTIVE) & DEXCR_PR_NPHIE)
+ return 0;
+
SKIP_IF_MSG(!(get_dexcr(EFFECTIVE) & DEXCR_PR_NPHIE),
- "DEXCR[NPHIE] not enabled");
+ "Failed to enable DEXCR[NPHIE]");
return 0;
}
diff --git a/tools/testing/selftests/powerpc/dexcr/lsdexcr.c b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c
index 94abbfcc389e..7588929180ab 100644
--- a/tools/testing/selftests/powerpc/dexcr/lsdexcr.c
+++ b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c
@@ -1,9 +1,9 @@
// SPDX-License-Identifier: GPL-2.0+
-#include <errno.h>
#include <stddef.h>
#include <stdio.h>
#include <string.h>
+#include <sys/prctl.h>
#include "dexcr.h"
#include "utils.h"
@@ -12,40 +12,6 @@ static unsigned int dexcr;
static unsigned int hdexcr;
static unsigned int effective;
-struct dexcr_aspect {
- const char *name;
- const char *desc;
- unsigned int index;
-};
-
-static const struct dexcr_aspect aspects[] = {
- {
- .name = "SBHE",
- .desc = "Speculative branch hint enable",
- .index = 0,
- },
- {
- .name = "IBRTPD",
- .desc = "Indirect branch recurrent target prediction disable",
- .index = 3,
- },
- {
- .name = "SRAPD",
- .desc = "Subroutine return address prediction disable",
- .index = 4,
- },
- {
- .name = "NPHIE",
- .desc = "Non-privileged hash instruction enable",
- .index = 5,
- },
- {
- .name = "PHIE",
- .desc = "Privileged hash instruction enable",
- .index = 6,
- },
-};
-
static void print_list(const char *list[], size_t len)
{
for (size_t i = 0; i < len; i++) {
@@ -60,7 +26,7 @@ static void print_dexcr(char *name, unsigned int bits)
const char *enabled_aspects[ARRAY_SIZE(aspects) + 1] = {NULL};
size_t j = 0;
- printf("%s: %08x", name, bits);
+ printf("%s: 0x%08x", name, bits);
if (bits == 0) {
printf("\n");
@@ -103,6 +69,63 @@ static void print_aspect(const struct dexcr_aspect *aspect)
printf(" \t(%s)\n", aspect->desc);
}
+static void print_aspect_config(const struct dexcr_aspect *aspect)
+{
+ const char *reason = NULL;
+ const char *reason_hyp = NULL;
+ const char *reason_prctl = "no prctl";
+ bool actual = effective & DEXCR_PR_BIT(aspect->index);
+ bool expected = actual; /* Assume it's fine if we don't expect a specific set/clear value */
+
+ if (actual)
+ reason = "set by unknown";
+ else
+ reason = "cleared by unknown";
+
+ if (aspect->prctl != -1) {
+ int ctrl = pr_get_dexcr(aspect->prctl);
+
+ if (ctrl < 0) {
+ reason_prctl = "failed to read prctl";
+ } else {
+ if (ctrl & PR_PPC_DEXCR_CTRL_SET) {
+ reason_prctl = "set by prctl";
+ expected = true;
+ } else if (ctrl & PR_PPC_DEXCR_CTRL_CLEAR) {
+ reason_prctl = "cleared by prctl";
+ expected = false;
+ } else {
+ reason_prctl = "unknown prctl";
+ }
+
+ reason = reason_prctl;
+ }
+ }
+
+ if (hdexcr & DEXCR_PR_BIT(aspect->index)) {
+ reason_hyp = "set by hypervisor";
+ reason = reason_hyp;
+ expected = true;
+ } else {
+ reason_hyp = "not modified by hypervisor";
+ }
+
+ printf("%12s (%d): %-28s (%s, %s)\n",
+ aspect->name,
+ aspect->index,
+ reason,
+ reason_hyp,
+ reason_prctl);
+
+ /*
+ * The checks are not atomic, so this can technically trigger if the
+ * hypervisor makes a change while we are checking each source. It's
+ * far more likely to be a bug if we see this though.
+ */
+ if (actual != expected)
+ printf(" : ! actual %s does not match config\n", aspect->name);
+}
+
int main(int argc, char *argv[])
{
if (!dexcr_exists()) {
@@ -114,6 +137,8 @@ int main(int argc, char *argv[])
hdexcr = get_dexcr(HDEXCR);
effective = dexcr | hdexcr;
+ printf("current status:\n");
+
print_dexcr(" DEXCR", dexcr);
print_dexcr(" HDEXCR", hdexcr);
print_dexcr("Effective", effective);
@@ -136,6 +161,12 @@ int main(int argc, char *argv[])
else
printf("ignored\n");
}
+ printf("\n");
+
+ printf("configuration:\n");
+ for (size_t i = 0; i < ARRAY_SIZE(aspects); i++)
+ print_aspect_config(&aspects[i]);
+ printf("\n");
return 0;
}
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index 9289d5febe1e..9fa9cb5bd989 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -5,6 +5,7 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
$(OUTPUT)/dscr_explicit_test: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile
index ae963eb2dc5b..70797716f2b5 100644
--- a/tools/testing/selftests/powerpc/eeh/Makefile
+++ b/tools/testing/selftests/powerpc/eeh/Makefile
@@ -7,3 +7,4 @@ TEST_FILES := eeh-functions.sh settings
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
diff --git a/tools/testing/selftests/powerpc/flags.mk b/tools/testing/selftests/powerpc/flags.mk
new file mode 100644
index 000000000000..abb9e58d95c4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/flags.mk
@@ -0,0 +1,9 @@
+#This checks for any ENV variables and add those.
+
+ifeq ($(GIT_VERSION),)
+GIT_VERSION := $(shell git describe --always --long --dirty || echo "unknown")
+export GIT_VERSION
+endif
+
+CFLAGS := -std=gnu99 -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(selfdir)/powerpc/include $(USERCFLAGS)
+export CFLAGS
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index 3948f7c510aa..b14fd2e0c6a8 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -3,6 +3,7 @@ TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vm
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c
$(TEST_GEN_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
diff --git a/tools/testing/selftests/powerpc/mce/Makefile b/tools/testing/selftests/powerpc/mce/Makefile
index 2424513982d9..ce4ed679aaaf 100644
--- a/tools/testing/selftests/powerpc/mce/Makefile
+++ b/tools/testing/selftests/powerpc/mce/Makefile
@@ -3,5 +3,6 @@
TEST_GEN_PROGS := inject-ra-err
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 4a6608beef0e..aab058ecb352 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -13,6 +13,7 @@ TEST_GEN_FILES := tempfile
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
index 0785c2e99d40..480d8ba94cf7 100644
--- a/tools/testing/selftests/powerpc/nx-gzip/Makefile
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -1,8 +1,9 @@
-CFLAGS = -O3 -m64 -I./include -I../include
-
TEST_GEN_FILES := gzfht_test gunz_test
TEST_PROGS := nx-gzip-test.sh
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS = -O3 -m64 -I./include -I../include
$(TEST_GEN_FILES): gzip_vas.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/papr_attributes/Makefile b/tools/testing/selftests/powerpc/papr_attributes/Makefile
index e899712d49db..406429499022 100644
--- a/tools/testing/selftests/powerpc/papr_attributes/Makefile
+++ b/tools/testing/selftests/powerpc/papr_attributes/Makefile
@@ -3,5 +3,6 @@ TEST_GEN_PROGS := attr_test
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
-$(TEST_GEN_PROGS): ../harness.c ../utils.c \ No newline at end of file
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
index 7f79e437634a..fed4f2414dbf 100644
--- a/tools/testing/selftests/powerpc/papr_sysparm/Makefile
+++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
@@ -6,6 +6,7 @@ TEST_GEN_PROGS := papr_sysparm
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile b/tools/testing/selftests/powerpc/papr_vpd/Makefile
index 06b719703bfd..b09852e40882 100644
--- a/tools/testing/selftests/powerpc/papr_vpd/Makefile
+++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile
@@ -6,6 +6,7 @@ TEST_GEN_PROGS := papr_vpd
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index a284fa874a9f..7e9dbf3d0d09 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -7,8 +7,11 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
-all: $(TEST_GEN_PROGS) ebb sampling_tests event_code_tests
+SUB_DIRS := ebb sampling_tests event_code_tests
+
+all: $(TEST_GEN_PROGS) $(SUB_DIRS)
$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
@@ -22,12 +25,16 @@ $(OUTPUT)/count_stcx_fail: loop.S $(EXTRA_SOURCES)
$(OUTPUT)/per_event_excludes: ../utils.c
+$(SUB_DIRS):
+ BUILD_TARGET=$(OUTPUT)/$@; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $@ all
+
DEFAULT_RUN_TESTS := $(RUN_TESTS)
override define RUN_TESTS
$(DEFAULT_RUN_TESTS)
- +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
- +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
- +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
+ +@for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests; \
+ done;
endef
emit_tests:
@@ -35,34 +42,29 @@ emit_tests:
BASENAME_TEST=`basename $$TEST`; \
echo "$(COLLECTION):$$BASENAME_TEST"; \
done
- +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
- +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
- +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
+ +@for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET COLLECTION=$(COLLECTION)/$$TARGET -s -C $$TARGET emit_tests; \
+ done;
DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
override define INSTALL_RULE
$(DEFAULT_INSTALL_RULE)
- +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
- +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
- +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
+ +@for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET INSTALL_PATH=$$INSTALL_PATH/$$TARGET -C $$TARGET install; \
+ done;
endef
DEFAULT_CLEAN := $(CLEAN)
override define CLEAN
$(DEFAULT_CLEAN)
$(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o
- +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
- +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
- +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
+ +@for TARGET in $(SUB_DIRS); do \
+ BUILD_TARGET=$(OUTPUT)/$$TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \
+ done;
endef
-ebb:
- TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
-
-sampling_tests:
- TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
-
-event_code_tests:
- TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
.PHONY: all run_tests ebb sampling_tests event_code_tests emit_tests
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index 010160690227..1b39af7c10db 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -4,16 +4,6 @@ include ../../../../../build/Build.include
noarg:
$(MAKE) -C ../../
-# The EBB handler is 64-bit code and everything links against it
-CFLAGS += -m64
-
-TMPOUT = $(OUTPUT)/TMPDIR/
-# Toolchains may build PIE by default which breaks the assembly
-no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
- $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
-
-LDFLAGS += $(no-pie-option)
-
TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \
cycles_with_freeze_test pmc56_overflow_test \
ebb_vs_cpu_event_test cpu_event_vs_ebb_test \
@@ -28,6 +18,17 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \
top_srcdir = ../../../../../..
include ../../../lib.mk
+include ../../flags.mk
+
+# The EBB handler is 64-bit code and everything links against it
+CFLAGS += -m64
+
+TMPOUT = $(OUTPUT)/TMPDIR/
+# Toolchains may build PIE by default which breaks the assembly
+no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
+ $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+
+LDFLAGS += $(no-pie-option)
$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c \
ebb.c ebb_handler.S trace.c busy_loop.S
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile
index 4e07d7046457..fdb080b3fa65 100644
--- a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile
@@ -1,6 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -m64
-
TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \
group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \
group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \
@@ -11,5 +9,8 @@ TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_te
top_srcdir = ../../../../../..
include ../../../lib.mk
+include ../../flags.mk
+
+CFLAGS += -m64
$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c ../sampling_tests/misc.h ../sampling_tests/misc.c
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
index 9e67351fb252..9f79bec5fce7 100644
--- a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -1,6 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -m64
-
TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \
mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \
mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \
@@ -11,5 +9,8 @@ TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test
top_srcdir = ../../../../../..
include ../../../lib.mk
+include ../../flags.mk
+
+CFLAGS += -m64
$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S ../branch_loops.S
diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile
index 9b9491a63213..23bd9a7590dd 100644
--- a/tools/testing/selftests/powerpc/primitives/Makefile
+++ b/tools/testing/selftests/powerpc/primitives/Makefile
@@ -1,9 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -I$(CURDIR)
-
TEST_GEN_PROGS := load_unaligned_zeropad
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS += -I$(CURDIR)
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 1b39b86849da..59ca01d8567e 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -26,6 +26,7 @@ LOCAL_HDRS += $(patsubst %,$(selfdir)/powerpc/ptrace/%,$(wildcard *.h))
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
TM_TESTS := $(patsubst %,$(OUTPUT)/%,$(TM_TESTS))
TESTS_64 := $(patsubst %,$(OUTPUT)/%,$(TESTS_64))
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index e0d979ab0204..33286039724a 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -5,9 +5,10 @@ TEST_PROGS := mitigation-patching.sh
top_srcdir = ../../../../..
-CFLAGS += $(KHDR_INCLUDES)
-
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS += $(KHDR_INCLUDES)
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index f679d260afc8..ece95bd52be9 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -3,7 +3,6 @@ TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart
TEST_GEN_PROGS += sigreturn_kernel
TEST_GEN_PROGS += sigreturn_unaligned
-CFLAGS += -maltivec
$(OUTPUT)/signal_tm: CFLAGS += -mhtm
$(OUTPUT)/sigfuz: CFLAGS += -pthread -m64
@@ -11,5 +10,8 @@ TEST_FILES := settings
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS += -maltivec
$(TEST_GEN_PROGS): ../harness.c ../utils.c signal.S
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 9c39f55a58ff..4c9d9a58c9d1 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -1,7 +1,4 @@
# SPDX-License-Identifier: GPL-2.0
-# The loops are all 64-bit code
-CFLAGS += -I$(CURDIR)
-
EXTRA_SOURCES := ../harness.c
build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null 2>&1) then echo "1"; fi)
@@ -27,9 +24,13 @@ $(OUTPUT)/strlen_32: CFLAGS += -m32
TEST_GEN_PROGS += strlen_32
endif
-ASFLAGS = $(CFLAGS)
-
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+# The loops are all 64-bit code
+CFLAGS += -I$(CURDIR)
+
+ASFLAGS = $(CFLAGS)
$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile
index bdc081afedb0..0da2e0a74264 100644
--- a/tools/testing/selftests/powerpc/switch_endian/Makefile
+++ b/tools/testing/selftests/powerpc/switch_endian/Makefile
@@ -1,12 +1,13 @@
# SPDX-License-Identifier: GPL-2.0
TEST_GEN_PROGS := switch_endian_test
-ASFLAGS += -O2 -Wall -g -nostdlib -m64
-
EXTRA_CLEAN = $(OUTPUT)/*.o $(OUTPUT)/check-reversed.S
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+ASFLAGS += -O2 -Wall -g -nostdlib -m64
$(OUTPUT)/switch_endian_test: ASFLAGS += -I $(OUTPUT)
$(OUTPUT)/switch_endian_test: $(OUTPUT)/check-reversed.S
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
index ee1740ddfb0c..3bc07af88f0e 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -1,9 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
TEST_GEN_PROGS := ipc_unmuxed rtas_filter
-CFLAGS += $(KHDR_INCLUDES)
-
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS += $(KHDR_INCLUDES)
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 3876805c2f31..f13f0ab36007 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -11,6 +11,7 @@ TEST_FILES := settings
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile
index cf65cbf33085..61d519a076c6 100644
--- a/tools/testing/selftests/powerpc/vphn/Makefile
+++ b/tools/testing/selftests/powerpc/vphn/Makefile
@@ -1,10 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
TEST_GEN_PROGS := test-vphn
-CFLAGS += -m64 -I$(CURDIR)
-
top_srcdir = ../../../../..
include ../../lib.mk
+include ../flags.mk
+
+CFLAGS += -m64 -I$(CURDIR)
$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index a156ac5dd2c6..973968f45bba 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -2,6 +2,7 @@
/fd-001-lookup
/fd-002-posix-eq
/fd-003-kthread
+/proc-2-is-kthread
/proc-fsconfig-hidepid
/proc-loadavg-001
/proc-multiple-procfs
@@ -9,6 +10,7 @@
/proc-pid-vm
/proc-self-map-files-001
/proc-self-map-files-002
+/proc-self-isnt-kthread
/proc-self-syscall
/proc-self-wchan
/proc-subset-pid
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index cd95369254c0..b12921b9794b 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -1,17 +1,19 @@
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS += -Wall -O2 -Wno-unused-function
-CFLAGS += -D_GNU_SOURCE
+CFLAGS += $(TOOLS_INCLUDES)
LDFLAGS += -pthread
TEST_GEN_PROGS :=
TEST_GEN_PROGS += fd-001-lookup
TEST_GEN_PROGS += fd-002-posix-eq
TEST_GEN_PROGS += fd-003-kthread
+TEST_GEN_PROGS += proc-2-is-kthread
TEST_GEN_PROGS += proc-loadavg-001
TEST_GEN_PROGS += proc-empty-vm
TEST_GEN_PROGS += proc-pid-vm
TEST_GEN_PROGS += proc-self-map-files-001
TEST_GEN_PROGS += proc-self-map-files-002
+TEST_GEN_PROGS += proc-self-isnt-kthread
TEST_GEN_PROGS += proc-self-syscall
TEST_GEN_PROGS += proc-self-wchan
TEST_GEN_PROGS += proc-subset-pid
diff --git a/tools/testing/selftests/proc/proc-2-is-kthread.c b/tools/testing/selftests/proc/proc-2-is-kthread.c
new file mode 100644
index 000000000000..f13668fb482e
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-2-is-kthread.c
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2024 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test that kernel thread is reported as such. */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+int main(void)
+{
+ /*
+ * The following solutions don't really work:
+ *
+ * 1) jit kernel module which creates kernel thread:
+ * test becomes arch-specific,
+ * problems with mandatory module signing,
+ * problems with lockdown mode,
+ * doesn't work with CONFIG_MODULES=n at all,
+ * kthread creation API is formally unstable internal kernel API,
+ * need a mechanism to report test kernel thread's PID back,
+ *
+ * 2) ksoftirqd/0 and kswapd0 look like stable enough kernel threads,
+ * but their PIDs are unstable.
+ *
+ * Check against kthreadd which always seem to exist under pid 2.
+ */
+ int fd = open("/proc/2/status", O_RDONLY);
+ assert(fd >= 0);
+
+ char buf[4096];
+ ssize_t rv = read(fd, buf, sizeof(buf));
+ assert(0 <= rv && rv < sizeof(buf));
+ buf[rv] = '\0';
+
+ assert(strstr(buf, "Kthread:\t1\n"));
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c
index 56198d4ca2bf..b3f898aab4ab 100644
--- a/tools/testing/selftests/proc/proc-empty-vm.c
+++ b/tools/testing/selftests/proc/proc-empty-vm.c
@@ -381,9 +381,6 @@ static int test_proc_pid_statm(pid_t pid)
assert(rv >= 0);
assert(rv <= sizeof(buf));
- if (0) {
- write(1, buf, rv);
- }
const char *p = buf;
const char *const end = p + rv;
diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c
index cacbd2a4aec9..d04685771952 100644
--- a/tools/testing/selftests/proc/proc-pid-vm.c
+++ b/tools/testing/selftests/proc/proc-pid-vm.c
@@ -45,6 +45,7 @@
#include <linux/kdev_t.h>
#include <sys/time.h>
#include <sys/resource.h>
+#include <linux/fs.h>
#include "../kselftest.h"
@@ -492,6 +493,91 @@ int main(void)
assert(buf[13] == '\n');
}
+ /* Test PROCMAP_QUERY ioctl() for /proc/$PID/maps */
+ {
+ char path_buf[256], exp_path_buf[256];
+ struct procmap_query q;
+ int fd, err;
+
+ snprintf(path_buf, sizeof(path_buf), "/proc/%u/maps", pid);
+ fd = open(path_buf, O_RDONLY);
+ if (fd == -1)
+ return 1;
+
+ /* CASE 1: exact MATCH at VADDR */
+ memset(&q, 0, sizeof(q));
+ q.size = sizeof(q);
+ q.query_addr = VADDR;
+ q.query_flags = 0;
+ q.vma_name_addr = (__u64)(unsigned long)path_buf;
+ q.vma_name_size = sizeof(path_buf);
+
+ err = ioctl(fd, PROCMAP_QUERY, &q);
+ assert(err == 0);
+
+ assert(q.query_addr == VADDR);
+ assert(q.query_flags == 0);
+
+ assert(q.vma_flags == (PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_EXECUTABLE));
+ assert(q.vma_start == VADDR);
+ assert(q.vma_end == VADDR + PAGE_SIZE);
+ assert(q.vma_page_size == PAGE_SIZE);
+
+ assert(q.vma_offset == 0);
+ assert(q.inode == st.st_ino);
+ assert(q.dev_major == MAJOR(st.st_dev));
+ assert(q.dev_minor == MINOR(st.st_dev));
+
+ snprintf(exp_path_buf, sizeof(exp_path_buf),
+ "/tmp/#%llu (deleted)", (unsigned long long)st.st_ino);
+ assert(q.vma_name_size == strlen(exp_path_buf) + 1);
+ assert(strcmp(path_buf, exp_path_buf) == 0);
+
+ /* CASE 2: NO MATCH at VADDR-1 */
+ memset(&q, 0, sizeof(q));
+ q.size = sizeof(q);
+ q.query_addr = VADDR - 1;
+ q.query_flags = 0; /* exact match */
+
+ err = ioctl(fd, PROCMAP_QUERY, &q);
+ err = err < 0 ? -errno : 0;
+ assert(err == -ENOENT);
+
+ /* CASE 3: MATCH COVERING_OR_NEXT_VMA at VADDR - 1 */
+ memset(&q, 0, sizeof(q));
+ q.size = sizeof(q);
+ q.query_addr = VADDR - 1;
+ q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA;
+
+ err = ioctl(fd, PROCMAP_QUERY, &q);
+ assert(err == 0);
+
+ assert(q.query_addr == VADDR - 1);
+ assert(q.query_flags == PROCMAP_QUERY_COVERING_OR_NEXT_VMA);
+ assert(q.vma_start == VADDR);
+ assert(q.vma_end == VADDR + PAGE_SIZE);
+
+ /* CASE 4: NO MATCH at VADDR + PAGE_SIZE */
+ memset(&q, 0, sizeof(q));
+ q.size = sizeof(q);
+ q.query_addr = VADDR + PAGE_SIZE; /* point right after the VMA */
+ q.query_flags = PROCMAP_QUERY_COVERING_OR_NEXT_VMA;
+
+ err = ioctl(fd, PROCMAP_QUERY, &q);
+ err = err < 0 ? -errno : 0;
+ assert(err == -ENOENT);
+
+ /* CASE 5: NO MATCH WRITABLE at VADDR */
+ memset(&q, 0, sizeof(q));
+ q.size = sizeof(q);
+ q.query_addr = VADDR;
+ q.query_flags = PROCMAP_QUERY_VMA_WRITABLE;
+
+ err = ioctl(fd, PROCMAP_QUERY, &q);
+ err = err < 0 ? -errno : 0;
+ assert(err == -ENOENT);
+ }
+
return 0;
}
#else
diff --git a/tools/testing/selftests/proc/proc-self-isnt-kthread.c b/tools/testing/selftests/proc/proc-self-isnt-kthread.c
new file mode 100644
index 000000000000..e01f4e0a91b4
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-self-isnt-kthread.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2024 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/* Test that userspace program is not kernel thread. */
+#undef NDEBUG
+#include <assert.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+
+int main(void)
+{
+ int fd = open("/proc/self/status", O_RDONLY);
+ assert(fd >= 0);
+
+ char buf[4096];
+ ssize_t rv = read(fd, buf, sizeof(buf));
+ assert(0 <= rv && rv < sizeof(buf));
+ buf[rv] = '\0';
+
+ /* This test is very much not kernel thread. */
+ assert(strstr(buf, "Kthread:\t0\n"));
+
+ return 0;
+}
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index bbac5f4b03d0..990d24696fd3 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -391,7 +391,7 @@ __EOF__
forceflavor="`echo $flavor | sed -e 's/^CONFIG/CONFIG_FORCE/'`"
deselectedflavors="`grep -v $flavor $T/rcutasksflavors | tr '\012' ' ' | tr -s ' ' | sed -e 's/ *$//'`"
echo " --- Running RCU Tasks Trace flavor $flavor `date`" >> $rtfdir/log
- tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1
+ tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y CONFIG_KPROBES=n CONFIG_RCU_TRACE=n CONFIG_TRACING=n CONFIG_BLK_DEV_IO_TRACE=n CONFIG_UPROBE_EVENTS=n $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1
retcode=$?
if test "$retcode" -ne 0
then
@@ -425,7 +425,7 @@ fi
if test "$do_scftorture" = "yes"
then
# Scale memory based on the number of CPUs.
- scfmem=$((2+HALF_ALLOTED_CPUS/16))
+ scfmem=$((3+HALF_ALLOTED_CPUS/16))
torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory ${scfmem}G --trust-make
fi
@@ -559,7 +559,7 @@ do_kcsan="$do_kcsan_save"
if test "$do_kvfree" = "yes"
then
torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot"
- torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
+ torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration $duration_rcutorture --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
fi
if test "$do_clocksourcewd" = "yes"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
index fc45645bb5f4..9ecd1b4e653d 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -10,8 +10,9 @@ CONFIG_NO_HZ_FULL=n
CONFIG_RCU_TRACE=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_RCU_BOOST=n
+CONFIG_RCU_BOOST=y
+CONFIG_RCU_BOOST_DELAY=100
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
-#CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_RCU_EXPERT=y
CONFIG_KPROBES=n
CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
index 2deac2031de9..f408bd6bfc3d 100644
--- a/tools/testing/selftests/resctrl/Makefile
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -1,10 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE
+CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2
CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := resctrl_tests
+LOCAL_HDRS += $(wildcard *.h)
+
include ../lib.mk
-$(OUTPUT)/resctrl_tests: $(wildcard *.[ch])
+$(OUTPUT)/resctrl_tests: $(wildcard *.c)
diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 1b339d6bbff1..1ff1104e6575 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -101,12 +101,12 @@ static int get_llc_occu_resctrl(unsigned long *llc_occupancy)
*
* Return: 0 on success, < 0 on error.
*/
-static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value)
+static int print_results_cache(const char *filename, pid_t bm_pid, __u64 llc_value)
{
FILE *fp;
if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
- printf("Pid: %d \t LLC_value: %llu\n", bm_pid, llc_value);
+ printf("Pid: %d \t LLC_value: %llu\n", (int)bm_pid, llc_value);
} else {
fp = fopen(filename, "a");
if (!fp) {
@@ -114,7 +114,7 @@ static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value
return -1;
}
- fprintf(fp, "Pid: %d \t llc_value: %llu\n", bm_pid, llc_value);
+ fprintf(fp, "Pid: %d \t llc_value: %llu\n", (int)bm_pid, llc_value);
fclose(fp);
}
@@ -133,7 +133,7 @@ static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value
* Return: =0 on success. <0 on failure.
*/
int perf_event_measure(int pe_fd, struct perf_event_read *pe_read,
- const char *filename, int bm_pid)
+ const char *filename, pid_t bm_pid)
{
int ret;
@@ -161,7 +161,7 @@ int perf_event_measure(int pe_fd, struct perf_event_read *pe_read,
*
* Return: =0 on success. <0 on failure.
*/
-int measure_llc_resctrl(const char *filename, int bm_pid)
+int measure_llc_resctrl(const char *filename, pid_t bm_pid)
{
unsigned long llc_occu_resc = 0;
int ret;
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index 4cb991be8e31..742782438ca3 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -128,7 +128,7 @@ static int check_results(struct resctrl_val_param *param, const char *cache_type
return fail;
}
-void cat_test_cleanup(void)
+static void cat_test_cleanup(void)
{
remove(RESULT_FILE_NAME);
}
@@ -158,7 +158,6 @@ static int cat_test(const struct resctrl_test *test,
struct resctrl_val_param *param,
size_t span, unsigned long current_mask)
{
- char *resctrl_val = param->resctrl_val;
struct perf_event_read pe_read;
struct perf_event_attr pea;
cpu_set_t old_affinity;
@@ -178,8 +177,7 @@ static int cat_test(const struct resctrl_test *test,
return ret;
/* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/
- ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
- resctrl_val);
+ ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp);
if (ret)
goto reset_affinity;
@@ -272,7 +270,6 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param
start_mask = create_bit_mask(start, n);
struct resctrl_val_param param = {
- .resctrl_val = CAT_STR,
.ctrlgrp = "c1",
.filename = RESULT_FILE_NAME,
.num_of_runs = 0,
@@ -284,21 +281,37 @@ static int cat_run_test(const struct resctrl_test *test, const struct user_param
ret = cat_test(test, uparams, &param, span, start_mask);
if (ret)
- goto out;
+ return ret;
ret = check_results(&param, test->resource,
cache_total_size, full_cache_mask, start_mask);
-out:
- cat_test_cleanup();
-
return ret;
}
+static bool arch_supports_noncont_cat(const struct resctrl_test *test)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ /* AMD always supports non-contiguous CBM. */
+ if (get_vendor() == ARCH_AMD)
+ return true;
+
+ /* Intel support for non-contiguous CBM needs to be discovered. */
+ if (!strcmp(test->resource, "L3"))
+ __cpuid_count(0x10, 1, eax, ebx, ecx, edx);
+ else if (!strcmp(test->resource, "L2"))
+ __cpuid_count(0x10, 2, eax, ebx, ecx, edx);
+ else
+ return false;
+
+ return ((ecx >> 3) & 1);
+}
+
static int noncont_cat_run_test(const struct resctrl_test *test,
const struct user_params *uparams)
{
unsigned long full_cache_mask, cont_mask, noncont_mask;
- unsigned int eax, ebx, ecx, edx, sparse_masks;
+ unsigned int sparse_masks;
int bit_center, ret;
char schemata[64];
@@ -307,15 +320,8 @@ static int noncont_cat_run_test(const struct resctrl_test *test,
if (ret)
return ret;
- if (!strcmp(test->resource, "L3"))
- __cpuid_count(0x10, 1, eax, ebx, ecx, edx);
- else if (!strcmp(test->resource, "L2"))
- __cpuid_count(0x10, 2, eax, ebx, ecx, edx);
- else
- return -EINVAL;
-
- if (sparse_masks != ((ecx >> 3) & 1)) {
- ksft_print_msg("CPUID output doesn't match 'sparse_masks' file content!\n");
+ if (arch_supports_noncont_cat(test) != sparse_masks) {
+ ksft_print_msg("Hardware and kernel differ on non-contiguous CBM support!\n");
return 1;
}
@@ -373,6 +379,7 @@ struct resctrl_test l3_cat_test = {
.resource = "L3",
.feature_check = test_resource_feature_check,
.run_test = cat_run_test,
+ .cleanup = cat_test_cleanup,
};
struct resctrl_test l3_noncont_cat_test = {
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index a81f91222a89..0c045080d808 100644
--- a/tools/testing/selftests/resctrl/cmt_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -16,6 +16,17 @@
#define MAX_DIFF 2000000
#define MAX_DIFF_PERCENT 15
+#define CON_MON_LCC_OCCUP_PATH \
+ "%s/%s/mon_data/mon_L3_%02d/llc_occupancy"
+
+static int cmt_init(const struct resctrl_val_param *param, int domain_id)
+{
+ sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH,
+ param->ctrlgrp, domain_id);
+
+ return 0;
+}
+
static int cmt_setup(const struct resctrl_test *test,
const struct user_params *uparams,
struct resctrl_val_param *p)
@@ -29,6 +40,13 @@ static int cmt_setup(const struct resctrl_test *test,
return 0;
}
+static int cmt_measure(const struct user_params *uparams,
+ struct resctrl_val_param *param, pid_t bm_pid)
+{
+ sleep(1);
+ return measure_llc_resctrl(param->filename, bm_pid);
+}
+
static int show_results_info(unsigned long sum_llc_val, int no_of_bits,
unsigned long cache_span, unsigned long max_diff,
unsigned long max_diff_percent, unsigned long num_of_runs,
@@ -40,11 +58,11 @@ static int show_results_info(unsigned long sum_llc_val, int no_of_bits,
int ret;
avg_llc_val = sum_llc_val / num_of_runs;
- avg_diff = (long)abs(cache_span - avg_llc_val);
+ avg_diff = (long)(cache_span - avg_llc_val);
diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100;
ret = platform && abs((int)diff_percent) > max_diff_percent &&
- abs(avg_diff) > max_diff;
+ labs(avg_diff) > max_diff;
ksft_print_msg("%s Check cache miss rate within %lu%%\n",
ret ? "Fail:" : "Pass:", max_diff_percent);
@@ -91,7 +109,7 @@ static int check_results(struct resctrl_val_param *param, size_t span, int no_of
MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, true);
}
-void cmt_test_cleanup(void)
+static void cmt_test_cleanup(void)
{
remove(RESULT_FILE_NAME);
}
@@ -126,13 +144,13 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param
}
struct resctrl_val_param param = {
- .resctrl_val = CMT_STR,
.ctrlgrp = "c1",
- .mongrp = "m1",
.filename = RESULT_FILE_NAME,
.mask = ~(long_mask << n) & long_mask,
.num_of_runs = 0,
+ .init = cmt_init,
.setup = cmt_setup,
+ .measure = cmt_measure,
};
span = cache_portion_size(cache_total_size, param.mask, long_mask);
@@ -161,7 +179,6 @@ static int cmt_run_test(const struct resctrl_test *test, const struct user_param
ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
out:
- cmt_test_cleanup();
free(span_str);
return ret;
@@ -178,4 +195,5 @@ struct resctrl_test cmt_test = {
.resource = "L3",
.feature_check = cmt_feature_check,
.run_test = cmt_run_test,
+ .cleanup = cmt_test_cleanup,
};
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
index 7946e32e85c8..ab8496a4925b 100644
--- a/tools/testing/selftests/resctrl/mba_test.c
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -17,6 +17,19 @@
#define ALLOCATION_MIN 10
#define ALLOCATION_STEP 10
+static int mba_init(const struct resctrl_val_param *param, int domain_id)
+{
+ int ret;
+
+ ret = initialize_mem_bw_imc();
+ if (ret)
+ return ret;
+
+ initialize_mem_bw_resctrl(param, domain_id);
+
+ return 0;
+}
+
/*
* Change schemata percentage from 100 to 10%. Write schemata to specified
* con_mon grp, mon_grp in resctrl FS.
@@ -51,6 +64,12 @@ static int mba_setup(const struct resctrl_test *test,
return 0;
}
+static int mba_measure(const struct user_params *uparams,
+ struct resctrl_val_param *param, pid_t bm_pid)
+{
+ return measure_mem_bw(uparams, param, bm_pid, "reads");
+}
+
static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc)
{
int allocation, runs;
@@ -60,8 +79,8 @@ static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc)
/* Memory bandwidth from 100% down to 10% */
for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP;
allocation++) {
- unsigned long avg_bw_imc, avg_bw_resc;
unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
+ long avg_bw_imc, avg_bw_resc;
int avg_diff_per;
float avg_diff;
@@ -137,7 +156,7 @@ static int check_results(void)
return show_mba_info(bw_imc, bw_resc);
}
-void mba_test_cleanup(void)
+static void mba_test_cleanup(void)
{
remove(RESULT_FILE_NAME);
}
@@ -145,12 +164,11 @@ void mba_test_cleanup(void)
static int mba_run_test(const struct resctrl_test *test, const struct user_params *uparams)
{
struct resctrl_val_param param = {
- .resctrl_val = MBA_STR,
.ctrlgrp = "c1",
- .mongrp = "m1",
.filename = RESULT_FILE_NAME,
- .bw_report = "reads",
- .setup = mba_setup
+ .init = mba_init,
+ .setup = mba_setup,
+ .measure = mba_measure,
};
int ret;
@@ -158,13 +176,10 @@ static int mba_run_test(const struct resctrl_test *test, const struct user_param
ret = resctrl_val(test, uparams, uparams->benchmark_cmd, &param);
if (ret)
- goto out;
+ return ret;
ret = check_results();
-out:
- mba_test_cleanup();
-
return ret;
}
@@ -180,4 +195,5 @@ struct resctrl_test mba_test = {
.vendor_specific = ARCH_INTEL,
.feature_check = mba_feature_check,
.run_test = mba_run_test,
+ .cleanup = mba_test_cleanup,
};
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index d67ffa3ec63a..6b5a3b52d861 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -17,8 +17,8 @@
static int
show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span)
{
- unsigned long avg_bw_imc = 0, avg_bw_resc = 0;
unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
+ long avg_bw_imc = 0, avg_bw_resc = 0;
int runs, ret, avg_diff_per;
float avg_diff = 0;
@@ -86,6 +86,19 @@ static int check_results(size_t span)
return ret;
}
+static int mbm_init(const struct resctrl_val_param *param, int domain_id)
+{
+ int ret;
+
+ ret = initialize_mem_bw_imc();
+ if (ret)
+ return ret;
+
+ initialize_mem_bw_resctrl(param, domain_id);
+
+ return 0;
+}
+
static int mbm_setup(const struct resctrl_test *test,
const struct user_params *uparams,
struct resctrl_val_param *p)
@@ -105,7 +118,13 @@ static int mbm_setup(const struct resctrl_test *test,
return ret;
}
-void mbm_test_cleanup(void)
+static int mbm_measure(const struct user_params *uparams,
+ struct resctrl_val_param *param, pid_t bm_pid)
+{
+ return measure_mem_bw(uparams, param, bm_pid, "reads");
+}
+
+static void mbm_test_cleanup(void)
{
remove(RESULT_FILE_NAME);
}
@@ -113,12 +132,11 @@ void mbm_test_cleanup(void)
static int mbm_run_test(const struct resctrl_test *test, const struct user_params *uparams)
{
struct resctrl_val_param param = {
- .resctrl_val = MBM_STR,
.ctrlgrp = "c1",
- .mongrp = "m1",
.filename = RESULT_FILE_NAME,
- .bw_report = "reads",
- .setup = mbm_setup
+ .init = mbm_init,
+ .setup = mbm_setup,
+ .measure = mbm_measure,
};
int ret;
@@ -126,15 +144,12 @@ static int mbm_run_test(const struct resctrl_test *test, const struct user_param
ret = resctrl_val(test, uparams, uparams->benchmark_cmd, &param);
if (ret)
- goto out;
+ return ret;
ret = check_results(DEFAULT_SPAN);
if (ret && (get_vendor() == ARCH_INTEL))
ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
-out:
- mbm_test_cleanup();
-
return ret;
}
@@ -150,4 +165,5 @@ struct resctrl_test mbm_test = {
.vendor_specific = ARCH_INTEL,
.feature_check = mbm_feature_check,
.run_test = mbm_run_test,
+ .cleanup = mbm_test_cleanup,
};
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 2051bd135e0d..2dda56084588 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -43,13 +43,6 @@
#define DEFAULT_SPAN (250 * MB)
-#define PARENT_EXIT() \
- do { \
- kill(ppid, SIGKILL); \
- umount_resctrlfs(); \
- exit(EXIT_FAILURE); \
- } while (0)
-
/*
* user_params: User supplied parameters
* @cpu: CPU number to which the benchmark will be bound to
@@ -72,6 +65,7 @@ struct user_params {
* @disabled: Test is disabled
* @feature_check: Callback to check required resctrl features
* @run_test: Callback to run the test
+ * @cleanup: Callback to cleanup after the test
*/
struct resctrl_test {
const char *name;
@@ -82,28 +76,32 @@ struct resctrl_test {
bool (*feature_check)(const struct resctrl_test *test);
int (*run_test)(const struct resctrl_test *test,
const struct user_params *uparams);
+ void (*cleanup)(void);
};
/*
* resctrl_val_param: resctrl test parameters
- * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc)
* @ctrlgrp: Name of the control monitor group (con_mon grp)
* @mongrp: Name of the monitor group (mon grp)
* @filename: Name of file to which the o/p should be written
- * @bw_report: Bandwidth report type (reads vs writes)
- * @setup: Call back function to setup test environment
+ * @init: Callback function to initialize test environment
+ * @setup: Callback function to setup per test run environment
+ * @measure: Callback that performs the measurement (a single test)
*/
struct resctrl_val_param {
- char *resctrl_val;
- char ctrlgrp[64];
- char mongrp[64];
+ const char *ctrlgrp;
+ const char *mongrp;
char filename[64];
- char *bw_report;
unsigned long mask;
int num_of_runs;
+ int (*init)(const struct resctrl_val_param *param,
+ int domain_id);
int (*setup)(const struct resctrl_test *test,
const struct user_params *uparams,
struct resctrl_val_param *param);
+ int (*measure)(const struct user_params *uparams,
+ struct resctrl_val_param *param,
+ pid_t bm_pid);
};
struct perf_event_read {
@@ -113,11 +111,6 @@ struct perf_event_read {
} values[2];
};
-#define MBM_STR "mbm"
-#define MBA_STR "mba"
-#define CMT_STR "cmt"
-#define CAT_STR "cat"
-
/*
* Memory location that consumes values compiler must not optimize away.
* Volatile ensures writes to this location cannot be optimized away by
@@ -125,8 +118,6 @@ struct perf_event_read {
*/
extern volatile int *value_sink;
-extern pid_t bm_pid, ppid;
-
extern char llc_occup_path[1024];
int get_vendor(void);
@@ -135,7 +126,7 @@ int filter_dmesg(void);
int get_domain_id(const char *resource, int cpu_no, int *domain_id);
int mount_resctrlfs(void);
int umount_resctrlfs(void);
-int validate_bw_report_request(char *bw_report);
+const char *get_bw_report_type(const char *bw_report);
bool resctrl_resource_exists(const char *resource);
bool resctrl_mon_feature_exists(const char *resource, const char *feature);
bool resource_info_file_exists(const char *resource, const char *file);
@@ -143,22 +134,25 @@ bool test_resource_feature_check(const struct resctrl_test *test);
char *fgrep(FILE *inf, const char *str);
int taskset_benchmark(pid_t bm_pid, int cpu_no, cpu_set_t *old_affinity);
int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity);
-int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resource);
-int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
- char *resctrl_val);
+int write_schemata(const char *ctrlgrp, char *schemata, int cpu_no,
+ const char *resource);
+int write_bm_pid_to_resctrl(pid_t bm_pid, const char *ctrlgrp, const char *mongrp);
int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
int group_fd, unsigned long flags);
unsigned char *alloc_buffer(size_t buf_size, int memflush);
void mem_flush(unsigned char *buf, size_t buf_size);
void fill_cache_read(unsigned char *buf, size_t buf_size, bool once);
int run_fill_buf(size_t buf_size, int memflush, int op, bool once);
+int initialize_mem_bw_imc(void);
+int measure_mem_bw(const struct user_params *uparams,
+ struct resctrl_val_param *param, pid_t bm_pid,
+ const char *bw_report);
+void initialize_mem_bw_resctrl(const struct resctrl_val_param *param,
+ int domain_id);
int resctrl_val(const struct resctrl_test *test,
const struct user_params *uparams,
const char * const *benchmark_cmd,
struct resctrl_val_param *param);
-void tests_cleanup(void);
-void mbm_test_cleanup(void);
-void mba_test_cleanup(void);
unsigned long create_bit_mask(unsigned int start, unsigned int len);
unsigned int count_contiguous_bits(unsigned long val, unsigned int *start);
int get_full_cbm(const char *cache_type, unsigned long *mask);
@@ -166,19 +160,17 @@ int get_mask_no_shareable(const char *cache_type, unsigned long *mask);
int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size);
int resource_info_unsigned_get(const char *resource, const char *filename, unsigned int *val);
void ctrlc_handler(int signum, siginfo_t *info, void *ptr);
-int signal_handler_register(void);
+int signal_handler_register(const struct resctrl_test *test);
void signal_handler_unregister(void);
-void cat_test_cleanup(void);
unsigned int count_bits(unsigned long n);
-void cmt_test_cleanup(void);
void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config);
void perf_event_initialize_read_format(struct perf_event_read *pe_read);
int perf_open(struct perf_event_attr *pea, pid_t pid, int cpu_no);
int perf_event_reset_enable(int pe_fd);
int perf_event_measure(int pe_fd, struct perf_event_read *pe_read,
- const char *filename, int bm_pid);
-int measure_llc_resctrl(const char *filename, int bm_pid);
+ const char *filename, pid_t bm_pid);
+int measure_llc_resctrl(const char *filename, pid_t bm_pid);
void show_cache_info(int no_of_bits, __u64 avg_llc_val, size_t cache_span, bool lines);
/*
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index f3dc1b9696e7..ecbb7605a981 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -81,19 +81,11 @@ static void cmd_help(void)
printf("\t-h: help\n");
}
-void tests_cleanup(void)
-{
- mbm_test_cleanup();
- mba_test_cleanup();
- cmt_test_cleanup();
- cat_test_cleanup();
-}
-
-static int test_prepare(void)
+static int test_prepare(const struct resctrl_test *test)
{
int res;
- res = signal_handler_register();
+ res = signal_handler_register(test);
if (res) {
ksft_print_msg("Failed to register signal handler\n");
return res;
@@ -108,8 +100,10 @@ static int test_prepare(void)
return 0;
}
-static void test_cleanup(void)
+static void test_cleanup(const struct resctrl_test *test)
{
+ if (test->cleanup)
+ test->cleanup();
umount_resctrlfs();
signal_handler_unregister();
}
@@ -136,7 +130,7 @@ static void run_single_test(const struct resctrl_test *test, const struct user_p
ksft_print_msg("Starting %s test ...\n", test->name);
- if (test_prepare()) {
+ if (test_prepare(test)) {
ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
return;
}
@@ -151,7 +145,7 @@ static void run_single_test(const struct resctrl_test *test, const struct user_p
ksft_test_result(!ret, "%s: test\n", test->name);
cleanup:
- test_cleanup();
+ test_cleanup(test);
}
static void init_user_params(struct user_params *uparams)
@@ -253,13 +247,13 @@ last_arg:
* 2. We execute perf commands
*/
if (geteuid() != 0)
- return ksft_exit_skip("Not running as root. Skipping...\n");
+ ksft_exit_skip("Not running as root. Skipping...\n");
if (!check_resctrlfs_support())
- return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n");
+ ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n");
if (umount_resctrlfs())
- return ksft_exit_skip("resctrl FS unmount failed.\n");
+ ksft_exit_skip("resctrl FS unmount failed.\n");
filter_dmesg();
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index 5a49f07a6c85..8c275f6b4dd7 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -19,30 +19,10 @@
#define MAX_TOKENS 5
#define READ 0
#define WRITE 1
-#define CON_MON_MBM_LOCAL_BYTES_PATH \
- "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
#define CON_MBM_LOCAL_BYTES_PATH \
"%s/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
-#define MON_MBM_LOCAL_BYTES_PATH \
- "%s/mon_groups/%s/mon_data/mon_L3_%02d/mbm_local_bytes"
-
-#define MBM_LOCAL_BYTES_PATH \
- "%s/mon_data/mon_L3_%02d/mbm_local_bytes"
-
-#define CON_MON_LCC_OCCUP_PATH \
- "%s/%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
-
-#define CON_LCC_OCCUP_PATH \
- "%s/%s/mon_data/mon_L3_%02d/llc_occupancy"
-
-#define MON_LCC_OCCUP_PATH \
- "%s/mon_groups/%s/mon_data/mon_L3_%02d/llc_occupancy"
-
-#define LCC_OCCUP_PATH \
- "%s/mon_data/mon_L3_%02d/llc_occupancy"
-
struct membw_read_format {
__u64 value; /* The value of the event */
__u64 time_enabled; /* if PERF_FORMAT_TOTAL_TIME_ENABLED */
@@ -62,6 +42,7 @@ struct imc_counter_config {
static char mbm_total_path[1024];
static int imcs;
static struct imc_counter_config imc_counters_config[MAX_IMCS][2];
+static const struct resctrl_test *current_test;
void membw_initialize_perf_event_attr(int i, int j)
{
@@ -275,7 +256,7 @@ static int num_of_imcs(void)
return count;
}
-static int initialize_mem_bw_imc(void)
+int initialize_mem_bw_imc(void)
{
int imc, j;
@@ -292,44 +273,93 @@ static int initialize_mem_bw_imc(void)
return 0;
}
+static void perf_close_imc_mem_bw(void)
+{
+ int mc;
+
+ for (mc = 0; mc < imcs; mc++) {
+ if (imc_counters_config[mc][READ].fd != -1)
+ close(imc_counters_config[mc][READ].fd);
+ if (imc_counters_config[mc][WRITE].fd != -1)
+ close(imc_counters_config[mc][WRITE].fd);
+ }
+}
+
/*
- * get_mem_bw_imc: Memory band width as reported by iMC counters
- * @cpu_no: CPU number that the benchmark PID is binded to
- * @bw_report: Bandwidth report type (reads, writes)
- *
- * Memory B/W utilized by a process on a socket can be calculated using
- * iMC counters. Perf events are used to read these counters.
+ * perf_open_imc_mem_bw - Open perf fds for IMCs
+ * @cpu_no: CPU number that the benchmark PID is bound to
*
* Return: = 0 on success. < 0 on failure.
*/
-static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
+static int perf_open_imc_mem_bw(int cpu_no)
{
- float reads, writes, of_mul_read, of_mul_write;
- int imc, j, ret;
+ int imc, ret;
- /* Start all iMC counters to log values (both read and write) */
- reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
for (imc = 0; imc < imcs; imc++) {
- for (j = 0; j < 2; j++) {
- ret = open_perf_event(imc, cpu_no, j);
- if (ret)
- return -1;
- }
- for (j = 0; j < 2; j++)
- membw_ioctl_perf_event_ioc_reset_enable(imc, j);
+ imc_counters_config[imc][READ].fd = -1;
+ imc_counters_config[imc][WRITE].fd = -1;
+ }
+
+ for (imc = 0; imc < imcs; imc++) {
+ ret = open_perf_event(imc, cpu_no, READ);
+ if (ret)
+ goto close_fds;
+ ret = open_perf_event(imc, cpu_no, WRITE);
+ if (ret)
+ goto close_fds;
+ }
+
+ return 0;
+
+close_fds:
+ perf_close_imc_mem_bw();
+ return -1;
+}
+
+/*
+ * do_mem_bw_test - Perform memory bandwidth test
+ *
+ * Runs memory bandwidth test over one second period. Also, handles starting
+ * and stopping of the IMC perf counters around the test.
+ */
+static void do_imc_mem_bw_test(void)
+{
+ int imc;
+
+ for (imc = 0; imc < imcs; imc++) {
+ membw_ioctl_perf_event_ioc_reset_enable(imc, READ);
+ membw_ioctl_perf_event_ioc_reset_enable(imc, WRITE);
}
sleep(1);
/* Stop counters after a second to get results (both read and write) */
for (imc = 0; imc < imcs; imc++) {
- for (j = 0; j < 2; j++)
- membw_ioctl_perf_event_ioc_disable(imc, j);
+ membw_ioctl_perf_event_ioc_disable(imc, READ);
+ membw_ioctl_perf_event_ioc_disable(imc, WRITE);
}
+}
+
+/*
+ * get_mem_bw_imc - Memory bandwidth as reported by iMC counters
+ * @bw_report: Bandwidth report type (reads, writes)
+ *
+ * Memory bandwidth utilized by a process on a socket can be calculated
+ * using iMC counters. Perf events are used to read these counters.
+ *
+ * Return: = 0 on success. < 0 on failure.
+ */
+static int get_mem_bw_imc(const char *bw_report, float *bw_imc)
+{
+ float reads, writes, of_mul_read, of_mul_write;
+ int imc;
+
+ /* Start all iMC counters to log values (both read and write) */
+ reads = 0, writes = 0, of_mul_read = 1, of_mul_write = 1;
/*
* Get results which are stored in struct type imc_counter_config
- * Take over flow into consideration before calculating total b/w
+ * Take overflow into consideration before calculating total bandwidth.
*/
for (imc = 0; imc < imcs; imc++) {
struct imc_counter_config *r =
@@ -339,15 +369,13 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
if (read(r->fd, &r->return_value,
sizeof(struct membw_read_format)) == -1) {
- ksft_perror("Couldn't get read b/w through iMC");
-
+ ksft_perror("Couldn't get read bandwidth through iMC");
return -1;
}
if (read(w->fd, &w->return_value,
sizeof(struct membw_read_format)) == -1) {
- ksft_perror("Couldn't get write bw through iMC");
-
+ ksft_perror("Couldn't get write bandwidth through iMC");
return -1;
}
@@ -368,11 +396,6 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
writes += w->return_value.value * of_mul_write * SCALE;
}
- for (imc = 0; imc < imcs; imc++) {
- close(imc_counters_config[imc][READ].fd);
- close(imc_counters_config[imc][WRITE].fd);
- }
-
if (strcmp(bw_report, "reads") == 0) {
*bw_imc = reads;
return 0;
@@ -387,84 +410,45 @@ static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
return 0;
}
-void set_mbm_path(const char *ctrlgrp, const char *mongrp, int domain_id)
+/*
+ * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path"
+ * @param: Parameters passed to resctrl_val()
+ * @domain_id: Domain ID (cache ID; for MB, L3 cache ID)
+ */
+void initialize_mem_bw_resctrl(const struct resctrl_val_param *param,
+ int domain_id)
{
- if (ctrlgrp && mongrp)
- sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH,
- RESCTRL_PATH, ctrlgrp, mongrp, domain_id);
- else if (!ctrlgrp && mongrp)
- sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
- mongrp, domain_id);
- else if (ctrlgrp && !mongrp)
- sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
- ctrlgrp, domain_id);
- else if (!ctrlgrp && !mongrp)
- sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
- domain_id);
+ sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
+ param->ctrlgrp, domain_id);
}
/*
- * initialize_mem_bw_resctrl: Appropriately populate "mbm_total_path"
- * @ctrlgrp: Name of the control monitor group (con_mon grp)
- * @mongrp: Name of the monitor group (mon grp)
- * @cpu_no: CPU number that the benchmark PID is binded to
- * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc)
+ * Open file to read MBM local bytes from resctrl FS
*/
-static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
- int cpu_no, char *resctrl_val)
+static FILE *open_mem_bw_resctrl(const char *mbm_bw_file)
{
- int domain_id;
-
- if (get_domain_id("MB", cpu_no, &domain_id) < 0) {
- ksft_print_msg("Could not get domain ID\n");
- return;
- }
+ FILE *fp;
- if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
- set_mbm_path(ctrlgrp, mongrp, domain_id);
+ fp = fopen(mbm_bw_file, "r");
+ if (!fp)
+ ksft_perror("Failed to open total memory bandwidth file");
- if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
- if (ctrlgrp)
- sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH,
- RESCTRL_PATH, ctrlgrp, domain_id);
- else
- sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH,
- RESCTRL_PATH, domain_id);
- }
+ return fp;
}
/*
* Get MBM Local bytes as reported by resctrl FS
- * For MBM,
- * 1. If con_mon grp and mon grp are given, then read from con_mon grp's mon grp
- * 2. If only con_mon grp is given, then read from con_mon grp
- * 3. If both are not given, then read from root con_mon grp
- * For MBA,
- * 1. If con_mon grp is given, then read from it
- * 2. If con_mon grp is not given, then read from root con_mon grp
*/
-static int get_mem_bw_resctrl(unsigned long *mbm_total)
+static int get_mem_bw_resctrl(FILE *fp, unsigned long *mbm_total)
{
- FILE *fp;
-
- fp = fopen(mbm_total_path, "r");
- if (!fp) {
- ksft_perror("Failed to open total bw file");
-
+ if (fscanf(fp, "%lu\n", mbm_total) <= 0) {
+ ksft_perror("Could not get MBM local bytes");
return -1;
}
- if (fscanf(fp, "%lu", mbm_total) <= 0) {
- ksft_perror("Could not get mbm local bytes");
- fclose(fp);
-
- return -1;
- }
- fclose(fp);
-
return 0;
}
-pid_t bm_pid, ppid;
+static pid_t bm_pid, ppid;
void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
{
@@ -472,7 +456,8 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
if (bm_pid)
kill(bm_pid, SIGKILL);
umount_resctrlfs();
- tests_cleanup();
+ if (current_test && current_test->cleanup)
+ current_test->cleanup();
ksft_print_msg("Ending\n\n");
exit(EXIT_SUCCESS);
@@ -482,13 +467,14 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
* Register CTRL-C handler for parent, as it has to kill
* child process before exiting.
*/
-int signal_handler_register(void)
+int signal_handler_register(const struct resctrl_test *test)
{
struct sigaction sigact = {};
int ret = 0;
bm_pid = 0;
+ current_test = test;
sigact.sa_sigaction = ctrlc_handler;
sigemptyset(&sigact.sa_mask);
sigact.sa_flags = SA_SIGINFO;
@@ -510,6 +496,7 @@ void signal_handler_unregister(void)
{
struct sigaction sigact = {};
+ current_test = NULL;
sigact.sa_handler = SIG_DFL;
sigemptyset(&sigact.sa_mask);
if (sigaction(SIGINT, &sigact, NULL) ||
@@ -519,6 +506,13 @@ void signal_handler_unregister(void)
}
}
+static void parent_exit(pid_t ppid)
+{
+ kill(ppid, SIGKILL);
+ umount_resctrlfs();
+ exit(EXIT_FAILURE);
+}
+
/*
* print_results_bw: the memory bandwidth results are stored in a file
* @filename: file that stores the results
@@ -528,14 +522,14 @@ void signal_handler_unregister(void)
*
* Return: 0 on success, < 0 on error.
*/
-static int print_results_bw(char *filename, int bm_pid, float bw_imc,
+static int print_results_bw(char *filename, pid_t bm_pid, float bw_imc,
unsigned long bw_resc)
{
unsigned long diff = fabs(bw_imc - bw_resc);
FILE *fp;
if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
- printf("Pid: %d \t Mem_BW_iMC: %f \t ", bm_pid, bw_imc);
+ printf("Pid: %d \t Mem_BW_iMC: %f \t ", (int)bm_pid, bw_imc);
printf("Mem_BW_resc: %lu \t Difference: %lu\n", bw_resc, diff);
} else {
fp = fopen(filename, "a");
@@ -545,7 +539,7 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc,
return -1;
}
if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
- bm_pid, bw_imc, bw_resc, diff) <= 0) {
+ (int)bm_pid, bw_imc, bw_resc, diff) <= 0) {
ksft_print_msg("Could not log results\n");
fclose(fp);
@@ -557,73 +551,67 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc,
return 0;
}
-static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num)
-{
- if (strlen(ctrlgrp) && strlen(mongrp))
- sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH,
- ctrlgrp, mongrp, sock_num);
- else if (!strlen(ctrlgrp) && strlen(mongrp))
- sprintf(llc_occup_path, MON_LCC_OCCUP_PATH, RESCTRL_PATH,
- mongrp, sock_num);
- else if (strlen(ctrlgrp) && !strlen(mongrp))
- sprintf(llc_occup_path, CON_LCC_OCCUP_PATH, RESCTRL_PATH,
- ctrlgrp, sock_num);
- else if (!strlen(ctrlgrp) && !strlen(mongrp))
- sprintf(llc_occup_path, LCC_OCCUP_PATH, RESCTRL_PATH, sock_num);
-}
-
/*
- * initialize_llc_occu_resctrl: Appropriately populate "llc_occup_path"
- * @ctrlgrp: Name of the control monitor group (con_mon grp)
- * @mongrp: Name of the monitor group (mon grp)
- * @cpu_no: CPU number that the benchmark PID is binded to
- * @resctrl_val: Resctrl feature (Eg: cat, cmt.. etc)
+ * measure_mem_bw - Measures memory bandwidth numbers while benchmark runs
+ * @uparams: User supplied parameters
+ * @param: Parameters passed to resctrl_val()
+ * @bm_pid: PID that runs the benchmark
+ * @bw_report: Bandwidth report type (reads, writes)
+ *
+ * Measure memory bandwidth from resctrl and from another source which is
+ * perf imc value or could be something else if perf imc event is not
+ * available. Compare the two values to validate resctrl value. It takes
+ * 1 sec to measure the data.
*/
-static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
- int cpu_no, char *resctrl_val)
+int measure_mem_bw(const struct user_params *uparams,
+ struct resctrl_val_param *param, pid_t bm_pid,
+ const char *bw_report)
{
- int domain_id;
+ unsigned long bw_resc, bw_resc_start, bw_resc_end;
+ FILE *mem_bw_fp;
+ float bw_imc;
+ int ret;
- if (get_domain_id("L3", cpu_no, &domain_id) < 0) {
- ksft_print_msg("Could not get domain ID\n");
- return;
- }
+ bw_report = get_bw_report_type(bw_report);
+ if (!bw_report)
+ return -1;
- if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
- set_cmt_path(ctrlgrp, mongrp, domain_id);
-}
+ mem_bw_fp = open_mem_bw_resctrl(mbm_total_path);
+ if (!mem_bw_fp)
+ return -1;
-static int measure_vals(const struct user_params *uparams,
- struct resctrl_val_param *param,
- unsigned long *bw_resc_start)
-{
- unsigned long bw_resc, bw_resc_end;
- float bw_imc;
- int ret;
+ ret = perf_open_imc_mem_bw(uparams->cpu);
+ if (ret < 0)
+ goto close_fp;
- /*
- * Measure memory bandwidth from resctrl and from
- * another source which is perf imc value or could
- * be something else if perf imc event is not available.
- * Compare the two values to validate resctrl value.
- * It takes 1sec to measure the data.
- */
- ret = get_mem_bw_imc(uparams->cpu, param->bw_report, &bw_imc);
+ ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_start);
if (ret < 0)
- return ret;
+ goto close_imc;
+
+ rewind(mem_bw_fp);
+
+ do_imc_mem_bw_test();
- ret = get_mem_bw_resctrl(&bw_resc_end);
+ ret = get_mem_bw_resctrl(mem_bw_fp, &bw_resc_end);
if (ret < 0)
- return ret;
+ goto close_imc;
- bw_resc = (bw_resc_end - *bw_resc_start) / MB;
- ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
- if (ret)
- return ret;
+ ret = get_mem_bw_imc(bw_report, &bw_imc);
+ if (ret < 0)
+ goto close_imc;
- *bw_resc_start = bw_resc_end;
+ perf_close_imc_mem_bw();
+ fclose(mem_bw_fp);
- return 0;
+ bw_resc = (bw_resc_end - bw_resc_start) / MB;
+
+ return print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
+
+close_imc:
+ perf_close_imc_mem_bw();
+close_fp:
+ fclose(mem_bw_fp);
+ return ret;
}
/*
@@ -650,7 +638,7 @@ static void run_benchmark(int signum, siginfo_t *info, void *ucontext)
fp = freopen("/dev/null", "w", stdout);
if (!fp) {
ksft_perror("Unable to direct benchmark status to /dev/null");
- PARENT_EXIT();
+ parent_exit(ppid);
}
if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
@@ -664,7 +652,7 @@ static void run_benchmark(int signum, siginfo_t *info, void *ucontext)
once = false;
} else {
ksft_print_msg("Invalid once parameter\n");
- PARENT_EXIT();
+ parent_exit(ppid);
}
if (run_fill_buf(span, memflush, operation, once))
@@ -678,7 +666,7 @@ static void run_benchmark(int signum, siginfo_t *info, void *ucontext)
fclose(stdout);
ksft_print_msg("Unable to run specified benchmark\n");
- PARENT_EXIT();
+ parent_exit(ppid);
}
/*
@@ -696,21 +684,19 @@ int resctrl_val(const struct resctrl_test *test,
const char * const *benchmark_cmd,
struct resctrl_val_param *param)
{
- char *resctrl_val = param->resctrl_val;
- unsigned long bw_resc_start = 0;
struct sigaction sigact;
int ret = 0, pipefd[2];
char pipe_message = 0;
union sigval value;
+ int domain_id;
if (strcmp(param->filename, "") == 0)
sprintf(param->filename, "stdio");
- if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
- !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
- ret = validate_bw_report_request(param->bw_report);
- if (ret)
- return ret;
+ ret = get_domain_id(test->resource, uparams->cpu, &domain_id);
+ if (ret < 0) {
+ ksft_print_msg("Could not get domain ID\n");
+ return ret;
}
/*
@@ -751,7 +737,7 @@ int resctrl_val(const struct resctrl_test *test,
/* Register for "SIGUSR1" signal from parent */
if (sigaction(SIGUSR1, &sigact, NULL)) {
ksft_perror("Can't register child for signal");
- PARENT_EXIT();
+ parent_exit(ppid);
}
/* Tell parent that child is ready */
@@ -769,10 +755,10 @@ int resctrl_val(const struct resctrl_test *test,
sigsuspend(&sigact.sa_mask);
ksft_perror("Child is done");
- PARENT_EXIT();
+ parent_exit(ppid);
}
- ksft_print_msg("Benchmark PID: %d\n", bm_pid);
+ ksft_print_msg("Benchmark PID: %d\n", (int)bm_pid);
/*
* The cast removes constness but nothing mutates benchmark_cmd within
@@ -788,22 +774,15 @@ int resctrl_val(const struct resctrl_test *test,
goto out;
/* Write benchmark to specified control&monitoring grp in resctrl FS */
- ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
- resctrl_val);
+ ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp);
if (ret)
goto out;
- if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
- !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
- ret = initialize_mem_bw_imc();
+ if (param->init) {
+ ret = param->init(param, domain_id);
if (ret)
goto out;
-
- initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
- uparams->cpu, resctrl_val);
- } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
- initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp,
- uparams->cpu, resctrl_val);
+ }
/* Parent waits for child to be ready. */
close(pipefd[1]);
@@ -837,17 +816,9 @@ int resctrl_val(const struct resctrl_test *test,
if (ret < 0)
break;
- if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
- !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
- ret = measure_vals(uparams, param, &bw_resc_start);
- if (ret)
- break;
- } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
- sleep(1);
- ret = measure_llc_resctrl(param->filename, bm_pid);
- if (ret)
- break;
- }
+ ret = param->measure(uparams, param, bm_pid);
+ if (ret)
+ break;
}
out:
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 1cade75176eb..250c320349a7 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -456,6 +456,9 @@ int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity)
* @grp: Full path and name of the group
* @parent_grp: Full path and name of the parent group
*
+ * Creates a group @grp_name if it does not exist yet. If @grp_name is NULL,
+ * it is interpreted as the root group which always results in success.
+ *
* Return: 0 on success, < 0 on error.
*/
static int create_grp(const char *grp_name, char *grp, const char *parent_grp)
@@ -464,12 +467,7 @@ static int create_grp(const char *grp_name, char *grp, const char *parent_grp)
struct dirent *ep;
DIR *dp;
- /*
- * At this point, we are guaranteed to have resctrl FS mounted and if
- * length of grp_name == 0, it means, user wants to use root con_mon
- * grp, so do nothing
- */
- if (strlen(grp_name) == 0)
+ if (!grp_name)
return 0;
/* Check if requested grp exists or not */
@@ -508,7 +506,7 @@ static int write_pid_to_tasks(char *tasks, pid_t pid)
return -1;
}
- if (fprintf(fp, "%d\n", pid) < 0) {
+ if (fprintf(fp, "%d\n", (int)pid) < 0) {
ksft_print_msg("Failed to write pid to tasks file\n");
fclose(fp);
@@ -524,7 +522,6 @@ static int write_pid_to_tasks(char *tasks, pid_t pid)
* @bm_pid: PID that should be written
* @ctrlgrp: Name of the control monitor group (con_mon grp)
* @mongrp: Name of the monitor group (mon grp)
- * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc)
*
* If a con_mon grp is requested, create it and write pid to it, otherwise
* write pid to root con_mon grp.
@@ -534,14 +531,13 @@ static int write_pid_to_tasks(char *tasks, pid_t pid)
*
* Return: 0 on success, < 0 on error.
*/
-int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
- char *resctrl_val)
+int write_bm_pid_to_resctrl(pid_t bm_pid, const char *ctrlgrp, const char *mongrp)
{
char controlgroup[128], monitorgroup[512], monitorgroup_p[256];
char tasks[1024];
int ret = 0;
- if (strlen(ctrlgrp))
+ if (ctrlgrp)
sprintf(controlgroup, "%s/%s", RESCTRL_PATH, ctrlgrp);
else
sprintf(controlgroup, "%s", RESCTRL_PATH);
@@ -555,22 +551,19 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
if (ret)
goto out;
- /* Create mon grp and write pid into it for "mbm" and "cmt" test */
- if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)) ||
- !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
- if (strlen(mongrp)) {
- sprintf(monitorgroup_p, "%s/mon_groups", controlgroup);
- sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp);
- ret = create_grp(mongrp, monitorgroup, monitorgroup_p);
- if (ret)
- goto out;
-
- sprintf(tasks, "%s/mon_groups/%s/tasks",
- controlgroup, mongrp);
- ret = write_pid_to_tasks(tasks, bm_pid);
- if (ret)
- goto out;
- }
+ /* Create monitor group and write pid into if it is used */
+ if (mongrp) {
+ sprintf(monitorgroup_p, "%s/mon_groups", controlgroup);
+ sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp);
+ ret = create_grp(mongrp, monitorgroup, monitorgroup_p);
+ if (ret)
+ goto out;
+
+ sprintf(tasks, "%s/mon_groups/%s/tasks",
+ controlgroup, mongrp);
+ ret = write_pid_to_tasks(tasks, bm_pid);
+ if (ret)
+ goto out;
}
out:
@@ -593,7 +586,8 @@ out:
*
* Return: 0 on success, < 0 on error.
*/
-int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resource)
+int write_schemata(const char *ctrlgrp, char *schemata, int cpu_no,
+ const char *resource)
{
char controlgroup[1024], reason[128], schema[1024] = {};
int domain_id, fd, schema_len, ret = 0;
@@ -611,7 +605,7 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resour
goto out;
}
- if (strlen(ctrlgrp) != 0)
+ if (ctrlgrp)
sprintf(controlgroup, "%s/%s/schemata", RESCTRL_PATH, ctrlgrp);
else
sprintf(controlgroup, "%s/schemata", RESCTRL_PATH);
@@ -837,22 +831,21 @@ int filter_dmesg(void)
return 0;
}
-int validate_bw_report_request(char *bw_report)
+const char *get_bw_report_type(const char *bw_report)
{
if (strcmp(bw_report, "reads") == 0)
- return 0;
+ return bw_report;
if (strcmp(bw_report, "writes") == 0)
- return 0;
+ return bw_report;
if (strcmp(bw_report, "nt-writes") == 0) {
- strcpy(bw_report, "writes");
- return 0;
+ return "writes";
}
if (strcmp(bw_report, "total") == 0)
- return 0;
+ return bw_report;
- fprintf(stderr, "Requested iMC B/W report type unavailable\n");
+ fprintf(stderr, "Requested iMC bandwidth report type unavailable\n");
- return -1;
+ return NULL;
}
int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
diff --git a/tools/testing/selftests/ring-buffer/.gitignore b/tools/testing/selftests/ring-buffer/.gitignore
new file mode 100644
index 000000000000..3aed1a2a6c67
--- /dev/null
+++ b/tools/testing/selftests/ring-buffer/.gitignore
@@ -0,0 +1 @@
+map_test
diff --git a/tools/testing/selftests/ring-buffer/Makefile b/tools/testing/selftests/ring-buffer/Makefile
new file mode 100644
index 000000000000..23605782639e
--- /dev/null
+++ b/tools/testing/selftests/ring-buffer/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wl,-no-as-needed -Wall
+CFLAGS += $(KHDR_INCLUDES)
+
+TEST_GEN_PROGS = map_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/ring-buffer/config b/tools/testing/selftests/ring-buffer/config
new file mode 100644
index 000000000000..d936f8f00e78
--- /dev/null
+++ b/tools/testing/selftests/ring-buffer/config
@@ -0,0 +1,2 @@
+CONFIG_FTRACE=y
+CONFIG_TRACER_SNAPSHOT=y
diff --git a/tools/testing/selftests/ring-buffer/map_test.c b/tools/testing/selftests/ring-buffer/map_test.c
new file mode 100644
index 000000000000..a9006fa7097e
--- /dev/null
+++ b/tools/testing/selftests/ring-buffer/map_test.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Ring-buffer memory mapping tests
+ *
+ * Copyright (c) 2024 Vincent Donnefort <vdonnefort@google.com>
+ */
+#include <fcntl.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/trace_mmap.h>
+
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+
+#include "../user_events/user_events_selftests.h" /* share tracefs setup */
+#include "../kselftest_harness.h"
+
+#define TRACEFS_ROOT "/sys/kernel/tracing"
+
+static int __tracefs_write(const char *path, const char *value)
+{
+ int fd, ret;
+
+ fd = open(path, O_WRONLY | O_TRUNC);
+ if (fd < 0)
+ return fd;
+
+ ret = write(fd, value, strlen(value));
+
+ close(fd);
+
+ return ret == -1 ? -errno : 0;
+}
+
+static int __tracefs_write_int(const char *path, int value)
+{
+ char *str;
+ int ret;
+
+ if (asprintf(&str, "%d", value) < 0)
+ return -1;
+
+ ret = __tracefs_write(path, str);
+
+ free(str);
+
+ return ret;
+}
+
+#define tracefs_write_int(path, value) \
+ ASSERT_EQ(__tracefs_write_int((path), (value)), 0)
+
+#define tracefs_write(path, value) \
+ ASSERT_EQ(__tracefs_write((path), (value)), 0)
+
+static int tracefs_reset(void)
+{
+ if (__tracefs_write_int(TRACEFS_ROOT"/tracing_on", 0))
+ return -1;
+ if (__tracefs_write(TRACEFS_ROOT"/trace", ""))
+ return -1;
+ if (__tracefs_write(TRACEFS_ROOT"/set_event", ""))
+ return -1;
+ if (__tracefs_write(TRACEFS_ROOT"/current_tracer", "nop"))
+ return -1;
+
+ return 0;
+}
+
+struct tracefs_cpu_map_desc {
+ struct trace_buffer_meta *meta;
+ int cpu_fd;
+};
+
+int tracefs_cpu_map(struct tracefs_cpu_map_desc *desc, int cpu)
+{
+ int page_size = getpagesize();
+ char *cpu_path;
+ void *map;
+
+ if (asprintf(&cpu_path,
+ TRACEFS_ROOT"/per_cpu/cpu%d/trace_pipe_raw",
+ cpu) < 0)
+ return -ENOMEM;
+
+ desc->cpu_fd = open(cpu_path, O_RDONLY | O_NONBLOCK);
+ free(cpu_path);
+ if (desc->cpu_fd < 0)
+ return -ENODEV;
+
+ map = mmap(NULL, page_size, PROT_READ, MAP_SHARED, desc->cpu_fd, 0);
+ if (map == MAP_FAILED)
+ return -errno;
+
+ desc->meta = (struct trace_buffer_meta *)map;
+
+ return 0;
+}
+
+void tracefs_cpu_unmap(struct tracefs_cpu_map_desc *desc)
+{
+ munmap(desc->meta, desc->meta->meta_page_size);
+ close(desc->cpu_fd);
+}
+
+FIXTURE(map) {
+ struct tracefs_cpu_map_desc map_desc;
+ bool umount;
+};
+
+FIXTURE_VARIANT(map) {
+ int subbuf_size;
+};
+
+FIXTURE_VARIANT_ADD(map, subbuf_size_4k) {
+ .subbuf_size = 4,
+};
+
+FIXTURE_VARIANT_ADD(map, subbuf_size_8k) {
+ .subbuf_size = 8,
+};
+
+FIXTURE_SETUP(map)
+{
+ int cpu = sched_getcpu();
+ cpu_set_t cpu_mask;
+ bool fail, umount;
+ char *message;
+
+ if (getuid() != 0)
+ SKIP(return, "Skipping: %s", "Please run the test as root");
+
+ if (!tracefs_enabled(&message, &fail, &umount)) {
+ if (fail) {
+ TH_LOG("Tracefs setup failed: %s", message);
+ ASSERT_FALSE(fail);
+ }
+ SKIP(return, "Skipping: %s", message);
+ }
+
+ self->umount = umount;
+
+ ASSERT_GE(cpu, 0);
+
+ ASSERT_EQ(tracefs_reset(), 0);
+
+ tracefs_write_int(TRACEFS_ROOT"/buffer_subbuf_size_kb", variant->subbuf_size);
+
+ ASSERT_EQ(tracefs_cpu_map(&self->map_desc, cpu), 0);
+
+ /*
+ * Ensure generated events will be found on this very same ring-buffer.
+ */
+ CPU_ZERO(&cpu_mask);
+ CPU_SET(cpu, &cpu_mask);
+ ASSERT_EQ(sched_setaffinity(0, sizeof(cpu_mask), &cpu_mask), 0);
+}
+
+FIXTURE_TEARDOWN(map)
+{
+ tracefs_reset();
+
+ if (self->umount)
+ tracefs_unmount();
+
+ tracefs_cpu_unmap(&self->map_desc);
+}
+
+TEST_F(map, meta_page_check)
+{
+ struct tracefs_cpu_map_desc *desc = &self->map_desc;
+ int cnt = 0;
+
+ ASSERT_EQ(desc->meta->entries, 0);
+ ASSERT_EQ(desc->meta->overrun, 0);
+ ASSERT_EQ(desc->meta->read, 0);
+
+ ASSERT_EQ(desc->meta->reader.id, 0);
+ ASSERT_EQ(desc->meta->reader.read, 0);
+
+ ASSERT_EQ(ioctl(desc->cpu_fd, TRACE_MMAP_IOCTL_GET_READER), 0);
+ ASSERT_EQ(desc->meta->reader.id, 0);
+
+ tracefs_write_int(TRACEFS_ROOT"/tracing_on", 1);
+ for (int i = 0; i < 16; i++)
+ tracefs_write_int(TRACEFS_ROOT"/trace_marker", i);
+again:
+ ASSERT_EQ(ioctl(desc->cpu_fd, TRACE_MMAP_IOCTL_GET_READER), 0);
+
+ ASSERT_EQ(desc->meta->entries, 16);
+ ASSERT_EQ(desc->meta->overrun, 0);
+ ASSERT_EQ(desc->meta->read, 16);
+
+ ASSERT_EQ(desc->meta->reader.id, 1);
+
+ if (!(cnt++))
+ goto again;
+}
+
+TEST_F(map, data_mmap)
+{
+ struct tracefs_cpu_map_desc *desc = &self->map_desc;
+ unsigned long meta_len, data_len;
+ void *data;
+
+ meta_len = desc->meta->meta_page_size;
+ data_len = desc->meta->subbuf_size * desc->meta->nr_subbufs;
+
+ /* Map all the available subbufs */
+ data = mmap(NULL, data_len, PROT_READ, MAP_SHARED,
+ desc->cpu_fd, meta_len);
+ ASSERT_NE(data, MAP_FAILED);
+ munmap(data, data_len);
+
+ /* Map all the available subbufs - 1 */
+ data_len -= desc->meta->subbuf_size;
+ data = mmap(NULL, data_len, PROT_READ, MAP_SHARED,
+ desc->cpu_fd, meta_len);
+ ASSERT_NE(data, MAP_FAILED);
+ munmap(data, data_len);
+
+ /* Overflow the available subbufs by 1 */
+ meta_len += desc->meta->subbuf_size * 2;
+ data = mmap(NULL, data_len, PROT_READ, MAP_SHARED,
+ desc->cpu_fd, meta_len);
+ ASSERT_EQ(data, MAP_FAILED);
+}
+
+FIXTURE(snapshot) {
+ bool umount;
+};
+
+FIXTURE_SETUP(snapshot)
+{
+ bool fail, umount;
+ struct stat sb;
+ char *message;
+
+ if (getuid() != 0)
+ SKIP(return, "Skipping: %s", "Please run the test as root");
+
+ if (stat(TRACEFS_ROOT"/snapshot", &sb))
+ SKIP(return, "Skipping: %s", "snapshot not available");
+
+ if (!tracefs_enabled(&message, &fail, &umount)) {
+ if (fail) {
+ TH_LOG("Tracefs setup failed: %s", message);
+ ASSERT_FALSE(fail);
+ }
+ SKIP(return, "Skipping: %s", message);
+ }
+
+ self->umount = umount;
+}
+
+FIXTURE_TEARDOWN(snapshot)
+{
+ __tracefs_write(TRACEFS_ROOT"/events/sched/sched_switch/trigger",
+ "!snapshot");
+ tracefs_reset();
+
+ if (self->umount)
+ tracefs_unmount();
+}
+
+TEST_F(snapshot, excludes_map)
+{
+ struct tracefs_cpu_map_desc map_desc;
+ int cpu = sched_getcpu();
+
+ ASSERT_GE(cpu, 0);
+ tracefs_write(TRACEFS_ROOT"/events/sched/sched_switch/trigger",
+ "snapshot");
+ ASSERT_EQ(tracefs_cpu_map(&map_desc, cpu), -EBUSY);
+}
+
+TEST_F(snapshot, excluded_by_map)
+{
+ struct tracefs_cpu_map_desc map_desc;
+ int cpu = sched_getcpu();
+
+ ASSERT_EQ(tracefs_cpu_map(&map_desc, cpu), 0);
+
+ ASSERT_EQ(__tracefs_write(TRACEFS_ROOT"/events/sched/sched_switch/trigger",
+ "snapshot"), -EBUSY);
+ ASSERT_EQ(__tracefs_write(TRACEFS_ROOT"/snapshot",
+ "1"), -EBUSY);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile
index 4a9ff515a3a0..7ce03d832b64 100644
--- a/tools/testing/selftests/riscv/Makefile
+++ b/tools/testing/selftests/riscv/Makefile
@@ -5,7 +5,7 @@
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),riscv))
-RISCV_SUBTARGETS ?= hwprobe vector mm
+RISCV_SUBTARGETS ?= hwprobe vector mm sigreturn
else
RISCV_SUBTARGETS :=
endif
diff --git a/tools/testing/selftests/riscv/hwprobe/.gitignore b/tools/testing/selftests/riscv/hwprobe/.gitignore
index 8113dc3bdd03..6e384e80ea1a 100644
--- a/tools/testing/selftests/riscv/hwprobe/.gitignore
+++ b/tools/testing/selftests/riscv/hwprobe/.gitignore
@@ -1 +1,3 @@
hwprobe
+cbo
+which-cpus
diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile
index c333263f2b27..4664ed79e20b 100644
--- a/tools/testing/selftests/riscv/mm/Makefile
+++ b/tools/testing/selftests/riscv/mm/Makefile
@@ -3,7 +3,7 @@
# Originally tools/testing/arm64/abi/Makefile
# Additional include paths needed by kselftest.h and local headers
-CFLAGS += -D_GNU_SOURCE -std=gnu99 -I.
+CFLAGS += -std=gnu99 -I.
TEST_GEN_FILES := mmap_default mmap_bottomup
diff --git a/tools/testing/selftests/riscv/sigreturn/.gitignore b/tools/testing/selftests/riscv/sigreturn/.gitignore
new file mode 100644
index 000000000000..35002b8ae780
--- /dev/null
+++ b/tools/testing/selftests/riscv/sigreturn/.gitignore
@@ -0,0 +1 @@
+sigreturn
diff --git a/tools/testing/selftests/riscv/sigreturn/Makefile b/tools/testing/selftests/riscv/sigreturn/Makefile
new file mode 100644
index 000000000000..eb8bac9279a8
--- /dev/null
+++ b/tools/testing/selftests/riscv/sigreturn/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 ARM Limited
+# Originally tools/testing/arm64/abi/Makefile
+
+CFLAGS += -I$(top_srcdir)/tools/include
+
+TEST_GEN_PROGS := sigreturn
+
+include ../../lib.mk
+
+$(OUTPUT)/sigreturn: sigreturn.c
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/sigreturn/sigreturn.c b/tools/testing/selftests/riscv/sigreturn/sigreturn.c
new file mode 100644
index 000000000000..ed351a1cb917
--- /dev/null
+++ b/tools/testing/selftests/riscv/sigreturn/sigreturn.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <ucontext.h>
+#include <linux/ptrace.h>
+#include "../../kselftest_harness.h"
+
+#define RISCV_V_MAGIC 0x53465457
+#define DEFAULT_VALUE 2
+#define SIGNAL_HANDLER_OVERRIDE 3
+
+static void simple_handle(int sig_no, siginfo_t *info, void *vcontext)
+{
+ ucontext_t *context = vcontext;
+
+ context->uc_mcontext.__gregs[REG_PC] = context->uc_mcontext.__gregs[REG_PC] + 4;
+}
+
+static void vector_override(int sig_no, siginfo_t *info, void *vcontext)
+{
+ ucontext_t *context = vcontext;
+
+ // vector state
+ struct __riscv_extra_ext_header *ext;
+ struct __riscv_v_ext_state *v_ext_state;
+
+ /* Find the vector context. */
+ ext = (void *)(&context->uc_mcontext.__fpregs);
+ if (ext->hdr.magic != RISCV_V_MAGIC) {
+ fprintf(stderr, "bad vector magic: %x\n", ext->hdr.magic);
+ abort();
+ }
+
+ v_ext_state = (void *)((char *)(ext) + sizeof(*ext));
+
+ *(int *)v_ext_state->datap = SIGNAL_HANDLER_OVERRIDE;
+
+ context->uc_mcontext.__gregs[REG_PC] = context->uc_mcontext.__gregs[REG_PC] + 4;
+}
+
+static int vector_sigreturn(int data, void (*handler)(int, siginfo_t *, void *))
+{
+ int after_sigreturn;
+ struct sigaction sig_action = {
+ .sa_sigaction = handler,
+ .sa_flags = SA_SIGINFO
+ };
+
+ sigaction(SIGSEGV, &sig_action, 0);
+
+ asm(".option push \n\
+ .option arch, +v \n\
+ vsetivli x0, 1, e32, m1, ta, ma \n\
+ vmv.s.x v0, %1 \n\
+ # Generate SIGSEGV \n\
+ lw a0, 0(x0) \n\
+ vmv.x.s %0, v0 \n\
+ .option pop" : "=r" (after_sigreturn) : "r" (data));
+
+ return after_sigreturn;
+}
+
+TEST(vector_restore)
+{
+ int result;
+
+ result = vector_sigreturn(DEFAULT_VALUE, &simple_handle);
+
+ EXPECT_EQ(DEFAULT_VALUE, result);
+}
+
+TEST(vector_restore_signal_handler_override)
+{
+ int result;
+
+ result = vector_sigreturn(DEFAULT_VALUE, &vector_override);
+
+ EXPECT_EQ(SIGNAL_HANDLER_OVERRIDE, result);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c
index 27668fb3b6d0..895177f6bf4c 100644
--- a/tools/testing/selftests/riscv/vector/vstate_prctl.c
+++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c
@@ -88,16 +88,16 @@ int main(void)
return -2;
}
- if (!(pair.value & RISCV_HWPROBE_IMA_V)) {
+ if (!(pair.value & RISCV_HWPROBE_EXT_ZVE32X)) {
rc = prctl(PR_RISCV_V_GET_CONTROL);
if (rc != -1 || errno != EINVAL) {
- ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n");
+ ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without ZVE32X\n");
return -3;
}
rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON);
if (rc != -1 || errno != EINVAL) {
- ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n");
+ ksft_test_result_fail("SET_CONTROL should fail on kernel/hw without ZVE32X\n");
return -4;
}
diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c
index 62fba7356af2..52d97fae4dbd 100644
--- a/tools/testing/selftests/sched/cs_prctl_test.c
+++ b/tools/testing/selftests/sched/cs_prctl_test.c
@@ -42,11 +42,11 @@ static pid_t gettid(void)
#ifndef PR_SCHED_CORE
#define PR_SCHED_CORE 62
-# define PR_SCHED_CORE_GET 0
-# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
-# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
-# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
-# define PR_SCHED_CORE_MAX 4
+#define PR_SCHED_CORE_GET 0
+#define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+#define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+#define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+#define PR_SCHED_CORE_MAX 4
#endif
#define MAX_PROCESSES 128
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index b83099160fbc..94886c82ae60 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -194,14 +194,14 @@ int main(int argc, char *argv[])
ksft_set_plan(7);
ksft_print_msg("Running on:\n");
- ksft_print_msg("");
+ ksft_print_msg("%s", "");
system("uname -a");
ksft_print_msg("Current BPF sysctl settings:\n");
/* Avoid using "sysctl" which may not be installed. */
- ksft_print_msg("");
+ ksft_print_msg("%s", "");
system("grep -H . /proc/sys/net/core/bpf_jit_enable");
- ksft_print_msg("");
+ ksft_print_msg("%s", "");
system("grep -H . /proc/sys/net/core/bpf_jit_harden");
affinity();
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 783ebce8c4de..8c3a73461475 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -60,7 +60,9 @@
#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
#endif
+#ifndef MIN
#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#endif
#ifndef PR_SET_PTRACER
# define PR_SET_PTRACER 0x59616d61
@@ -3954,6 +3956,60 @@ TEST(user_notification_filter_empty)
EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
}
+TEST(user_ioctl_notification_filter_empty)
+{
+ pid_t pid;
+ long ret;
+ int status, p[2];
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+ struct seccomp_notif req = {};
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ if (__NR_clone3 < 0)
+ SKIP(return, "Test not built with clone3 support");
+
+ ASSERT_EQ(0, pipe(p));
+
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int listener;
+
+ listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ if (listener < 0)
+ _exit(EXIT_FAILURE);
+
+ if (dup2(listener, 200) != 200)
+ _exit(EXIT_FAILURE);
+ close(p[1]);
+ close(listener);
+ sleep(1);
+
+ _exit(EXIT_SUCCESS);
+ }
+ if (read(p[0], &status, 1) != 0)
+ _exit(EXIT_SUCCESS);
+ close(p[0]);
+ /*
+ * The seccomp filter has become unused so we should be notified once
+ * the kernel gets around to cleaning up task struct.
+ */
+ EXPECT_EQ(ioctl(200, SECCOMP_IOCTL_NOTIF_RECV, &req), -1);
+ EXPECT_EQ(errno, ENOENT);
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
static void *do_thread(void *data)
{
return NULL;
@@ -4755,6 +4811,83 @@ TEST(user_notification_wait_killable_fatal)
EXPECT_EQ(SIGTERM, WTERMSIG(status));
}
+struct tsync_vs_thread_leader_args {
+ pthread_t leader;
+};
+
+static void *tsync_vs_dead_thread_leader_sibling(void *_args)
+{
+ struct sock_filter allow_filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog allow_prog = {
+ .len = (unsigned short)ARRAY_SIZE(allow_filter),
+ .filter = allow_filter,
+ };
+ struct tsync_vs_thread_leader_args *args = _args;
+ void *retval;
+ long ret;
+
+ ret = pthread_join(args->leader, &retval);
+ if (ret)
+ exit(1);
+ if (retval != _args)
+ exit(2);
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC, &allow_prog);
+ if (ret)
+ exit(3);
+
+ exit(0);
+}
+
+/*
+ * Ensure that a dead thread leader doesn't prevent installing new filters with
+ * SECCOMP_FILTER_FLAG_TSYNC from other threads.
+ */
+TEST(tsync_vs_dead_thread_leader)
+{
+ int status;
+ pid_t pid;
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ struct sock_filter allow_filter[] = {
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog allow_prog = {
+ .len = (unsigned short)ARRAY_SIZE(allow_filter),
+ .filter = allow_filter,
+ };
+ struct tsync_vs_thread_leader_args *args;
+ pthread_t sibling;
+
+ args = malloc(sizeof(*args));
+ ASSERT_NE(NULL, args);
+ args->leader = pthread_self();
+
+ ret = pthread_create(&sibling, NULL,
+ tsync_vs_dead_thread_leader_sibling, args);
+ ASSERT_EQ(0, ret);
+
+ /* Install a new filter just to the leader thread. */
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
+ ASSERT_EQ(0, ret);
+ pthread_exit(args);
+ exit(1);
+ }
+
+ EXPECT_EQ(pid, waitpid(pid, &status, 0));
+ EXPECT_EQ(0, status);
+}
+
/*
* TODO:
* - expand NNP testing
diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
index 867f88ce2570..03b5e13b872b 100644
--- a/tools/testing/selftests/sgx/Makefile
+++ b/tools/testing/selftests/sgx/Makefile
@@ -12,7 +12,7 @@ OBJCOPY := $(CROSS_COMPILE)objcopy
endif
INCLUDES := -I$(top_srcdir)/tools/include
-HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC
+HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC $(CFLAGS)
HOST_LDFLAGS := -z noexecstack -lcrypto
ENCL_CFLAGS += -Wall -Werror -static-pie -nostdlib -ffreestanding -fPIE \
-fno-stack-protector -mrdrnd $(INCLUDES)
diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h b/tools/testing/selftests/sigaltstack/current_stack_pointer.h
index ea9bdf3a90b1..09da8f1011ce 100644
--- a/tools/testing/selftests/sigaltstack/current_stack_pointer.h
+++ b/tools/testing/selftests/sigaltstack/current_stack_pointer.h
@@ -8,7 +8,7 @@ register unsigned long sp asm("sp");
register unsigned long sp asm("esp");
#elif __loongarch64
register unsigned long sp asm("$sp");
-#elif __ppc__
+#elif __powerpc__
register unsigned long sp asm("r1");
#elif __s390x__
register unsigned long sp asm("%15");
diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c
index 414a617db993..93db5aa246a3 100644
--- a/tools/testing/selftests/sync/sync_test.c
+++ b/tools/testing/selftests/sync/sync_test.c
@@ -109,6 +109,5 @@ int main(void)
ksft_exit_fail_msg("%d out of %d sync tests failed\n",
err, ksft_test_num());
- /* need this return to keep gcc happy */
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
index 12da0a939e3e..557fb074acf0 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
@@ -133,6 +133,50 @@
]
},
{
+ "id": "6f62",
+ "name": "Add taprio Qdisc with too short interval",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 2 queues 1@0 1@1 sched-entry S 01 300 sched-entry S 02 1700 clockid CLOCK_TAI",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc taprio 1: root refcnt",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "831f",
+ "name": "Add taprio Qdisc with too short cycle-time",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 2 queues 1@0 1@1 sched-entry S 01 200000 sched-entry S 02 200000 cycle-time 100 clockid CLOCK_TAI",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc taprio 1: root refcnt",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
"id": "3e1e",
"name": "Add taprio Qdisc with an invalid cycle-time",
"category": [
diff --git a/tools/testing/selftests/timens/exec.c b/tools/testing/selftests/timens/exec.c
index e40dc5be2f66..d12ff955de0d 100644
--- a/tools/testing/selftests/timens/exec.c
+++ b/tools/testing/selftests/timens/exec.c
@@ -30,7 +30,7 @@ int main(int argc, char *argv[])
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now.tv_sec) > 5)
+ if (labs(tst.tv_sec - now.tv_sec) > 5)
return pr_fail("%ld %ld\n", now.tv_sec, tst.tv_sec);
}
return 0;
@@ -50,7 +50,7 @@ int main(int argc, char *argv[])
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now.tv_sec) > 5)
+ if (labs(tst.tv_sec - now.tv_sec) > 5)
return pr_fail("%ld %ld\n",
now.tv_sec, tst.tv_sec);
}
@@ -70,7 +70,7 @@ int main(int argc, char *argv[])
/* Check that a child process is in the new timens. */
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now.tv_sec - OFFSET) > 5)
+ if (labs(tst.tv_sec - now.tv_sec - OFFSET) > 5)
return pr_fail("%ld %ld\n",
now.tv_sec + OFFSET, tst.tv_sec);
}
diff --git a/tools/testing/selftests/timens/timer.c b/tools/testing/selftests/timens/timer.c
index 5e7f0051bd7b..5b939f59dfa4 100644
--- a/tools/testing/selftests/timens/timer.c
+++ b/tools/testing/selftests/timens/timer.c
@@ -56,7 +56,7 @@ int run_test(int clockid, struct timespec now)
return pr_perror("timerfd_gettime");
elapsed = new_value.it_value.tv_sec;
- if (abs(elapsed - 3600) > 60) {
+ if (llabs(elapsed - 3600) > 60) {
ksft_test_result_fail("clockid: %d elapsed: %lld\n",
clockid, elapsed);
return 1;
diff --git a/tools/testing/selftests/timens/timerfd.c b/tools/testing/selftests/timens/timerfd.c
index 9edd43d6b2c1..a4196bbd6e33 100644
--- a/tools/testing/selftests/timens/timerfd.c
+++ b/tools/testing/selftests/timens/timerfd.c
@@ -61,7 +61,7 @@ int run_test(int clockid, struct timespec now)
return pr_perror("timerfd_gettime(%d)", clockid);
elapsed = new_value.it_value.tv_sec;
- if (abs(elapsed - 3600) > 60) {
+ if (llabs(elapsed - 3600) > 60) {
ksft_test_result_fail("clockid: %d elapsed: %lld\n",
clockid, elapsed);
return 1;
diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c
index beb7614941fb..5b8907bf451d 100644
--- a/tools/testing/selftests/timens/vfork_exec.c
+++ b/tools/testing/selftests/timens/vfork_exec.c
@@ -32,7 +32,7 @@ static void *tcheck(void *_args)
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now->tv_sec) > 5) {
+ if (labs(tst.tv_sec - now->tv_sec) > 5) {
pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n",
args->tst_name, tst.tv_sec, now->tv_sec);
return (void *)1UL;
@@ -64,7 +64,7 @@ static int check(char *tst_name, struct timespec *now)
for (i = 0; i < 2; i++) {
_gettime(CLOCK_MONOTONIC, &tst, i);
- if (abs(tst.tv_sec - now->tv_sec) > 5)
+ if (labs(tst.tv_sec - now->tv_sec) > 5)
return pr_fail("%s: unexpected value: %ld (%ld)\n",
tst_name, tst.tv_sec, now->tv_sec);
}
diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c
index 47e05fdc32c5..205b76a4abb4 100644
--- a/tools/testing/selftests/timers/adjtick.c
+++ b/tools/testing/selftests/timers/adjtick.c
@@ -205,7 +205,7 @@ int main(int argc, char **argv)
adjtimex(&tx1);
if (err)
- return ksft_exit_fail();
+ ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
index 4332b494103d..ad52e608b88e 100644
--- a/tools/testing/selftests/timers/alarmtimer-suspend.c
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -173,6 +173,6 @@ int main(void)
timer_delete(tm1);
}
if (final_ret)
- return ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_fail();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
index 992a77f2a74c..4421cd562c24 100644
--- a/tools/testing/selftests/timers/change_skew.c
+++ b/tools/testing/selftests/timers/change_skew.c
@@ -89,8 +89,8 @@ int main(int argc, char **argv)
if (ret) {
printf("[FAIL]");
- return ksft_exit_fail();
+ ksft_exit_fail();
}
printf("[OK]");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/freq-step.c b/tools/testing/selftests/timers/freq-step.c
index 4b76450d78d1..73b636f89fdc 100644
--- a/tools/testing/selftests/timers/freq-step.c
+++ b/tools/testing/selftests/timers/freq-step.c
@@ -257,7 +257,7 @@ int main(int argc, char **argv)
set_frequency(0.0);
if (fails)
- return ksft_exit_fail();
+ ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
index 23eb398c8140..986abbdb1521 100644
--- a/tools/testing/selftests/timers/leap-a-day.c
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -268,7 +268,7 @@ int main(int argc, char **argv)
if (ret < 0) {
printf("Error: Problem setting STA_INS/STA_DEL!: %s\n",
time_state_str(ret));
- return ksft_exit_fail();
+ ksft_exit_fail();
}
/* Validate STA_INS was set */
@@ -277,7 +277,7 @@ int main(int argc, char **argv)
if (tx.status != STA_INS && tx.status != STA_DEL) {
printf("Error: STA_INS/STA_DEL not set!: %s\n",
time_state_str(ret));
- return ksft_exit_fail();
+ ksft_exit_fail();
}
if (tai_time) {
@@ -295,7 +295,7 @@ int main(int argc, char **argv)
se.sigev_value.sival_int = 0;
if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) {
printf("Error: timer_create failed\n");
- return ksft_exit_fail();
+ ksft_exit_fail();
}
its1.it_value.tv_sec = next_leap;
its1.it_value.tv_nsec = 0;
@@ -366,7 +366,7 @@ int main(int argc, char **argv)
if (error_found) {
printf("Errors observed\n");
clear_time_state();
- return ksft_exit_fail();
+ ksft_exit_fail();
}
printf("\n");
if ((iterations != -1) && !(--iterations))
@@ -374,5 +374,5 @@ int main(int argc, char **argv)
}
clear_time_state();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
index f70802c5dd0d..8fd065eec904 100644
--- a/tools/testing/selftests/timers/leapcrash.c
+++ b/tools/testing/selftests/timers/leapcrash.c
@@ -87,7 +87,7 @@ int main(void)
tv.tv_usec = 0;
if (settimeofday(&tv, NULL)) {
printf("Error: You're likely not running with proper (ie: root) permissions\n");
- return ksft_exit_fail();
+ ksft_exit_fail();
}
tx.modes = 0;
adjtimex(&tx);
@@ -104,5 +104,5 @@ int main(void)
fflush(stdout);
}
printf("[OK]\n");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c
index 7916cf5cc6ff..f3179a605bba 100644
--- a/tools/testing/selftests/timers/mqueue-lat.c
+++ b/tools/testing/selftests/timers/mqueue-lat.c
@@ -107,8 +107,8 @@ int main(int argc, char **argv)
ret = mqueue_lat_test();
if (ret < 0) {
printf("[FAILED]\n");
- return ksft_exit_fail();
+ ksft_exit_fail();
}
printf("[OK]\n");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index c001dd79179d..07c81c0093c0 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -260,16 +260,16 @@ int main(int argc, char **argv)
ksft_print_msg("based timers if other threads run on the CPU...\n");
if (check_itimer(ITIMER_VIRTUAL) < 0)
- return ksft_exit_fail();
+ ksft_exit_fail();
if (check_itimer(ITIMER_PROF) < 0)
- return ksft_exit_fail();
+ ksft_exit_fail();
if (check_itimer(ITIMER_REAL) < 0)
- return ksft_exit_fail();
+ ksft_exit_fail();
if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0)
- return ksft_exit_fail();
+ ksft_exit_fail();
/*
* It's unfortunately hard to reliably test a timer expiration
@@ -281,10 +281,10 @@ int main(int argc, char **argv)
* find a better solution.
*/
if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0)
- return ksft_exit_fail();
+ ksft_exit_fail();
if (check_timer_distribution() < 0)
- return ksft_exit_fail();
+ ksft_exit_fail();
ksft_finished();
}
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
index 6eba203f9da7..030143eb09b4 100644
--- a/tools/testing/selftests/timers/raw_skew.c
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -137,11 +137,11 @@ int main(int argc, char **argv)
if (tx1.offset || tx2.offset ||
tx1.freq != tx2.freq || tx1.tick != tx2.tick) {
printf(" [SKIP]\n");
- return ksft_exit_skip("The clock was adjusted externally. Shutdown NTPd or other time sync daemons\n");
+ ksft_exit_skip("The clock was adjusted externally. Shutdown NTPd or other time sync daemons\n");
}
printf(" [FAILED]\n");
- return ksft_exit_fail();
+ ksft_exit_fail();
}
printf(" [OK]\n");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c
index 4ef2184f1558..7c07edd0d450 100644
--- a/tools/testing/selftests/timers/rtcpie.c
+++ b/tools/testing/selftests/timers/rtcpie.c
@@ -29,7 +29,7 @@ static const char default_rtc[] = "/dev/rtc0";
int main(int argc, char **argv)
{
- int i, fd, retval, irqcount = 0;
+ int i, fd, retval;
unsigned long tmp, data, old_pie_rate;
const char *rtc = default_rtc;
struct timeval start, end, diff;
@@ -120,7 +120,6 @@ int main(int argc, char **argv)
fprintf(stderr, " %d",i);
fflush(stderr);
- irqcount++;
}
/* Disable periodic interrupts */
diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c
index 688cfd81b531..f7d978721b9e 100644
--- a/tools/testing/selftests/timers/set-2038.c
+++ b/tools/testing/selftests/timers/set-2038.c
@@ -128,6 +128,6 @@ out:
/* restore clock */
settime(start);
if (ret)
- return ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_fail();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c
index 8c4179ee2ca2..5b67462efcd6 100644
--- a/tools/testing/selftests/timers/set-tai.c
+++ b/tools/testing/selftests/timers/set-tai.c
@@ -61,9 +61,9 @@ int main(int argc, char **argv)
ret = get_tai();
if (ret != i) {
printf("[FAILED] expected: %i got %i\n", i, ret);
- return ksft_exit_fail();
+ ksft_exit_fail();
}
}
printf("[OK]\n");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c
index 50da45437daa..7ce240c89b21 100644
--- a/tools/testing/selftests/timers/set-timer-lat.c
+++ b/tools/testing/selftests/timers/set-timer-lat.c
@@ -278,6 +278,6 @@ int main(void)
ret |= do_timer_oneshot(clock_id, 0);
}
if (ret)
- return ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_fail();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/set-tz.c b/tools/testing/selftests/timers/set-tz.c
index 62bd33eb16f0..20daaf1782b7 100644
--- a/tools/testing/selftests/timers/set-tz.c
+++ b/tools/testing/selftests/timers/set-tz.c
@@ -102,9 +102,9 @@ int main(int argc, char **argv)
printf("[OK]\n");
set_tz(min, dst);
- return ksft_exit_pass();
+ ksft_exit_pass();
err:
set_tz(min, dst);
- return ksft_exit_fail();
+ ksft_exit_fail();
}
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
index 63913f75b384..c8e6bffe4e0a 100644
--- a/tools/testing/selftests/timers/skew_consistency.c
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -70,8 +70,8 @@ int main(int argc, char **argv)
if (ret) {
printf("[FAILED]\n");
- return ksft_exit_fail();
+ ksft_exit_fail();
}
printf("[OK]\n");
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
index 80aed4bf06fb..76b38e41d9c7 100644
--- a/tools/testing/selftests/timers/threadtest.c
+++ b/tools/testing/selftests/timers/threadtest.c
@@ -189,5 +189,5 @@ out:
/* die */
if (ret)
ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index d13ebde20322..d500884801d8 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -320,10 +320,10 @@ int validate_set_offset(void)
int main(int argc, char **argv)
{
if (validate_freq())
- return ksft_exit_fail();
+ ksft_exit_fail();
if (validate_set_offset())
- return ksft_exit_fail();
+ ksft_exit_fail();
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/tmpfs/Makefile b/tools/testing/selftests/tmpfs/Makefile
index aa11ccc92e5b..3be931e1193f 100644
--- a/tools/testing/selftests/tmpfs/Makefile
+++ b/tools/testing/selftests/tmpfs/Makefile
@@ -1,6 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS += -Wall -O2
-CFLAGS += -D_GNU_SOURCE
TEST_GEN_PROGS :=
TEST_GEN_PROGS += bug-link-o-tmpfile
diff --git a/tools/testing/selftests/tty/tty_tstamp_update.c b/tools/testing/selftests/tty/tty_tstamp_update.c
index 0ee97943dccc..9e1a40f5db17 100644
--- a/tools/testing/selftests/tty/tty_tstamp_update.c
+++ b/tools/testing/selftests/tty/tty_tstamp_update.c
@@ -47,42 +47,60 @@ int main(int argc, char **argv)
int r;
char tty[PATH_MAX] = {};
struct stat st1, st2;
+ int result = KSFT_FAIL;
ksft_print_header();
ksft_set_plan(1);
r = readlink("/proc/self/fd/0", tty, PATH_MAX);
- if (r < 0)
- ksft_exit_fail_msg("readlink on /proc/self/fd/0 failed: %m\n");
+ if (r < 0) {
+ ksft_print_msg("readlink on /proc/self/fd/0 failed: %m\n");
+ goto out;
+ }
+
+ if (!tty_valid(tty)) {
+ ksft_print_msg("invalid tty path '%s'\n", tty);
+ result = KSFT_SKIP;
+ goto out;
- if (!tty_valid(tty))
- ksft_exit_skip("invalid tty path '%s'\n", tty);
+ }
r = stat(tty, &st1);
- if (r < 0)
- ksft_exit_fail_msg("stat failed on tty path '%s': %m\n", tty);
+ if (r < 0) {
+ ksft_print_msg("stat failed on tty path '%s': %m\n", tty);
+ goto out;
+ }
/* We need to wait at least 8 seconds in order to observe timestamp change */
/* https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fbf47635315ab308c9b58a1ea0906e711a9228de */
sleep(10);
r = write_dev_tty();
- if (r < 0)
- ksft_exit_fail_msg("failed to write to /dev/tty: %s\n",
- strerror(-r));
+ if (r < 0) {
+ ksft_print_msg("failed to write to /dev/tty: %s\n",
+ strerror(-r));
+ goto out;
+ }
r = stat(tty, &st2);
- if (r < 0)
- ksft_exit_fail_msg("stat failed on tty path '%s': %m\n", tty);
+ if (r < 0) {
+ ksft_print_msg("stat failed on tty path '%s': %m\n", tty);
+ goto out;
+ }
/* We wrote to the terminal so timestamps should have been updated */
if (st1.st_atim.tv_sec == st2.st_atim.tv_sec &&
st1.st_mtim.tv_sec == st2.st_mtim.tv_sec) {
- ksft_test_result_fail("tty timestamps not updated\n");
- ksft_exit_fail();
+ ksft_print_msg("tty timestamps not updated\n");
+ goto out;
}
- ksft_test_result_pass(
+ ksft_print_msg(
"timestamps of terminal '%s' updated after write to /dev/tty\n", tty);
- return EXIT_SUCCESS;
+ result = KSFT_PASS;
+
+out:
+ ksft_test_result_report(result, "tty_tstamp_update\n");
+
+ ksft_finished();
}
diff --git a/tools/testing/selftests/turbostat/added_perf_counters.py b/tools/testing/selftests/turbostat/added_perf_counters.py
new file mode 100755
index 000000000000..9ab4aaf45fb8
--- /dev/null
+++ b/tools/testing/selftests/turbostat/added_perf_counters.py
@@ -0,0 +1,178 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+from shutil import which
+from os import pread
+
+class PerfCounterInfo:
+ def __init__(self, subsys, event):
+ self.subsys = subsys
+ self.event = event
+
+ def get_perf_event_name(self):
+ return f'{self.subsys}/{self.event}/'
+
+ def get_turbostat_perf_id(self, counter_scope, counter_type, column_name):
+ return f'perf/{self.subsys}/{self.event},{counter_scope},{counter_type},{column_name}'
+
+PERF_COUNTERS_CANDIDATES = [
+ PerfCounterInfo('msr', 'mperf'),
+ PerfCounterInfo('msr', 'aperf'),
+ PerfCounterInfo('msr', 'tsc'),
+ PerfCounterInfo('cstate_core', 'c1-residency'),
+ PerfCounterInfo('cstate_core', 'c6-residency'),
+ PerfCounterInfo('cstate_core', 'c7-residency'),
+ PerfCounterInfo('cstate_pkg', 'c2-residency'),
+ PerfCounterInfo('cstate_pkg', 'c3-residency'),
+ PerfCounterInfo('cstate_pkg', 'c6-residency'),
+ PerfCounterInfo('cstate_pkg', 'c7-residency'),
+ PerfCounterInfo('cstate_pkg', 'c8-residency'),
+ PerfCounterInfo('cstate_pkg', 'c9-residency'),
+ PerfCounterInfo('cstate_pkg', 'c10-residency'),
+]
+present_perf_counters = []
+
+def check_perf_access():
+ perf = which('perf')
+ if perf is None:
+ print('SKIP: Could not find perf binary, thus could not determine perf access.')
+ return False
+
+ def has_perf_counter_access(counter_name):
+ proc_perf = subprocess.run([perf, 'stat', '-e', counter_name, '--timeout', '10'],
+ capture_output = True)
+
+ if proc_perf.returncode != 0:
+ print(f'SKIP: Could not read {counter_name} perf counter.')
+ return False
+
+ if b'<not supported>' in proc_perf.stderr:
+ print(f'SKIP: Could not read {counter_name} perf counter.')
+ return False
+
+ return True
+
+ for counter in PERF_COUNTERS_CANDIDATES:
+ if has_perf_counter_access(counter.get_perf_event_name()):
+ present_perf_counters.append(counter)
+
+ if len(present_perf_counters) == 0:
+ print('SKIP: Could not read any perf counter.')
+ return False
+
+ if len(present_perf_counters) != len(PERF_COUNTERS_CANDIDATES):
+ print(f'WARN: Could not access all of the counters - some will be left untested')
+
+ return True
+
+if not check_perf_access():
+ exit(0)
+
+turbostat_counter_source_opts = ['']
+
+turbostat = which('turbostat')
+if turbostat is None:
+ print('Could not find turbostat binary')
+ exit(1)
+
+timeout = which('timeout')
+if timeout is None:
+ print('Could not find timeout binary')
+ exit(1)
+
+proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+
+EXPECTED_COLUMNS_DEBUG_DEFAULT = [b'usec', b'Time_Of_Day_Seconds', b'APIC', b'X2APIC']
+
+expected_columns = [b'CPU']
+counters_argv = []
+for counter in present_perf_counters:
+ if counter.subsys == 'cstate_core':
+ counter_scope = 'core'
+ elif counter.subsys == 'cstate_pkg':
+ counter_scope = 'package'
+ else:
+ counter_scope = 'cpu'
+
+ counter_type = 'delta'
+ column_name = counter.event
+
+ cparams = counter.get_turbostat_perf_id(
+ counter_scope = counter_scope,
+ counter_type = counter_type,
+ column_name = column_name
+ )
+ expected_columns.append(column_name.encode())
+ counters_argv.extend(['--add', cparams])
+
+expected_columns_debug = EXPECTED_COLUMNS_DEBUG_DEFAULT + expected_columns
+
+def gen_user_friendly_cmdline(argv_):
+ argv = argv_[:]
+ ret = ''
+
+ while len(argv) != 0:
+ arg = argv.pop(0)
+ arg_next = ''
+
+ if arg in ('-i', '--show', '--add'):
+ arg_next = argv.pop(0) if len(argv) > 0 else ''
+
+ ret += f'{arg} {arg_next} \\\n\t'
+
+ # Remove the last separator and return
+ return ret[:-4]
+
+#
+# Run turbostat for some time and send SIGINT
+#
+timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '0.2s']
+turbostat_argv = [turbostat, '-i', '0.50', '--show', 'CPU'] + counters_argv
+
+def check_columns_or_fail(expected_columns: list, actual_columns: list):
+ if len(actual_columns) != len(expected_columns):
+ print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}')
+ exit(1)
+
+ failed = False
+ for expected_column in expected_columns:
+ if expected_column not in actual_columns:
+ print(f'turbostat column check failed: missing column {expected_column.decode()}')
+ failed = True
+
+ if failed:
+ exit(1)
+
+cmdline = gen_user_friendly_cmdline(turbostat_argv)
+print(f'Running turbostat with:\n\t{cmdline}\n... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+
+actual_columns = proc_turbostat.stdout.split(b'\n')[0].split(b'\t')
+check_columns_or_fail(expected_columns, actual_columns)
+print('OK')
+
+#
+# Same, but with --debug
+#
+# We explicitly specify '--show CPU' to make sure turbostat
+# don't show a bunch of default counters instead.
+#
+turbostat_argv.append('--debug')
+
+cmdline = gen_user_friendly_cmdline(turbostat_argv)
+print(f'Running turbostat (in debug mode) with:\n\t{cmdline}\n... ', end = '', flush = True)
+proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+
+actual_columns = proc_turbostat.stdout.split(b'\n')[0].split(b'\t')
+check_columns_or_fail(expected_columns_debug, actual_columns)
+print('OK')
diff --git a/tools/testing/selftests/turbostat/smi_aperf_mperf.py b/tools/testing/selftests/turbostat/smi_aperf_mperf.py
new file mode 100755
index 000000000000..6289cc47d5f0
--- /dev/null
+++ b/tools/testing/selftests/turbostat/smi_aperf_mperf.py
@@ -0,0 +1,157 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+from shutil import which
+from os import pread
+
+# CDLL calls dlopen underneath.
+# Calling it with None (null), we get handle to the our own image (python interpreter).
+# We hope to find sched_getcpu() inside ;]
+# This is a bit ugly, but helps shipping working software, so..
+try:
+ import ctypes
+
+ this_image = ctypes.CDLL(None)
+ BASE_CPU = this_image.sched_getcpu()
+except:
+ BASE_CPU = 0 # If we fail, set to 0 and pray it's not offline.
+
+MSR_IA32_MPERF = 0x000000e7
+MSR_IA32_APERF = 0x000000e8
+
+def check_perf_access():
+ perf = which('perf')
+ if perf is None:
+ print('SKIP: Could not find perf binary, thus could not determine perf access.')
+ return False
+
+ def has_perf_counter_access(counter_name):
+ proc_perf = subprocess.run([perf, 'stat', '-e', counter_name, '--timeout', '10'],
+ capture_output = True)
+
+ if proc_perf.returncode != 0:
+ print(f'SKIP: Could not read {counter_name} perf counter, assuming no access.')
+ return False
+
+ if b'<not supported>' in proc_perf.stderr:
+ print(f'SKIP: Could not read {counter_name} perf counter, assuming no access.')
+ return False
+
+ return True
+
+ if not has_perf_counter_access('msr/mperf/'):
+ return False
+ if not has_perf_counter_access('msr/aperf/'):
+ return False
+ if not has_perf_counter_access('msr/smi/'):
+ return False
+
+ return True
+
+def check_msr_access():
+ try:
+ file_msr = open(f'/dev/cpu/{BASE_CPU}/msr', 'rb')
+ except:
+ return False
+
+ if len(pread(file_msr.fileno(), 8, MSR_IA32_MPERF)) != 8:
+ return False
+
+ if len(pread(file_msr.fileno(), 8, MSR_IA32_APERF)) != 8:
+ return False
+
+ return True
+
+has_perf_access = check_perf_access()
+has_msr_access = check_msr_access()
+
+turbostat_counter_source_opts = ['']
+
+if has_msr_access:
+ turbostat_counter_source_opts.append('--no-perf')
+else:
+ print('SKIP: doesn\'t have MSR access, skipping run with --no-perf')
+
+if has_perf_access:
+ turbostat_counter_source_opts.append('--no-msr')
+else:
+ print('SKIP: doesn\'t have perf access, skipping run with --no-msr')
+
+if not has_msr_access and not has_perf_access:
+ print('SKIP: No MSR nor perf access detected. Skipping the tests entirely')
+ exit(0)
+
+turbostat = which('turbostat')
+if turbostat is None:
+ print('Could not find turbostat binary')
+ exit(1)
+
+timeout = which('timeout')
+if timeout is None:
+ print('Could not find timeout binary')
+ exit(1)
+
+proc_turbostat = subprocess.run([turbostat, '--list'], capture_output = True)
+if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+
+EXPECTED_COLUMNS_DEBUG_DEFAULT = b'usec\tTime_Of_Day_Seconds\tAPIC\tX2APIC'
+
+SMI_APERF_MPERF_DEPENDENT_BICS = [
+ 'SMI',
+ 'Avg_MHz',
+ 'Busy%',
+ 'Bzy_MHz',
+]
+if has_perf_access:
+ SMI_APERF_MPERF_DEPENDENT_BICS.append('IPC')
+
+for bic in SMI_APERF_MPERF_DEPENDENT_BICS:
+ for counter_source_opt in turbostat_counter_source_opts:
+
+ # Ugly special case, but it is what it is..
+ if counter_source_opt == '--no-perf' and bic == 'IPC':
+ continue
+
+ expected_columns = bic.encode()
+ expected_columns_debug = EXPECTED_COLUMNS_DEBUG_DEFAULT + f'\t{bic}'.encode()
+
+ #
+ # Run turbostat for some time and send SIGINT
+ #
+ timeout_argv = [timeout, '--preserve-status', '-s', 'SIGINT', '-k', '3', '0.2s']
+ turbostat_argv = [turbostat, '-i', '0.50', '--show', bic]
+
+ if counter_source_opt:
+ turbostat_argv.append(counter_source_opt)
+
+ print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+ proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+ if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+
+ actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+ if expected_columns != actual_columns:
+ print(f'turbostat column check failed\n{expected_columns=}\n{actual_columns=}')
+ exit(1)
+ print('OK')
+
+ #
+ # Same, but with --debug
+ #
+ turbostat_argv.append('--debug')
+
+ print(f'Running turbostat with {turbostat_argv=}... ', end = '', flush = True)
+ proc_turbostat = subprocess.run(timeout_argv + turbostat_argv, capture_output = True)
+ if proc_turbostat.returncode != 0:
+ print(f'turbostat failed with {proc_turbostat.returncode}')
+ exit(1)
+
+ actual_columns = proc_turbostat.stdout.split(b'\n')[0]
+ if expected_columns_debug != actual_columns:
+ print(f'turbostat column check failed\n{expected_columns_debug=}\n{actual_columns=}')
+ exit(1)
+ print('OK')
diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c
index dcd7509fe2e0..0bb46793dcd4 100644
--- a/tools/testing/selftests/user_events/ftrace_test.c
+++ b/tools/testing/selftests/user_events/ftrace_test.c
@@ -261,6 +261,12 @@ TEST_F(user, register_events) {
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
ASSERT_EQ(0, reg.write_index);
+ /* Register without separator spacing should still match */
+ reg.enable_bit = 29;
+ reg.name_args = (__u64)"__test_event u32 field1;u32 field2";
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+
/* Multiple registers to same name but different args should fail */
reg.enable_bit = 29;
reg.name_args = (__u64)"__test_event u32 field1;";
@@ -288,6 +294,8 @@ TEST_F(user, register_events) {
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg));
unreg.disable_bit = 30;
ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg));
+ unreg.disable_bit = 29;
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg));
/* Delete should have been auto-done after close and unregister */
close(self->data_fd);
diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore
index a8dc51af5a9c..30d5c8f0e5c7 100644
--- a/tools/testing/selftests/vDSO/.gitignore
+++ b/tools/testing/selftests/vDSO/.gitignore
@@ -6,3 +6,5 @@ vdso_test_correctness
vdso_test_gettimeofday
vdso_test_getcpu
vdso_standalone_test_x86
+vdso_test_getrandom
+vdso_test_chacha
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index d53a4d8008f9..3de8e7e052ae 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -1,35 +1,48 @@
# SPDX-License-Identifier: GPL-2.0
-include ../lib.mk
-
uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+SODIUM := $(shell pkg-config --libs libsodium 2>/dev/null)
-TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres
+TEST_GEN_PROGS := vdso_test_gettimeofday
+TEST_GEN_PROGS += vdso_test_getcpu
+TEST_GEN_PROGS += vdso_test_abi
+TEST_GEN_PROGS += vdso_test_clock_getres
ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
-TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
+TEST_GEN_PROGS += vdso_standalone_test_x86
+endif
+TEST_GEN_PROGS += vdso_test_correctness
+ifeq ($(uname_M),x86_64)
+TEST_GEN_PROGS += vdso_test_getrandom
+ifneq ($(SODIUM),)
+TEST_GEN_PROGS += vdso_test_chacha
+endif
endif
-TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness
CFLAGS := -std=gnu99
-CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
-LDFLAGS_vdso_test_correctness := -ldl
+
ifeq ($(CONFIG_X86_32),y)
LDLIBS += -lgcc_s
endif
-all: $(TEST_GEN_PROGS)
+include ../lib.mk
$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c
+
$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
- $(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
- vdso_standalone_test_x86.c parse_vdso.c \
- -o $@
+$(OUTPUT)/vdso_standalone_test_x86: CFLAGS +=-nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+
$(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c
- $(CC) $(CFLAGS) \
- vdso_test_correctness.c \
- -o $@ \
- $(LDFLAGS_vdso_test_correctness)
+$(OUTPUT)/vdso_test_correctness: LDFLAGS += -ldl
+
+$(OUTPUT)/vdso_test_getrandom: parse_vdso.c
+$(OUTPUT)/vdso_test_getrandom: CFLAGS += -isystem $(top_srcdir)/tools/include \
+ -isystem $(top_srcdir)/include/uapi
+
+$(OUTPUT)/vdso_test_chacha: $(top_srcdir)/arch/$(ARCH)/entry/vdso/vgetrandom-chacha.S
+$(OUTPUT)/vdso_test_chacha: CFLAGS += -idirafter $(top_srcdir)/tools/include \
+ -isystem $(top_srcdir)/arch/$(ARCH)/include \
+ -isystem $(top_srcdir)/include \
+ -D__ASSEMBLY__ -DBULID_VDSO -DCONFIG_FUNCTION_ALIGNMENT=0 \
+ -Wa,--noexecstack $(SODIUM)
diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c
index 413f75620a35..4ae417372e9e 100644
--- a/tools/testing/selftests/vDSO/parse_vdso.c
+++ b/tools/testing/selftests/vDSO/parse_vdso.c
@@ -55,14 +55,20 @@ static struct vdso_info
ELF(Verdef) *verdef;
} vdso_info;
-/* Straight from the ELF specification. */
-static unsigned long elf_hash(const unsigned char *name)
+/*
+ * Straight from the ELF specification...and then tweaked slightly, in order to
+ * avoid a few clang warnings.
+ */
+static unsigned long elf_hash(const char *name)
{
unsigned long h = 0, g;
- while (*name)
+ const unsigned char *uch_name = (const unsigned char *)name;
+
+ while (*uch_name)
{
- h = (h << 4) + *name++;
- if (g = h & 0xf0000000)
+ h = (h << 4) + *uch_name++;
+ g = h & 0xf0000000;
+ if (g)
h ^= g >> 24;
h &= ~g;
}
diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
index cdfed403ba13..7b543e7f04d7 100644
--- a/tools/testing/selftests/vDSO/vdso_config.h
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -53,15 +53,19 @@
#if __riscv_xlen == 32
#define VDSO_32BIT 1
#endif
+#elif defined(__loongarch__)
+#define VDSO_VERSION 6
+#define VDSO_NAMES 1
#endif
-static const char *versions[6] = {
+static const char *versions[7] = {
"LINUX_2.6",
"LINUX_2.6.15",
"LINUX_2.6.29",
"LINUX_2.6.39",
"LINUX_4",
"LINUX_4.15",
+ "LINUX_5.10"
};
static const char *names[2][6] = {
diff --git a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
index 8a44ff973ee1..27f6fdf11969 100644
--- a/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
+++ b/tools/testing/selftests/vDSO/vdso_standalone_test_x86.c
@@ -18,7 +18,7 @@
#include "parse_vdso.h"
-/* We need a libc functions... */
+/* We need some libc functions... */
int strcmp(const char *a, const char *b)
{
/* This implementation is buggy: it never returns -1. */
@@ -34,6 +34,20 @@ int strcmp(const char *a, const char *b)
return 0;
}
+/*
+ * The clang build needs this, although gcc does not.
+ * Stolen from lib/string.c.
+ */
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ char *tmp = dest;
+ const char *s = src;
+
+ while (count--)
+ *tmp++ = *s++;
+ return dest;
+}
+
/* ...and two syscalls. This is x86-specific. */
static inline long x86_syscall3(long nr, long a0, long a1, long a2)
{
@@ -70,7 +84,7 @@ void to_base10(char *lastdig, time_t n)
}
}
-__attribute__((externally_visible)) void c_main(void **stack)
+void c_main(void **stack)
{
/* Parse the stack */
long argc = (long)*stack;
diff --git a/tools/testing/selftests/vDSO/vdso_test_chacha.c b/tools/testing/selftests/vDSO/vdso_test_chacha.c
new file mode 100644
index 000000000000..e38f44e5f803
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test_chacha.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <sodium/crypto_stream_chacha20.h>
+#include <sys/random.h>
+#include <string.h>
+#include <stdint.h>
+#include "../kselftest.h"
+
+extern void __arch_chacha20_blocks_nostack(uint8_t *dst_bytes, const uint8_t *key, uint32_t *counter, size_t nblocks);
+
+int main(int argc, char *argv[])
+{
+ enum { TRIALS = 1000, BLOCKS = 128, BLOCK_SIZE = 64 };
+ static const uint8_t nonce[8] = { 0 };
+ uint32_t counter[2];
+ uint8_t key[32];
+ uint8_t output1[BLOCK_SIZE * BLOCKS], output2[BLOCK_SIZE * BLOCKS];
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ for (unsigned int trial = 0; trial < TRIALS; ++trial) {
+ if (getrandom(key, sizeof(key), 0) != sizeof(key)) {
+ printf("getrandom() failed!\n");
+ return KSFT_SKIP;
+ }
+ crypto_stream_chacha20(output1, sizeof(output1), nonce, key);
+ for (unsigned int split = 0; split < BLOCKS; ++split) {
+ memset(output2, 'X', sizeof(output2));
+ memset(counter, 0, sizeof(counter));
+ if (split)
+ __arch_chacha20_blocks_nostack(output2, key, counter, split);
+ __arch_chacha20_blocks_nostack(output2 + split * BLOCK_SIZE, key, counter, BLOCKS - split);
+ if (memcmp(output1, output2, sizeof(output1)))
+ return KSFT_FAIL;
+ }
+ }
+ ksft_test_result_pass("chacha: PASS\n");
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/vDSO/vdso_test_getcpu.c b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
index 1df5d057d79f..b758f68c6c9c 100644
--- a/tools/testing/selftests/vDSO/vdso_test_getcpu.c
+++ b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
@@ -13,13 +13,7 @@
#include "../kselftest.h"
#include "parse_vdso.h"
-
-#if defined(__riscv)
-const char *version = "LINUX_4.15";
-#else
-const char *version = "LINUX_2.6";
-#endif
-const char *name = "__vdso_getcpu";
+#include "vdso_config.h"
struct getcpu_cache;
typedef long (*getcpu_t)(unsigned int *, unsigned int *,
@@ -27,6 +21,8 @@ typedef long (*getcpu_t)(unsigned int *, unsigned int *,
int main(int argc, char **argv)
{
+ const char *version = versions[VDSO_VERSION];
+ const char **name = (const char **)&names[VDSO_NAMES];
unsigned long sysinfo_ehdr;
unsigned int cpu, node;
getcpu_t get_cpu;
@@ -40,9 +36,9 @@ int main(int argc, char **argv)
vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
- get_cpu = (getcpu_t)vdso_sym(version, name);
+ get_cpu = (getcpu_t)vdso_sym(version, name[4]);
if (!get_cpu) {
- printf("Could not find %s\n", name);
+ printf("Could not find %s\n", name[4]);
return KSFT_SKIP;
}
@@ -50,7 +46,7 @@ int main(int argc, char **argv)
if (ret == 0) {
printf("Running on CPU %u node %u\n", cpu, node);
} else {
- printf("%s failed\n", name);
+ printf("%s failed\n", name[4]);
return KSFT_FAIL;
}
diff --git a/tools/testing/selftests/vDSO/vdso_test_getrandom.c b/tools/testing/selftests/vDSO/vdso_test_getrandom.c
new file mode 100644
index 000000000000..05122425a873
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test_getrandom.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022-2024 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+ */
+
+#include <assert.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/random.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/random.h>
+
+#include "../kselftest.h"
+#include "parse_vdso.h"
+
+#ifndef timespecsub
+#define timespecsub(tsp, usp, vsp) \
+ do { \
+ (vsp)->tv_sec = (tsp)->tv_sec - (usp)->tv_sec; \
+ (vsp)->tv_nsec = (tsp)->tv_nsec - (usp)->tv_nsec; \
+ if ((vsp)->tv_nsec < 0) { \
+ (vsp)->tv_sec--; \
+ (vsp)->tv_nsec += 1000000000L; \
+ } \
+ } while (0)
+#endif
+
+static struct {
+ pthread_mutex_t lock;
+ void **states;
+ size_t len, cap;
+} grnd_allocator = {
+ .lock = PTHREAD_MUTEX_INITIALIZER
+};
+
+static struct {
+ ssize_t(*fn)(void *, size_t, unsigned long, void *, size_t);
+ pthread_key_t key;
+ pthread_once_t initialized;
+ struct vgetrandom_opaque_params params;
+} grnd_ctx = {
+ .initialized = PTHREAD_ONCE_INIT
+};
+
+static void *vgetrandom_get_state(void)
+{
+ void *state = NULL;
+
+ pthread_mutex_lock(&grnd_allocator.lock);
+ if (!grnd_allocator.len) {
+ size_t page_size = getpagesize();
+ size_t new_cap;
+ size_t alloc_size, num = sysconf(_SC_NPROCESSORS_ONLN); /* Just a decent heuristic. */
+ void *new_block, *new_states;
+
+ alloc_size = (num * grnd_ctx.params.size_of_opaque_state + page_size - 1) & (~(page_size - 1));
+ num = (page_size / grnd_ctx.params.size_of_opaque_state) * (alloc_size / page_size);
+ new_block = mmap(0, alloc_size, grnd_ctx.params.mmap_prot, grnd_ctx.params.mmap_flags, -1, 0);
+ if (new_block == MAP_FAILED)
+ goto out;
+
+ new_cap = grnd_allocator.cap + num;
+ new_states = reallocarray(grnd_allocator.states, new_cap, sizeof(*grnd_allocator.states));
+ if (!new_states)
+ goto unmap;
+ grnd_allocator.cap = new_cap;
+ grnd_allocator.states = new_states;
+
+ for (size_t i = 0; i < num; ++i) {
+ if (((uintptr_t)new_block & (page_size - 1)) + grnd_ctx.params.size_of_opaque_state > page_size)
+ new_block = (void *)(((uintptr_t)new_block + page_size - 1) & (~(page_size - 1)));
+ grnd_allocator.states[i] = new_block;
+ new_block += grnd_ctx.params.size_of_opaque_state;
+ }
+ grnd_allocator.len = num;
+ goto success;
+
+ unmap:
+ munmap(new_block, alloc_size);
+ goto out;
+ }
+success:
+ state = grnd_allocator.states[--grnd_allocator.len];
+
+out:
+ pthread_mutex_unlock(&grnd_allocator.lock);
+ return state;
+}
+
+static void vgetrandom_put_state(void *state)
+{
+ if (!state)
+ return;
+ pthread_mutex_lock(&grnd_allocator.lock);
+ grnd_allocator.states[grnd_allocator.len++] = state;
+ pthread_mutex_unlock(&grnd_allocator.lock);
+}
+
+static void vgetrandom_init(void)
+{
+ if (pthread_key_create(&grnd_ctx.key, vgetrandom_put_state) != 0)
+ return;
+ unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+ if (!sysinfo_ehdr) {
+ printf("AT_SYSINFO_EHDR is not present!\n");
+ exit(KSFT_SKIP);
+ }
+ vdso_init_from_sysinfo_ehdr(sysinfo_ehdr);
+ grnd_ctx.fn = (__typeof__(grnd_ctx.fn))vdso_sym("LINUX_2.6", "__vdso_getrandom");
+ if (!grnd_ctx.fn) {
+ printf("__vdso_getrandom is missing!\n");
+ exit(KSFT_FAIL);
+ }
+ if (grnd_ctx.fn(NULL, 0, 0, &grnd_ctx.params, ~0UL) != 0) {
+ printf("failed to fetch vgetrandom params!\n");
+ exit(KSFT_FAIL);
+ }
+}
+
+static ssize_t vgetrandom(void *buf, size_t len, unsigned long flags)
+{
+ void *state;
+
+ pthread_once(&grnd_ctx.initialized, vgetrandom_init);
+ state = pthread_getspecific(grnd_ctx.key);
+ if (!state) {
+ state = vgetrandom_get_state();
+ if (pthread_setspecific(grnd_ctx.key, state) != 0) {
+ vgetrandom_put_state(state);
+ state = NULL;
+ }
+ if (!state) {
+ printf("vgetrandom_get_state failed!\n");
+ exit(KSFT_FAIL);
+ }
+ }
+ return grnd_ctx.fn(buf, len, flags, state, grnd_ctx.params.size_of_opaque_state);
+}
+
+enum { TRIALS = 25000000, THREADS = 256 };
+
+static void *test_vdso_getrandom(void *)
+{
+ for (size_t i = 0; i < TRIALS; ++i) {
+ unsigned int val;
+ ssize_t ret = vgetrandom(&val, sizeof(val), 0);
+ assert(ret == sizeof(val));
+ }
+ return NULL;
+}
+
+static void *test_libc_getrandom(void *)
+{
+ for (size_t i = 0; i < TRIALS; ++i) {
+ unsigned int val;
+ ssize_t ret = getrandom(&val, sizeof(val), 0);
+ assert(ret == sizeof(val));
+ }
+ return NULL;
+}
+
+static void *test_syscall_getrandom(void *)
+{
+ for (size_t i = 0; i < TRIALS; ++i) {
+ unsigned int val;
+ ssize_t ret = syscall(__NR_getrandom, &val, sizeof(val), 0);
+ assert(ret == sizeof(val));
+ }
+ return NULL;
+}
+
+static void bench_single(void)
+{
+ struct timespec start, end, diff;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ test_vdso_getrandom(NULL);
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ timespecsub(&end, &start, &diff);
+ printf(" vdso: %u times in %lu.%09lu seconds\n", TRIALS, diff.tv_sec, diff.tv_nsec);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ test_libc_getrandom(NULL);
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ timespecsub(&end, &start, &diff);
+ printf(" libc: %u times in %lu.%09lu seconds\n", TRIALS, diff.tv_sec, diff.tv_nsec);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ test_syscall_getrandom(NULL);
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ timespecsub(&end, &start, &diff);
+ printf("syscall: %u times in %lu.%09lu seconds\n", TRIALS, diff.tv_sec, diff.tv_nsec);
+}
+
+static void bench_multi(void)
+{
+ struct timespec start, end, diff;
+ pthread_t threads[THREADS];
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ for (size_t i = 0; i < THREADS; ++i)
+ assert(pthread_create(&threads[i], NULL, test_vdso_getrandom, NULL) == 0);
+ for (size_t i = 0; i < THREADS; ++i)
+ pthread_join(threads[i], NULL);
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ timespecsub(&end, &start, &diff);
+ printf(" vdso: %u x %u times in %lu.%09lu seconds\n", TRIALS, THREADS, diff.tv_sec, diff.tv_nsec);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ for (size_t i = 0; i < THREADS; ++i)
+ assert(pthread_create(&threads[i], NULL, test_libc_getrandom, NULL) == 0);
+ for (size_t i = 0; i < THREADS; ++i)
+ pthread_join(threads[i], NULL);
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ timespecsub(&end, &start, &diff);
+ printf(" libc: %u x %u times in %lu.%09lu seconds\n", TRIALS, THREADS, diff.tv_sec, diff.tv_nsec);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ for (size_t i = 0; i < THREADS; ++i)
+ assert(pthread_create(&threads[i], NULL, test_syscall_getrandom, NULL) == 0);
+ for (size_t i = 0; i < THREADS; ++i)
+ pthread_join(threads[i], NULL);
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ timespecsub(&end, &start, &diff);
+ printf(" syscall: %u x %u times in %lu.%09lu seconds\n", TRIALS, THREADS, diff.tv_sec, diff.tv_nsec);
+}
+
+static void fill(void)
+{
+ uint8_t weird_size[323929];
+ for (;;)
+ vgetrandom(weird_size, sizeof(weird_size), 0);
+}
+
+static void kselftest(void)
+{
+ uint8_t weird_size[1263];
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ for (size_t i = 0; i < 1000; ++i) {
+ ssize_t ret = vgetrandom(weird_size, sizeof(weird_size), 0);
+ if (ret != sizeof(weird_size))
+ exit(KSFT_FAIL);
+ }
+
+ ksft_test_result_pass("getrandom: PASS\n");
+ exit(KSFT_PASS);
+}
+
+static void usage(const char *argv0)
+{
+ fprintf(stderr, "Usage: %s [bench-single|bench-multi|fill]\n", argv0);
+}
+
+int main(int argc, char *argv[])
+{
+ if (argc == 1) {
+ kselftest();
+ return 0;
+ }
+
+ if (argc != 2) {
+ usage(argv[0]);
+ return 1;
+ }
+ if (!strcmp(argv[1], "bench-single"))
+ bench_single();
+ else if (!strcmp(argv[1], "bench-multi"))
+ bench_multi();
+ else if (!strcmp(argv[1], "fill"))
+ fill();
+ else {
+ usage(argv[0]);
+ return 1;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
index e411f287a426..ee4f1ca56a71 100644
--- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
+++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
@@ -18,25 +18,13 @@
#include "../kselftest.h"
#include "parse_vdso.h"
-
-/*
- * ARM64's vDSO exports its gettimeofday() implementation with a different
- * name and version from other architectures, so we need to handle it as
- * a special case.
- */
-#if defined(__aarch64__)
-const char *version = "LINUX_2.6.39";
-const char *name = "__kernel_gettimeofday";
-#elif defined(__riscv)
-const char *version = "LINUX_4.15";
-const char *name = "__vdso_gettimeofday";
-#else
-const char *version = "LINUX_2.6";
-const char *name = "__vdso_gettimeofday";
-#endif
+#include "vdso_config.h"
int main(int argc, char **argv)
{
+ const char *version = versions[VDSO_VERSION];
+ const char **name = (const char **)&names[VDSO_NAMES];
+
unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
if (!sysinfo_ehdr) {
printf("AT_SYSINFO_EHDR is not present!\n");
@@ -47,10 +35,10 @@ int main(int argc, char **argv)
/* Find gettimeofday. */
typedef long (*gtod_t)(struct timeval *tv, struct timezone *tz);
- gtod_t gtod = (gtod_t)vdso_sym(version, name);
+ gtod_t gtod = (gtod_t)vdso_sym(version, name[0]);
if (!gtod) {
- printf("Could not find %s\n", name);
+ printf("Could not find %s\n", name[0]);
return KSFT_SKIP;
}
@@ -61,7 +49,7 @@ int main(int argc, char **argv)
printf("The time is %lld.%06lld\n",
(long long)tv.tv_sec, (long long)tv.tv_usec);
} else {
- printf("%s failed\n", name);
+ printf("%s failed\n", name[0]);
return KSFT_FAIL;
}
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index e95bd56b332f..35856b11c143 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -109,9 +109,9 @@ KERNEL_ARCH := x86_64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off
else
-QEMU_MACHINE := -cpu max -machine microvm -no-acpi
+QEMU_MACHINE := -cpu max -machine microvm,acpi=off
endif
else ifeq ($(ARCH),i686)
CHOST := i686-linux-musl
@@ -120,9 +120,9 @@ KERNEL_ARCH := x86
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
-QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off,acpi=off
else
-QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi
+QEMU_MACHINE := -cpu coreduo -machine microvm,acpi=off
endif
else ifeq ($(ARCH),mips64)
CHOST := mips64-linux-musl
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
index a7f8e8a95625..66290cf289a9 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
@@ -2,7 +2,7 @@ CONFIG_NONPORTABLE=y
CONFIG_ARCH_RV32I=y
CONFIG_MMU=y
CONFIG_FPU=y
-CONFIG_SOC_VIRT=y
+CONFIG_ARCH_VIRT=y
CONFIG_RISCV_ISA_FALLBACK=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
index daeb3e5e0965..db1aa9f388b9 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
@@ -1,7 +1,7 @@
CONFIG_ARCH_RV64I=y
CONFIG_MMU=y
CONFIG_FPU=y
-CONFIG_SOC_VIRT=y
+CONFIG_ARCH_VIRT=y
CONFIG_RISCV_ISA_FALLBACK=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index 507555714b1d..f314d3789f17 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -41,7 +41,6 @@ CONFIG_KALLSYMS=y
CONFIG_BUG=y
CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
CONFIG_JUMP_LABEL=y
-CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_SHMEM=y
CONFIG_SLUB=y
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 0b872c0a42d2..5c8757a25998 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -40,6 +40,13 @@ CFLAGS := -O2 -g -std=gnu99 -pthread -Wall $(KHDR_INCLUDES)
# call32_from_64 in thunks.S uses absolute addresses.
ifeq ($(CAN_BUILD_WITH_NOPIE),1)
CFLAGS += -no-pie
+
+ifneq ($(LLVM),)
+# clang only wants to see -no-pie during linking. Here, we don't have a separate
+# linking stage, so a compiler warning is unavoidable without (wastefully)
+# restructuring the Makefile. Avoid this by simply disabling that warning.
+CFLAGS += -Wno-unused-command-line-argument
+endif
endif
define gen-target-rule-32
@@ -73,10 +80,10 @@ all_64: $(BINARIES_64)
EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64)
$(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h
- $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
+ $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $< $(EXTRA_FILES) -lrt -ldl -lm
$(BINARIES_64): $(OUTPUT)/%_64: %.c helpers.h
- $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
+ $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $< $(EXTRA_FILES) -lrt -ldl
# x86_64 users should be encouraged to install 32-bit libraries
ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01)
@@ -100,10 +107,22 @@ warn_32bit_failure:
exit 0;
endif
-# Some tests have additional dependencies.
-$(OUTPUT)/sysret_ss_attrs_64: thunks.S
-$(OUTPUT)/ptrace_syscall_32: raw_syscall_helper_32.S
-$(OUTPUT)/test_syscall_vdso_32: thunks_32.S
+# Add an additional file to the source file list for a given target, and also
+# add a Makefile dependency on that same file. However, do these separately, so
+# that the compiler invocation ("$(CC) file1.c file2.S") is not combined with
+# the dependencies ("header3.h"), because clang, unlike gcc, will not accept
+# header files as an input to the compiler invocation.
+define extra-files
+$(OUTPUT)/$(1): EXTRA_FILES := $(2)
+$(OUTPUT)/$(1): $(2)
+endef
+
+$(eval $(call extra-files,sysret_ss_attrs_64,thunks.S))
+$(eval $(call extra-files,ptrace_syscall_32,raw_syscall_helper_32.S))
+$(eval $(call extra-files,test_syscall_vdso_32,thunks_32.S))
+$(eval $(call extra-files,fsgsbase_restore_64,clang_helpers_64.S))
+$(eval $(call extra-files,fsgsbase_restore_32,clang_helpers_32.S))
+$(eval $(call extra-files,sysret_rip_64,clang_helpers_64.S))
# check_initial_reg_state is special: it needs a custom entry, and it
# needs to be static so that its interpreter doesn't destroy its initial
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
index d884fd69dd51..1fdf35a4d7f6 100644
--- a/tools/testing/selftests/x86/amx.c
+++ b/tools/testing/selftests/x86/amx.c
@@ -39,16 +39,6 @@ struct xsave_buffer {
};
};
-static inline uint64_t xgetbv(uint32_t index)
-{
- uint32_t eax, edx;
-
- asm volatile("xgetbv;"
- : "=a" (eax), "=d" (edx)
- : "c" (index));
- return eax + ((uint64_t)edx << 32);
-}
-
static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
{
uint32_t rfbm_lo = rfbm;
@@ -103,21 +93,6 @@ static void clearhandler(int sig)
#define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26)
#define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27)
-static inline void check_cpuid_xsave(void)
-{
- uint32_t eax, ebx, ecx, edx;
-
- /*
- * CPUID.1:ECX.XSAVE[bit 26] enumerates general
- * support for the XSAVE feature set, including
- * XGETBV.
- */
- __cpuid_count(1, 0, eax, ebx, ecx, edx);
- if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
- fatal_error("cpuid: no CPU xsave support");
- if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
- fatal_error("cpuid: no OS xsave support");
-}
static uint32_t xbuf_size;
@@ -179,12 +154,6 @@ static inline void clear_xstate_header(struct xsave_buffer *buffer)
memset(&buffer->header, 0, sizeof(buffer->header));
}
-static inline uint64_t get_xstatebv(struct xsave_buffer *buffer)
-{
- /* XSTATE_BV is at the beginning of the header: */
- return *(uint64_t *)&buffer->header;
-}
-
static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv)
{
/* XSTATE_BV is at the beginning of the header: */
@@ -350,6 +319,7 @@ enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED };
/* arch_prctl() and sigaltstack() test */
+#define ARCH_GET_XCOMP_SUPP 0x1021
#define ARCH_GET_XCOMP_PERM 0x1022
#define ARCH_REQ_XCOMP_PERM 0x1023
@@ -928,8 +898,15 @@ static void test_ptrace(void)
int main(void)
{
- /* Check hardware availability at first */
- check_cpuid_xsave();
+ unsigned long features;
+ long rc;
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_SUPP, &features);
+ if (rc || (features & XFEATURE_MASK_XTILE) != XFEATURE_MASK_XTILE) {
+ ksft_print_msg("no AMX support\n");
+ return KSFT_SKIP;
+ }
+
check_cpuid_xtiledata();
init_stashed_xsave();
diff --git a/tools/testing/selftests/x86/clang_helpers_32.S b/tools/testing/selftests/x86/clang_helpers_32.S
new file mode 100644
index 000000000000..dc16271bac70
--- /dev/null
+++ b/tools/testing/selftests/x86/clang_helpers_32.S
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * 32-bit assembly helpers for asm operations that lack support in both gcc and
+ * clang. For example, clang asm does not support segment prefixes.
+ */
+.global dereference_seg_base
+dereference_seg_base:
+ mov %fs:(0), %eax
+ ret
+
+.section .note.GNU-stack,"",%progbits
diff --git a/tools/testing/selftests/x86/clang_helpers_64.S b/tools/testing/selftests/x86/clang_helpers_64.S
new file mode 100644
index 000000000000..185a69dbf39c
--- /dev/null
+++ b/tools/testing/selftests/x86/clang_helpers_64.S
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * 64-bit assembly helpers for asm operations that lack support in both gcc and
+ * clang. For example, clang asm does not support segment prefixes.
+ */
+.global dereference_seg_base
+
+dereference_seg_base:
+ mov %gs:(0), %rax
+ ret
+
+.global test_page
+.global test_syscall_insn
+
+.pushsection ".text", "ax"
+.balign 4096
+test_page: .globl test_page
+ .fill 4094,1,0xcc
+
+test_syscall_insn:
+ syscall
+
+.ifne . - test_page - 4096
+ .error "test page is not one page long"
+.endif
+.popsection
+
+.section .note.GNU-stack,"",%progbits
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 8c780cce941d..50cf32de6313 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -109,11 +109,6 @@ static inline void wrgsbase(unsigned long gsbase)
asm volatile("wrgsbase %0" :: "r" (gsbase) : "memory");
}
-static inline void wrfsbase(unsigned long fsbase)
-{
- asm volatile("wrfsbase %0" :: "r" (fsbase) : "memory");
-}
-
enum which_base { FS, GS };
static unsigned long read_base(enum which_base which)
@@ -212,7 +207,6 @@ static void mov_0_gs(unsigned long initial_base, bool schedule)
}
static volatile unsigned long remote_base;
-static volatile bool remote_hard_zero;
static volatile unsigned int ftx;
/*
diff --git a/tools/testing/selftests/x86/fsgsbase_restore.c b/tools/testing/selftests/x86/fsgsbase_restore.c
index 6fffadc51579..224058c1e4b2 100644
--- a/tools/testing/selftests/x86/fsgsbase_restore.c
+++ b/tools/testing/selftests/x86/fsgsbase_restore.c
@@ -39,12 +39,11 @@
# define SEG "%fs"
#endif
-static unsigned int dereference_seg_base(void)
-{
- int ret;
- asm volatile ("mov %" SEG ":(0), %0" : "=rm" (ret));
- return ret;
-}
+/*
+ * Defined in clang_helpers_[32|64].S, because unlike gcc, clang inline asm does
+ * not support segmentation prefixes.
+ */
+unsigned int dereference_seg_base(void);
static void init_seg(void)
{
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
index 215b8150b7cc..0ea4f6813930 100644
--- a/tools/testing/selftests/x86/lam.c
+++ b/tools/testing/selftests/x86/lam.c
@@ -1183,7 +1183,7 @@ int main(int argc, char **argv)
if (!cpu_has_lam()) {
ksft_print_msg("Unsupported LAM feature!\n");
- return -1;
+ return KSFT_SKIP;
}
while ((c = getopt(argc, argv, "ht:")) != -1) {
@@ -1237,5 +1237,5 @@ int main(int argc, char **argv)
ksft_set_plan(tests_cnt);
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 5d7961a5f7f6..0b75b29f794b 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -487,7 +487,7 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
greg_t asm_ss = ctx->uc_mcontext.gregs[REG_CX];
if (asm_ss != sig_ss && sig == SIGTRAP) {
/* Sanity check failure. */
- printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n",
+ printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %x, ax = %llx\n",
ss, *ssptr(ctx), (unsigned long long)asm_ss);
nerrs++;
}
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index 461fa41a4d02..48ab065a76f9 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -29,7 +29,6 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
err(1, "sigaction");
}
-static volatile sig_atomic_t sig_traps;
static sigjmp_buf jmpbuf;
static volatile sig_atomic_t n_errs;
diff --git a/tools/testing/selftests/x86/sysret_rip.c b/tools/testing/selftests/x86/sysret_rip.c
index 84d74be1d902..b30de9aaa6d4 100644
--- a/tools/testing/selftests/x86/sysret_rip.c
+++ b/tools/testing/selftests/x86/sysret_rip.c
@@ -22,21 +22,13 @@
#include <sys/mman.h>
#include <assert.h>
-
-asm (
- ".pushsection \".text\", \"ax\"\n\t"
- ".balign 4096\n\t"
- "test_page: .globl test_page\n\t"
- ".fill 4094,1,0xcc\n\t"
- "test_syscall_insn:\n\t"
- "syscall\n\t"
- ".ifne . - test_page - 4096\n\t"
- ".error \"test page is not one page long\"\n\t"
- ".endif\n\t"
- ".popsection"
- );
-
+/*
+ * These items are in clang_helpers_64.S, in order to avoid clang inline asm
+ * limitations:
+ */
+void test_syscall_ins(void);
extern const char test_page[];
+
static void const *current_test_page_addr = test_page;
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
diff --git a/tools/testing/selftests/x86/test_FISTTP.c b/tools/testing/selftests/x86/test_FISTTP.c
index 09789c0ce3e9..b9ae9d8cebcb 100644
--- a/tools/testing/selftests/x86/test_FISTTP.c
+++ b/tools/testing/selftests/x86/test_FISTTP.c
@@ -25,7 +25,7 @@ int test(void)
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
asm volatile ("\n"
" fld1""\n"
- " fisttp res16""\n"
+ " fisttps res16""\n"
" fld1""\n"
" fisttpl res32""\n"
" fld1""\n"
@@ -45,7 +45,7 @@ int test(void)
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
asm volatile ("\n"
" fldpi""\n"
- " fisttp res16""\n"
+ " fisttps res16""\n"
" fldpi""\n"
" fisttpl res32""\n"
" fldpi""\n"
@@ -66,7 +66,7 @@ int test(void)
asm volatile ("\n"
" fldpi""\n"
" fchs""\n"
- " fisttp res16""\n"
+ " fisttps res16""\n"
" fldpi""\n"
" fchs""\n"
" fisttpl res32""\n"
@@ -88,7 +88,7 @@ int test(void)
feclearexcept(FE_DIVBYZERO|FE_INEXACT|FE_INVALID|FE_OVERFLOW|FE_UNDERFLOW);
asm volatile ("\n"
" fldln2""\n"
- " fisttp res16""\n"
+ " fisttps res16""\n"
" fldln2""\n"
" fisttpl res32""\n"
" fldln2""\n"
diff --git a/tools/testing/selftests/x86/test_mremap_vdso.c b/tools/testing/selftests/x86/test_mremap_vdso.c
index f0d876d48277..d53959e03593 100644
--- a/tools/testing/selftests/x86/test_mremap_vdso.c
+++ b/tools/testing/selftests/x86/test_mremap_vdso.c
@@ -19,6 +19,7 @@
#include <sys/auxv.h>
#include <sys/syscall.h>
#include <sys/wait.h>
+#include "../kselftest.h"
#define PAGE_SIZE 4096
@@ -29,13 +30,13 @@ static int try_to_remap(void *vdso_addr, unsigned long size)
/* Searching for memory location where to remap */
dest_addr = mmap(0, size, PROT_NONE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
if (dest_addr == MAP_FAILED) {
- printf("[WARN]\tmmap failed (%d): %m\n", errno);
+ ksft_print_msg("WARN: mmap failed (%d): %m\n", errno);
return 0;
}
- printf("[NOTE]\tMoving vDSO: [%p, %#lx] -> [%p, %#lx]\n",
- vdso_addr, (unsigned long)vdso_addr + size,
- dest_addr, (unsigned long)dest_addr + size);
+ ksft_print_msg("Moving vDSO: [%p, %#lx] -> [%p, %#lx]\n",
+ vdso_addr, (unsigned long)vdso_addr + size,
+ dest_addr, (unsigned long)dest_addr + size);
fflush(stdout);
new_addr = mremap(vdso_addr, size, size,
@@ -43,10 +44,10 @@ static int try_to_remap(void *vdso_addr, unsigned long size)
if ((unsigned long)new_addr == (unsigned long)-1) {
munmap(dest_addr, size);
if (errno == EINVAL) {
- printf("[NOTE]\tvDSO partial move failed, will try with bigger size\n");
+ ksft_print_msg("vDSO partial move failed, will try with bigger size\n");
return -1; /* Retry with larger */
}
- printf("[FAIL]\tmremap failed (%d): %m\n", errno);
+ ksft_print_msg("[FAIL]\tmremap failed (%d): %m\n", errno);
return 1;
}
@@ -58,11 +59,12 @@ int main(int argc, char **argv, char **envp)
{
pid_t child;
+ ksft_print_header();
+ ksft_set_plan(1);
+
child = fork();
- if (child == -1) {
- printf("[WARN]\tfailed to fork (%d): %m\n", errno);
- return 1;
- }
+ if (child == -1)
+ ksft_exit_fail_msg("failed to fork (%d): %m\n", errno);
if (child == 0) {
unsigned long vdso_size = PAGE_SIZE;
@@ -70,9 +72,9 @@ int main(int argc, char **argv, char **envp)
int ret = -1;
auxval = getauxval(AT_SYSINFO_EHDR);
- printf("\tAT_SYSINFO_EHDR is %#lx\n", auxval);
+ ksft_print_msg("AT_SYSINFO_EHDR is %#lx\n", auxval);
if (!auxval || auxval == -ENOENT) {
- printf("[WARN]\tgetauxval failed\n");
+ ksft_print_msg("WARN: getauxval failed\n");
return 0;
}
@@ -92,16 +94,13 @@ int main(int argc, char **argv, char **envp)
int status;
if (waitpid(child, &status, 0) != child ||
- !WIFEXITED(status)) {
- printf("[FAIL]\tmremap() of the vDSO does not work on this kernel!\n");
- return 1;
- } else if (WEXITSTATUS(status) != 0) {
- printf("[FAIL]\tChild failed with %d\n",
- WEXITSTATUS(status));
- return 1;
- }
- printf("[OK]\n");
+ !WIFEXITED(status))
+ ksft_test_result_fail("mremap() of the vDSO does not work on this kernel!\n");
+ else if (WEXITSTATUS(status) != 0)
+ ksft_test_result_fail("Child failed with %d\n", WEXITSTATUS(status));
+ else
+ ksft_test_result_pass("%s\n", __func__);
}
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/x86/test_shadow_stack.c b/tools/testing/selftests/x86/test_shadow_stack.c
index 757e6527f67e..21af54d5f4ea 100644
--- a/tools/testing/selftests/x86/test_shadow_stack.c
+++ b/tools/testing/selftests/x86/test_shadow_stack.c
@@ -34,6 +34,7 @@
#include <sys/ptrace.h>
#include <sys/signal.h>
#include <linux/elf.h>
+#include <linux/perf_event.h>
/*
* Define the ABI defines if needed, so people can run the tests
@@ -556,7 +557,7 @@ struct node {
* looked at the shadow stack gaps.
* 5. See if it landed in the gap.
*/
-int test_guard_gap(void)
+int test_guard_gap_other_gaps(void)
{
void *free_area, *shstk, *test_map = (void *)0xFFFFFFFFFFFFFFFF;
struct node *head = NULL, *cur;
@@ -593,11 +594,64 @@ int test_guard_gap(void)
if (shstk - test_map - PAGE_SIZE != PAGE_SIZE)
return 1;
- printf("[OK]\tGuard gap test\n");
+ printf("[OK]\tGuard gap test, other mapping's gaps\n");
return 0;
}
+/* Tests respecting the guard gap of the mapping getting placed */
+int test_guard_gap_new_mappings_gaps(void)
+{
+ void *free_area, *shstk_start, *test_map = (void *)0xFFFFFFFFFFFFFFFF;
+ struct node *head = NULL, *cur;
+ int ret = 0;
+
+ free_area = mmap(0, PAGE_SIZE * 4, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ munmap(free_area, PAGE_SIZE * 4);
+
+ /* Test letting map_shadow_stack find a free space */
+ shstk_start = mmap(free_area, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (shstk_start == MAP_FAILED || shstk_start != free_area)
+ return 1;
+
+ while (test_map > shstk_start) {
+ test_map = (void *)syscall(__NR_map_shadow_stack, 0, PAGE_SIZE, 0);
+ if (test_map == MAP_FAILED) {
+ printf("[INFO]\tmap_shadow_stack MAP_FAILED\n");
+ ret = 1;
+ break;
+ }
+
+ cur = malloc(sizeof(*cur));
+ cur->mapping = test_map;
+
+ cur->next = head;
+ head = cur;
+
+ if (test_map == free_area + PAGE_SIZE) {
+ printf("[INFO]\tNew mapping has other mapping in guard gap!\n");
+ ret = 1;
+ break;
+ }
+ }
+
+ while (head) {
+ cur = head;
+ head = cur->next;
+ munmap(cur->mapping, PAGE_SIZE);
+ free(cur);
+ }
+
+ munmap(shstk_start, PAGE_SIZE);
+
+ if (!ret)
+ printf("[OK]\tGuard gap test, placement mapping's gaps\n");
+
+ return ret;
+}
+
/*
* Too complicated to pull it out of the 32 bit header, but also get the
* 64 bit one needed above. Just define a copy here.
@@ -681,6 +735,144 @@ int test_32bit(void)
return !segv_triggered;
}
+static int parse_uint_from_file(const char *file, const char *fmt)
+{
+ int err, ret;
+ FILE *f;
+
+ f = fopen(file, "re");
+ if (!f) {
+ err = -errno;
+ printf("failed to open '%s': %d\n", file, err);
+ return err;
+ }
+ err = fscanf(f, fmt, &ret);
+ if (err != 1) {
+ err = err == EOF ? -EIO : -errno;
+ printf("failed to parse '%s': %d\n", file, err);
+ fclose(f);
+ return err;
+ }
+ fclose(f);
+ return ret;
+}
+
+static int determine_uprobe_perf_type(void)
+{
+ const char *file = "/sys/bus/event_source/devices/uprobe/type";
+
+ return parse_uint_from_file(file, "%d\n");
+}
+
+static int determine_uprobe_retprobe_bit(void)
+{
+ const char *file = "/sys/bus/event_source/devices/uprobe/format/retprobe";
+
+ return parse_uint_from_file(file, "config:%d\n");
+}
+
+static ssize_t get_uprobe_offset(const void *addr)
+{
+ size_t start, end, base;
+ char buf[256];
+ bool found = false;
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
+ if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) {
+ found = true;
+ break;
+ }
+ }
+
+ fclose(f);
+
+ if (!found)
+ return -ESRCH;
+
+ return (uintptr_t)addr - start + base;
+}
+
+static __attribute__((noinline)) void uretprobe_trigger(void)
+{
+ asm volatile ("");
+}
+
+/*
+ * This test setups return uprobe, which is sensitive to shadow stack
+ * (crashes without extra fix). After executing the uretprobe we fail
+ * the test if we receive SIGSEGV, no crash means we're good.
+ *
+ * Helper functions above borrowed from bpf selftests.
+ */
+static int test_uretprobe(void)
+{
+ const size_t attr_sz = sizeof(struct perf_event_attr);
+ const char *file = "/proc/self/exe";
+ int bit, fd = 0, type, err = 1;
+ struct perf_event_attr attr;
+ struct sigaction sa = {};
+ ssize_t offset;
+
+ type = determine_uprobe_perf_type();
+ if (type < 0) {
+ if (type == -ENOENT)
+ printf("[SKIP]\tUretprobe test, uprobes are not available\n");
+ return 0;
+ }
+
+ offset = get_uprobe_offset(uretprobe_trigger);
+ if (offset < 0)
+ return 1;
+
+ bit = determine_uprobe_retprobe_bit();
+ if (bit < 0)
+ return 1;
+
+ sa.sa_sigaction = segv_gp_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL))
+ return 1;
+
+ /* Setup return uprobe through perf event interface. */
+ memset(&attr, 0, attr_sz);
+ attr.size = attr_sz;
+ attr.type = type;
+ attr.config = 1 << bit;
+ attr.config1 = (__u64) (unsigned long) file;
+ attr.config2 = offset;
+
+ fd = syscall(__NR_perf_event_open, &attr, 0 /* pid */, -1 /* cpu */,
+ -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);
+ if (fd < 0)
+ goto out;
+
+ if (sigsetjmp(jmp_buffer, 1))
+ goto out;
+
+ ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK);
+
+ /*
+ * This either segfaults and goes through sigsetjmp above
+ * or succeeds and we're good.
+ */
+ uretprobe_trigger();
+
+ printf("[OK]\tUretprobe test\n");
+ err = 0;
+
+out:
+ ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK);
+ signal(SIGSEGV, SIG_DFL);
+ if (fd)
+ close(fd);
+ return err;
+}
+
void segv_handler_ptrace(int signum, siginfo_t *si, void *uc)
{
/* The SSP adjustment caused a segfault. */
@@ -850,9 +1042,15 @@ int main(int argc, char *argv[])
goto out;
}
- if (test_guard_gap()) {
+ if (test_guard_gap_other_gaps()) {
+ ret = 1;
+ printf("[FAIL]\tGuard gap test, other mappings' gaps\n");
+ goto out;
+ }
+
+ if (test_guard_gap_new_mappings_gaps()) {
ret = 1;
- printf("[FAIL]\tGuard gap test\n");
+ printf("[FAIL]\tGuard gap test, placement mapping's gaps\n");
goto out;
}
@@ -867,6 +1065,12 @@ int main(int argc, char *argv[])
goto out;
}
+ if (test_uretprobe()) {
+ ret = 1;
+ printf("[FAIL]\turetprobe test\n");
+ goto out;
+ }
+
return ret;
out:
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 47cab972807c..6de11b4df458 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -21,6 +21,13 @@
#include <sys/uio.h>
#include "helpers.h"
+#include "../kselftest.h"
+
+#ifdef __x86_64__
+#define TOTAL_TESTS 13
+#else
+#define TOTAL_TESTS 8
+#endif
#ifdef __x86_64__
# define VSYS(x) (x)
@@ -39,18 +46,6 @@
/* max length of lines in /proc/self/maps - anything longer is skipped here */
#define MAPS_LINE_LEN 128
-static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
- int flags)
-{
- struct sigaction sa;
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = handler;
- sa.sa_flags = SA_SIGINFO | flags;
- sigemptyset(&sa.sa_mask);
- if (sigaction(sig, &sa, 0))
- err(1, "sigaction");
-}
-
/* vsyscalls and vDSO */
bool vsyscall_map_r = false, vsyscall_map_x = false;
@@ -75,83 +70,25 @@ static void init_vdso(void)
if (!vdso)
vdso = dlopen("linux-gate.so.1", RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
if (!vdso) {
- printf("[WARN]\tfailed to find vDSO\n");
+ ksft_print_msg("[WARN] failed to find vDSO\n");
return;
}
vdso_gtod = (gtod_t)dlsym(vdso, "__vdso_gettimeofday");
if (!vdso_gtod)
- printf("[WARN]\tfailed to find gettimeofday in vDSO\n");
+ ksft_print_msg("[WARN] failed to find gettimeofday in vDSO\n");
vdso_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
if (!vdso_gettime)
- printf("[WARN]\tfailed to find clock_gettime in vDSO\n");
+ ksft_print_msg("[WARN] failed to find clock_gettime in vDSO\n");
vdso_time = (time_func_t)dlsym(vdso, "__vdso_time");
if (!vdso_time)
- printf("[WARN]\tfailed to find time in vDSO\n");
+ ksft_print_msg("[WARN] failed to find time in vDSO\n");
vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
if (!vdso_getcpu)
- printf("[WARN]\tfailed to find getcpu in vDSO\n");
-}
-
-static int init_vsys(void)
-{
-#ifdef __x86_64__
- int nerrs = 0;
- FILE *maps;
- char line[MAPS_LINE_LEN];
- bool found = false;
-
- maps = fopen("/proc/self/maps", "r");
- if (!maps) {
- printf("[WARN]\tCould not open /proc/self/maps -- assuming vsyscall is r-x\n");
- vsyscall_map_r = true;
- return 0;
- }
-
- while (fgets(line, MAPS_LINE_LEN, maps)) {
- char r, x;
- void *start, *end;
- char name[MAPS_LINE_LEN];
-
- /* sscanf() is safe here as strlen(name) >= strlen(line) */
- if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
- &start, &end, &r, &x, name) != 5)
- continue;
-
- if (strcmp(name, "[vsyscall]"))
- continue;
-
- printf("\tvsyscall map: %s", line);
-
- if (start != (void *)0xffffffffff600000 ||
- end != (void *)0xffffffffff601000) {
- printf("[FAIL]\taddress range is nonsense\n");
- nerrs++;
- }
-
- printf("\tvsyscall permissions are %c-%c\n", r, x);
- vsyscall_map_r = (r == 'r');
- vsyscall_map_x = (x == 'x');
-
- found = true;
- break;
- }
-
- fclose(maps);
-
- if (!found) {
- printf("\tno vsyscall map in /proc/self/maps\n");
- vsyscall_map_r = false;
- vsyscall_map_x = false;
- }
-
- return nerrs;
-#else
- return 0;
-#endif
+ ksft_print_msg("[WARN] failed to find getcpu in vDSO\n");
}
/* syscalls */
@@ -160,11 +97,6 @@ static inline long sys_gtod(struct timeval *tv, struct timezone *tz)
return syscall(SYS_gettimeofday, tv, tz);
}
-static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
-{
- return syscall(SYS_clock_gettime, id, ts);
-}
-
static inline long sys_time(time_t *t)
{
return syscall(SYS_time, t);
@@ -176,98 +108,76 @@ static inline long sys_getcpu(unsigned * cpu, unsigned * node,
return syscall(SYS_getcpu, cpu, node, cache);
}
-static jmp_buf jmpbuf;
-static volatile unsigned long segv_err;
-
-static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
-{
- ucontext_t *ctx = (ucontext_t *)ctx_void;
-
- segv_err = ctx->uc_mcontext.gregs[REG_ERR];
- siglongjmp(jmpbuf, 1);
-}
-
static double tv_diff(const struct timeval *a, const struct timeval *b)
{
return (double)(a->tv_sec - b->tv_sec) +
(double)((int)a->tv_usec - (int)b->tv_usec) * 1e-6;
}
-static int check_gtod(const struct timeval *tv_sys1,
- const struct timeval *tv_sys2,
- const struct timezone *tz_sys,
- const char *which,
- const struct timeval *tv_other,
- const struct timezone *tz_other)
+static void check_gtod(const struct timeval *tv_sys1,
+ const struct timeval *tv_sys2,
+ const struct timezone *tz_sys,
+ const char *which,
+ const struct timeval *tv_other,
+ const struct timezone *tz_other)
{
- int nerrs = 0;
double d1, d2;
- if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest || tz_sys->tz_dsttime != tz_other->tz_dsttime)) {
- printf("[FAIL] %s tz mismatch\n", which);
- nerrs++;
- }
+ if (tz_other && (tz_sys->tz_minuteswest != tz_other->tz_minuteswest ||
+ tz_sys->tz_dsttime != tz_other->tz_dsttime))
+ ksft_print_msg("%s tz mismatch\n", which);
d1 = tv_diff(tv_other, tv_sys1);
d2 = tv_diff(tv_sys2, tv_other);
- printf("\t%s time offsets: %lf %lf\n", which, d1, d2);
- if (d1 < 0 || d2 < 0) {
- printf("[FAIL]\t%s time was inconsistent with the syscall\n", which);
- nerrs++;
- } else {
- printf("[OK]\t%s gettimeofday()'s timeval was okay\n", which);
- }
+ ksft_print_msg("%s time offsets: %lf %lf\n", which, d1, d2);
- return nerrs;
+ ksft_test_result(!(d1 < 0 || d2 < 0), "%s gettimeofday()'s timeval\n", which);
}
-static int test_gtod(void)
+static void test_gtod(void)
{
struct timeval tv_sys1, tv_sys2, tv_vdso, tv_vsys;
struct timezone tz_sys, tz_vdso, tz_vsys;
long ret_vdso = -1;
long ret_vsys = -1;
- int nerrs = 0;
- printf("[RUN]\ttest gettimeofday()\n");
+ ksft_print_msg("test gettimeofday()\n");
if (sys_gtod(&tv_sys1, &tz_sys) != 0)
- err(1, "syscall gettimeofday");
+ ksft_exit_fail_msg("syscall gettimeofday: %s\n", strerror(errno));
if (vdso_gtod)
ret_vdso = vdso_gtod(&tv_vdso, &tz_vdso);
if (vsyscall_map_x)
ret_vsys = vgtod(&tv_vsys, &tz_vsys);
if (sys_gtod(&tv_sys2, &tz_sys) != 0)
- err(1, "syscall gettimeofday");
+ ksft_exit_fail_msg("syscall gettimeofday: %s\n", strerror(errno));
if (vdso_gtod) {
- if (ret_vdso == 0) {
- nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
- } else {
- printf("[FAIL]\tvDSO gettimeofday() failed: %ld\n", ret_vdso);
- nerrs++;
- }
+ if (ret_vdso == 0)
+ check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vDSO", &tv_vdso, &tz_vdso);
+ else
+ ksft_test_result_fail("vDSO gettimeofday() failed: %ld\n", ret_vdso);
+ } else {
+ ksft_test_result_skip("vdso_gtod isn't set\n");
}
if (vsyscall_map_x) {
- if (ret_vsys == 0) {
- nerrs += check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
- } else {
- printf("[FAIL]\tvsys gettimeofday() failed: %ld\n", ret_vsys);
- nerrs++;
- }
+ if (ret_vsys == 0)
+ check_gtod(&tv_sys1, &tv_sys2, &tz_sys, "vsyscall", &tv_vsys, &tz_vsys);
+ else
+ ksft_test_result_fail("vsys gettimeofday() failed: %ld\n", ret_vsys);
+ } else {
+ ksft_test_result_skip("vsyscall_map_x isn't set\n");
}
-
- return nerrs;
}
-static int test_time(void) {
- int nerrs = 0;
-
- printf("[RUN]\ttest time()\n");
+static void test_time(void)
+{
long t_sys1, t_sys2, t_vdso = 0, t_vsys = 0;
long t2_sys1 = -1, t2_sys2 = -1, t2_vdso = -1, t2_vsys = -1;
+
+ ksft_print_msg("test time()\n");
t_sys1 = sys_time(&t2_sys1);
if (vdso_time)
t_vdso = vdso_time(&t2_vdso);
@@ -275,56 +185,60 @@ static int test_time(void) {
t_vsys = vtime(&t2_vsys);
t_sys2 = sys_time(&t2_sys2);
if (t_sys1 < 0 || t_sys1 != t2_sys1 || t_sys2 < 0 || t_sys2 != t2_sys2) {
- printf("[FAIL]\tsyscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n", t_sys1, t2_sys1, t_sys2, t2_sys2);
- nerrs++;
- return nerrs;
+ ksft_print_msg("syscall failed (ret1:%ld output1:%ld ret2:%ld output2:%ld)\n",
+ t_sys1, t2_sys1, t_sys2, t2_sys2);
+ ksft_test_result_skip("vdso_time\n");
+ ksft_test_result_skip("vdso_time\n");
+ return;
}
if (vdso_time) {
- if (t_vdso < 0 || t_vdso != t2_vdso) {
- printf("[FAIL]\tvDSO failed (ret:%ld output:%ld)\n", t_vdso, t2_vdso);
- nerrs++;
- } else if (t_vdso < t_sys1 || t_vdso > t_sys2) {
- printf("[FAIL]\tvDSO returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vdso, t_sys2);
- nerrs++;
- } else {
- printf("[OK]\tvDSO time() is okay\n");
- }
+ if (t_vdso < 0 || t_vdso != t2_vdso)
+ ksft_test_result_fail("vDSO failed (ret:%ld output:%ld)\n",
+ t_vdso, t2_vdso);
+ else if (t_vdso < t_sys1 || t_vdso > t_sys2)
+ ksft_test_result_fail("vDSO returned the wrong time (%ld %ld %ld)\n",
+ t_sys1, t_vdso, t_sys2);
+ else
+ ksft_test_result_pass("vDSO time() is okay\n");
+ } else {
+ ksft_test_result_skip("vdso_time isn't set\n");
}
if (vsyscall_map_x) {
- if (t_vsys < 0 || t_vsys != t2_vsys) {
- printf("[FAIL]\tvsyscall failed (ret:%ld output:%ld)\n", t_vsys, t2_vsys);
- nerrs++;
- } else if (t_vsys < t_sys1 || t_vsys > t_sys2) {
- printf("[FAIL]\tvsyscall returned the wrong time (%ld %ld %ld)\n", t_sys1, t_vsys, t_sys2);
- nerrs++;
- } else {
- printf("[OK]\tvsyscall time() is okay\n");
- }
+ if (t_vsys < 0 || t_vsys != t2_vsys)
+ ksft_test_result_fail("vsyscall failed (ret:%ld output:%ld)\n",
+ t_vsys, t2_vsys);
+ else if (t_vsys < t_sys1 || t_vsys > t_sys2)
+ ksft_test_result_fail("vsyscall returned the wrong time (%ld %ld %ld)\n",
+ t_sys1, t_vsys, t_sys2);
+ else
+ ksft_test_result_pass("vsyscall time() is okay\n");
+ } else {
+ ksft_test_result_skip("vsyscall_map_x isn't set\n");
}
-
- return nerrs;
}
-static int test_getcpu(int cpu)
+static void test_getcpu(int cpu)
{
- int nerrs = 0;
+ unsigned int cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
long ret_sys, ret_vdso = -1, ret_vsys = -1;
+ unsigned int node = 0;
+ bool have_node = false;
+ cpu_set_t cpuset;
- printf("[RUN]\tgetcpu() on CPU %d\n", cpu);
+ ksft_print_msg("getcpu() on CPU %d\n", cpu);
- cpu_set_t cpuset;
CPU_ZERO(&cpuset);
CPU_SET(cpu, &cpuset);
if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) {
- printf("[SKIP]\tfailed to force CPU %d\n", cpu);
- return nerrs;
+ ksft_print_msg("failed to force CPU %d\n", cpu);
+ ksft_test_result_skip("vdso_getcpu\n");
+ ksft_test_result_skip("vsyscall_map_x\n");
+
+ return;
}
- unsigned cpu_sys, cpu_vdso, cpu_vsys, node_sys, node_vdso, node_vsys;
- unsigned node = 0;
- bool have_node = false;
ret_sys = sys_getcpu(&cpu_sys, &node_sys, 0);
if (vdso_getcpu)
ret_vdso = vdso_getcpu(&cpu_vdso, &node_vdso, 0);
@@ -332,10 +246,9 @@ static int test_getcpu(int cpu)
ret_vsys = vgetcpu(&cpu_vsys, &node_vsys, 0);
if (ret_sys == 0) {
- if (cpu_sys != cpu) {
- printf("[FAIL]\tsyscall reported CPU %hu but should be %d\n", cpu_sys, cpu);
- nerrs++;
- }
+ if (cpu_sys != cpu)
+ ksft_print_msg("syscall reported CPU %u but should be %d\n",
+ cpu_sys, cpu);
have_node = true;
node = node_sys;
@@ -343,63 +256,84 @@ static int test_getcpu(int cpu)
if (vdso_getcpu) {
if (ret_vdso) {
- printf("[FAIL]\tvDSO getcpu() failed\n");
- nerrs++;
+ ksft_test_result_fail("vDSO getcpu() failed\n");
} else {
if (!have_node) {
have_node = true;
node = node_vdso;
}
- if (cpu_vdso != cpu) {
- printf("[FAIL]\tvDSO reported CPU %hu but should be %d\n", cpu_vdso, cpu);
- nerrs++;
- } else {
- printf("[OK]\tvDSO reported correct CPU\n");
- }
-
- if (node_vdso != node) {
- printf("[FAIL]\tvDSO reported node %hu but should be %hu\n", node_vdso, node);
- nerrs++;
+ if (cpu_vdso != cpu || node_vdso != node) {
+ if (cpu_vdso != cpu)
+ ksft_print_msg("vDSO reported CPU %u but should be %d\n",
+ cpu_vdso, cpu);
+ if (node_vdso != node)
+ ksft_print_msg("vDSO reported node %u but should be %u\n",
+ node_vdso, node);
+ ksft_test_result_fail("Wrong values\n");
} else {
- printf("[OK]\tvDSO reported correct node\n");
+ ksft_test_result_pass("vDSO reported correct CPU and node\n");
}
}
+ } else {
+ ksft_test_result_skip("vdso_getcpu isn't set\n");
}
if (vsyscall_map_x) {
if (ret_vsys) {
- printf("[FAIL]\tvsyscall getcpu() failed\n");
- nerrs++;
+ ksft_test_result_fail("vsyscall getcpu() failed\n");
} else {
if (!have_node) {
have_node = true;
node = node_vsys;
}
- if (cpu_vsys != cpu) {
- printf("[FAIL]\tvsyscall reported CPU %hu but should be %d\n", cpu_vsys, cpu);
- nerrs++;
- } else {
- printf("[OK]\tvsyscall reported correct CPU\n");
- }
-
- if (node_vsys != node) {
- printf("[FAIL]\tvsyscall reported node %hu but should be %hu\n", node_vsys, node);
- nerrs++;
+ if (cpu_vsys != cpu || node_vsys != node) {
+ if (cpu_vsys != cpu)
+ ksft_print_msg("vsyscall reported CPU %u but should be %d\n",
+ cpu_vsys, cpu);
+ if (node_vsys != node)
+ ksft_print_msg("vsyscall reported node %u but should be %u\n",
+ node_vsys, node);
+ ksft_test_result_fail("Wrong values\n");
} else {
- printf("[OK]\tvsyscall reported correct node\n");
+ ksft_test_result_pass("vsyscall reported correct CPU and node\n");
}
}
+ } else {
+ ksft_test_result_skip("vsyscall_map_x isn't set\n");
}
+}
+
+#ifdef __x86_64__
+
+static jmp_buf jmpbuf;
+static volatile unsigned long segv_err;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ ksft_exit_fail_msg("sigaction failed\n");
+}
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
- return nerrs;
+ segv_err = ctx->uc_mcontext.gregs[REG_ERR];
+ siglongjmp(jmpbuf, 1);
}
-static int test_vsys_r(void)
+static void test_vsys_r(void)
{
-#ifdef __x86_64__
- printf("[RUN]\tChecking read access to the vsyscall page\n");
+ ksft_print_msg("Checking read access to the vsyscall page\n");
bool can_read;
if (sigsetjmp(jmpbuf, 1) == 0) {
*(volatile int *)0xffffffffff600000;
@@ -408,32 +342,25 @@ static int test_vsys_r(void)
can_read = false;
}
- if (can_read && !vsyscall_map_r) {
- printf("[FAIL]\tWe have read access, but we shouldn't\n");
- return 1;
- } else if (!can_read && vsyscall_map_r) {
- printf("[FAIL]\tWe don't have read access, but we should\n");
- return 1;
- } else if (can_read) {
- printf("[OK]\tWe have read access\n");
- } else {
- printf("[OK]\tWe do not have read access: #PF(0x%lx)\n",
- segv_err);
- }
-#endif
-
- return 0;
+ if (can_read && !vsyscall_map_r)
+ ksft_test_result_fail("We have read access, but we shouldn't\n");
+ else if (!can_read && vsyscall_map_r)
+ ksft_test_result_fail("We don't have read access, but we should\n");
+ else if (can_read)
+ ksft_test_result_pass("We have read access\n");
+ else
+ ksft_test_result_pass("We do not have read access: #PF(0x%lx)\n", segv_err);
}
-static int test_vsys_x(void)
+static void test_vsys_x(void)
{
-#ifdef __x86_64__
if (vsyscall_map_x) {
/* We already tested this adequately. */
- return 0;
+ ksft_test_result_pass("vsyscall_map_x is true\n");
+ return;
}
- printf("[RUN]\tMake sure that vsyscalls really page fault\n");
+ ksft_print_msg("Make sure that vsyscalls really page fault\n");
bool can_exec;
if (sigsetjmp(jmpbuf, 1) == 0) {
@@ -443,20 +370,14 @@ static int test_vsys_x(void)
can_exec = false;
}
- if (can_exec) {
- printf("[FAIL]\tExecuting the vsyscall did not page fault\n");
- return 1;
- } else if (segv_err & (1 << 4)) { /* INSTR */
- printf("[OK]\tExecuting the vsyscall page failed: #PF(0x%lx)\n",
- segv_err);
- } else {
- printf("[FAIL]\tExecution failed with the wrong error: #PF(0x%lx)\n",
- segv_err);
- return 1;
- }
-#endif
-
- return 0;
+ if (can_exec)
+ ksft_test_result_fail("Executing the vsyscall did not page fault\n");
+ else if (segv_err & (1 << 4)) /* INSTR */
+ ksft_test_result_pass("Executing the vsyscall page failed: #PF(0x%lx)\n",
+ segv_err);
+ else
+ ksft_test_result_fail("Execution failed with the wrong error: #PF(0x%lx)\n",
+ segv_err);
}
/*
@@ -470,14 +391,13 @@ static int test_vsys_x(void)
* fact that ptrace() ever worked was a nice courtesy of old kernels,
* but the code to support it is fairly gross.
*/
-static int test_process_vm_readv(void)
+static void test_process_vm_readv(void)
{
-#ifdef __x86_64__
char buf[4096];
struct iovec local, remote;
int ret;
- printf("[RUN]\tprocess_vm_readv() from vsyscall page\n");
+ ksft_print_msg("process_vm_readv() from vsyscall page\n");
local.iov_base = buf;
local.iov_len = 4096;
@@ -489,27 +409,71 @@ static int test_process_vm_readv(void)
* We expect process_vm_readv() to work if and only if the
* vsyscall page is readable.
*/
- printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno);
- return vsyscall_map_r ? 1 : 0;
+ ksft_test_result(!vsyscall_map_r,
+ "process_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno);
+ return;
}
- if (vsyscall_map_r) {
- if (!memcmp(buf, remote.iov_base, sizeof(buf))) {
- printf("[OK]\tIt worked and read correct data\n");
- } else {
- printf("[FAIL]\tIt worked but returned incorrect data\n");
- return 1;
+ if (vsyscall_map_r)
+ ksft_test_result(!memcmp(buf, remote.iov_base, sizeof(buf)), "Read data\n");
+ else
+ ksft_test_result_fail("process_rm_readv() succeeded, but it should have failed in this configuration\n");
+}
+
+static void init_vsys(void)
+{
+ int nerrs = 0;
+ FILE *maps;
+ char line[MAPS_LINE_LEN];
+ bool found = false;
+
+ maps = fopen("/proc/self/maps", "r");
+ if (!maps) {
+ ksft_test_result_skip("Could not open /proc/self/maps -- assuming vsyscall is r-x\n");
+ vsyscall_map_r = true;
+ return;
+ }
+
+ while (fgets(line, MAPS_LINE_LEN, maps)) {
+ char r, x;
+ void *start, *end;
+ char name[MAPS_LINE_LEN];
+
+ /* sscanf() is safe here as strlen(name) >= strlen(line) */
+ if (sscanf(line, "%p-%p %c-%cp %*x %*x:%*x %*u %s",
+ &start, &end, &r, &x, name) != 5)
+ continue;
+
+ if (strcmp(name, "[vsyscall]"))
+ continue;
+
+ ksft_print_msg("vsyscall map: %s", line);
+
+ if (start != (void *)0xffffffffff600000 ||
+ end != (void *)0xffffffffff601000) {
+ ksft_print_msg("address range is nonsense\n");
+ nerrs++;
}
- } else {
- printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n");
- return 1;
+
+ ksft_print_msg("vsyscall permissions are %c-%c\n", r, x);
+ vsyscall_map_r = (r == 'r');
+ vsyscall_map_x = (x == 'x');
+
+ found = true;
+ break;
}
-#endif
- return 0;
+ fclose(maps);
+
+ if (!found) {
+ ksft_print_msg("no vsyscall map in /proc/self/maps\n");
+ vsyscall_map_r = false;
+ vsyscall_map_x = false;
+ }
+
+ ksft_test_result(!nerrs, "vsyscall map\n");
}
-#ifdef __x86_64__
static volatile sig_atomic_t num_vsyscall_traps;
static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
@@ -521,15 +485,17 @@ static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
num_vsyscall_traps++;
}
-static int test_emulation(void)
+static void test_emulation(void)
{
time_t tmp;
bool is_native;
- if (!vsyscall_map_x)
- return 0;
+ if (!vsyscall_map_x) {
+ ksft_test_result_skip("vsyscall_map_x isn't set\n");
+ return;
+ }
- printf("[RUN]\tchecking that vsyscalls are emulated\n");
+ ksft_print_msg("checking that vsyscalls are emulated\n");
sethandler(SIGTRAP, sigtrap, 0);
set_eflags(get_eflags() | X86_EFLAGS_TF);
vtime(&tmp);
@@ -545,36 +511,35 @@ static int test_emulation(void)
*/
is_native = (num_vsyscall_traps > 1);
- printf("[%s]\tvsyscalls are %s (%d instructions in vsyscall page)\n",
- (is_native ? "FAIL" : "OK"),
- (is_native ? "native" : "emulated"),
- (int)num_vsyscall_traps);
-
- return is_native;
+ ksft_test_result(!is_native, "vsyscalls are %s (%d instructions in vsyscall page)\n",
+ (is_native ? "native" : "emulated"), (int)num_vsyscall_traps);
}
#endif
int main(int argc, char **argv)
{
- int nerrs = 0;
+ int total_tests = TOTAL_TESTS;
- init_vdso();
- nerrs += init_vsys();
+ ksft_print_header();
+ ksft_set_plan(total_tests);
- nerrs += test_gtod();
- nerrs += test_time();
- nerrs += test_getcpu(0);
- nerrs += test_getcpu(1);
-
- sethandler(SIGSEGV, sigsegv, 0);
- nerrs += test_vsys_r();
- nerrs += test_vsys_x();
+ init_vdso();
+#ifdef __x86_64__
+ init_vsys();
+#endif
- nerrs += test_process_vm_readv();
+ test_gtod();
+ test_time();
+ test_getcpu(0);
+ test_getcpu(1);
#ifdef __x86_64__
- nerrs += test_emulation();
+ sethandler(SIGSEGV, sigsegv, 0);
+ test_vsys_r();
+ test_vsys_x();
+ test_process_vm_readv();
+ test_emulation();
#endif
- return nerrs ? 1 : 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/x86/vdso_restorer.c b/tools/testing/selftests/x86/vdso_restorer.c
index fe99f2434155..ac8d8e1e9805 100644
--- a/tools/testing/selftests/x86/vdso_restorer.c
+++ b/tools/testing/selftests/x86/vdso_restorer.c
@@ -92,4 +92,6 @@ int main()
printf("[FAIL]\t!SA_SIGINFO handler was not called\n");
nerrs++;
}
+
+ return nerrs;
}
diff --git a/tools/testing/vsock/Makefile b/tools/testing/vsock/Makefile
index a7f56a09ca9f..6e0b4e95e230 100644
--- a/tools/testing/vsock/Makefile
+++ b/tools/testing/vsock/Makefile
@@ -13,3 +13,16 @@ CFLAGS += -g -O2 -Werror -Wall -I. -I../../include -I../../../usr/include -Wno-p
clean:
${RM} *.o *.d vsock_test vsock_diag_test vsock_perf vsock_uring_test
-include *.d
+
+VSOCK_INSTALL_PATH ?=
+
+install: all
+ifdef VSOCK_INSTALL_PATH
+ mkdir -p $(VSOCK_INSTALL_PATH)
+ install -m 744 vsock_test $(VSOCK_INSTALL_PATH)
+ install -m 744 vsock_perf $(VSOCK_INSTALL_PATH)
+ install -m 744 vsock_diag_test $(VSOCK_INSTALL_PATH)
+ install -m 744 vsock_uring_test $(VSOCK_INSTALL_PATH)
+else
+ $(error Error: set VSOCK_INSTALL_PATH to use install)
+endif