summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/arch/ia64/include/asm/barrier.h3
-rw-r--r--tools/arch/x86/include/asm/inat.h2
-rw-r--r--tools/arch/x86/include/asm/insn.h46
-rw-r--r--tools/arch/x86/include/asm/msr-index.h2
-rw-r--r--tools/arch/x86/include/asm/nops.h81
-rw-r--r--tools/arch/x86/kcpuid/Makefile24
-rw-r--r--tools/arch/x86/kcpuid/cpuid.csv400
-rw-r--r--tools/arch/x86/kcpuid/kcpuid.c657
-rw-r--r--tools/arch/x86/lib/inat.c2
-rw-r--r--tools/arch/x86/lib/insn.c230
-rw-r--r--tools/bpf/Makefile.helpers60
-rw-r--r--tools/bpf/bpf_dbg.c2
-rw-r--r--tools/bpf/bpf_exp.y14
-rw-r--r--tools/bpf/bpftool/.gitignore1
-rw-r--r--tools/bpf/bpftool/Documentation/Makefile11
-rw-r--r--tools/bpf/bpftool/Documentation/bpftool-gen.rst78
-rw-r--r--tools/bpf/bpftool/bash-completion/bpftool17
-rw-r--r--tools/bpf/bpftool/btf.c41
-rw-r--r--tools/bpf/bpftool/btf_dumper.c1
-rw-r--r--tools/bpf/bpftool/common.c1
-rw-r--r--tools/bpf/bpftool/feature.c4
-rw-r--r--tools/bpf/bpftool/gen.c72
-rw-r--r--tools/bpf/bpftool/main.c3
-rw-r--r--tools/bpf/bpftool/map.c2
-rw-r--r--tools/bpf/bpftool/net.c2
-rw-r--r--tools/bpf/bpftool/prog.c1
-rw-r--r--tools/bpf/bpftool/xlated_dumper.c3
-rw-r--r--tools/bpf/resolve_btfids/main.c11
-rw-r--r--tools/bpf/runqslower/Makefile9
-rw-r--r--tools/bpf/runqslower/runqslower.bpf.c33
-rw-r--r--tools/build/Build.include24
-rw-r--r--tools/cgroup/memcg_slabinfo.py8
-rwxr-xr-xtools/debugging/kernel-chktaint2
-rw-r--r--tools/iio/Makefile1
-rw-r--r--tools/iio/iio_event_monitor.c69
-rw-r--r--tools/iio/iio_generic_buffer.c153
-rw-r--r--tools/iio/iio_utils.c18
-rw-r--r--tools/iio/iio_utils.h9
-rw-r--r--tools/include/linux/kconfig.h73
-rw-r--r--tools/include/linux/static_call_types.h18
-rw-r--r--tools/include/uapi/asm/errno.h2
-rw-r--r--tools/include/uapi/linux/bpf.h850
-rw-r--r--tools/include/uapi/linux/btf.h5
-rw-r--r--tools/include/uapi/linux/kvm.h13
-rw-r--r--tools/kvm/kvm_stat/kvm_stat.service1
-rw-r--r--tools/lib/bpf/Build2
-rw-r--r--tools/lib/bpf/Makefile5
-rw-r--r--tools/lib/bpf/bpf_core_read.h16
-rw-r--r--tools/lib/bpf/bpf_helpers.h40
-rw-r--r--tools/lib/bpf/bpf_tracing.h58
-rw-r--r--tools/lib/bpf/btf.c768
-rw-r--r--tools/lib/bpf/btf.h9
-rw-r--r--tools/lib/bpf/btf_dump.c14
-rw-r--r--tools/lib/bpf/libbpf.c908
-rw-r--r--tools/lib/bpf/libbpf.h20
-rw-r--r--tools/lib/bpf/libbpf.map12
-rw-r--r--tools/lib/bpf/libbpf_internal.h85
-rw-r--r--tools/lib/bpf/libbpf_util.h47
-rw-r--r--tools/lib/bpf/linker.c2883
-rw-r--r--tools/lib/bpf/netlink.c2
-rw-r--r--tools/lib/bpf/ringbuf.c2
-rw-r--r--tools/lib/bpf/strset.c176
-rw-r--r--tools/lib/bpf/strset.h21
-rw-r--r--tools/lib/bpf/xsk.c316
-rw-r--r--tools/lib/bpf/xsk.h87
-rw-r--r--tools/memory-model/Documentation/access-marking.txt479
-rw-r--r--tools/memory-model/Documentation/glossary.txt2
-rw-r--r--tools/memory-model/Documentation/simple.txt1
-rw-r--r--tools/objtool/arch/x86/decode.c424
-rw-r--r--tools/objtool/arch/x86/include/arch/cfi_regs.h12
-rw-r--r--tools/objtool/arch/x86/include/arch/special.h2
-rw-r--r--tools/objtool/builtin-check.c43
-rw-r--r--tools/objtool/builtin-orc.c5
-rw-r--r--tools/objtool/check.c236
-rw-r--r--tools/objtool/elf.c289
-rw-r--r--tools/objtool/include/objtool/arch.h5
-rw-r--r--tools/objtool/include/objtool/builtin.h5
-rw-r--r--tools/objtool/include/objtool/check.h3
-rw-r--r--tools/objtool/include/objtool/elf.h13
-rw-r--r--tools/objtool/include/objtool/objtool.h1
-rw-r--r--tools/objtool/objtool.c65
-rw-r--r--tools/objtool/orc_gen.c33
-rw-r--r--tools/objtool/special.c12
-rwxr-xr-xtools/objtool/sync-check.sh18
-rw-r--r--tools/perf/MANIFEST2
-rw-r--r--tools/perf/arch/x86/tests/insn-x86.c9
-rw-r--r--tools/perf/arch/x86/util/archinsn.c9
-rw-r--r--tools/perf/arch/x86/util/intel-pt.c6
-rw-r--r--tools/perf/builtin-daemon.c57
-rw-r--r--tools/perf/builtin-ftrace.c2
-rw-r--r--tools/perf/builtin-inject.c2
-rwxr-xr-xtools/perf/check-headers.sh15
-rw-r--r--tools/perf/tests/bpf.c9
-rwxr-xr-xtools/perf/tests/shell/daemon.sh2
-rwxr-xr-xtools/perf/trace/beauty/tracepoints/x86_msr.sh2
-rw-r--r--tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c4
-rw-r--r--tools/perf/util/auxtrace.c6
-rw-r--r--tools/perf/util/block-info.c6
-rw-r--r--tools/perf/util/bpf-event.c13
-rw-r--r--tools/perf/util/data.c5
-rw-r--r--tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c17
-rw-r--r--tools/perf/util/map.c7
-rw-r--r--tools/perf/util/parse-events.c3
-rw-r--r--tools/perf/util/pmu.c33
-rw-r--r--tools/perf/util/pmu.h3
-rw-r--r--tools/perf/util/synthetic-events.c11
-rw-r--r--tools/perf/util/vdso.c2
-rw-r--r--tools/power/acpi/common/cmfsize.c2
-rwxr-xr-xtools/power/pm-graph/sleepgraph.py2
-rw-r--r--tools/power/x86/intel-speed-select/isst-config.c30
-rw-r--r--tools/power/x86/intel-speed-select/isst-display.c12
-rw-r--r--tools/power/x86/turbostat/turbostat.c2
-rw-r--r--tools/scripts/Makefile.include15
-rw-r--r--tools/spi/Makefile5
-rw-r--r--tools/testing/kunit/configs/broken_on_uml.config2
-rwxr-xr-xtools/testing/kunit/kunit.py4
-rw-r--r--tools/testing/kunit/kunit_config.py2
-rw-r--r--tools/testing/kunit/kunit_kernel.py2
-rwxr-xr-xtools/testing/kunit/kunit_tool_test.py6
-rw-r--r--tools/testing/radix-tree/idr-test.c21
-rw-r--r--tools/testing/radix-tree/linux/compiler_types.h0
-rw-r--r--tools/testing/radix-tree/multiorder.c2
-rw-r--r--tools/testing/radix-tree/xarray.c2
-rw-r--r--tools/testing/selftests/arm64/Makefile2
-rw-r--r--tools/testing/selftests/arm64/bti/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/bti/Makefile61
-rw-r--r--tools/testing/selftests/arm64/bti/assembler.h80
-rw-r--r--tools/testing/selftests/arm64/bti/btitest.h23
-rw-r--r--tools/testing/selftests/arm64/bti/compiler.h21
-rw-r--r--tools/testing/selftests/arm64/bti/gen/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/bti/signal.c37
-rw-r--r--tools/testing/selftests/arm64/bti/signal.h21
-rw-r--r--tools/testing/selftests/arm64/bti/start.S14
-rw-r--r--tools/testing/selftests/arm64/bti/syscall.S23
-rw-r--r--tools/testing/selftests/arm64/bti/system.c22
-rw-r--r--tools/testing/selftests/arm64/bti/system.h28
-rw-r--r--tools/testing/selftests/arm64/bti/test.c234
-rw-r--r--tools/testing/selftests/arm64/bti/teststubs.S39
-rw-r--r--tools/testing/selftests/arm64/bti/trampoline.S29
-rw-r--r--tools/testing/selftests/arm64/fp/sve-test.S22
-rw-r--r--tools/testing/selftests/arm64/mte/Makefile15
-rw-r--r--tools/testing/selftests/arm64/mte/check_ksm_options.c5
-rw-r--r--tools/testing/selftests/arm64/mte/check_user_mem.c3
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.c39
-rw-r--r--tools/testing/selftests/bpf/.gitignore2
-rw-r--r--tools/testing/selftests/bpf/Makefile79
-rw-r--r--tools/testing/selftests/bpf/Makefile.docs82
-rw-r--r--tools/testing/selftests/bpf/README.rst71
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h29
-rw-r--r--tools/testing/selftests/bpf/btf_helpers.c4
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c6
-rw-r--r--tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c109
-rw-r--r--tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c158
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c176
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_endian.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_link.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/check_mtu.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_sleep.c82
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/for_each.c130
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c59
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_funcs.c42
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_maps.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_vars.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_ptr.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mmap.c24
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c51
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c83
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf.c125
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf_btf.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c144
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_sk.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/static_linked.c40
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_storage.c92
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_ima.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_lsm.c61
-rw-r--r--tools/testing/selftests/bpf/progs/bind4_prog.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bind6_prog.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c36
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c22
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c27
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c19
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c15
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h25
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_test.c2
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_sleep.c31
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_test.c4
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_array_map_elem.c61
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c95
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test.c47
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c42
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs1.c73
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs2.c73
-rw-r--r--tools/testing/selftests/bpf/progs/linked_maps1.c82
-rw-r--r--tools/testing/selftests/bpf/progs/linked_maps2.c76
-rw-r--r--tools/testing/selftests/bpf/progs/linked_vars1.c54
-rw-r--r--tools/testing/selftests/bpf/progs/linked_vars2.c55
-rw-r--r--tools/testing/selftests/bpf/progs/loop6.c99
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c4
-rw-r--r--tools/testing/selftests/bpf/progs/skb_pkt_end.c1
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c12
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c11
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_storage.c64
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c32
-rw-r--r--tools/testing/selftests/bpf/progs/task_ls_recursion.c70
-rw-r--r--tools/testing/selftests/bpf/progs/test_check_mtu.c92
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_size.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func10.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_mmap.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_multi.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup.c62
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf.c73
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf_single.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_listen.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked1.c30
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked2.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c113
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c8
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh21
-rw-r--r--tools/testing/selftests/bpf/test_btf.h3
-rwxr-xr-xtools/testing/selftests/bpf/test_doc_build.sh13
-rw-r--r--tools/testing/selftests/bpf/test_progs.h63
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh15
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c4
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh138
-rw-r--r--tools/testing/selftests/bpf/verifier/array_access.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds.c5
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds_deduction.c30
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c13
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_get_stack.c43
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c12
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c1
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c10
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr.c4
-rw-r--r--tools/testing/selftests/bpf/verifier/unpriv.c15
-rw-r--r--tools/testing/selftests/bpf/verifier/value_ptr_arith.c27
-rwxr-xr-xtools/testing/selftests/bpf/vmtest.sh59
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.c862
-rw-r--r--tools/testing/selftests/bpf/xdpxceiver.h98
-rwxr-xr-xtools/testing/selftests/bpf/xsk_prereqs.sh30
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh31
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh3
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/port_scale.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh82
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh7
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh77
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh4
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh4
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh21
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_sample.sh657
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh5
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh110
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/nexthop.sh620
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/psample.sh181
-rw-r--r--tools/testing/selftests/firmware/fw_namespace.c2
-rw-r--r--tools/testing/selftests/kvm/.gitignore3
-rw-r--r--tools/testing/selftests/kvm/Makefile5
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c10
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h2
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c7
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h2
-rw-r--r--tools/testing/selftests/kvm/x86_64/get_msr_index_features.c134
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_clock.c269
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c166
-rw-r--r--tools/testing/selftests/lib.mk7
-rw-r--r--tools/testing/selftests/lkdtm/.gitignore1
-rw-r--r--tools/testing/selftests/lkdtm/Makefile1
-rwxr-xr-xtools/testing/selftests/lkdtm/stack-entropy.sh36
-rw-r--r--tools/testing/selftests/net/Makefile4
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh564
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh152
-rwxr-xr-xtools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh366
-rw-r--r--tools/testing/selftests/net/forwarding/fib_offload_lib.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh361
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh14
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh19
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh400
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_police.sh56
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh13
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh55
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c77
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh51
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh278
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh276
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh6
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c34
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh13
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c32
-rw-r--r--tools/testing/selftests/net/settings1
-rw-r--r--tools/testing/selftests/net/so_txtime.c247
-rwxr-xr-xtools/testing/selftests/net/so_txtime.sh97
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh251
-rwxr-xr-xtools/testing/selftests/net/veth.sh177
-rwxr-xr-xtools/testing/selftests/netfilter/nft_flowtable.sh82
-rw-r--r--tools/testing/selftests/perf_events/.gitignore3
-rw-r--r--tools/testing/selftests/perf_events/Makefile6
-rw-r--r--tools/testing/selftests/perf_events/config1
-rw-r--r--tools/testing/selftests/perf_events/remove_on_exec.c260
-rw-r--r--tools/testing/selftests/perf_events/settings1
-rw-r--r--tools/testing/selftests/perf_events/sigtrap_threads.c210
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/cpus2use.sh1
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh20
-rw-r--r--tools/testing/selftests/rcutorture/bin/jitterstart.sh37
-rw-r--r--tools/testing/selftests/rcutorture/bin/jitterstop.sh23
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-again.sh199
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh67
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh176
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh218
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-transform.sh54
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh98
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFLIST4
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-T (renamed from tools/testing/selftests/rcutorture/configs/rcu/SRCU-t)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-T.boot (renamed from tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-U (renamed from tools/testing/selftests/rcutorture/configs/rcu/SRCU-u)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-U.boot (renamed from tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh2
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh2
-rw-r--r--tools/testing/selftests/resctrl/.gitignore2
-rw-r--r--tools/testing/selftests/resctrl/Makefile2
-rw-r--r--tools/testing/selftests/resctrl/README4
-rw-r--r--tools/testing/selftests/resctrl/cache.c52
-rw-r--r--tools/testing/selftests/resctrl/cat_test.c57
-rw-r--r--tools/testing/selftests/resctrl/cmt_test.c (renamed from tools/testing/selftests/resctrl/cqm_test.c)75
-rw-r--r--tools/testing/selftests/resctrl/config2
-rw-r--r--tools/testing/selftests/resctrl/fill_buf.c4
-rw-r--r--tools/testing/selftests/resctrl/mba_test.c43
-rw-r--r--tools/testing/selftests/resctrl/mbm_test.c42
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h29
-rw-r--r--tools/testing/selftests/resctrl/resctrl_tests.c163
-rw-r--r--tools/testing/selftests/resctrl/resctrl_val.c95
-rw-r--r--tools/testing/selftests/resctrl/resctrlfs.c134
-rw-r--r--tools/testing/selftests/sgx/defines.h2
-rw-r--r--tools/testing/selftests/sgx/load.c69
-rw-r--r--tools/testing/selftests/sgx/main.c26
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json48
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/simple.json83
-rw-r--r--tools/testing/selftests/timers/clocksource-switch.c4
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c2
-rw-r--r--tools/testing/selftests/timers/leapcrash.c4
-rw-r--r--tools/testing/selftests/timers/threadtest.c2
-rw-r--r--tools/testing/selftests/vm/Makefile4
-rw-r--r--tools/testing/selftests/x86/thunks_32.S2
-rw-r--r--tools/thermal/tmon/Makefile2
-rw-r--r--tools/usb/usbip/doc/usbip.842
-rw-r--r--tools/usb/usbip/doc/usbipd.826
-rw-r--r--tools/usb/usbip/libsrc/list.h10
-rw-r--r--tools/usb/usbip/src/usbip_list.c3
383 files changed, 21596 insertions, 3581 deletions
diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h
index 4d471d9511a5..6fffe5682713 100644
--- a/tools/arch/ia64/include/asm/barrier.h
+++ b/tools/arch/ia64/include/asm/barrier.h
@@ -39,9 +39,6 @@
* sequential memory pages only.
*/
-/* XXX From arch/ia64/include/uapi/asm/gcc_intrin.h */
-#define ia64_mf() asm volatile ("mf" ::: "memory")
-
#define mb() ia64_mf()
#define rmb() mb()
#define wmb() mb()
diff --git a/tools/arch/x86/include/asm/inat.h b/tools/arch/x86/include/asm/inat.h
index 877827b7c2c3..a61051400311 100644
--- a/tools/arch/x86/include/asm/inat.h
+++ b/tools/arch/x86/include/asm/inat.h
@@ -6,7 +6,7 @@
*
* Written by Masami Hiramatsu <mhiramat@redhat.com>
*/
-#include "inat_types.h"
+#include "inat_types.h" /* __ignore_sync_check__ */
/*
* Internal bits. Don't use bitmasks directly, because these bits are
diff --git a/tools/arch/x86/include/asm/insn.h b/tools/arch/x86/include/asm/insn.h
index cc777c185212..dc632b41f135 100644
--- a/tools/arch/x86/include/asm/insn.h
+++ b/tools/arch/x86/include/asm/insn.h
@@ -9,7 +9,7 @@
#include <asm/byteorder.h>
/* insn_attr_t is defined in inat.h */
-#include "inat.h"
+#include "inat.h" /* __ignore_sync_check__ */
#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
@@ -132,13 +132,25 @@ struct insn {
#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
-extern void insn_get_prefixes(struct insn *insn);
-extern void insn_get_opcode(struct insn *insn);
-extern void insn_get_modrm(struct insn *insn);
-extern void insn_get_sib(struct insn *insn);
-extern void insn_get_displacement(struct insn *insn);
-extern void insn_get_immediate(struct insn *insn);
-extern void insn_get_length(struct insn *insn);
+extern int insn_get_prefixes(struct insn *insn);
+extern int insn_get_opcode(struct insn *insn);
+extern int insn_get_modrm(struct insn *insn);
+extern int insn_get_sib(struct insn *insn);
+extern int insn_get_displacement(struct insn *insn);
+extern int insn_get_immediate(struct insn *insn);
+extern int insn_get_length(struct insn *insn);
+
+enum insn_mode {
+ INSN_MODE_32,
+ INSN_MODE_64,
+ /* Mode is determined by the current kernel build. */
+ INSN_MODE_KERN,
+ INSN_NUM_MODES,
+};
+
+extern int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m);
+
+#define insn_decode_kernel(_insn, _ptr) insn_decode((_insn), (_ptr), MAX_INSN_SIZE, INSN_MODE_KERN)
/* Attribute will be determined after getting ModRM (for opcode groups) */
static inline void insn_get_attribute(struct insn *insn)
@@ -149,17 +161,6 @@ static inline void insn_get_attribute(struct insn *insn)
/* Instruction uses RIP-relative addressing */
extern int insn_rip_relative(struct insn *insn);
-/* Init insn for kernel text */
-static inline void kernel_insn_init(struct insn *insn,
- const void *kaddr, int buf_len)
-{
-#ifdef CONFIG_X86_64
- insn_init(insn, kaddr, buf_len, 1);
-#else /* CONFIG_X86_32 */
- insn_init(insn, kaddr, buf_len, 0);
-#endif
-}
-
static inline int insn_is_avx(struct insn *insn)
{
if (!insn->prefixes.got)
@@ -179,13 +180,6 @@ static inline int insn_has_emulate_prefix(struct insn *insn)
return !!insn->emulate_prefix_size;
}
-/* Ensure this instruction is decoded completely */
-static inline int insn_complete(struct insn *insn)
-{
- return insn->opcode.got && insn->modrm.got && insn->sib.got &&
- insn->displacement.got && insn->immediate.got;
-}
-
static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
{
if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h
index 546d6ecf0a35..45029354e0a8 100644
--- a/tools/arch/x86/include/asm/msr-index.h
+++ b/tools/arch/x86/include/asm/msr-index.h
@@ -628,8 +628,6 @@
#define MSR_IA32_APICBASE_ENABLE (1<<11)
#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
-#define MSR_IA32_TSCDEADLINE 0x000006e0
-
#define MSR_IA32_UCODE_WRITE 0x00000079
#define MSR_IA32_UCODE_REV 0x0000008b
diff --git a/tools/arch/x86/include/asm/nops.h b/tools/arch/x86/include/asm/nops.h
new file mode 100644
index 000000000000..c1e5e818ba16
--- /dev/null
+++ b/tools/arch/x86/include/asm/nops.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_NOPS_H
+#define _ASM_X86_NOPS_H
+
+/*
+ * Define nops for use with alternative() and for tracing.
+ */
+
+#ifndef CONFIG_64BIT
+
+/*
+ * Generic 32bit nops from GAS:
+ *
+ * 1: nop
+ * 2: movl %esi,%esi
+ * 3: leal 0x0(%esi),%esi
+ * 4: leal 0x0(%esi,%eiz,1),%esi
+ * 5: leal %ds:0x0(%esi,%eiz,1),%esi
+ * 6: leal 0x0(%esi),%esi
+ * 7: leal 0x0(%esi,%eiz,1),%esi
+ * 8: leal %ds:0x0(%esi,%eiz,1),%esi
+ *
+ * Except 5 and 8, which are DS prefixed 4 and 7 resp, where GAS would emit 2
+ * nop instructions.
+ */
+#define BYTES_NOP1 0x90
+#define BYTES_NOP2 0x89,0xf6
+#define BYTES_NOP3 0x8d,0x76,0x00
+#define BYTES_NOP4 0x8d,0x74,0x26,0x00
+#define BYTES_NOP5 0x3e,BYTES_NOP4
+#define BYTES_NOP6 0x8d,0xb6,0x00,0x00,0x00,0x00
+#define BYTES_NOP7 0x8d,0xb4,0x26,0x00,0x00,0x00,0x00
+#define BYTES_NOP8 0x3e,BYTES_NOP7
+
+#else
+
+/*
+ * Generic 64bit nops from GAS:
+ *
+ * 1: nop
+ * 2: osp nop
+ * 3: nopl (%eax)
+ * 4: nopl 0x00(%eax)
+ * 5: nopl 0x00(%eax,%eax,1)
+ * 6: osp nopl 0x00(%eax,%eax,1)
+ * 7: nopl 0x00000000(%eax)
+ * 8: nopl 0x00000000(%eax,%eax,1)
+ */
+#define BYTES_NOP1 0x90
+#define BYTES_NOP2 0x66,BYTES_NOP1
+#define BYTES_NOP3 0x0f,0x1f,0x00
+#define BYTES_NOP4 0x0f,0x1f,0x40,0x00
+#define BYTES_NOP5 0x0f,0x1f,0x44,0x00,0x00
+#define BYTES_NOP6 0x66,BYTES_NOP5
+#define BYTES_NOP7 0x0f,0x1f,0x80,0x00,0x00,0x00,0x00
+#define BYTES_NOP8 0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
+
+#endif /* CONFIG_64BIT */
+
+#ifdef __ASSEMBLY__
+#define _ASM_MK_NOP(x) .byte x
+#else
+#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
+#endif
+
+#define ASM_NOP1 _ASM_MK_NOP(BYTES_NOP1)
+#define ASM_NOP2 _ASM_MK_NOP(BYTES_NOP2)
+#define ASM_NOP3 _ASM_MK_NOP(BYTES_NOP3)
+#define ASM_NOP4 _ASM_MK_NOP(BYTES_NOP4)
+#define ASM_NOP5 _ASM_MK_NOP(BYTES_NOP5)
+#define ASM_NOP6 _ASM_MK_NOP(BYTES_NOP6)
+#define ASM_NOP7 _ASM_MK_NOP(BYTES_NOP7)
+#define ASM_NOP8 _ASM_MK_NOP(BYTES_NOP8)
+
+#define ASM_NOP_MAX 8
+
+#ifndef __ASSEMBLY__
+extern const unsigned char * const x86_nops[];
+#endif
+
+#endif /* _ASM_X86_NOPS_H */
diff --git a/tools/arch/x86/kcpuid/Makefile b/tools/arch/x86/kcpuid/Makefile
new file mode 100644
index 000000000000..87b554fab14b
--- /dev/null
+++ b/tools/arch/x86/kcpuid/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for x86/kcpuid tool
+
+kcpuid : kcpuid.c
+
+CFLAGS = -Wextra
+
+BINDIR ?= /usr/sbin
+
+HWDATADIR ?= /usr/share/misc/
+
+override CFLAGS += -O2 -Wall -I../../../include
+
+%: %.c
+ $(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
+.PHONY : clean
+clean :
+ @rm -f kcpuid
+
+install : kcpuid
+ install -d $(DESTDIR)$(BINDIR)
+ install -m 755 -p kcpuid $(DESTDIR)$(BINDIR)/kcpuid
+ install -m 444 -p cpuid.csv $(HWDATADIR)/cpuid.csv
diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv
new file mode 100644
index 000000000000..4f1c4b0c29e9
--- /dev/null
+++ b/tools/arch/x86/kcpuid/cpuid.csv
@@ -0,0 +1,400 @@
+# The basic row format is:
+# LEAF, SUBLEAF, register_name, bits, short_name, long_description
+
+# Leaf 00H
+ 0, 0, EAX, 31:0, max_basic_leafs, Max input value for supported subleafs
+
+# Leaf 01H
+ 1, 0, EAX, 3:0, stepping, Stepping ID
+ 1, 0, EAX, 7:4, model, Model
+ 1, 0, EAX, 11:8, family, Family ID
+ 1, 0, EAX, 13:12, processor, Processor Type
+ 1, 0, EAX, 19:16, model_ext, Extended Model ID
+ 1, 0, EAX, 27:20, family_ext, Extended Family ID
+
+ 1, 0, EBX, 7:0, brand, Brand Index
+ 1, 0, EBX, 15:8, clflush_size, CLFLUSH line size (value * 8) in bytes
+ 1, 0, EBX, 23:16, max_cpu_id, Maxim number of addressable logic cpu in this package
+ 1, 0, EBX, 31:24, apic_id, Initial APIC ID
+
+ 1, 0, ECX, 0, sse3, Streaming SIMD Extensions 3(SSE3)
+ 1, 0, ECX, 1, pclmulqdq, PCLMULQDQ instruction supported
+ 1, 0, ECX, 2, dtes64, DS area uses 64-bit layout
+ 1, 0, ECX, 3, mwait, MONITOR/MWAIT supported
+ 1, 0, ECX, 4, ds_cpl, CPL Qualified Debug Store which allows for branch message storage qualified by CPL
+ 1, 0, ECX, 5, vmx, Virtual Machine Extensions supported
+ 1, 0, ECX, 6, smx, Safer Mode Extension supported
+ 1, 0, ECX, 7, eist, Enhanced Intel SpeedStep Technology
+ 1, 0, ECX, 8, tm2, Thermal Monitor 2
+ 1, 0, ECX, 9, ssse3, Supplemental Streaming SIMD Extensions 3 (SSSE3)
+ 1, 0, ECX, 10, l1_ctx_id, L1 data cache could be set to either adaptive mode or shared mode (check IA32_MISC_ENABLE bit 24 definition)
+ 1, 0, ECX, 11, sdbg, IA32_DEBUG_INTERFACE MSR for silicon debug supported
+ 1, 0, ECX, 12, fma, FMA extensions using YMM state supported
+ 1, 0, ECX, 13, cmpxchg16b, 'CMPXCHG16B - Compare and Exchange Bytes' supported
+ 1, 0, ECX, 14, xtpr_update, xTPR Update Control supported
+ 1, 0, ECX, 15, pdcm, Perfmon and Debug Capability present
+ 1, 0, ECX, 17, pcid, Process-Context Identifiers feature present
+ 1, 0, ECX, 18, dca, Prefetching data from a memory mapped device supported
+ 1, 0, ECX, 19, sse4_1, SSE4.1 feature present
+ 1, 0, ECX, 20, sse4_2, SSE4.2 feature present
+ 1, 0, ECX, 21, x2apic, x2APIC supported
+ 1, 0, ECX, 22, movbe, MOVBE instruction supported
+ 1, 0, ECX, 23, popcnt, POPCNT instruction supported
+ 1, 0, ECX, 24, tsc_deadline_timer, LAPIC supports one-shot operation using a TSC deadline value
+ 1, 0, ECX, 25, aesni, AESNI instruction supported
+ 1, 0, ECX, 26, xsave, XSAVE/XRSTOR processor extended states (XSETBV/XGETBV/XCR0)
+ 1, 0, ECX, 27, osxsave, OS has set CR4.OSXSAVE bit to enable XSETBV/XGETBV/XCR0
+ 1, 0, ECX, 28, avx, AVX instruction supported
+ 1, 0, ECX, 29, f16c, 16-bit floating-point conversion instruction supported
+ 1, 0, ECX, 30, rdrand, RDRAND instruction supported
+
+ 1, 0, EDX, 0, fpu, x87 FPU on chip
+ 1, 0, EDX, 1, vme, Virtual-8086 Mode Enhancement
+ 1, 0, EDX, 2, de, Debugging Extensions
+ 1, 0, EDX, 3, pse, Page Size Extensions
+ 1, 0, EDX, 4, tsc, Time Stamp Counter
+ 1, 0, EDX, 5, msr, RDMSR and WRMSR Support
+ 1, 0, EDX, 6, pae, Physical Address Extensions
+ 1, 0, EDX, 7, mce, Machine Check Exception
+ 1, 0, EDX, 8, cx8, CMPXCHG8B instr
+ 1, 0, EDX, 9, apic, APIC on Chip
+ 1, 0, EDX, 11, sep, SYSENTER and SYSEXIT instrs
+ 1, 0, EDX, 12, mtrr, Memory Type Range Registers
+ 1, 0, EDX, 13, pge, Page Global Bit
+ 1, 0, EDX, 14, mca, Machine Check Architecture
+ 1, 0, EDX, 15, cmov, Conditional Move Instrs
+ 1, 0, EDX, 16, pat, Page Attribute Table
+ 1, 0, EDX, 17, pse36, 36-Bit Page Size Extension
+ 1, 0, EDX, 18, psn, Processor Serial Number
+ 1, 0, EDX, 19, clflush, CLFLUSH instr
+# 1, 0, EDX, 20,
+ 1, 0, EDX, 21, ds, Debug Store
+ 1, 0, EDX, 22, acpi, Thermal Monitor and Software Controlled Clock Facilities
+ 1, 0, EDX, 23, mmx, Intel MMX Technology
+ 1, 0, EDX, 24, fxsr, XSAVE and FXRSTOR Instrs
+ 1, 0, EDX, 25, sse, SSE
+ 1, 0, EDX, 26, sse2, SSE2
+ 1, 0, EDX, 27, ss, Self Snoop
+ 1, 0, EDX, 28, hit, Max APIC IDs
+ 1, 0, EDX, 29, tm, Thermal Monitor
+# 1, 0, EDX, 30,
+ 1, 0, EDX, 31, pbe, Pending Break Enable
+
+# Leaf 02H
+# cache and TLB descriptor info
+
+# Leaf 03H
+# Precessor Serial Number, introduced on Pentium III, not valid for
+# latest models
+
+# Leaf 04H
+# thread/core and cache topology
+ 4, 0, EAX, 4:0, cache_type, Cache type like instr/data or unified
+ 4, 0, EAX, 7:5, cache_level, Cache Level (starts at 1)
+ 4, 0, EAX, 8, cache_self_init, Cache Self Initialization
+ 4, 0, EAX, 9, fully_associate, Fully Associative cache
+# 4, 0, EAX, 13:10, resvd, resvd
+ 4, 0, EAX, 25:14, max_logical_id, Max number of addressable IDs for logical processors sharing the cache
+ 4, 0, EAX, 31:26, max_phy_id, Max number of addressable IDs for processors in phy package
+
+ 4, 0, EBX, 11:0, cache_linesize, Size of a cache line in bytes
+ 4, 0, EBX, 21:12, cache_partition, Physical Line partitions
+ 4, 0, EBX, 31:22, cache_ways, Ways of associativity
+ 4, 0, ECX, 31:0, cache_sets, Number of Sets - 1
+ 4, 0, EDX, 0, c_wbinvd, 1 means WBINVD/INVD is not ganranteed to act upon lower level caches of non-originating threads sharing this cache
+ 4, 0, EDX, 1, c_incl, Whether cache is inclusive of lower cache level
+ 4, 0, EDX, 2, c_comp_index, Complex Cache Indexing
+
+# Leaf 05H
+# MONITOR/MWAIT
+ 5, 0, EAX, 15:0, min_mon_size, Smallest monitor line size in bytes
+ 5, 0, EBX, 15:0, max_mon_size, Largest monitor line size in bytes
+ 5, 0, ECX, 0, mwait_ext, Enum of Monitor-Mwait extensions supported
+ 5, 0, ECX, 1, mwait_irq_break, Largest monitor line size in bytes
+ 5, 0, EDX, 3:0, c0_sub_stats, Number of C0* sub C-states supported using MWAIT
+ 5, 0, EDX, 7:4, c1_sub_stats, Number of C1* sub C-states supported using MWAIT
+ 5, 0, EDX, 11:8, c2_sub_stats, Number of C2* sub C-states supported using MWAIT
+ 5, 0, EDX, 15:12, c3_sub_stats, Number of C3* sub C-states supported using MWAIT
+ 5, 0, EDX, 19:16, c4_sub_stats, Number of C4* sub C-states supported using MWAIT
+ 5, 0, EDX, 23:20, c5_sub_stats, Number of C5* sub C-states supported using MWAIT
+ 5, 0, EDX, 27:24, c6_sub_stats, Number of C6* sub C-states supported using MWAIT
+ 5, 0, EDX, 31:28, c7_sub_stats, Number of C7* sub C-states supported using MWAIT
+
+# Leaf 06H
+# Thermal & Power Management
+
+ 6, 0, EAX, 0, dig_temp, Digital temperature sensor supported
+ 6, 0, EAX, 1, turbo, Intel Turbo Boost
+ 6, 0, EAX, 2, arat, Always running APIC timer
+# 6, 0, EAX, 3, resv, Reserved
+ 6, 0, EAX, 4, pln, Power limit notifications supported
+ 6, 0, EAX, 5, ecmd, Clock modulation duty cycle extension supported
+ 6, 0, EAX, 6, ptm, Package thermal management supported
+ 6, 0, EAX, 7, hwp, HWP base register
+ 6, 0, EAX, 8, hwp_notify, HWP notification
+ 6, 0, EAX, 9, hwp_act_window, HWP activity window
+ 6, 0, EAX, 10, hwp_energy, HWP energy performance preference
+ 6, 0, EAX, 11, hwp_pkg_req, HWP package level request
+# 6, 0, EAX, 12, resv, Reserved
+ 6, 0, EAX, 13, hdc, HDC base registers supported
+ 6, 0, EAX, 14, turbo3, Turbo Boost Max 3.0
+ 6, 0, EAX, 15, hwp_cap, Highest Performance change supported
+ 6, 0, EAX, 16, hwp_peci, HWP PECI override is supported
+ 6, 0, EAX, 17, hwp_flex, Flexible HWP is supported
+ 6, 0, EAX, 18, hwp_fast, Fast access mode for the IA32_HWP_REQUEST MSR is supported
+# 6, 0, EAX, 19, resv, Reserved
+ 6, 0, EAX, 20, hwp_ignr, Ignoring Idle Logical Processor HWP request is supported
+
+ 6, 0, EBX, 3:0, therm_irq_thresh, Number of Interrupt Thresholds in Digital Thermal Sensor
+ 6, 0, ECX, 0, aperfmperf, Presence of IA32_MPERF and IA32_APERF
+ 6, 0, ECX, 3, energ_bias, Performance-energy bias preference supported
+
+# Leaf 07H
+# ECX == 0
+# AVX512 refers to https://en.wikipedia.org/wiki/AVX-512
+# XXX: Do we really need to enumerate each and every AVX512 sub features
+
+ 7, 0, EBX, 0, fsgsbase, RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE supported
+ 7, 0, EBX, 1, tsc_adjust, TSC_ADJUST MSR supported
+ 7, 0, EBX, 2, sgx, Software Guard Extensions
+ 7, 0, EBX, 3, bmi1, BMI1
+ 7, 0, EBX, 4, hle, Hardware Lock Elision
+ 7, 0, EBX, 5, avx2, AVX2
+# 7, 0, EBX, 6, fdp_excp_only, x87 FPU Data Pointer updated only on x87 exceptions
+ 7, 0, EBX, 7, smep, Supervisor-Mode Execution Prevention
+ 7, 0, EBX, 8, bmi2, BMI2
+ 7, 0, EBX, 9, rep_movsb, Enhanced REP MOVSB/STOSB
+ 7, 0, EBX, 10, invpcid, INVPCID instruction
+ 7, 0, EBX, 11, rtm, Restricted Transactional Memory
+ 7, 0, EBX, 12, rdt_m, Intel RDT Monitoring capability
+ 7, 0, EBX, 13, depc_fpu_cs_ds, Deprecates FPU CS and FPU DS
+ 7, 0, EBX, 14, mpx, Memory Protection Extensions
+ 7, 0, EBX, 15, rdt_a, Intel RDT Allocation capability
+ 7, 0, EBX, 16, avx512f, AVX512 Foundation instr
+ 7, 0, EBX, 17, avx512dq, AVX512 Double and Quadword AVX512 instr
+ 7, 0, EBX, 18, rdseed, RDSEED instr
+ 7, 0, EBX, 19, adx, ADX instr
+ 7, 0, EBX, 20, smap, Supervisor Mode Access Prevention
+ 7, 0, EBX, 21, avx512ifma, AVX512 Integer Fused Multiply Add
+# 7, 0, EBX, 22, resvd, resvd
+ 7, 0, EBX, 23, clflushopt, CLFLUSHOPT instr
+ 7, 0, EBX, 24, clwb, CLWB instr
+ 7, 0, EBX, 25, intel_pt, Intel Processor Trace instr
+ 7, 0, EBX, 26, avx512pf, Prefetch
+ 7, 0, EBX, 27, avx512er, AVX512 Exponent Reciproca instr
+ 7, 0, EBX, 28, avx512cd, AVX512 Conflict Detection instr
+ 7, 0, EBX, 29, sha, Intel Secure Hash Algorithm Extensions instr
+ 7, 0, EBX, 26, avx512bw, AVX512 Byte & Word instr
+ 7, 0, EBX, 28, avx512vl, AVX512 Vector Length Extentions (VL)
+ 7, 0, ECX, 0, prefetchwt1, X
+ 7, 0, ECX, 1, avx512vbmi, AVX512 Vector Byte Manipulation Instructions
+ 7, 0, ECX, 2, umip, User-mode Instruction Prevention
+
+ 7, 0, ECX, 3, pku, Protection Keys for User-mode pages
+ 7, 0, ECX, 4, ospke, CR4 PKE set to enable protection keys
+# 7, 0, ECX, 16:5, resvd, resvd
+ 7, 0, ECX, 21:17, mawau, The value of MAWAU used by the BNDLDX and BNDSTX instructions in 64-bit mode
+ 7, 0, ECX, 22, rdpid, RDPID and IA32_TSC_AUX
+# 7, 0, ECX, 29:23, resvd, resvd
+ 7, 0, ECX, 30, sgx_lc, SGX Launch Configuration
+# 7, 0, ECX, 31, resvd, resvd
+
+# Leaf 08H
+#
+
+
+# Leaf 09H
+# Direct Cache Access (DCA) information
+ 9, 0, ECX, 31:0, dca_cap, The value of IA32_PLATFORM_DCA_CAP
+
+# Leaf 0AH
+# Architectural Performance Monitoring
+#
+# Do we really need to print out the PMU related stuff?
+# Does normal user really care about it?
+#
+ 0xA, 0, EAX, 7:0, pmu_ver, Performance Monitoring Unit version
+ 0xA, 0, EAX, 15:8, pmu_gp_cnt_num, Numer of general-purose PMU counters per logical CPU
+ 0xA, 0, EAX, 23:16, pmu_cnt_bits, Bit wideth of PMU counter
+ 0xA, 0, EAX, 31:24, pmu_ebx_bits, Length of EBX bit vector to enumerate PMU events
+
+ 0xA, 0, EBX, 0, pmu_no_core_cycle_evt, Core cycle event not available
+ 0xA, 0, EBX, 1, pmu_no_instr_ret_evt, Instruction retired event not available
+ 0xA, 0, EBX, 2, pmu_no_ref_cycle_evt, Reference cycles event not available
+ 0xA, 0, EBX, 3, pmu_no_llc_ref_evt, Last-level cache reference event not available
+ 0xA, 0, EBX, 4, pmu_no_llc_mis_evt, Last-level cache misses event not available
+ 0xA, 0, EBX, 5, pmu_no_br_instr_ret_evt, Branch instruction retired event not available
+ 0xA, 0, EBX, 6, pmu_no_br_mispredict_evt, Branch mispredict retired event not available
+
+ 0xA, 0, ECX, 4:0, pmu_fixed_cnt_num, Performance Monitoring Unit version
+ 0xA, 0, ECX, 12:5, pmu_fixed_cnt_bits, Numer of PMU counters per logical CPU
+
+# Leaf 0BH
+# Extended Topology Enumeration Leaf
+#
+
+ 0xB, 0, EAX, 4:0, id_shift, Number of bits to shift right on x2APIC ID to get a unique topology ID of the next level type
+ 0xB, 0, EBX, 15:0, cpu_nr, Number of logical processors at this level type
+ 0xB, 0, ECX, 15:8, lvl_type, 0-Invalid 1-SMT 2-Core
+ 0xB, 0, EDX, 31:0, x2apic_id, x2APIC ID the current logical processor
+
+
+# Leaf 0DH
+# Processor Extended State
+
+ 0xD, 0, EAX, 0, x87, X87 state
+ 0xD, 0, EAX, 1, sse, SSE state
+ 0xD, 0, EAX, 2, avx, AVX state
+ 0xD, 0, EAX, 4:3, mpx, MPX state
+ 0xD, 0, EAX, 7:5, avx512, AVX-512 state
+ 0xD, 0, EAX, 9, pkru, PKRU state
+
+ 0xD, 0, EBX, 31:0, max_sz_xcr0, Maximum size (bytes) required by enabled features in XCR0
+ 0xD, 0, ECX, 31:0, max_sz_xsave, Maximum size (bytes) of the XSAVE/XRSTOR save area
+
+ 0xD, 1, EAX, 0, xsaveopt, XSAVEOPT available
+ 0xD, 1, EAX, 1, xsavec, XSAVEC and compacted form supported
+ 0xD, 1, EAX, 2, xgetbv, XGETBV supported
+ 0xD, 1, EAX, 3, xsaves, XSAVES/XRSTORS and IA32_XSS supported
+
+ 0xD, 1, EBX, 31:0, max_sz_xcr0, Maximum size (bytes) required by enabled features in XCR0
+ 0xD, 1, ECX, 8, pt, PT state
+ 0xD, 1, ECX, 11, cet_usr, CET user state
+ 0xD, 1, ECX, 12, cet_supv, CET supervisor state
+ 0xD, 1, ECX, 13, hdc, HDC state
+ 0xD, 1, ECX, 16, hwp, HWP state
+
+# Leaf 0FH
+# Intel RDT Monitoring
+
+ 0xF, 0, EBX, 31:0, rmid_range, Maximum range (zero-based) of RMID within this physical processor of all types
+ 0xF, 0, EDX, 1, l3c_rdt_mon, L3 Cache RDT Monitoring supported
+
+ 0xF, 1, ECX, 31:0, rmid_range, Maximum range (zero-based) of RMID of this types
+ 0xF, 1, EDX, 0, l3c_ocp_mon, L3 Cache occupancy Monitoring supported
+ 0xF, 1, EDX, 1, l3c_tbw_mon, L3 Cache Total Bandwidth Monitoring supported
+ 0xF, 1, EDX, 2, l3c_lbw_mon, L3 Cache Local Bandwidth Monitoring supported
+
+# Leaf 10H
+# Intel RDT Allocation
+
+ 0x10, 0, EBX, 1, l3c_rdt_alloc, L3 Cache Allocation supported
+ 0x10, 0, EBX, 2, l2c_rdt_alloc, L2 Cache Allocation supported
+ 0x10, 0, EBX, 3, mem_bw_alloc, Memory Bandwidth Allocation supported
+
+
+# Leaf 12H
+# SGX Capability
+#
+# Some detailed SGX features not added yet
+
+ 0x12, 0, EAX, 0, sgx1, L3 Cache Allocation supported
+ 0x12, 1, EAX, 0, sgx2, L3 Cache Allocation supported
+
+
+# Leaf 14H
+# Intel Processor Tracer
+#
+
+# Leaf 15H
+# Time Stamp Counter and Nominal Core Crystal Clock Information
+
+ 0x15, 0, EAX, 31:0, tsc_denominator, The denominator of the TSC/”core crystal clock” ratio
+ 0x15, 0, EBX, 31:0, tsc_numerator, The numerator of the TSC/”core crystal clock” ratio
+ 0x15, 0, ECX, 31:0, nom_freq, Nominal frequency of the core crystal clock in Hz
+
+# Leaf 16H
+# Processor Frequency Information
+
+ 0x16, 0, EAX, 15:0, cpu_base_freq, Processor Base Frequency in MHz
+ 0x16, 0, EBX, 15:0, cpu_max_freq, Maximum Frequency in MHz
+ 0x16, 0, ECX, 15:0, bus_freq, Bus (Reference) Frequency in MHz
+
+# Leaf 17H
+# System-On-Chip Vendor Attribute
+
+ 0x17, 0, EAX, 31:0, max_socid, Maximum input value of supported sub-leaf
+ 0x17, 0, EBX, 15:0, soc_vid, SOC Vendor ID
+ 0x17, 0, EBX, 16, std_vid, SOC Vendor ID is assigned via an industry standard scheme
+ 0x17, 0, ECX, 31:0, soc_pid, SOC Project ID assigned by vendor
+ 0x17, 0, EDX, 31:0, soc_sid, SOC Stepping ID
+
+# Leaf 18H
+# Deterministic Address Translation Parameters
+
+
+# Leaf 19H
+# Key Locker Leaf
+
+
+# Leaf 1AH
+# Hybrid Information
+
+ 0x1A, 0, EAX, 31:24, core_type, 20H-Intel_Atom 40H-Intel_Core
+
+
+# Leaf 1FH
+# V2 Extended Topology - A preferred superset to leaf 0BH
+
+
+# According to SDM
+# 40000000H - 4FFFFFFFH is invalid range
+
+
+# Leaf 80000001H
+# Extended Processor Signature and Feature Bits
+
+0x80000001, 0, ECX, 0, lahf_lm, LAHF/SAHF available in 64-bit mode
+0x80000001, 0, ECX, 5, lzcnt, LZCNT
+0x80000001, 0, ECX, 8, prefetchw, PREFETCHW
+
+0x80000001, 0, EDX, 11, sysret, SYSCALL/SYSRET supported
+0x80000001, 0, EDX, 20, exec_dis, Execute Disable Bit available
+0x80000001, 0, EDX, 26, 1gb_page, 1GB page supported
+0x80000001, 0, EDX, 27, rdtscp, RDTSCP and IA32_TSC_AUX are available
+#0x80000001, 0, EDX, 29, 64b, 64b Architecture supported
+
+# Leaf 80000002H/80000003H/80000004H
+# Processor Brand String
+
+# Leaf 80000005H
+# Reserved
+
+# Leaf 80000006H
+# Extended L2 Cache Features
+
+0x80000006, 0, ECX, 7:0, clsize, Cache Line size in bytes
+0x80000006, 0, ECX, 15:12, l2c_assoc, L2 Associativity
+0x80000006, 0, ECX, 31:16, csize, Cache size in 1K units
+
+
+# Leaf 80000007H
+
+0x80000007, 0, EDX, 8, nonstop_tsc, Invariant TSC available
+
+
+# Leaf 80000008H
+
+0x80000008, 0, EAX, 7:0, phy_adr_bits, Physical Address Bits
+0x80000008, 0, EAX, 15:8, lnr_adr_bits, Linear Address Bits
+0x80000007, 0, EBX, 9, wbnoinvd, WBNOINVD
+
+# 0x8000001E
+# EAX: Extended APIC ID
+0x8000001E, 0, EAX, 31:0, extended_apic_id, Extended APIC ID
+# EBX: Core Identifiers
+0x8000001E, 0, EBX, 7:0, core_id, Identifies the logical core ID
+0x8000001E, 0, EBX, 15:8, threads_per_core, The number of threads per core is threads_per_core + 1
+# ECX: Node Identifiers
+0x8000001E, 0, ECX, 7:0, node_id, Node ID
+0x8000001E, 0, ECX, 10:8, nodes_per_processor, Nodes per processor { 0: 1 node, else reserved }
+
+# 8000001F: AMD Secure Encryption
+0x8000001F, 0, EAX, 0, sme, Secure Memory Encryption
+0x8000001F, 0, EAX, 1, sev, Secure Encrypted Virtualization
+0x8000001F, 0, EAX, 2, vmpgflush, VM Page Flush MSR
+0x8000001F, 0, EAX, 3, seves, SEV Encrypted State
+0x8000001F, 0, EBX, 5:0, c-bit, Page table bit number used to enable memory encryption
+0x8000001F, 0, EBX, 11:6, mem_encrypt_physaddr_width, Reduction of physical address space in bits with SME enabled
+0x8000001F, 0, ECX, 31:0, num_encrypted_guests, Maximum ASID value that may be used for an SEV-enabled guest
+0x8000001F, 0, EDX, 31:0, minimum_sev_asid, Minimum ASID value that must be used for an SEV-enabled, SEV-ES-disabled guest
diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c
new file mode 100644
index 000000000000..dae75511fef7
--- /dev/null
+++ b/tools/arch/x86/kcpuid/kcpuid.c
@@ -0,0 +1,657 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+char *def_csv = "/usr/share/misc/cpuid.csv";
+char *user_csv;
+
+
+/* Cover both single-bit flag and multiple-bits fields */
+struct bits_desc {
+ /* start and end bits */
+ int start, end;
+ /* 0 or 1 for 1-bit flag */
+ int value;
+ char simp[32];
+ char detail[256];
+};
+
+/* descriptor info for eax/ebx/ecx/edx */
+struct reg_desc {
+ /* number of valid entries */
+ int nr;
+ struct bits_desc descs[32];
+};
+
+enum {
+ R_EAX = 0,
+ R_EBX,
+ R_ECX,
+ R_EDX,
+ NR_REGS
+};
+
+struct subleaf {
+ u32 index;
+ u32 sub;
+ u32 eax, ebx, ecx, edx;
+ struct reg_desc info[NR_REGS];
+};
+
+/* Represent one leaf (basic or extended) */
+struct cpuid_func {
+ /*
+ * Array of subleafs for this func, if there is no subleafs
+ * then the leafs[0] is the main leaf
+ */
+ struct subleaf *leafs;
+ int nr;
+};
+
+struct cpuid_range {
+ /* array of main leafs */
+ struct cpuid_func *funcs;
+ /* number of valid leafs */
+ int nr;
+ bool is_ext;
+};
+
+/*
+ * basic: basic functions range: [0... ]
+ * ext: extended functions range: [0x80000000... ]
+ */
+struct cpuid_range *leafs_basic, *leafs_ext;
+
+static int num_leafs;
+static bool is_amd;
+static bool show_details;
+static bool show_raw;
+static bool show_flags_only = true;
+static u32 user_index = 0xFFFFFFFF;
+static u32 user_sub = 0xFFFFFFFF;
+static int flines;
+
+static inline void cpuid(u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
+{
+ /* ecx is often an input as well as an output. */
+ asm volatile("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx));
+}
+
+static inline bool has_subleafs(u32 f)
+{
+ if (f == 0x7 || f == 0xd)
+ return true;
+
+ if (is_amd) {
+ if (f == 0x8000001d)
+ return true;
+ return false;
+ }
+
+ switch (f) {
+ case 0x4:
+ case 0xb:
+ case 0xf:
+ case 0x10:
+ case 0x14:
+ case 0x18:
+ case 0x1f:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static void leaf_print_raw(struct subleaf *leaf)
+{
+ if (has_subleafs(leaf->index)) {
+ if (leaf->sub == 0)
+ printf("0x%08x: subleafs:\n", leaf->index);
+
+ printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n",
+ leaf->sub, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx);
+ } else {
+ printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n",
+ leaf->index, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx);
+ }
+}
+
+/* Return true is the input eax/ebx/ecx/edx are all zero */
+static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf,
+ u32 a, u32 b, u32 c, u32 d)
+{
+ struct cpuid_func *func;
+ struct subleaf *leaf;
+ int s = 0;
+
+ if (a == 0 && b == 0 && c == 0 && d == 0)
+ return true;
+
+ /*
+ * Cut off vendor-prefix from CPUID function as we're using it as an
+ * index into ->funcs.
+ */
+ func = &range->funcs[f & 0xffff];
+
+ if (!func->leafs) {
+ func->leafs = malloc(sizeof(struct subleaf));
+ if (!func->leafs)
+ perror("malloc func leaf");
+
+ func->nr = 1;
+ } else {
+ s = func->nr;
+ func->leafs = realloc(func->leafs, (s + 1) * sizeof(*leaf));
+ if (!func->leafs)
+ perror("realloc f->leafs");
+
+ func->nr++;
+ }
+
+ leaf = &func->leafs[s];
+
+ leaf->index = f;
+ leaf->sub = subleaf;
+ leaf->eax = a;
+ leaf->ebx = b;
+ leaf->ecx = c;
+ leaf->edx = d;
+
+ return false;
+}
+
+static void raw_dump_range(struct cpuid_range *range)
+{
+ u32 f;
+ int i;
+
+ printf("%s Leafs :\n", range->is_ext ? "Extended" : "Basic");
+ printf("================\n");
+
+ for (f = 0; (int)f < range->nr; f++) {
+ struct cpuid_func *func = &range->funcs[f];
+ u32 index = f;
+
+ if (range->is_ext)
+ index += 0x80000000;
+
+ /* Skip leaf without valid items */
+ if (!func->nr)
+ continue;
+
+ /* First item is the main leaf, followed by all subleafs */
+ for (i = 0; i < func->nr; i++)
+ leaf_print_raw(&func->leafs[i]);
+ }
+}
+
+#define MAX_SUBLEAF_NUM 32
+struct cpuid_range *setup_cpuid_range(u32 input_eax)
+{
+ u32 max_func, idx_func;
+ int subleaf;
+ struct cpuid_range *range;
+ u32 eax, ebx, ecx, edx;
+ u32 f = input_eax;
+ int max_subleaf;
+ bool allzero;
+
+ eax = input_eax;
+ ebx = ecx = edx = 0;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+ max_func = eax;
+ idx_func = (max_func & 0xffff) + 1;
+
+ range = malloc(sizeof(struct cpuid_range));
+ if (!range)
+ perror("malloc range");
+
+ if (input_eax & 0x80000000)
+ range->is_ext = true;
+ else
+ range->is_ext = false;
+
+ range->funcs = malloc(sizeof(struct cpuid_func) * idx_func);
+ if (!range->funcs)
+ perror("malloc range->funcs");
+
+ range->nr = idx_func;
+ memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func);
+
+ for (; f <= max_func; f++) {
+ eax = f;
+ subleaf = ecx = 0;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+ allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx);
+ if (allzero)
+ continue;
+ num_leafs++;
+
+ if (!has_subleafs(f))
+ continue;
+
+ max_subleaf = MAX_SUBLEAF_NUM;
+
+ /*
+ * Some can provide the exact number of subleafs,
+ * others have to be tried (0xf)
+ */
+ if (f == 0x7 || f == 0x14 || f == 0x17 || f == 0x18)
+ max_subleaf = (eax & 0xff) + 1;
+
+ if (f == 0xb)
+ max_subleaf = 2;
+
+ for (subleaf = 1; subleaf < max_subleaf; subleaf++) {
+ eax = f;
+ ecx = subleaf;
+
+ cpuid(&eax, &ebx, &ecx, &edx);
+ allzero = cpuid_store(range, f, subleaf,
+ eax, ebx, ecx, edx);
+ if (allzero)
+ continue;
+ num_leafs++;
+ }
+
+ }
+
+ return range;
+}
+
+/*
+ * The basic row format for cpuid.csv is
+ * LEAF,SUBLEAF,register_name,bits,short name,long description
+ *
+ * like:
+ * 0, 0, EAX, 31:0, max_basic_leafs, Max input value for supported subleafs
+ * 1, 0, ECX, 0, sse3, Streaming SIMD Extensions 3(SSE3)
+ */
+static int parse_line(char *line)
+{
+ char *str;
+ int i;
+ struct cpuid_range *range;
+ struct cpuid_func *func;
+ struct subleaf *leaf;
+ u32 index;
+ u32 sub;
+ char buffer[512];
+ char *buf;
+ /*
+ * Tokens:
+ * 1. leaf
+ * 2. subleaf
+ * 3. register
+ * 4. bits
+ * 5. short name
+ * 6. long detail
+ */
+ char *tokens[6];
+ struct reg_desc *reg;
+ struct bits_desc *bdesc;
+ int reg_index;
+ char *start, *end;
+
+ /* Skip comments and NULL line */
+ if (line[0] == '#' || line[0] == '\n')
+ return 0;
+
+ strncpy(buffer, line, 511);
+ buffer[511] = 0;
+ str = buffer;
+ for (i = 0; i < 5; i++) {
+ tokens[i] = strtok(str, ",");
+ if (!tokens[i])
+ goto err_exit;
+ str = NULL;
+ }
+ tokens[5] = strtok(str, "\n");
+ if (!tokens[5])
+ goto err_exit;
+
+ /* index/main-leaf */
+ index = strtoull(tokens[0], NULL, 0);
+
+ if (index & 0x80000000)
+ range = leafs_ext;
+ else
+ range = leafs_basic;
+
+ index &= 0x7FFFFFFF;
+ /* Skip line parsing for non-existing indexes */
+ if ((int)index >= range->nr)
+ return -1;
+
+ func = &range->funcs[index];
+
+ /* Return if the index has no valid item on this platform */
+ if (!func->nr)
+ return 0;
+
+ /* subleaf */
+ sub = strtoul(tokens[1], NULL, 0);
+ if ((int)sub > func->nr)
+ return -1;
+
+ leaf = &func->leafs[sub];
+ buf = tokens[2];
+
+ if (strcasestr(buf, "EAX"))
+ reg_index = R_EAX;
+ else if (strcasestr(buf, "EBX"))
+ reg_index = R_EBX;
+ else if (strcasestr(buf, "ECX"))
+ reg_index = R_ECX;
+ else if (strcasestr(buf, "EDX"))
+ reg_index = R_EDX;
+ else
+ goto err_exit;
+
+ reg = &leaf->info[reg_index];
+ bdesc = &reg->descs[reg->nr++];
+
+ /* bit flag or bits field */
+ buf = tokens[3];
+
+ end = strtok(buf, ":");
+ bdesc->end = strtoul(end, NULL, 0);
+ bdesc->start = bdesc->end;
+
+ /* start != NULL means it is bit fields */
+ start = strtok(NULL, ":");
+ if (start)
+ bdesc->start = strtoul(start, NULL, 0);
+
+ strcpy(bdesc->simp, tokens[4]);
+ strcpy(bdesc->detail, tokens[5]);
+ return 0;
+
+err_exit:
+ printf("Warning: wrong line format:\n");
+ printf("\tline[%d]: %s\n", flines, line);
+ return -1;
+}
+
+/* Parse csv file, and construct the array of all leafs and subleafs */
+static void parse_text(void)
+{
+ FILE *file;
+ char *filename, *line = NULL;
+ size_t len = 0;
+ int ret;
+
+ if (show_raw)
+ return;
+
+ filename = user_csv ? user_csv : def_csv;
+ file = fopen(filename, "r");
+ if (!file) {
+ /* Fallback to a csv in the same dir */
+ file = fopen("./cpuid.csv", "r");
+ }
+
+ if (!file) {
+ printf("Fail to open '%s'\n", filename);
+ return;
+ }
+
+ while (1) {
+ ret = getline(&line, &len, file);
+ flines++;
+ if (ret > 0)
+ parse_line(line);
+
+ if (feof(file))
+ break;
+ }
+
+ fclose(file);
+}
+
+
+/* Decode every eax/ebx/ecx/edx */
+static void decode_bits(u32 value, struct reg_desc *rdesc)
+{
+ struct bits_desc *bdesc;
+ int start, end, i;
+ u32 mask;
+
+ for (i = 0; i < rdesc->nr; i++) {
+ bdesc = &rdesc->descs[i];
+
+ start = bdesc->start;
+ end = bdesc->end;
+ if (start == end) {
+ /* single bit flag */
+ if (value & (1 << start))
+ printf("\t%-20s %s%s\n",
+ bdesc->simp,
+ show_details ? "-" : "",
+ show_details ? bdesc->detail : ""
+ );
+ } else {
+ /* bit fields */
+ if (show_flags_only)
+ continue;
+
+ mask = ((u64)1 << (end - start + 1)) - 1;
+ printf("\t%-20s\t: 0x%-8x\t%s%s\n",
+ bdesc->simp,
+ (value >> start) & mask,
+ show_details ? "-" : "",
+ show_details ? bdesc->detail : ""
+ );
+ }
+ }
+}
+
+static void show_leaf(struct subleaf *leaf)
+{
+ if (!leaf)
+ return;
+
+ if (show_raw)
+ leaf_print_raw(leaf);
+
+ decode_bits(leaf->eax, &leaf->info[R_EAX]);
+ decode_bits(leaf->ebx, &leaf->info[R_EBX]);
+ decode_bits(leaf->ecx, &leaf->info[R_ECX]);
+ decode_bits(leaf->edx, &leaf->info[R_EDX]);
+}
+
+static void show_func(struct cpuid_func *func)
+{
+ int i;
+
+ if (!func)
+ return;
+
+ for (i = 0; i < func->nr; i++)
+ show_leaf(&func->leafs[i]);
+}
+
+static void show_range(struct cpuid_range *range)
+{
+ int i;
+
+ for (i = 0; i < range->nr; i++)
+ show_func(&range->funcs[i]);
+}
+
+static inline struct cpuid_func *index_to_func(u32 index)
+{
+ struct cpuid_range *range;
+
+ range = (index & 0x80000000) ? leafs_ext : leafs_basic;
+ index &= 0x7FFFFFFF;
+
+ if (((index & 0xFFFF) + 1) > (u32)range->nr) {
+ printf("ERR: invalid input index (0x%x)\n", index);
+ return NULL;
+ }
+ return &range->funcs[index];
+}
+
+static void show_info(void)
+{
+ struct cpuid_func *func;
+
+ if (show_raw) {
+ /* Show all of the raw output of 'cpuid' instr */
+ raw_dump_range(leafs_basic);
+ raw_dump_range(leafs_ext);
+ return;
+ }
+
+ if (user_index != 0xFFFFFFFF) {
+ /* Only show specific leaf/subleaf info */
+ func = index_to_func(user_index);
+ if (!func)
+ return;
+
+ /* Dump the raw data also */
+ show_raw = true;
+
+ if (user_sub != 0xFFFFFFFF) {
+ if (user_sub + 1 <= (u32)func->nr) {
+ show_leaf(&func->leafs[user_sub]);
+ return;
+ }
+
+ printf("ERR: invalid input subleaf (0x%x)\n", user_sub);
+ }
+
+ show_func(func);
+ return;
+ }
+
+ printf("CPU features:\n=============\n\n");
+ show_range(leafs_basic);
+ show_range(leafs_ext);
+}
+
+static void setup_platform_cpuid(void)
+{
+ u32 eax, ebx, ecx, edx;
+
+ /* Check vendor */
+ eax = ebx = ecx = edx = 0;
+ cpuid(&eax, &ebx, &ecx, &edx);
+
+ /* "htuA" */
+ if (ebx == 0x68747541)
+ is_amd = true;
+
+ /* Setup leafs for the basic and extended range */
+ leafs_basic = setup_cpuid_range(0x0);
+ leafs_ext = setup_cpuid_range(0x80000000);
+}
+
+static void usage(void)
+{
+ printf("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n"
+ "\t-a|--all Show both bit flags and complex bit fields info\n"
+ "\t-b|--bitflags Show boolean flags only\n"
+ "\t-d|--detail Show details of the flag/fields (default)\n"
+ "\t-f|--flags Specify the cpuid csv file\n"
+ "\t-h|--help Show usage info\n"
+ "\t-l|--leaf=index Specify the leaf you want to check\n"
+ "\t-r|--raw Show raw cpuid data\n"
+ "\t-s|--subleaf=sub Specify the subleaf you want to check\n"
+ );
+}
+
+static struct option opts[] = {
+ { "all", no_argument, NULL, 'a' }, /* show both bit flags and fields */
+ { "bitflags", no_argument, NULL, 'b' }, /* only show bit flags, default on */
+ { "detail", no_argument, NULL, 'd' }, /* show detail descriptions */
+ { "file", required_argument, NULL, 'f' }, /* use user's cpuid file */
+ { "help", no_argument, NULL, 'h'}, /* show usage */
+ { "leaf", required_argument, NULL, 'l'}, /* only check a specific leaf */
+ { "raw", no_argument, NULL, 'r'}, /* show raw CPUID leaf data */
+ { "subleaf", required_argument, NULL, 's'}, /* check a specific subleaf */
+ { NULL, 0, NULL, 0 }
+};
+
+static int parse_options(int argc, char *argv[])
+{
+ int c;
+
+ while ((c = getopt_long(argc, argv, "abdf:hl:rs:",
+ opts, NULL)) != -1)
+ switch (c) {
+ case 'a':
+ show_flags_only = false;
+ break;
+ case 'b':
+ show_flags_only = true;
+ break;
+ case 'd':
+ show_details = true;
+ break;
+ case 'f':
+ user_csv = optarg;
+ break;
+ case 'h':
+ usage();
+ exit(1);
+ break;
+ case 'l':
+ /* main leaf */
+ user_index = strtoul(optarg, NULL, 0);
+ break;
+ case 'r':
+ show_raw = true;
+ break;
+ case 's':
+ /* subleaf */
+ user_sub = strtoul(optarg, NULL, 0);
+ break;
+ default:
+ printf("%s: Invalid option '%c'\n", argv[0], optopt);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Do 4 things in turn:
+ * 1. Parse user options
+ * 2. Parse and store all the CPUID leaf data supported on this platform
+ * 2. Parse the csv file, while skipping leafs which are not available
+ * on this platform
+ * 3. Print leafs info based on user options
+ */
+int main(int argc, char *argv[])
+{
+ if (parse_options(argc, argv))
+ return -1;
+
+ /* Setup the cpuid leafs of current platform */
+ setup_platform_cpuid();
+
+ /* Read and parse the 'cpuid.csv' */
+ parse_text();
+
+ show_info();
+ return 0;
+}
diff --git a/tools/arch/x86/lib/inat.c b/tools/arch/x86/lib/inat.c
index 4f5ed49e1b4e..dfbcc6405941 100644
--- a/tools/arch/x86/lib/inat.c
+++ b/tools/arch/x86/lib/inat.c
@@ -4,7 +4,7 @@
*
* Written by Masami Hiramatsu <mhiramat@redhat.com>
*/
-#include "../include/asm/insn.h"
+#include "../include/asm/insn.h" /* __ignore_sync_check__ */
/* Attribute tables are generated from opcode map */
#include "inat-tables.c"
diff --git a/tools/arch/x86/lib/insn.c b/tools/arch/x86/lib/insn.c
index 3d9355ed1246..c41f95815480 100644
--- a/tools/arch/x86/lib/insn.c
+++ b/tools/arch/x86/lib/insn.c
@@ -11,10 +11,13 @@
#else
#include <string.h>
#endif
-#include "../include/asm/inat.h"
-#include "../include/asm/insn.h"
+#include "../include/asm/inat.h" /* __ignore_sync_check__ */
+#include "../include/asm/insn.h" /* __ignore_sync_check__ */
-#include "../include/asm/emulate_prefix.h"
+#include <linux/errno.h>
+#include <linux/kconfig.h>
+
+#include "../include/asm/emulate_prefix.h" /* __ignore_sync_check__ */
#define leXX_to_cpu(t, r) \
({ \
@@ -51,6 +54,7 @@
* insn_init() - initialize struct insn
* @insn: &struct insn to be initialized
* @kaddr: address (in kernel memory) of instruction (or copy thereof)
+ * @buf_len: length of the insn buffer at @kaddr
* @x86_64: !0 for 64-bit kernel or 64-bit app
*/
void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
@@ -111,8 +115,12 @@ static void insn_get_emulate_prefix(struct insn *insn)
* Populates the @insn->prefixes bitmap, and updates @insn->next_byte
* to point to the (first) opcode. No effect if @insn->prefixes.got
* is already set.
+ *
+ * * Returns:
+ * 0: on success
+ * < 0: on error
*/
-void insn_get_prefixes(struct insn *insn)
+int insn_get_prefixes(struct insn *insn)
{
struct insn_field *prefixes = &insn->prefixes;
insn_attr_t attr;
@@ -120,7 +128,7 @@ void insn_get_prefixes(struct insn *insn)
int i, nb;
if (prefixes->got)
- return;
+ return 0;
insn_get_emulate_prefix(insn);
@@ -230,8 +238,10 @@ vex_end:
prefixes->got = 1;
+ return 0;
+
err_out:
- return;
+ return -ENODATA;
}
/**
@@ -243,16 +253,25 @@ err_out:
* If necessary, first collects any preceding (prefix) bytes.
* Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
* is already 1.
+ *
+ * Returns:
+ * 0: on success
+ * < 0: on error
*/
-void insn_get_opcode(struct insn *insn)
+int insn_get_opcode(struct insn *insn)
{
struct insn_field *opcode = &insn->opcode;
+ int pfx_id, ret;
insn_byte_t op;
- int pfx_id;
+
if (opcode->got)
- return;
- if (!insn->prefixes.got)
- insn_get_prefixes(insn);
+ return 0;
+
+ if (!insn->prefixes.got) {
+ ret = insn_get_prefixes(insn);
+ if (ret)
+ return ret;
+ }
/* Get first opcode */
op = get_next(insn_byte_t, insn);
@@ -267,9 +286,13 @@ void insn_get_opcode(struct insn *insn)
insn->attr = inat_get_avx_attribute(op, m, p);
if ((inat_must_evex(insn->attr) && !insn_is_evex(insn)) ||
(!inat_accept_vex(insn->attr) &&
- !inat_is_group(insn->attr)))
- insn->attr = 0; /* This instruction is bad */
- goto end; /* VEX has only 1 byte for opcode */
+ !inat_is_group(insn->attr))) {
+ /* This instruction is bad */
+ insn->attr = 0;
+ return -EINVAL;
+ }
+ /* VEX has only 1 byte for opcode */
+ goto end;
}
insn->attr = inat_get_opcode_attribute(op);
@@ -280,13 +303,18 @@ void insn_get_opcode(struct insn *insn)
pfx_id = insn_last_prefix_id(insn);
insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
}
- if (inat_must_vex(insn->attr))
- insn->attr = 0; /* This instruction is bad */
+
+ if (inat_must_vex(insn->attr)) {
+ /* This instruction is bad */
+ insn->attr = 0;
+ return -EINVAL;
+ }
end:
opcode->got = 1;
+ return 0;
err_out:
- return;
+ return -ENODATA;
}
/**
@@ -296,15 +324,25 @@ err_out:
* Populates @insn->modrm and updates @insn->next_byte to point past the
* ModRM byte, if any. If necessary, first collects the preceding bytes
* (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
+ *
+ * Returns:
+ * 0: on success
+ * < 0: on error
*/
-void insn_get_modrm(struct insn *insn)
+int insn_get_modrm(struct insn *insn)
{
struct insn_field *modrm = &insn->modrm;
insn_byte_t pfx_id, mod;
+ int ret;
+
if (modrm->got)
- return;
- if (!insn->opcode.got)
- insn_get_opcode(insn);
+ return 0;
+
+ if (!insn->opcode.got) {
+ ret = insn_get_opcode(insn);
+ if (ret)
+ return ret;
+ }
if (inat_has_modrm(insn->attr)) {
mod = get_next(insn_byte_t, insn);
@@ -313,17 +351,22 @@ void insn_get_modrm(struct insn *insn)
pfx_id = insn_last_prefix_id(insn);
insn->attr = inat_get_group_attribute(mod, pfx_id,
insn->attr);
- if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
- insn->attr = 0; /* This is bad */
+ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr)) {
+ /* Bad insn */
+ insn->attr = 0;
+ return -EINVAL;
+ }
}
}
if (insn->x86_64 && inat_is_force64(insn->attr))
insn->opnd_bytes = 8;
+
modrm->got = 1;
+ return 0;
err_out:
- return;
+ return -ENODATA;
}
@@ -337,11 +380,16 @@ err_out:
int insn_rip_relative(struct insn *insn)
{
struct insn_field *modrm = &insn->modrm;
+ int ret;
if (!insn->x86_64)
return 0;
- if (!modrm->got)
- insn_get_modrm(insn);
+
+ if (!modrm->got) {
+ ret = insn_get_modrm(insn);
+ if (ret)
+ return 0;
+ }
/*
* For rip-relative instructions, the mod field (top 2 bits)
* is zero and the r/m field (bottom 3 bits) is 0x5.
@@ -355,15 +403,25 @@ int insn_rip_relative(struct insn *insn)
*
* If necessary, first collects the instruction up to and including the
* ModRM byte.
+ *
+ * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
*/
-void insn_get_sib(struct insn *insn)
+int insn_get_sib(struct insn *insn)
{
insn_byte_t modrm;
+ int ret;
if (insn->sib.got)
- return;
- if (!insn->modrm.got)
- insn_get_modrm(insn);
+ return 0;
+
+ if (!insn->modrm.got) {
+ ret = insn_get_modrm(insn);
+ if (ret)
+ return ret;
+ }
+
if (insn->modrm.nbytes) {
modrm = insn->modrm.bytes[0];
if (insn->addr_bytes != 2 &&
@@ -374,8 +432,10 @@ void insn_get_sib(struct insn *insn)
}
insn->sib.got = 1;
+ return 0;
+
err_out:
- return;
+ return -ENODATA;
}
@@ -386,15 +446,25 @@ err_out:
* If necessary, first collects the instruction up to and including the
* SIB byte.
* Displacement value is sign-expanded.
+ *
+ * * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
*/
-void insn_get_displacement(struct insn *insn)
+int insn_get_displacement(struct insn *insn)
{
insn_byte_t mod, rm, base;
+ int ret;
if (insn->displacement.got)
- return;
- if (!insn->sib.got)
- insn_get_sib(insn);
+ return 0;
+
+ if (!insn->sib.got) {
+ ret = insn_get_sib(insn);
+ if (ret)
+ return ret;
+ }
+
if (insn->modrm.nbytes) {
/*
* Interpreting the modrm byte:
@@ -436,9 +506,10 @@ void insn_get_displacement(struct insn *insn)
}
out:
insn->displacement.got = 1;
+ return 0;
err_out:
- return;
+ return -ENODATA;
}
/* Decode moffset16/32/64. Return 0 if failed */
@@ -537,20 +608,30 @@ err_out:
}
/**
- * insn_get_immediate() - Get the immediates of instruction
+ * insn_get_immediate() - Get the immediate in an instruction
* @insn: &struct insn containing instruction
*
* If necessary, first collects the instruction up to and including the
* displacement bytes.
* Basically, most of immediates are sign-expanded. Unsigned-value can be
- * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ * computed by bit masking with ((1 << (nbytes * 8)) - 1)
+ *
+ * Returns:
+ * 0: on success
+ * < 0: on error
*/
-void insn_get_immediate(struct insn *insn)
+int insn_get_immediate(struct insn *insn)
{
+ int ret;
+
if (insn->immediate.got)
- return;
- if (!insn->displacement.got)
- insn_get_displacement(insn);
+ return 0;
+
+ if (!insn->displacement.got) {
+ ret = insn_get_displacement(insn);
+ if (ret)
+ return ret;
+ }
if (inat_has_moffset(insn->attr)) {
if (!__get_moffset(insn))
@@ -597,9 +678,10 @@ void insn_get_immediate(struct insn *insn)
}
done:
insn->immediate.got = 1;
+ return 0;
err_out:
- return;
+ return -ENODATA;
}
/**
@@ -608,13 +690,65 @@ err_out:
*
* If necessary, first collects the instruction up to and including the
* immediates bytes.
- */
-void insn_get_length(struct insn *insn)
+ *
+ * Returns:
+ * - 0 on success
+ * - < 0 on error
+*/
+int insn_get_length(struct insn *insn)
{
+ int ret;
+
if (insn->length)
- return;
- if (!insn->immediate.got)
- insn_get_immediate(insn);
+ return 0;
+
+ if (!insn->immediate.got) {
+ ret = insn_get_immediate(insn);
+ if (ret)
+ return ret;
+ }
+
insn->length = (unsigned char)((unsigned long)insn->next_byte
- (unsigned long)insn->kaddr);
+
+ return 0;
+}
+
+/* Ensure this instruction is decoded completely */
+static inline int insn_complete(struct insn *insn)
+{
+ return insn->opcode.got && insn->modrm.got && insn->sib.got &&
+ insn->displacement.got && insn->immediate.got;
+}
+
+/**
+ * insn_decode() - Decode an x86 instruction
+ * @insn: &struct insn to be initialized
+ * @kaddr: address (in kernel memory) of instruction (or copy thereof)
+ * @buf_len: length of the insn buffer at @kaddr
+ * @m: insn mode, see enum insn_mode
+ *
+ * Returns:
+ * 0: if decoding succeeded
+ * < 0: otherwise.
+ */
+int insn_decode(struct insn *insn, const void *kaddr, int buf_len, enum insn_mode m)
+{
+ int ret;
+
+#define INSN_MODE_KERN (enum insn_mode)-1 /* __ignore_sync_check__ mode is only valid in the kernel */
+
+ if (m == INSN_MODE_KERN)
+ insn_init(insn, kaddr, buf_len, IS_ENABLED(CONFIG_X86_64));
+ else
+ insn_init(insn, kaddr, buf_len, m == INSN_MODE_64);
+
+ ret = insn_get_length(insn);
+ if (ret)
+ return ret;
+
+ if (insn_complete(insn))
+ return 0;
+
+ return -EINVAL;
}
diff --git a/tools/bpf/Makefile.helpers b/tools/bpf/Makefile.helpers
deleted file mode 100644
index 854d084026dd..000000000000
--- a/tools/bpf/Makefile.helpers
+++ /dev/null
@@ -1,60 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-ifndef allow-override
- include ../scripts/Makefile.include
- include ../scripts/utilities.mak
-else
- # Assume Makefile.helpers is being run from bpftool/Documentation
- # subdirectory. Go up two more directories to fetch bpf.h header and
- # associated script.
- UP2DIR := ../../
-endif
-
-INSTALL ?= install
-RM ?= rm -f
-RMDIR ?= rmdir --ignore-fail-on-non-empty
-
-ifeq ($(V),1)
- Q =
-else
- Q = @
-endif
-
-prefix ?= /usr/local
-mandir ?= $(prefix)/man
-man7dir = $(mandir)/man7
-
-HELPERS_RST = bpf-helpers.rst
-MAN7_RST = $(HELPERS_RST)
-
-_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
-DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
-
-helpers: man7
-man7: $(DOC_MAN7)
-
-RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
-
-$(OUTPUT)$(HELPERS_RST): $(UP2DIR)../../include/uapi/linux/bpf.h
- $(QUIET_GEN)$(UP2DIR)../../scripts/bpf_helpers_doc.py --filename $< > $@
-
-$(OUTPUT)%.7: $(OUTPUT)%.rst
-ifndef RST2MAN_DEP
- $(error "rst2man not found, but required to generate man pages")
-endif
- $(QUIET_GEN)rst2man $< > $@
-
-helpers-clean:
- $(call QUIET_CLEAN, eBPF_helpers-manpage)
- $(Q)$(RM) $(DOC_MAN7) $(OUTPUT)$(HELPERS_RST)
-
-helpers-install: helpers
- $(call QUIET_INSTALL, eBPF_helpers-manpage)
- $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
- $(Q)$(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
-
-helpers-uninstall:
- $(call QUIET_UNINST, eBPF_helpers-manpage)
- $(Q)$(RM) $(addprefix $(DESTDIR)$(man7dir)/,$(_DOC_MAN7))
- $(Q)$(RMDIR) $(DESTDIR)$(man7dir)
-
-.PHONY: helpers helpers-clean helpers-install helpers-uninstall
diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c
index a07dfc479270..00e560a17baf 100644
--- a/tools/bpf/bpf_dbg.c
+++ b/tools/bpf/bpf_dbg.c
@@ -1198,7 +1198,7 @@ static int cmd_run(char *num)
else
return CMD_OK;
bpf_reset();
- } while (pcap_next_pkt() && (!has_limit || (has_limit && ++i < pkts)));
+ } while (pcap_next_pkt() && (!has_limit || (++i < pkts)));
rl_printf("bpf passes:%u fails:%u\n", pass, fail);
diff --git a/tools/bpf/bpf_exp.y b/tools/bpf/bpf_exp.y
index 8d48e896be50..dfb7254a24e8 100644
--- a/tools/bpf/bpf_exp.y
+++ b/tools/bpf/bpf_exp.y
@@ -185,13 +185,13 @@ ldx
| OP_LDXB number '*' '(' '[' number ']' '&' number ')' {
if ($2 != 4 || $9 != 0xf) {
fprintf(stderr, "ldxb offset not supported!\n");
- exit(0);
+ exit(1);
} else {
bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
| OP_LDX number '*' '(' '[' number ']' '&' number ')' {
if ($2 != 4 || $9 != 0xf) {
fprintf(stderr, "ldxb offset not supported!\n");
- exit(0);
+ exit(1);
} else {
bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
;
@@ -472,7 +472,7 @@ static void bpf_assert_max(void)
{
if (curr_instr >= BPF_MAXINSNS) {
fprintf(stderr, "only max %u insns allowed!\n", BPF_MAXINSNS);
- exit(0);
+ exit(1);
}
}
@@ -522,7 +522,7 @@ static int bpf_find_insns_offset(const char *label)
if (ret == -ENOENT) {
fprintf(stderr, "no such label \'%s\'!\n", label);
- exit(0);
+ exit(1);
}
return ret;
@@ -549,9 +549,11 @@ static uint8_t bpf_encode_jt_jf_offset(int off, int i)
{
int delta = off - i - 1;
- if (delta < 0 || delta > 255)
- fprintf(stderr, "warning: insn #%d jumps to insn #%d, "
+ if (delta < 0 || delta > 255) {
+ fprintf(stderr, "error: insn #%d jumps to insn #%d, "
"which is out of range\n", i, off);
+ exit(1);
+ }
return (uint8_t) delta;
}
diff --git a/tools/bpf/bpftool/.gitignore b/tools/bpf/bpftool/.gitignore
index 944cb4b7c95d..05ce4446b780 100644
--- a/tools/bpf/bpftool/.gitignore
+++ b/tools/bpf/bpftool/.gitignore
@@ -3,7 +3,6 @@
/bootstrap/
/bpftool
bpftool*.8
-bpf-helpers.*
FEATURE-DUMP.bpftool
feature
libbpf
diff --git a/tools/bpf/bpftool/Documentation/Makefile b/tools/bpf/bpftool/Documentation/Makefile
index f33cb02de95c..c49487905ceb 100644
--- a/tools/bpf/bpftool/Documentation/Makefile
+++ b/tools/bpf/bpftool/Documentation/Makefile
@@ -16,15 +16,12 @@ prefix ?= /usr/local
mandir ?= $(prefix)/man
man8dir = $(mandir)/man8
-# Load targets for building eBPF helpers man page.
-include ../../Makefile.helpers
-
MAN8_RST = $(wildcard bpftool*.rst)
_DOC_MAN8 = $(patsubst %.rst,%.8,$(MAN8_RST))
DOC_MAN8 = $(addprefix $(OUTPUT),$(_DOC_MAN8))
-man: man8 helpers
+man: man8
man8: $(DOC_MAN8)
RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
@@ -46,16 +43,16 @@ ifndef RST2MAN_DEP
endif
$(QUIET_GEN)( cat $< ; printf "%b" $(call see_also,$<) ) | rst2man $(RST2MAN_OPTS) > $@
-clean: helpers-clean
+clean:
$(call QUIET_CLEAN, Documentation)
$(Q)$(RM) $(DOC_MAN8)
-install: man helpers-install
+install: man
$(call QUIET_INSTALL, Documentation-man)
$(Q)$(INSTALL) -d -m 755 $(DESTDIR)$(man8dir)
$(Q)$(INSTALL) -m 644 $(DOC_MAN8) $(DESTDIR)$(man8dir)
-uninstall: helpers-uninstall
+uninstall:
$(call QUIET_UNINST, Documentation-man)
$(Q)$(RM) $(addprefix $(DESTDIR)$(man8dir)/,$(_DOC_MAN8))
$(Q)$(RMDIR) $(DESTDIR)$(man8dir)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 84cf0639696f..7cd6681137f3 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -14,16 +14,37 @@ SYNOPSIS
*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
- *COMMAND* := { **skeleton** | **help** }
+ *COMMAND* := { **object** | **skeleton** | **help** }
GEN COMMANDS
=============
-| **bpftool** **gen skeleton** *FILE*
+| **bpftool** **gen object** *OUTPUT_FILE* *INPUT_FILE* [*INPUT_FILE*...]
+| **bpftool** **gen skeleton** *FILE* [**name** *OBJECT_NAME*]
| **bpftool** **gen help**
DESCRIPTION
===========
+ **bpftool gen object** *OUTPUT_FILE* *INPUT_FILE* [*INPUT_FILE*...]
+ Statically link (combine) together one or more *INPUT_FILE*'s
+ into a single resulting *OUTPUT_FILE*. All the files involved
+ are BPF ELF object files.
+
+ The rules of BPF static linking are mostly the same as for
+ user-space object files, but in addition to combining data
+ and instruction sections, .BTF and .BTF.ext (if present in
+ any of the input files) data are combined together. .BTF
+ data is deduplicated, so all the common types across
+ *INPUT_FILE*'s will only be represented once in the resulting
+ BTF information.
+
+ BPF static linking allows to partition BPF source code into
+ individually compiled files that are then linked into
+ a single resulting BPF object file, which can be used to
+ generated BPF skeleton (with **gen skeleton** command) or
+ passed directly into **libbpf** (using **bpf_object__open()**
+ family of APIs).
+
**bpftool gen skeleton** *FILE*
Generate BPF skeleton C header file for a given *FILE*.
@@ -75,10 +96,13 @@ DESCRIPTION
specific maps, programs, etc.
As part of skeleton, few custom functions are generated.
- Each of them is prefixed with object name, derived from
- object file name. I.e., if BPF object file name is
- **example.o**, BPF object name will be **example**. The
- following custom functions are provided in such case:
+ Each of them is prefixed with object name. Object name can
+ either be derived from object file name, i.e., if BPF object
+ file name is **example.o**, BPF object name will be
+ **example**. Object name can be also specified explicitly
+ through **name** *OBJECT_NAME* parameter. The following
+ custom functions are provided (assuming **example** as
+ the object name):
- **example__open** and **example__open_opts**.
These functions are used to instantiate skeleton. It
@@ -130,26 +154,19 @@ OPTIONS
EXAMPLES
========
-**$ cat example.c**
+**$ cat example1.bpf.c**
::
#include <stdbool.h>
#include <linux/ptrace.h>
#include <linux/bpf.h>
- #include "bpf_helpers.h"
+ #include <bpf/bpf_helpers.h>
const volatile int param1 = 42;
bool global_flag = true;
struct { int x; } data = {};
- struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __uint(max_entries, 128);
- __type(key, int);
- __type(value, long);
- } my_map SEC(".maps");
-
SEC("raw_tp/sys_enter")
int handle_sys_enter(struct pt_regs *ctx)
{
@@ -161,6 +178,21 @@ EXAMPLES
return 0;
}
+**$ cat example2.bpf.c**
+
+::
+
+ #include <linux/ptrace.h>
+ #include <linux/bpf.h>
+ #include <bpf/bpf_helpers.h>
+
+ struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 128);
+ __type(key, int);
+ __type(value, long);
+ } my_map SEC(".maps");
+
SEC("raw_tp/sys_exit")
int handle_sys_exit(struct pt_regs *ctx)
{
@@ -170,9 +202,17 @@ EXAMPLES
}
This is example BPF application with two BPF programs and a mix of BPF maps
-and global variables.
+and global variables. Source code is split across two source code files.
-**$ bpftool gen skeleton example.o**
+**$ clang -target bpf -g example1.bpf.c -o example1.bpf.o**
+**$ clang -target bpf -g example2.bpf.c -o example2.bpf.o**
+**$ bpftool gen object example.bpf.o example1.bpf.o example2.bpf.o**
+
+This set of commands compiles *example1.bpf.c* and *example2.bpf.c*
+individually and then statically links respective object files into the final
+BPF ELF object file *example.bpf.o*.
+
+**$ bpftool gen skeleton example.bpf.o name example | tee example.skel.h**
::
@@ -227,7 +267,7 @@ and global variables.
#endif /* __EXAMPLE_SKEL_H__ */
-**$ cat example_user.c**
+**$ cat example.c**
::
@@ -270,7 +310,7 @@ and global variables.
return err;
}
-**# ./example_user**
+**# ./example**
::
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index fdffbc64c65c..d67518bcbd44 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -981,12 +981,25 @@ _bpftool()
;;
gen)
case $command in
- skeleton)
+ object)
_filedir
+ return 0
+ ;;
+ skeleton)
+ case $prev in
+ $command)
+ _filedir
+ return 0
+ ;;
+ *)
+ _bpftool_once_attr 'name'
+ return 0
+ ;;
+ esac
;;
*)
[[ $prev == $object ]] && \
- COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) )
+ COMPREPLY=( $( compgen -W 'object skeleton help' -- "$cur" ) )
;;
esac
;;
diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index fe9e7b3a4b50..385d5c955cf3 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -36,6 +36,7 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
[BTF_KIND_VAR] = "VAR",
[BTF_KIND_DATASEC] = "DATASEC",
+ [BTF_KIND_FLOAT] = "FLOAT",
};
struct btf_attach_table {
@@ -70,7 +71,9 @@ static const char *btf_var_linkage_str(__u32 linkage)
case BTF_VAR_STATIC:
return "static";
case BTF_VAR_GLOBAL_ALLOCATED:
- return "global-alloc";
+ return "global";
+ case BTF_VAR_GLOBAL_EXTERN:
+ return "extern";
default:
return "(unknown)";
}
@@ -97,26 +100,28 @@ static const char *btf_str(const struct btf *btf, __u32 off)
return btf__name_by_offset(btf, off) ? : "(invalid)";
}
+static int btf_kind_safe(int kind)
+{
+ return kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN;
+}
+
static int dump_btf_type(const struct btf *btf, __u32 id,
const struct btf_type *t)
{
json_writer_t *w = json_wtr;
- int kind, safe_kind;
-
- kind = BTF_INFO_KIND(t->info);
- safe_kind = kind <= BTF_KIND_MAX ? kind : BTF_KIND_UNKN;
+ int kind = btf_kind(t);
if (json_output) {
jsonw_start_object(w);
jsonw_uint_field(w, "id", id);
- jsonw_string_field(w, "kind", btf_kind_str[safe_kind]);
+ jsonw_string_field(w, "kind", btf_kind_str[btf_kind_safe(kind)]);
jsonw_string_field(w, "name", btf_str(btf, t->name_off));
} else {
- printf("[%u] %s '%s'", id, btf_kind_str[safe_kind],
+ printf("[%u] %s '%s'", id, btf_kind_str[btf_kind_safe(kind)],
btf_str(btf, t->name_off));
}
- switch (BTF_INFO_KIND(t->info)) {
+ switch (kind) {
case BTF_KIND_INT: {
__u32 v = *(__u32 *)(t + 1);
const char *enc;
@@ -299,7 +304,8 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
break;
}
case BTF_KIND_DATASEC: {
- const struct btf_var_secinfo *v = (const void *)(t+1);
+ const struct btf_var_secinfo *v = (const void *)(t + 1);
+ const struct btf_type *vt;
__u16 vlen = BTF_INFO_VLEN(t->info);
int i;
@@ -321,12 +327,26 @@ static int dump_btf_type(const struct btf *btf, __u32 id,
} else {
printf("\n\ttype_id=%u offset=%u size=%u",
v->type, v->offset, v->size);
+
+ if (v->type <= btf__get_nr_types(btf)) {
+ vt = btf__type_by_id(btf, v->type);
+ printf(" (%s '%s')",
+ btf_kind_str[btf_kind_safe(btf_kind(vt))],
+ btf_str(btf, vt->name_off));
+ }
}
}
if (json_output)
jsonw_end_array(w);
break;
}
+ case BTF_KIND_FLOAT: {
+ if (json_output)
+ jsonw_uint_field(w, "size", t->size);
+ else
+ printf(" size=%u", t->size);
+ break;
+ }
default:
break;
}
@@ -538,6 +558,7 @@ static int do_dump(int argc, char **argv)
NEXT_ARG();
if (argc < 1) {
p_err("expecting value for 'format' option\n");
+ err = -EINVAL;
goto done;
}
if (strcmp(*argv, "c") == 0) {
@@ -547,11 +568,13 @@ static int do_dump(int argc, char **argv)
} else {
p_err("unrecognized format specifier: '%s', possible values: raw, c",
*argv);
+ err = -EINVAL;
goto done;
}
NEXT_ARG();
} else {
p_err("unrecognized option: '%s'", *argv);
+ err = -EINVAL;
goto done;
}
}
diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 0e9310727281..7ca54d046362 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -596,6 +596,7 @@ static int __btf_dumper_type_only(const struct btf *btf, __u32 type_id,
switch (BTF_INFO_KIND(t->info)) {
case BTF_KIND_INT:
case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FLOAT:
BTF_PRINT_ARG("%s ", btf__name_by_offset(btf, t->name_off));
break;
case BTF_KIND_STRUCT:
diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 65303664417e..1828bba19020 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -57,6 +57,7 @@ const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
[BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
[BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
+ [BPF_SK_SKB_VERDICT] = "sk_skb_verdict",
[BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
[BPF_LIRC_MODE2] = "lirc_mode2",
[BPF_FLOW_DISSECTOR] = "flow_dissector",
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 359960a8f1de..40a88df275f9 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -336,6 +336,10 @@ static void probe_kernel_image_config(const char *define_prefix)
{ "CONFIG_BPF_JIT", },
/* Avoid compiling eBPF interpreter (use JIT only) */
{ "CONFIG_BPF_JIT_ALWAYS_ON", },
+ /* Kernel BTF debug information available */
+ { "CONFIG_DEBUG_INFO_BTF", },
+ /* Kernel module BTF debug information available */
+ { "CONFIG_DEBUG_INFO_BTF_MODULES", },
/* cgroups */
{ "CONFIG_CGROUPS", },
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 4033c46d83e7..31ade77f5ef8 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -273,7 +273,7 @@ static int do_skeleton(int argc, char **argv)
char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")];
size_t i, map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz;
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
- char obj_name[MAX_OBJ_NAME_LEN], *obj_data;
+ char obj_name[MAX_OBJ_NAME_LEN] = "", *obj_data;
struct bpf_object *obj = NULL;
const char *file, *ident;
struct bpf_program *prog;
@@ -288,6 +288,28 @@ static int do_skeleton(int argc, char **argv)
}
file = GET_ARG();
+ while (argc) {
+ if (!REQ_ARGS(2))
+ return -1;
+
+ if (is_prefix(*argv, "name")) {
+ NEXT_ARG();
+
+ if (obj_name[0] != '\0') {
+ p_err("object name already specified");
+ return -1;
+ }
+
+ strncpy(obj_name, *argv, MAX_OBJ_NAME_LEN - 1);
+ obj_name[MAX_OBJ_NAME_LEN - 1] = '\0';
+ } else {
+ p_err("unknown arg %s", *argv);
+ return -1;
+ }
+
+ NEXT_ARG();
+ }
+
if (argc) {
p_err("extra unknown arguments");
return -1;
@@ -310,7 +332,8 @@ static int do_skeleton(int argc, char **argv)
p_err("failed to mmap() %s: %s", file, strerror(errno));
goto out;
}
- get_obj_name(obj_name, file);
+ if (obj_name[0] == '\0')
+ get_obj_name(obj_name, file);
opts.object_name = obj_name;
obj = bpf_object__open_mem(obj_data, file_sz, &opts);
if (IS_ERR(obj)) {
@@ -591,6 +614,47 @@ out:
return err;
}
+static int do_object(int argc, char **argv)
+{
+ struct bpf_linker *linker;
+ const char *output_file, *file;
+ int err = 0;
+
+ if (!REQ_ARGS(2)) {
+ usage();
+ return -1;
+ }
+
+ output_file = GET_ARG();
+
+ linker = bpf_linker__new(output_file, NULL);
+ if (!linker) {
+ p_err("failed to create BPF linker instance");
+ return -1;
+ }
+
+ while (argc) {
+ file = GET_ARG();
+
+ err = bpf_linker__add_file(linker, file);
+ if (err) {
+ p_err("failed to link '%s': %s (%d)", file, strerror(err), err);
+ goto out;
+ }
+ }
+
+ err = bpf_linker__finalize(linker);
+ if (err) {
+ p_err("failed to finalize ELF file: %s (%d)", strerror(err), err);
+ goto out;
+ }
+
+ err = 0;
+out:
+ bpf_linker__free(linker);
+ return err;
+}
+
static int do_help(int argc, char **argv)
{
if (json_output) {
@@ -599,7 +663,8 @@ static int do_help(int argc, char **argv)
}
fprintf(stderr,
- "Usage: %1$s %2$s skeleton FILE\n"
+ "Usage: %1$s %2$s object OUTPUT_FILE INPUT_FILE [INPUT_FILE...]\n"
+ " %1$s %2$s skeleton FILE [name OBJECT_NAME]\n"
" %1$s %2$s help\n"
"\n"
" " HELP_SPEC_OPTIONS "\n"
@@ -610,6 +675,7 @@ static int do_help(int argc, char **argv)
}
static const struct cmd cmds[] = {
+ { "object", do_object },
{ "skeleton", do_skeleton },
{ "help", do_help },
{ 0 }
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index b86f450e6fce..d9afb730136a 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -276,7 +276,7 @@ static int do_batch(int argc, char **argv)
int n_argc;
FILE *fp;
char *cp;
- int err;
+ int err = 0;
int i;
if (argc < 2) {
@@ -370,7 +370,6 @@ static int do_batch(int argc, char **argv)
} else {
if (!json_output)
printf("processed %d commands\n", lines);
- err = 0;
}
err_close:
if (fp != stdin)
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index b400364ee054..09ae0381205b 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -100,7 +100,7 @@ static int do_dump_btf(const struct btf_dumper *d,
void *value)
{
__u32 value_id;
- int ret;
+ int ret = 0;
/* start of key-value pair */
jsonw_start_object(d->jw);
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index ff3aa0cf3997..f836d115d7d6 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -157,7 +157,7 @@ static int netlink_recv(int sock, __u32 nl_pid, __u32 seq,
if (len == 0)
break;
- for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len);
+ for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, (unsigned int)len);
nh = NLMSG_NEXT(nh, len)) {
if (nh->nlmsg_pid != nl_pid) {
ret = -LIBBPF_ERRNO__WRNGPID;
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f2b915b20546..3f067d2d7584 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -76,6 +76,7 @@ enum dump_mode {
static const char * const attach_type_strings[] = {
[BPF_SK_SKB_STREAM_PARSER] = "stream_parser",
[BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict",
+ [BPF_SK_SKB_VERDICT] = "skb_verdict",
[BPF_SK_MSG_VERDICT] = "msg_verdict",
[BPF_FLOW_DISSECTOR] = "flow_dissector",
[__MAX_BPF_ATTACH_TYPE] = NULL,
diff --git a/tools/bpf/bpftool/xlated_dumper.c b/tools/bpf/bpftool/xlated_dumper.c
index 8608cd68cdd0..6fc3e6f7f40c 100644
--- a/tools/bpf/bpftool/xlated_dumper.c
+++ b/tools/bpf/bpftool/xlated_dumper.c
@@ -196,6 +196,9 @@ static const char *print_imm(void *private_data,
else if (insn->src_reg == BPF_PSEUDO_MAP_VALUE)
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"map[id:%u][0]+%u", insn->imm, (insn + 1)->imm);
+ else if (insn->src_reg == BPF_PSEUDO_FUNC)
+ snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
+ "subprog[%+d]", insn->imm);
else
snprintf(dd->scratch_buff, sizeof(dd->scratch_buff),
"0x%llx", (unsigned long long)full_imm);
diff --git a/tools/bpf/resolve_btfids/main.c b/tools/bpf/resolve_btfids/main.c
index 80d966cfcaa1..7550fd9c3188 100644
--- a/tools/bpf/resolve_btfids/main.c
+++ b/tools/bpf/resolve_btfids/main.c
@@ -115,10 +115,10 @@ struct object {
static int verbose;
-int eprintf(int level, int var, const char *fmt, ...)
+static int eprintf(int level, int var, const char *fmt, ...)
{
va_list args;
- int ret;
+ int ret = 0;
if (var >= level) {
va_start(args, fmt);
@@ -385,7 +385,7 @@ static int elf_collect(struct object *obj)
static int symbols_collect(struct object *obj)
{
Elf_Scn *scn = NULL;
- int n, i, err = 0;
+ int n, i;
GElf_Shdr sh;
char *name;
@@ -402,11 +402,10 @@ static int symbols_collect(struct object *obj)
* Scan symbols and look for the ones starting with
* __BTF_ID__* over .BTF_ids section.
*/
- for (i = 0; !err && i < n; i++) {
- char *tmp, *prefix;
+ for (i = 0; i < n; i++) {
+ char *prefix;
struct btf_id *id;
GElf_Sym sym;
- int err = -1;
if (!gelf_getsym(obj->efile.symbols, i, &sym))
return -1;
diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index 9d9fb6209be1..3818ec511fd2 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -16,7 +16,10 @@ CFLAGS := -g -Wall
# Try to detect best kernel BTF source
KERNEL_REL := $(shell uname -r)
-VMLINUX_BTF_PATHS := /sys/kernel/btf/vmlinux /boot/vmlinux-$(KERNEL_REL)
+VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../vmlinux /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(KERNEL_REL)
VMLINUX_BTF_PATH := $(or $(VMLINUX_BTF),$(firstword \
$(wildcard $(VMLINUX_BTF_PATHS))))
@@ -66,12 +69,16 @@ $(OUTPUT) $(BPFOBJ_OUTPUT) $(BPFTOOL_OUTPUT):
$(QUIET_MKDIR)mkdir -p $@
$(OUTPUT)/vmlinux.h: $(VMLINUX_BTF_PATH) | $(OUTPUT) $(BPFTOOL)
+ifeq ($(VMLINUX_H),)
$(Q)if [ ! -e "$(VMLINUX_BTF_PATH)" ] ; then \
echo "Couldn't find kernel BTF; set VMLINUX_BTF to" \
"specify its location." >&2; \
exit 1;\
fi
$(QUIET_GEN)$(BPFTOOL) btf dump file $(VMLINUX_BTF_PATH) format c > $@
+else
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
$(BPFOBJ): $(wildcard $(LIBBPF_SRC)/*.[ch] $(LIBBPF_SRC)/Makefile) | $(BPFOBJ_OUTPUT)
$(Q)$(MAKE) $(submake_extras) -C $(LIBBPF_SRC) OUTPUT=$(BPFOBJ_OUTPUT) $@
diff --git a/tools/bpf/runqslower/runqslower.bpf.c b/tools/bpf/runqslower/runqslower.bpf.c
index 1f18a409f044..645530ca7e98 100644
--- a/tools/bpf/runqslower/runqslower.bpf.c
+++ b/tools/bpf/runqslower/runqslower.bpf.c
@@ -11,9 +11,9 @@ const volatile __u64 min_us = 0;
const volatile pid_t targ_pid = 0;
struct {
- __uint(type, BPF_MAP_TYPE_HASH);
- __uint(max_entries, 10240);
- __type(key, u32);
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
__type(value, u64);
} start SEC(".maps");
@@ -25,15 +25,20 @@ struct {
/* record enqueue timestamp */
__always_inline
-static int trace_enqueue(u32 tgid, u32 pid)
+static int trace_enqueue(struct task_struct *t)
{
- u64 ts;
+ u32 pid = t->pid;
+ u64 *ptr;
if (!pid || (targ_pid && targ_pid != pid))
return 0;
- ts = bpf_ktime_get_ns();
- bpf_map_update_elem(&start, &pid, &ts, 0);
+ ptr = bpf_task_storage_get(&start, t, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return 0;
+
+ *ptr = bpf_ktime_get_ns();
return 0;
}
@@ -43,7 +48,7 @@ int handle__sched_wakeup(u64 *ctx)
/* TP_PROTO(struct task_struct *p) */
struct task_struct *p = (void *)ctx[0];
- return trace_enqueue(p->tgid, p->pid);
+ return trace_enqueue(p);
}
SEC("tp_btf/sched_wakeup_new")
@@ -52,7 +57,7 @@ int handle__sched_wakeup_new(u64 *ctx)
/* TP_PROTO(struct task_struct *p) */
struct task_struct *p = (void *)ctx[0];
- return trace_enqueue(p->tgid, p->pid);
+ return trace_enqueue(p);
}
SEC("tp_btf/sched_switch")
@@ -70,12 +75,16 @@ int handle__sched_switch(u64 *ctx)
/* ivcsw: treat like an enqueue event and store timestamp */
if (prev->state == TASK_RUNNING)
- trace_enqueue(prev->tgid, prev->pid);
+ trace_enqueue(prev);
pid = next->pid;
+ /* For pid mismatch, save a bpf_task_storage_get */
+ if (!pid || (targ_pid && targ_pid != pid))
+ return 0;
+
/* fetch timestamp and calculate delta */
- tsp = bpf_map_lookup_elem(&start, &pid);
+ tsp = bpf_task_storage_get(&start, next, 0, 0);
if (!tsp)
return 0; /* missed enqueue */
@@ -91,7 +100,7 @@ int handle__sched_switch(u64 *ctx)
bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
&event, sizeof(event));
- bpf_map_delete_elem(&start, &pid);
+ bpf_task_storage_delete(&start, next);
return 0;
}
diff --git a/tools/build/Build.include b/tools/build/Build.include
index 585486e40995..2cf3b1bde86e 100644
--- a/tools/build/Build.include
+++ b/tools/build/Build.include
@@ -100,3 +100,27 @@ cxx_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(CXXFLAGS) -D"BUILD_STR(s)=\#s" $(CXX
## HOSTCC C flags
host_c_flags = -Wp,-MD,$(depfile) -Wp,-MT,$@ $(KBUILD_HOSTCFLAGS) -D"BUILD_STR(s)=\#s" $(HOSTCFLAGS_$(basetarget).o) $(HOSTCFLAGS_$(obj))
+
+# output directory for tests below
+TMPOUT = .tmp_$$$$
+
+# try-run
+# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
+# Exit code chooses option. "$$TMP" serves as a temporary file and is
+# automatically cleaned up.
+try-run = $(shell set -e; \
+ TMP=$(TMPOUT)/tmp; \
+ mkdir -p $(TMPOUT); \
+ trap "rm -rf $(TMPOUT)" EXIT; \
+ if ($(1)) >/dev/null 2>&1; \
+ then echo "$(2)"; \
+ else echo "$(3)"; \
+ fi)
+
+# cc-option
+# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
+cc-option = $(call try-run, \
+ $(CC) -Werror $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))
+
+# delete partially updated (i.e. corrupted) files on error
+.DELETE_ON_ERROR:
diff --git a/tools/cgroup/memcg_slabinfo.py b/tools/cgroup/memcg_slabinfo.py
index c4225ed63565..1600b17dbb8a 100644
--- a/tools/cgroup/memcg_slabinfo.py
+++ b/tools/cgroup/memcg_slabinfo.py
@@ -128,9 +128,9 @@ def detect_kernel_config():
cfg['nr_nodes'] = prog['nr_online_nodes'].value_()
- if prog.type('struct kmem_cache').members[1][1] == 'flags':
+ if prog.type('struct kmem_cache').members[1].name == 'flags':
cfg['allocator'] = 'SLUB'
- elif prog.type('struct kmem_cache').members[1][1] == 'batchcount':
+ elif prog.type('struct kmem_cache').members[1].name == 'batchcount':
cfg['allocator'] = 'SLAB'
else:
err('Can\'t determine the slab allocator')
@@ -193,7 +193,7 @@ def main():
# look over all slab pages, belonging to non-root memcgs
# and look for objects belonging to the given memory cgroup
for page in for_each_slab_page(prog):
- objcg_vec_raw = page.obj_cgroups.value_()
+ objcg_vec_raw = page.memcg_data.value_()
if objcg_vec_raw == 0:
continue
cache = page.slab_cache
@@ -202,7 +202,7 @@ def main():
addr = cache.value_()
caches[addr] = cache
# clear the lowest bit to get the true obj_cgroups
- objcg_vec = Object(prog, page.obj_cgroups.type_,
+ objcg_vec = Object(prog, 'struct obj_cgroup **',
value=objcg_vec_raw & ~1)
if addr not in stats:
diff --git a/tools/debugging/kernel-chktaint b/tools/debugging/kernel-chktaint
index 607b2b280945..719f18b1edf0 100755
--- a/tools/debugging/kernel-chktaint
+++ b/tools/debugging/kernel-chktaint
@@ -25,7 +25,7 @@ if [ "$1"x != "x" ]; then
elif [ $1 -ge 0 ] 2>/dev/null ; then
taint=$1
else
- echo "Error: Parameter '$1' not a positive interger. Aborting." >&2
+ echo "Error: Parameter '$1' not a positive integer. Aborting." >&2
exit 1
fi
else
diff --git a/tools/iio/Makefile b/tools/iio/Makefile
index 3de763d9ab70..5d12ac4e7f8f 100644
--- a/tools/iio/Makefile
+++ b/tools/iio/Makefile
@@ -27,6 +27,7 @@ include $(srctree)/tools/build/Makefile.include
#
$(OUTPUT)include/linux/iio: ../../include/uapi/linux/iio
mkdir -p $(OUTPUT)include/linux/iio 2>&1 || true
+ ln -sf $(CURDIR)/../../include/uapi/linux/iio/buffer.h $@
ln -sf $(CURDIR)/../../include/uapi/linux/iio/events.h $@
ln -sf $(CURDIR)/../../include/uapi/linux/iio/types.h $@
diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c
index bb03859db89d..0076437f6e3f 100644
--- a/tools/iio/iio_event_monitor.c
+++ b/tools/iio/iio_event_monitor.c
@@ -14,6 +14,7 @@
#include <unistd.h>
#include <stdlib.h>
+#include <dirent.h>
#include <stdbool.h>
#include <stdio.h>
#include <errno.h>
@@ -280,22 +281,69 @@ static void print_event(struct iio_event_data *event)
printf("\n");
}
+/* Enable or disable events in sysfs if the knob is available */
+static void enable_events(char *dev_dir, int enable)
+{
+ const struct dirent *ent;
+ char evdir[256];
+ int ret;
+ DIR *dp;
+
+ snprintf(evdir, sizeof(evdir), FORMAT_EVENTS_DIR, dev_dir);
+ evdir[sizeof(evdir)-1] = '\0';
+
+ dp = opendir(evdir);
+ if (!dp) {
+ fprintf(stderr, "Enabling/disabling events: can't open %s\n",
+ evdir);
+ return;
+ }
+
+ while (ent = readdir(dp), ent) {
+ if (iioutils_check_suffix(ent->d_name, "_en")) {
+ printf("%sabling: %s\n",
+ enable ? "En" : "Dis",
+ ent->d_name);
+ ret = write_sysfs_int(ent->d_name, evdir,
+ enable);
+ if (ret < 0)
+ fprintf(stderr, "Failed to enable/disable %s\n",
+ ent->d_name);
+ }
+ }
+
+ if (closedir(dp) == -1) {
+ perror("Enabling/disabling channels: "
+ "Failed to close directory");
+ return;
+ }
+}
+
int main(int argc, char **argv)
{
struct iio_event_data event;
const char *device_name;
+ char *dev_dir_name = NULL;
char *chrdev_name;
int ret;
int dev_num;
int fd, event_fd;
-
- if (argc <= 1) {
- fprintf(stderr, "Usage: %s <device_name>\n", argv[0]);
+ bool all_events = false;
+
+ if (argc == 2) {
+ device_name = argv[1];
+ } else if (argc == 3) {
+ device_name = argv[2];
+ if (!strcmp(argv[1], "-a"))
+ all_events = true;
+ } else {
+ fprintf(stderr,
+ "Usage: iio_event_monitor [options] <device_name>\n"
+ "Listen and display events from IIO devices\n"
+ " -a Auto-activate all available events\n");
return -1;
}
- device_name = argv[1];
-
dev_num = find_type_by_name(device_name, "iio:device");
if (dev_num >= 0) {
printf("Found IIO device with name %s with device number %d\n",
@@ -303,6 +351,10 @@ int main(int argc, char **argv)
ret = asprintf(&chrdev_name, "/dev/iio:device%d", dev_num);
if (ret < 0)
return -ENOMEM;
+ /* Look up sysfs dir as well if we can */
+ ret = asprintf(&dev_dir_name, "%siio:device%d", iio_dir, dev_num);
+ if (ret < 0)
+ return -ENOMEM;
} else {
/*
* If we can't find an IIO device by name assume device_name is
@@ -313,6 +365,9 @@ int main(int argc, char **argv)
return -ENOMEM;
}
+ if (all_events && dev_dir_name)
+ enable_events(dev_dir_name, 1);
+
fd = open(chrdev_name, 0);
if (fd == -1) {
ret = -errno;
@@ -365,6 +420,10 @@ int main(int argc, char **argv)
perror("Failed to close event file");
error_free_chrdev_name:
+ /* Disable events after use */
+ if (all_events && dev_dir_name)
+ enable_events(dev_dir_name, 0);
+
free(chrdev_name);
return ret;
diff --git a/tools/iio/iio_generic_buffer.c b/tools/iio/iio_generic_buffer.c
index 34d63bcebcd2..2491c54a5e4f 100644
--- a/tools/iio/iio_generic_buffer.c
+++ b/tools/iio/iio_generic_buffer.c
@@ -30,6 +30,8 @@
#include <inttypes.h>
#include <stdbool.h>
#include <signal.h>
+#include <sys/ioctl.h>
+#include <linux/iio/buffer.h>
#include "iio_utils.h"
/**
@@ -49,7 +51,7 @@ enum autochan {
* Has the side effect of filling the channels[i].location values used
* in processing the buffer output.
**/
-int size_from_channelarray(struct iio_channel_info *channels, int num_channels)
+static int size_from_channelarray(struct iio_channel_info *channels, int num_channels)
{
int bytes = 0;
int i = 0;
@@ -68,7 +70,7 @@ int size_from_channelarray(struct iio_channel_info *channels, int num_channels)
return bytes;
}
-void print1byte(uint8_t input, struct iio_channel_info *info)
+static void print1byte(uint8_t input, struct iio_channel_info *info)
{
/*
* Shift before conversion to avoid sign extension
@@ -85,7 +87,7 @@ void print1byte(uint8_t input, struct iio_channel_info *info)
}
}
-void print2byte(uint16_t input, struct iio_channel_info *info)
+static void print2byte(uint16_t input, struct iio_channel_info *info)
{
/* First swap if incorrect endian */
if (info->be)
@@ -108,7 +110,7 @@ void print2byte(uint16_t input, struct iio_channel_info *info)
}
}
-void print4byte(uint32_t input, struct iio_channel_info *info)
+static void print4byte(uint32_t input, struct iio_channel_info *info)
{
/* First swap if incorrect endian */
if (info->be)
@@ -131,7 +133,7 @@ void print4byte(uint32_t input, struct iio_channel_info *info)
}
}
-void print8byte(uint64_t input, struct iio_channel_info *info)
+static void print8byte(uint64_t input, struct iio_channel_info *info)
{
/* First swap if incorrect endian */
if (info->be)
@@ -167,9 +169,8 @@ void print8byte(uint64_t input, struct iio_channel_info *info)
* to fill the location offsets.
* @num_channels: number of channels
**/
-void process_scan(char *data,
- struct iio_channel_info *channels,
- int num_channels)
+static void process_scan(char *data, struct iio_channel_info *channels,
+ int num_channels)
{
int k;
@@ -198,7 +199,7 @@ void process_scan(char *data,
printf("\n");
}
-static int enable_disable_all_channels(char *dev_dir_name, int enable)
+static int enable_disable_all_channels(char *dev_dir_name, int buffer_idx, int enable)
{
const struct dirent *ent;
char scanelemdir[256];
@@ -206,7 +207,7 @@ static int enable_disable_all_channels(char *dev_dir_name, int enable)
int ret;
snprintf(scanelemdir, sizeof(scanelemdir),
- FORMAT_SCAN_ELEMENTS_DIR, dev_dir_name);
+ FORMAT_SCAN_ELEMENTS_DIR, dev_dir_name, buffer_idx);
scanelemdir[sizeof(scanelemdir)-1] = '\0';
dp = opendir(scanelemdir);
@@ -238,12 +239,13 @@ static int enable_disable_all_channels(char *dev_dir_name, int enable)
return 0;
}
-void print_usage(void)
+static void print_usage(void)
{
fprintf(stderr, "Usage: generic_buffer [options]...\n"
"Capture, convert and output data from IIO device buffer\n"
" -a Auto-activate all available channels\n"
" -A Force-activate ALL channels\n"
+ " -b <n> The buffer which to open (by index), default 0\n"
" -c <n> Do n conversions, or loop forever if n < 0\n"
" -e Disable wait for event (new data)\n"
" -g Use trigger-less mode\n"
@@ -257,12 +259,13 @@ void print_usage(void)
" -w <n> Set delay between reads in us (event-less mode)\n");
}
-enum autochan autochannels = AUTOCHANNELS_DISABLED;
-char *dev_dir_name = NULL;
-char *buf_dir_name = NULL;
-bool current_trigger_set = false;
+static enum autochan autochannels = AUTOCHANNELS_DISABLED;
+static char *dev_dir_name = NULL;
+static char *buf_dir_name = NULL;
+static int buffer_idx = 0;
+static bool current_trigger_set = false;
-void cleanup(void)
+static void cleanup(void)
{
int ret;
@@ -287,21 +290,21 @@ void cleanup(void)
/* Disable channels if auto-enabled */
if (dev_dir_name && autochannels == AUTOCHANNELS_ACTIVE) {
- ret = enable_disable_all_channels(dev_dir_name, 0);
+ ret = enable_disable_all_channels(dev_dir_name, buffer_idx, 0);
if (ret)
fprintf(stderr, "Failed to disable all channels\n");
autochannels = AUTOCHANNELS_DISABLED;
}
}
-void sig_handler(int signum)
+static void sig_handler(int signum)
{
fprintf(stderr, "Caught signal %d\n", signum);
cleanup();
exit(-signum);
}
-void register_cleanup(void)
+static void register_cleanup(void)
{
struct sigaction sa = { .sa_handler = sig_handler };
const int signums[] = { SIGINT, SIGTERM, SIGABRT };
@@ -334,7 +337,9 @@ int main(int argc, char **argv)
unsigned long long j;
unsigned long toread;
int ret, c;
- int fp = -1;
+ struct stat st;
+ int fd = -1;
+ int buf_fd = -1;
int num_channels = 0;
char *trigger_name = NULL, *device_name = NULL;
@@ -353,7 +358,7 @@ int main(int argc, char **argv)
register_cleanup();
- while ((c = getopt_long(argc, argv, "aAc:egl:n:N:t:T:w:?", longopts,
+ while ((c = getopt_long(argc, argv, "aAb:c:egl:n:N:t:T:w:?", longopts,
NULL)) != -1) {
switch (c) {
case 'a':
@@ -362,7 +367,20 @@ int main(int argc, char **argv)
case 'A':
autochannels = AUTOCHANNELS_ENABLED;
force_autochannels = true;
- break;
+ break;
+ case 'b':
+ errno = 0;
+ buffer_idx = strtoll(optarg, &dummy, 10);
+ if (errno) {
+ ret = -errno;
+ goto error;
+ }
+ if (buffer_idx < 0) {
+ ret = -ERANGE;
+ goto error;
+ }
+
+ break;
case 'c':
errno = 0;
num_loops = strtoll(optarg, &dummy, 10);
@@ -519,7 +537,7 @@ int main(int argc, char **argv)
* Parse the files in scan_elements to identify what channels are
* present
*/
- ret = build_channel_array(dev_dir_name, &channels, &num_channels);
+ ret = build_channel_array(dev_dir_name, buffer_idx, &channels, &num_channels);
if (ret) {
fprintf(stderr, "Problem reading scan element information\n"
"diag %s\n", dev_dir_name);
@@ -536,7 +554,7 @@ int main(int argc, char **argv)
(autochannels == AUTOCHANNELS_ENABLED && force_autochannels)) {
fprintf(stderr, "Enabling all channels\n");
- ret = enable_disable_all_channels(dev_dir_name, 1);
+ ret = enable_disable_all_channels(dev_dir_name, buffer_idx, 1);
if (ret) {
fprintf(stderr, "Failed to enable all channels\n");
goto error;
@@ -545,7 +563,7 @@ int main(int argc, char **argv)
/* This flags that we need to disable the channels again */
autochannels = AUTOCHANNELS_ACTIVE;
- ret = build_channel_array(dev_dir_name, &channels,
+ ret = build_channel_array(dev_dir_name, buffer_idx, &channels,
&num_channels);
if (ret) {
fprintf(stderr, "Problem reading scan element "
@@ -566,7 +584,7 @@ int main(int argc, char **argv)
fprintf(stderr, "Enable channels manually in "
FORMAT_SCAN_ELEMENTS_DIR
"/*_en or pass -a to autoenable channels and "
- "try again.\n", dev_dir_name);
+ "try again.\n", dev_dir_name, buffer_idx);
ret = -ENOENT;
goto error;
}
@@ -577,12 +595,25 @@ int main(int argc, char **argv)
* be built rather than found.
*/
ret = asprintf(&buf_dir_name,
- "%siio:device%d/buffer", iio_dir, dev_num);
+ "%siio:device%d/buffer%d", iio_dir, dev_num, buffer_idx);
if (ret < 0) {
ret = -ENOMEM;
goto error;
}
+ if (stat(buf_dir_name, &st)) {
+ fprintf(stderr, "Could not stat() '%s', got error %d: %s\n",
+ buf_dir_name, errno, strerror(errno));
+ ret = -errno;
+ goto error;
+ }
+
+ if (!S_ISDIR(st.st_mode)) {
+ fprintf(stderr, "File '%s' is not a directory\n", buf_dir_name);
+ ret = -EFAULT;
+ goto error;
+ }
+
if (!notrigger) {
printf("%s %s\n", dev_dir_name, trigger_name);
/*
@@ -599,6 +630,35 @@ int main(int argc, char **argv)
}
}
+ ret = asprintf(&buffer_access, "/dev/iio:device%d", dev_num);
+ if (ret < 0) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ /* Attempt to open non blocking the access dev */
+ fd = open(buffer_access, O_RDONLY | O_NONBLOCK);
+ if (fd == -1) { /* TODO: If it isn't there make the node */
+ ret = -errno;
+ fprintf(stderr, "Failed to open %s\n", buffer_access);
+ goto error;
+ }
+
+ /* specify for which buffer index we want an FD */
+ buf_fd = buffer_idx;
+
+ ret = ioctl(fd, IIO_BUFFER_GET_FD_IOCTL, &buf_fd);
+ if (ret == -1 || buf_fd == -1) {
+ ret = -errno;
+ if (ret == -ENODEV || ret == -EINVAL)
+ fprintf(stderr,
+ "Device does not have this many buffers\n");
+ else
+ fprintf(stderr, "Failed to retrieve buffer fd\n");
+
+ goto error;
+ }
+
/* Setup ring buffer parameters */
ret = write_sysfs_int("length", buf_dir_name, buf_len);
if (ret < 0)
@@ -608,7 +668,8 @@ int main(int argc, char **argv)
ret = write_sysfs_int("enable", buf_dir_name, 1);
if (ret < 0) {
fprintf(stderr,
- "Failed to enable buffer: %s\n", strerror(-ret));
+ "Failed to enable buffer '%s': %s\n",
+ buf_dir_name, strerror(-ret));
goto error;
}
@@ -619,24 +680,30 @@ int main(int argc, char **argv)
goto error;
}
- ret = asprintf(&buffer_access, "/dev/iio:device%d", dev_num);
- if (ret < 0) {
- ret = -ENOMEM;
- goto error;
+ /**
+ * This check is being done here for sanity reasons, however it
+ * should be omitted under normal operation.
+ * If this is buffer0, we check that we get EBUSY after this point.
+ */
+ if (buffer_idx == 0) {
+ errno = 0;
+ read_size = read(fd, data, 1);
+ if (read_size > -1 || errno != EBUSY) {
+ ret = -EFAULT;
+ perror("Reading from '%s' should not be possible after ioctl()");
+ goto error;
+ }
}
- /* Attempt to open non blocking the access dev */
- fp = open(buffer_access, O_RDONLY | O_NONBLOCK);
- if (fp == -1) { /* TODO: If it isn't there make the node */
- ret = -errno;
- fprintf(stderr, "Failed to open %s\n", buffer_access);
- goto error;
- }
+ /* close now the main chardev FD and let the buffer FD work */
+ if (close(fd) == -1)
+ perror("Failed to close character device file");
+ fd = -1;
for (j = 0; j < num_loops || num_loops < 0; j++) {
if (!noevents) {
struct pollfd pfd = {
- .fd = fp,
+ .fd = buf_fd,
.events = POLLIN,
};
@@ -654,7 +721,7 @@ int main(int argc, char **argv)
toread = 64;
}
- read_size = read(fp, data, toread * scan_size);
+ read_size = read(buf_fd, data, toread * scan_size);
if (read_size < 0) {
if (errno == EAGAIN) {
fprintf(stderr, "nothing available\n");
@@ -671,7 +738,9 @@ int main(int argc, char **argv)
error:
cleanup();
- if (fp >= 0 && close(fp) == -1)
+ if (fd >= 0 && close(fd) == -1)
+ perror("Failed to close character device");
+ if (buf_fd >= 0 && close(buf_fd) == -1)
perror("Failed to close buffer");
free(buffer_access);
free(data);
diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c
index 7399eb7f1378..aadee6d34c74 100644
--- a/tools/iio/iio_utils.c
+++ b/tools/iio/iio_utils.c
@@ -77,15 +77,17 @@ int iioutils_break_up_name(const char *full_name, char **generic_name)
* @mask: output a bit mask for the raw data
* @be: output if data in big endian
* @device_dir: the IIO device directory
+ * @buffer_idx: the IIO buffer index
* @name: the channel name
* @generic_name: the channel type name
*
* Returns a value >= 0 on success, otherwise a negative error code.
**/
-int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used,
- unsigned *shift, uint64_t *mask, unsigned *be,
- const char *device_dir, const char *name,
- const char *generic_name)
+static int iioutils_get_type(unsigned int *is_signed, unsigned int *bytes,
+ unsigned int *bits_used, unsigned int *shift,
+ uint64_t *mask, unsigned int *be,
+ const char *device_dir, int buffer_idx,
+ const char *name, const char *generic_name)
{
FILE *sysfsfp;
int ret;
@@ -95,7 +97,7 @@ int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used,
unsigned padint;
const struct dirent *ent;
- ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir);
+ ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir, buffer_idx);
if (ret < 0)
return -ENOMEM;
@@ -303,12 +305,13 @@ void bsort_channel_array_by_index(struct iio_channel_info *ci_array, int cnt)
/**
* build_channel_array() - function to figure out what channels are present
* @device_dir: the IIO device directory in sysfs
+ * @buffer_idx: the IIO buffer for this channel array
* @ci_array: output the resulting array of iio_channel_info
* @counter: output the amount of array elements
*
* Returns 0 on success, otherwise a negative error code.
**/
-int build_channel_array(const char *device_dir,
+int build_channel_array(const char *device_dir, int buffer_idx,
struct iio_channel_info **ci_array, int *counter)
{
DIR *dp;
@@ -321,7 +324,7 @@ int build_channel_array(const char *device_dir,
char *filename;
*counter = 0;
- ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir);
+ ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir, buffer_idx);
if (ret < 0)
return -ENOMEM;
@@ -502,6 +505,7 @@ int build_channel_array(const char *device_dir,
&current->mask,
&current->be,
device_dir,
+ buffer_idx,
current->name,
current->generic_name);
if (ret < 0)
diff --git a/tools/iio/iio_utils.h b/tools/iio/iio_utils.h
index 74bde4fde2c8..663c94a6c705 100644
--- a/tools/iio/iio_utils.h
+++ b/tools/iio/iio_utils.h
@@ -12,7 +12,8 @@
/* Made up value to limit allocation sizes */
#define IIO_MAX_NAME_LENGTH 64
-#define FORMAT_SCAN_ELEMENTS_DIR "%s/scan_elements"
+#define FORMAT_SCAN_ELEMENTS_DIR "%s/buffer%d"
+#define FORMAT_EVENTS_DIR "%s/events"
#define FORMAT_TYPE_FILE "%s_type"
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
@@ -57,15 +58,11 @@ static inline int iioutils_check_suffix(const char *str, const char *suffix)
}
int iioutils_break_up_name(const char *full_name, char **generic_name);
-int iioutils_get_type(unsigned *is_signed, unsigned *bytes, unsigned *bits_used,
- unsigned *shift, uint64_t *mask, unsigned *be,
- const char *device_dir, const char *name,
- const char *generic_name);
int iioutils_get_param_float(float *output, const char *param_name,
const char *device_dir, const char *name,
const char *generic_name);
void bsort_channel_array_by_index(struct iio_channel_info *ci_array, int cnt);
-int build_channel_array(const char *device_dir,
+int build_channel_array(const char *device_dir, int buffer_idx,
struct iio_channel_info **ci_array, int *counter);
int find_type_by_name(const char *name, const char *type);
int write_sysfs_int(const char *filename, const char *basedir, int val);
diff --git a/tools/include/linux/kconfig.h b/tools/include/linux/kconfig.h
new file mode 100644
index 000000000000..1555a0c4f345
--- /dev/null
+++ b/tools/include/linux/kconfig.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_LINUX_KCONFIG_H
+#define _TOOLS_LINUX_KCONFIG_H
+
+/* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+#define __BIG_ENDIAN 4321
+#else
+#define __LITTLE_ENDIAN 1234
+#endif
+
+#define __ARG_PLACEHOLDER_1 0,
+#define __take_second_arg(__ignored, val, ...) val
+
+/*
+ * The use of "&&" / "||" is limited in certain expressions.
+ * The following enable to calculate "and" / "or" with macro expansion only.
+ */
+#define __and(x, y) ___and(x, y)
+#define ___and(x, y) ____and(__ARG_PLACEHOLDER_##x, y)
+#define ____and(arg1_or_junk, y) __take_second_arg(arg1_or_junk y, 0)
+
+#define __or(x, y) ___or(x, y)
+#define ___or(x, y) ____or(__ARG_PLACEHOLDER_##x, y)
+#define ____or(arg1_or_junk, y) __take_second_arg(arg1_or_junk 1, y)
+
+/*
+ * Helper macros to use CONFIG_ options in C/CPP expressions. Note that
+ * these only work with boolean and tristate options.
+ */
+
+/*
+ * Getting something that works in C and CPP for an arg that may or may
+ * not be defined is tricky. Here, if we have "#define CONFIG_BOOGER 1"
+ * we match on the placeholder define, insert the "0," for arg1 and generate
+ * the triplet (0, 1, 0). Then the last step cherry picks the 2nd arg (a one).
+ * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when
+ * the last step cherry picks the 2nd arg, we get a zero.
+ */
+#define __is_defined(x) ___is_defined(x)
+#define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val)
+#define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0)
+
+/*
+ * IS_BUILTIN(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y', 0
+ * otherwise. For boolean options, this is equivalent to
+ * IS_ENABLED(CONFIG_FOO).
+ */
+#define IS_BUILTIN(option) __is_defined(option)
+
+/*
+ * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0
+ * otherwise.
+ */
+#define IS_MODULE(option) __is_defined(option##_MODULE)
+
+/*
+ * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled
+ * code can call a function defined in code compiled based on CONFIG_FOO.
+ * This is similar to IS_ENABLED(), but returns false when invoked from
+ * built-in code when CONFIG_FOO is set to 'm'.
+ */
+#define IS_REACHABLE(option) __or(IS_BUILTIN(option), \
+ __and(IS_MODULE(option), __is_defined(MODULE)))
+
+/*
+ * IS_ENABLED(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'y' or 'm',
+ * 0 otherwise.
+ */
+#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option))
+
+#endif /* _TOOLS_LINUX_KCONFIG_H */
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index ae5662d368b9..5a00b8b2cf9f 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -58,11 +58,25 @@ struct static_call_site {
__raw_static_call(name); \
})
+struct static_call_key {
+ void *func;
+ union {
+ /* bit 0: 0 = mods, 1 = sites */
+ unsigned long type;
+ struct static_call_mod *mods;
+ struct static_call_site *sites;
+ };
+};
+
#else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
#define __STATIC_CALL_ADDRESSABLE(name)
#define __static_call(name) __raw_static_call(name)
+struct static_call_key {
+ void *func;
+};
+
#endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
#ifdef MODULE
@@ -77,6 +91,10 @@ struct static_call_site {
#else
+struct static_call_key {
+ void *func;
+};
+
#define static_call(name) \
((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
diff --git a/tools/include/uapi/asm/errno.h b/tools/include/uapi/asm/errno.h
index 637189ec1ab9..d30439b4b8ab 100644
--- a/tools/include/uapi/asm/errno.h
+++ b/tools/include/uapi/asm/errno.h
@@ -9,8 +9,6 @@
#include "../../../arch/alpha/include/uapi/asm/errno.h"
#elif defined(__mips__)
#include "../../../arch/mips/include/uapi/asm/errno.h"
-#elif defined(__ia64__)
-#include "../../../arch/ia64/include/uapi/asm/errno.h"
#elif defined(__xtensa__)
#include "../../../arch/xtensa/include/uapi/asm/errno.h"
#else
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 79c893310492..ec6d85a81744 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -93,7 +93,738 @@ union bpf_iter_link_info {
} map;
};
-/* BPF syscall commands, see bpf(2) man-page for details. */
+/* BPF syscall commands, see bpf(2) man-page for more details. */
+/**
+ * DOC: eBPF Syscall Preamble
+ *
+ * The operation to be performed by the **bpf**\ () system call is determined
+ * by the *cmd* argument. Each operation takes an accompanying argument,
+ * provided via *attr*, which is a pointer to a union of type *bpf_attr* (see
+ * below). The size argument is the size of the union pointed to by *attr*.
+ */
+/**
+ * DOC: eBPF Syscall Commands
+ *
+ * BPF_MAP_CREATE
+ * Description
+ * Create a map and return a file descriptor that refers to the
+ * map. The close-on-exec file descriptor flag (see **fcntl**\ (2))
+ * is automatically enabled for the new file descriptor.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_MAP_CREATE** will delete the map (but see NOTES).
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_LOOKUP_ELEM
+ * Description
+ * Look up an element with a given *key* in the map referred to
+ * by the file descriptor *map_fd*.
+ *
+ * The *flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_UPDATE_ELEM
+ * Description
+ * Create or update an element (key/value pair) in a specified map.
+ *
+ * The *flags* argument should be specified as one of the
+ * following:
+ *
+ * **BPF_ANY**
+ * Create a new element or update an existing element.
+ * **BPF_NOEXIST**
+ * Create a new element only if it did not exist.
+ * **BPF_EXIST**
+ * Update an existing element.
+ * **BPF_F_LOCK**
+ * Update a spin_lock-ed map element.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**,
+ * **E2BIG**, **EEXIST**, or **ENOENT**.
+ *
+ * **E2BIG**
+ * The number of elements in the map reached the
+ * *max_entries* limit specified at map creation time.
+ * **EEXIST**
+ * If *flags* specifies **BPF_NOEXIST** and the element
+ * with *key* already exists in the map.
+ * **ENOENT**
+ * If *flags* specifies **BPF_EXIST** and the element with
+ * *key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_ELEM
+ * Description
+ * Look up and delete an element by key in a specified map.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_KEY
+ * Description
+ * Look up an element by key in a specified map and return the key
+ * of the next element. Can be used to iterate over all elements
+ * in the map.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * The following cases can be used to iterate over all elements of
+ * the map:
+ *
+ * * If *key* is not found, the operation returns zero and sets
+ * the *next_key* pointer to the key of the first element.
+ * * If *key* is found, the operation returns zero and sets the
+ * *next_key* pointer to the key of the next element.
+ * * If *key* is the last element, returns -1 and *errno* is set
+ * to **ENOENT**.
+ *
+ * May set *errno* to **ENOMEM**, **EFAULT**, **EPERM**, or
+ * **EINVAL** on error.
+ *
+ * BPF_PROG_LOAD
+ * Description
+ * Verify and load an eBPF program, returning a new file
+ * descriptor associated with the program.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_PROG_LOAD** will unload the eBPF program (but see NOTES).
+ *
+ * The close-on-exec file descriptor flag (see **fcntl**\ (2)) is
+ * automatically enabled for the new file descriptor.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_PIN
+ * Description
+ * Pin an eBPF program or map referred by the specified *bpf_fd*
+ * to the provided *pathname* on the filesystem.
+ *
+ * The *pathname* argument must not contain a dot (".").
+ *
+ * On success, *pathname* retains a reference to the eBPF object,
+ * preventing deallocation of the object when the original
+ * *bpf_fd* is closed. This allow the eBPF object to live beyond
+ * **close**\ (\ *bpf_fd*\ ), and hence the lifetime of the parent
+ * process.
+ *
+ * Applying **unlink**\ (2) or similar calls to the *pathname*
+ * unpins the object from the filesystem, removing the reference.
+ * If no other file descriptors or filesystem nodes refer to the
+ * same object, it will be deallocated (see NOTES).
+ *
+ * The filesystem type for the parent directory of *pathname* must
+ * be **BPF_FS_MAGIC**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_OBJ_GET
+ * Description
+ * Open a file descriptor for the eBPF object pinned to the
+ * specified *pathname*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_PROG_ATTACH
+ * Description
+ * Attach an eBPF program to a *target_fd* at the specified
+ * *attach_type* hook.
+ *
+ * The *attach_type* specifies the eBPF attachment point to
+ * attach the program to, and must be one of *bpf_attach_type*
+ * (see below).
+ *
+ * The *attach_bpf_fd* must be a valid file descriptor for a
+ * loaded eBPF program of a cgroup, flow dissector, LIRC, sockmap
+ * or sock_ops type corresponding to the specified *attach_type*.
+ *
+ * The *target_fd* must be a valid file descriptor for a kernel
+ * object which depends on the attach type of *attach_bpf_fd*:
+ *
+ * **BPF_PROG_TYPE_CGROUP_DEVICE**,
+ * **BPF_PROG_TYPE_CGROUP_SKB**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ * **BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ * **BPF_PROG_TYPE_SOCK_OPS**
+ *
+ * Control Group v2 hierarchy with the eBPF controller
+ * enabled. Requires the kernel to be compiled with
+ * **CONFIG_CGROUP_BPF**.
+ *
+ * **BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ * Network namespace (eg /proc/self/ns/net).
+ *
+ * **BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ * LIRC device path (eg /dev/lircN). Requires the kernel
+ * to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ * **BPF_PROG_TYPE_SK_SKB**,
+ * **BPF_PROG_TYPE_SK_MSG**
+ *
+ * eBPF map of socket type (eg **BPF_MAP_TYPE_SOCKHASH**).
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_DETACH
+ * Description
+ * Detach the eBPF program associated with the *target_fd* at the
+ * hook specified by *attach_type*. The program must have been
+ * previously attached using **BPF_PROG_ATTACH**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_TEST_RUN
+ * Description
+ * Run the eBPF program associated with the *prog_fd* a *repeat*
+ * number of times against a provided program context *ctx_in* and
+ * data *data_in*, and return the modified program context
+ * *ctx_out*, *data_out* (for example, packet data), result of the
+ * execution *retval*, and *duration* of the test run.
+ *
+ * The sizes of the buffers provided as input and output
+ * parameters *ctx_in*, *ctx_out*, *data_in*, and *data_out* must
+ * be provided in the corresponding variables *ctx_size_in*,
+ * *ctx_size_out*, *data_size_in*, and/or *data_size_out*. If any
+ * of these parameters are not provided (ie set to NULL), the
+ * corresponding size field must be zero.
+ *
+ * Some program types have particular requirements:
+ *
+ * **BPF_PROG_TYPE_SK_LOOKUP**
+ * *data_in* and *data_out* must be NULL.
+ *
+ * **BPF_PROG_TYPE_XDP**
+ * *ctx_in* and *ctx_out* must be NULL.
+ *
+ * **BPF_PROG_TYPE_RAW_TRACEPOINT**,
+ * **BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE**
+ *
+ * *ctx_out*, *data_in* and *data_out* must be NULL.
+ * *repeat* must be zero.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * **ENOSPC**
+ * Either *data_size_out* or *ctx_size_out* is too small.
+ * **ENOTSUPP**
+ * This command is not supported by the program type of
+ * the program referred to by *prog_fd*.
+ *
+ * BPF_PROG_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF program currently loaded into the kernel.
+ *
+ * Looks for the eBPF program with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF programs
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF map currently loaded into the kernel.
+ *
+ * Looks for the eBPF map with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF maps
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_PROG_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF program corresponding to
+ * *prog_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_MAP_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF map corresponding to
+ * *map_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_OBJ_GET_INFO_BY_FD
+ * Description
+ * Obtain information about the eBPF object corresponding to
+ * *bpf_fd*.
+ *
+ * Populates up to *info_len* bytes of *info*, which will be in
+ * one of the following formats depending on the eBPF object type
+ * of *bpf_fd*:
+ *
+ * * **struct bpf_prog_info**
+ * * **struct bpf_map_info**
+ * * **struct bpf_btf_info**
+ * * **struct bpf_link_info**
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_QUERY
+ * Description
+ * Obtain information about eBPF programs associated with the
+ * specified *attach_type* hook.
+ *
+ * The *target_fd* must be a valid file descriptor for a kernel
+ * object which depends on the attach type of *attach_bpf_fd*:
+ *
+ * **BPF_PROG_TYPE_CGROUP_DEVICE**,
+ * **BPF_PROG_TYPE_CGROUP_SKB**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK**,
+ * **BPF_PROG_TYPE_CGROUP_SOCK_ADDR**,
+ * **BPF_PROG_TYPE_CGROUP_SOCKOPT**,
+ * **BPF_PROG_TYPE_CGROUP_SYSCTL**,
+ * **BPF_PROG_TYPE_SOCK_OPS**
+ *
+ * Control Group v2 hierarchy with the eBPF controller
+ * enabled. Requires the kernel to be compiled with
+ * **CONFIG_CGROUP_BPF**.
+ *
+ * **BPF_PROG_TYPE_FLOW_DISSECTOR**
+ *
+ * Network namespace (eg /proc/self/ns/net).
+ *
+ * **BPF_PROG_TYPE_LIRC_MODE2**
+ *
+ * LIRC device path (eg /dev/lircN). Requires the kernel
+ * to be compiled with **CONFIG_BPF_LIRC_MODE2**.
+ *
+ * **BPF_PROG_QUERY** always fetches the number of programs
+ * attached and the *attach_flags* which were used to attach those
+ * programs. Additionally, if *prog_ids* is nonzero and the number
+ * of attached programs is less than *prog_cnt*, populates
+ * *prog_ids* with the eBPF program ids of the programs attached
+ * at *target_fd*.
+ *
+ * The following flags may alter the result:
+ *
+ * **BPF_F_QUERY_EFFECTIVE**
+ * Only return information regarding programs which are
+ * currently effective at the specified *target_fd*.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_RAW_TRACEPOINT_OPEN
+ * Description
+ * Attach an eBPF program to a tracepoint *name* to access kernel
+ * internal arguments of the tracepoint in their raw form.
+ *
+ * The *prog_fd* must be a valid file descriptor associated with
+ * a loaded eBPF program of type **BPF_PROG_TYPE_RAW_TRACEPOINT**.
+ *
+ * No ABI guarantees are made about the content of tracepoint
+ * arguments exposed to the corresponding eBPF program.
+ *
+ * Applying **close**\ (2) to the file descriptor returned by
+ * **BPF_RAW_TRACEPOINT_OPEN** will delete the map (but see NOTES).
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_LOAD
+ * Description
+ * Verify and load BPF Type Format (BTF) metadata into the kernel,
+ * returning a new file descriptor associated with the metadata.
+ * BTF is described in more detail at
+ * https://www.kernel.org/doc/html/latest/bpf/btf.html.
+ *
+ * The *btf* parameter must point to valid memory providing
+ * *btf_size* bytes of BTF binary metadata.
+ *
+ * The returned file descriptor can be passed to other **bpf**\ ()
+ * subcommands such as **BPF_PROG_LOAD** or **BPF_MAP_CREATE** to
+ * associate the BTF with those objects.
+ *
+ * Similar to **BPF_PROG_LOAD**, **BPF_BTF_LOAD** has optional
+ * parameters to specify a *btf_log_buf*, *btf_log_size* and
+ * *btf_log_level* which allow the kernel to return freeform log
+ * output regarding the BTF verification process.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_BTF_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the BPF Type Format (BTF)
+ * corresponding to *btf_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_TASK_FD_QUERY
+ * Description
+ * Obtain information about eBPF programs associated with the
+ * target process identified by *pid* and *fd*.
+ *
+ * If the *pid* and *fd* are associated with a tracepoint, kprobe
+ * or uprobe perf event, then the *prog_id* and *fd_type* will
+ * be populated with the eBPF program id and file descriptor type
+ * of type **bpf_task_fd_type**. If associated with a kprobe or
+ * uprobe, the *probe_offset* and *probe_addr* will also be
+ * populated. Optionally, if *buf* is provided, then up to
+ * *buf_len* bytes of *buf* will be populated with the name of
+ * the tracepoint, kprobe or uprobe.
+ *
+ * The resulting *prog_id* may be introspected in deeper detail
+ * using **BPF_PROG_GET_FD_BY_ID** and **BPF_OBJ_GET_INFO_BY_FD**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_ELEM
+ * Description
+ * Look up an element with the given *key* in the map referred to
+ * by the file descriptor *fd*, and if found, delete the element.
+ *
+ * The **BPF_MAP_TYPE_QUEUE** and **BPF_MAP_TYPE_STACK** map types
+ * implement this command as a "pop" operation, deleting the top
+ * element rather than one corresponding to *key*.
+ * The *key* and *key_len* parameters should be zeroed when
+ * issuing this operation for these map types.
+ *
+ * This command is only valid for the following map types:
+ * * **BPF_MAP_TYPE_QUEUE**
+ * * **BPF_MAP_TYPE_STACK**
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_FREEZE
+ * Description
+ * Freeze the permissions of the specified map.
+ *
+ * Write permissions may be frozen by passing zero *flags*.
+ * Upon success, no future syscall invocations may alter the
+ * map state of *map_fd*. Write operations from eBPF programs
+ * are still possible for a frozen map.
+ *
+ * Not supported for maps of type **BPF_MAP_TYPE_STRUCT_OPS**.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_BTF_GET_NEXT_ID
+ * Description
+ * Fetch the next BPF Type Format (BTF) object currently loaded
+ * into the kernel.
+ *
+ * Looks for the BTF object with an id greater than *start_id*
+ * and updates *next_id* on success. If no other BTF objects
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_MAP_LOOKUP_BATCH
+ * Description
+ * Iterate and fetch multiple elements in a map.
+ *
+ * Two opaque values are used to manage batch operations,
+ * *in_batch* and *out_batch*. Initially, *in_batch* must be set
+ * to NULL to begin the batched operation. After each subsequent
+ * **BPF_MAP_LOOKUP_BATCH**, the caller should pass the resultant
+ * *out_batch* as the *in_batch* for the next operation to
+ * continue iteration from the current point.
+ *
+ * The *keys* and *values* are output parameters which must point
+ * to memory large enough to hold *count* items based on the key
+ * and value size of the map *map_fd*. The *keys* buffer must be
+ * of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * The *elem_flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * On success, *count* elements from the map are copied into the
+ * user buffer, with the keys copied into *keys* and the values
+ * copied into the corresponding indices in *values*.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **ENOSPC** to indicate that *keys* or
+ * *values* is too small to dump an entire bucket during
+ * iteration of a hash-based map type.
+ *
+ * BPF_MAP_LOOKUP_AND_DELETE_BATCH
+ * Description
+ * Iterate and delete all elements in a map.
+ *
+ * This operation has the same behavior as
+ * **BPF_MAP_LOOKUP_BATCH** with two exceptions:
+ *
+ * * Every element that is successfully returned is also deleted
+ * from the map. This is at least *count* elements. Note that
+ * *count* is both an input and an output parameter.
+ * * Upon returning with *errno* set to **EFAULT**, up to
+ * *count* elements may be deleted without returning the keys
+ * and values of the deleted elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_MAP_UPDATE_BATCH
+ * Description
+ * Update multiple elements in a map by *key*.
+ *
+ * The *keys* and *values* are input parameters which must point
+ * to memory large enough to hold *count* items based on the key
+ * and value size of the map *map_fd*. The *keys* buffer must be
+ * of *key_size* * *count*. The *values* buffer must be of
+ * *value_size* * *count*.
+ *
+ * Each element specified in *keys* is sequentially updated to the
+ * value in the corresponding index in *values*. The *in_batch*
+ * and *out_batch* parameters are ignored and should be zeroed.
+ *
+ * The *elem_flags* argument should be specified as one of the
+ * following:
+ *
+ * **BPF_ANY**
+ * Create new elements or update a existing elements.
+ * **BPF_NOEXIST**
+ * Create new elements only if they do not exist.
+ * **BPF_EXIST**
+ * Update existing elements.
+ * **BPF_F_LOCK**
+ * Update spin_lock-ed map elements. This must be
+ * specified if the map value contains a spinlock.
+ *
+ * On success, *count* elements from the map are updated.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * May set *errno* to **EINVAL**, **EPERM**, **ENOMEM**, or
+ * **E2BIG**. **E2BIG** indicates that the number of elements in
+ * the map reached the *max_entries* limit specified at map
+ * creation time.
+ *
+ * May set *errno* to one of the following error codes under
+ * specific circumstances:
+ *
+ * **EEXIST**
+ * If *flags* specifies **BPF_NOEXIST** and the element
+ * with *key* already exists in the map.
+ * **ENOENT**
+ * If *flags* specifies **BPF_EXIST** and the element with
+ * *key* does not exist in the map.
+ *
+ * BPF_MAP_DELETE_BATCH
+ * Description
+ * Delete multiple elements in a map by *key*.
+ *
+ * The *keys* parameter is an input parameter which must point
+ * to memory large enough to hold *count* items based on the key
+ * size of the map *map_fd*, that is, *key_size* * *count*.
+ *
+ * Each element specified in *keys* is sequentially deleted. The
+ * *in_batch*, *out_batch*, and *values* parameters are ignored
+ * and should be zeroed.
+ *
+ * The *elem_flags* argument may be specified as one of the
+ * following:
+ *
+ * **BPF_F_LOCK**
+ * Look up the value of a spin-locked map without
+ * returning the lock. This must be specified if the
+ * elements contain a spinlock.
+ *
+ * On success, *count* elements from the map are updated.
+ *
+ * If an error is returned and *errno* is not **EFAULT**, *count*
+ * is set to the number of successfully processed elements. If
+ * *errno* is **EFAULT**, up to *count* elements may be been
+ * deleted.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_LINK_CREATE
+ * Description
+ * Attach an eBPF program to a *target_fd* at the specified
+ * *attach_type* hook and return a file descriptor handle for
+ * managing the link.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_UPDATE
+ * Description
+ * Update the eBPF program in the specified *link_fd* to
+ * *new_prog_fd*.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_LINK_GET_FD_BY_ID
+ * Description
+ * Open a file descriptor for the eBPF Link corresponding to
+ * *link_id*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_GET_NEXT_ID
+ * Description
+ * Fetch the next eBPF link currently loaded into the kernel.
+ *
+ * Looks for the eBPF link with an id greater than *start_id*
+ * and updates *next_id* on success. If no other eBPF links
+ * remain with ids higher than *start_id*, returns -1 and sets
+ * *errno* to **ENOENT**.
+ *
+ * Return
+ * Returns zero on success. On error, or when no id remains, -1
+ * is returned and *errno* is set appropriately.
+ *
+ * BPF_ENABLE_STATS
+ * Description
+ * Enable eBPF runtime statistics gathering.
+ *
+ * Runtime statistics gathering for the eBPF runtime is disabled
+ * by default to minimize the corresponding performance overhead.
+ * This command enables statistics globally.
+ *
+ * Multiple programs may independently enable statistics.
+ * After gathering the desired statistics, eBPF runtime statistics
+ * may be disabled again by calling **close**\ (2) for the file
+ * descriptor returned by this function. Statistics will only be
+ * disabled system-wide when all outstanding file descriptors
+ * returned by prior calls for this subcommand are closed.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_ITER_CREATE
+ * Description
+ * Create an iterator on top of the specified *link_fd* (as
+ * previously created using **BPF_LINK_CREATE**) and return a
+ * file descriptor that can be used to trigger the iteration.
+ *
+ * If the resulting file descriptor is pinned to the filesystem
+ * using **BPF_OBJ_PIN**, then subsequent **read**\ (2) syscalls
+ * for that path will trigger the iterator to read kernel state
+ * using the eBPF program attached to *link_fd*.
+ *
+ * Return
+ * A new file descriptor (a nonnegative integer), or -1 if an
+ * error occurred (in which case, *errno* is set appropriately).
+ *
+ * BPF_LINK_DETACH
+ * Description
+ * Forcefully detach the specified *link_fd* from its
+ * corresponding attachment point.
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * BPF_PROG_BIND_MAP
+ * Description
+ * Bind a map to the lifetime of an eBPF program.
+ *
+ * The map identified by *map_fd* is bound to the program
+ * identified by *prog_fd* and only released when *prog_fd* is
+ * released. This may be used in cases where metadata should be
+ * associated with a program which otherwise does not contain any
+ * references to the map (for example, embedded in the eBPF
+ * program instructions).
+ *
+ * Return
+ * Returns zero on success. On error, -1 is returned and *errno*
+ * is set appropriately.
+ *
+ * NOTES
+ * eBPF objects (maps and programs) can be shared between processes.
+ *
+ * * After **fork**\ (2), the child inherits file descriptors
+ * referring to the same eBPF objects.
+ * * File descriptors referring to eBPF objects can be transferred over
+ * **unix**\ (7) domain sockets.
+ * * File descriptors referring to eBPF objects can be duplicated in the
+ * usual way, using **dup**\ (2) and similar calls.
+ * * File descriptors referring to eBPF objects can be pinned to the
+ * filesystem using the **BPF_OBJ_PIN** command of **bpf**\ (2).
+ *
+ * An eBPF object is deallocated only after all file descriptors referring
+ * to the object have been closed and no references remain pinned to the
+ * filesystem or attached (for example, bound to a program or device).
+ */
enum bpf_cmd {
BPF_MAP_CREATE,
BPF_MAP_LOOKUP_ELEM,
@@ -247,6 +978,7 @@ enum bpf_attach_type {
BPF_XDP_CPUMAP,
BPF_SK_LOOKUP,
BPF_XDP,
+ BPF_SK_SKB_VERDICT,
__MAX_BPF_ATTACH_TYPE
};
@@ -393,11 +1125,24 @@ enum bpf_link_type {
* is struct/union.
*/
#define BPF_PSEUDO_BTF_ID 3
+/* insn[0].src_reg: BPF_PSEUDO_FUNC
+ * insn[0].imm: insn offset to the func
+ * insn[1].imm: 0
+ * insn[0].off: 0
+ * insn[1].off: 0
+ * ldimm64 rewrite: address of the function
+ * verifier type: PTR_TO_FUNC.
+ */
+#define BPF_PSEUDO_FUNC 4
/* when bpf_call->src_reg == BPF_PSEUDO_CALL, bpf_call->imm == pc-relative
* offset to another bpf function
*/
#define BPF_PSEUDO_CALL 1
+/* when bpf_call->src_reg == BPF_PSEUDO_KFUNC_CALL,
+ * bpf_call->imm == btf_id of a BTF_KIND_FUNC in the running kernel
+ */
+#define BPF_PSEUDO_KFUNC_CALL 2
/* flags for BPF_MAP_UPDATE_ELEM command */
enum {
@@ -720,7 +1465,7 @@ union bpf_attr {
* parsed and used to produce a manual page. The workflow is the following,
* and requires the rst2man utility:
*
- * $ ./scripts/bpf_helpers_doc.py \
+ * $ ./scripts/bpf_doc.py \
* --filename include/uapi/linux/bpf.h > /tmp/bpf-helpers.rst
* $ rst2man /tmp/bpf-helpers.rst > /tmp/bpf-helpers.7
* $ man /tmp/bpf-helpers.7
@@ -1765,6 +2510,10 @@ union bpf_attr {
* Use with ENCAP_L3/L4 flags to further specify the tunnel
* type; *len* is the length of the inner MAC header.
*
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2_ETH**:
+ * Use with BPF_F_ADJ_ROOM_ENCAP_L2 flag to further specify the
+ * L2 type as Ethernet.
+ *
* A call to this helper is susceptible to change the underlying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@@ -3333,12 +4082,20 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * An adaptive notification is a notification sent whenever the user-space
+ * process has caught up and consumed all available payloads. In case the user-space
+ * process is still processing a previous payload, then no notification is needed
+ * as it will process the newly added payload automatically.
* Return
* 0 on success, or a negative error in case of failure.
*
* void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
* Description
* Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ * *flags* must be 0.
* Return
* Valid pointer with *size* bytes of memory available; NULL,
* otherwise.
@@ -3350,6 +4107,10 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * See 'bpf_ringbuf_output()' for the definition of adaptive notification.
* Return
* Nothing. Always succeeds.
*
@@ -3360,6 +4121,10 @@ union bpf_attr {
* of new data availability is sent.
* If **BPF_RB_FORCE_WAKEUP** is specified in *flags*, notification
* of new data availability is sent unconditionally.
+ * If **0** is specified in *flags*, an adaptive notification
+ * of new data availability is sent.
+ *
+ * See 'bpf_ringbuf_output()' for the definition of adaptive notification.
* Return
* Nothing. Always succeeds.
*
@@ -3850,7 +4615,7 @@ union bpf_attr {
*
* long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
* Description
- * Check ctx packet size against exceeding MTU of net device (based
+ * Check packet size against exceeding MTU of net device (based
* on *ifindex*). This helper will likely be used in combination
* with helpers that adjust/change the packet size.
*
@@ -3867,6 +4632,14 @@ union bpf_attr {
* against the current net device. This is practical if this isn't
* used prior to redirect.
*
+ * On input *mtu_len* must be a valid pointer, else verifier will
+ * reject BPF program. If the value *mtu_len* is initialized to
+ * zero then the ctx packet size is use. When value *mtu_len* is
+ * provided as input this specify the L3 length that the MTU check
+ * is done against. Remember XDP and TC length operate at L2, but
+ * this value is L3 as this correlate to MTU and IP-header tot_len
+ * values which are L3 (similar behavior as bpf_fib_lookup).
+ *
* The Linux kernel route table can configure MTUs on a more
* specific per route level, which is not provided by this helper.
* For route level MTU checks use the **bpf_fib_lookup**\ ()
@@ -3891,11 +4664,9 @@ union bpf_attr {
*
* On return *mtu_len* pointer contains the MTU value of the net
* device. Remember the net device configured MTU is the L3 size,
- * which is returned here and XDP and TX length operate at L2.
+ * which is returned here and XDP and TC length operate at L2.
* Helper take this into account for you, but remember when using
- * MTU value in your BPF-code. On input *mtu_len* must be a valid
- * pointer and be initialized (to zero), else verifier will reject
- * BPF program.
+ * MTU value in your BPF-code.
*
* Return
* * 0 on success, and populate MTU value in *mtu_len* pointer.
@@ -3909,6 +4680,61 @@ union bpf_attr {
* * **BPF_MTU_CHK_RET_FRAG_NEEDED**
* * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
*
+ * long bpf_for_each_map_elem(struct bpf_map *map, void *callback_fn, void *callback_ctx, u64 flags)
+ * Description
+ * For each element in **map**, call **callback_fn** function with
+ * **map**, **callback_ctx** and other map-specific parameters.
+ * The **callback_fn** should be a static function and
+ * the **callback_ctx** should be a pointer to the stack.
+ * The **flags** is used to control certain aspects of the helper.
+ * Currently, the **flags** must be 0.
+ *
+ * The following are a list of supported map types and their
+ * respective expected callback signatures:
+ *
+ * BPF_MAP_TYPE_HASH, BPF_MAP_TYPE_PERCPU_HASH,
+ * BPF_MAP_TYPE_LRU_HASH, BPF_MAP_TYPE_LRU_PERCPU_HASH,
+ * BPF_MAP_TYPE_ARRAY, BPF_MAP_TYPE_PERCPU_ARRAY
+ *
+ * long (\*callback_fn)(struct bpf_map \*map, const void \*key, void \*value, void \*ctx);
+ *
+ * For per_cpu maps, the map_value is the value on the cpu where the
+ * bpf_prog is running.
+ *
+ * If **callback_fn** return 0, the helper will continue to the next
+ * element. If return value is 1, the helper will skip the rest of
+ * elements and return. Other return values are not used now.
+ *
+ * Return
+ * The number of traversed map elements for success, **-EINVAL** for
+ * invalid **flags**.
+ *
+ * long bpf_snprintf(char *str, u32 str_size, const char *fmt, u64 *data, u32 data_len)
+ * Description
+ * Outputs a string into the **str** buffer of size **str_size**
+ * based on a format string stored in a read-only map pointed by
+ * **fmt**.
+ *
+ * Each format specifier in **fmt** corresponds to one u64 element
+ * in the **data** array. For strings and pointers where pointees
+ * are accessed, only the pointer values are stored in the *data*
+ * array. The *data_len* is the size of *data* in bytes.
+ *
+ * Formats **%s** and **%p{i,I}{4,6}** require to read kernel
+ * memory. Reading kernel memory may fail due to either invalid
+ * address or valid address but requiring a major memory fault. If
+ * reading kernel memory fails, the string for **%s** will be an
+ * empty string, and the ip address for **%p{i,I}{4,6}** will be 0.
+ * Not returning error to bpf program is consistent with what
+ * **bpf_trace_printk**\ () does for now.
+ *
+ * Return
+ * The strictly positive length of the formatted string, including
+ * the trailing zero character. If the return value is greater than
+ * **str_size**, **str** contains a truncated string, guaranteed to
+ * be zero-terminated except when **str_size** is 0.
+ *
+ * Or **-EBUSY** if the per-CPU memory copy buffer is busy.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -4075,6 +4901,8 @@ union bpf_attr {
FN(ima_inode_hash), \
FN(sock_from_file), \
FN(check_mtu), \
+ FN(for_each_map_elem), \
+ FN(snprintf), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4168,6 +4996,7 @@ enum {
BPF_F_ADJ_ROOM_ENCAP_L4_GRE = (1ULL << 3),
BPF_F_ADJ_ROOM_ENCAP_L4_UDP = (1ULL << 4),
BPF_F_ADJ_ROOM_NO_CSUM_RESET = (1ULL << 5),
+ BPF_F_ADJ_ROOM_ENCAP_L2_ETH = (1ULL << 6),
};
enum {
@@ -4615,6 +5444,8 @@ struct bpf_link_info {
} raw_tracepoint;
struct {
__u32 attach_type;
+ __u32 target_obj_id; /* prog_id for PROG_EXT, otherwise btf object id */
+ __u32 target_btf_id; /* BTF type id inside the object */
} tracing;
struct {
__u64 cgroup_id;
@@ -5205,7 +6036,10 @@ struct bpf_pidns_info {
/* User accessible data for SK_LOOKUP programs. Add new fields at the end. */
struct bpf_sk_lookup {
- __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+ union {
+ __bpf_md_ptr(struct bpf_sock *, sk); /* Selected socket */
+ __u64 cookie; /* Non-zero if socket was selected in PROG_TEST_RUN */
+ };
__u32 family; /* Protocol family (AF_INET, AF_INET6) */
__u32 protocol; /* IP protocol (IPPROTO_TCP, IPPROTO_UDP) */
diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h
index 5a667107ad2c..d27b1708efe9 100644
--- a/tools/include/uapi/linux/btf.h
+++ b/tools/include/uapi/linux/btf.h
@@ -52,7 +52,7 @@ struct btf_type {
};
};
-#define BTF_INFO_KIND(info) (((info) >> 24) & 0x0f)
+#define BTF_INFO_KIND(info) (((info) >> 24) & 0x1f)
#define BTF_INFO_VLEN(info) ((info) & 0xffff)
#define BTF_INFO_KFLAG(info) ((info) >> 31)
@@ -72,7 +72,8 @@ struct btf_type {
#define BTF_KIND_FUNC_PROTO 13 /* Function Proto */
#define BTF_KIND_VAR 14 /* Variable */
#define BTF_KIND_DATASEC 15 /* Section */
-#define BTF_KIND_MAX BTF_KIND_DATASEC
+#define BTF_KIND_FLOAT 16 /* Floating point */
+#define BTF_KIND_MAX BTF_KIND_FLOAT
#define NR_BTF_KINDS (BTF_KIND_MAX + 1)
/* For some specific BTF_KIND, "struct btf_type" is immediately
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 8b281f722e5b..f6afee209620 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -1154,6 +1154,7 @@ struct kvm_x86_mce {
#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0)
#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1)
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
struct kvm_xen_hvm_config {
__u32 flags;
@@ -1621,12 +1622,24 @@ struct kvm_xen_vcpu_attr {
union {
__u64 gpa;
__u64 pad[8];
+ struct {
+ __u64 state;
+ __u64 state_entry_time;
+ __u64 time_running;
+ __u64 time_runnable;
+ __u64 time_blocked;
+ __u64 time_offline;
+ } runstate;
} u;
};
/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO 0x0
#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO 0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR 0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
diff --git a/tools/kvm/kvm_stat/kvm_stat.service b/tools/kvm/kvm_stat/kvm_stat.service
index 71aabaffe779..8f13b843d5b4 100644
--- a/tools/kvm/kvm_stat/kvm_stat.service
+++ b/tools/kvm/kvm_stat/kvm_stat.service
@@ -9,6 +9,7 @@ Type=simple
ExecStart=/usr/bin/kvm_stat -dtcz -s 10 -L /var/log/kvm_stat.csv
ExecReload=/bin/kill -HUP $MAINPID
Restart=always
+RestartSec=60s
SyslogIdentifier=kvm_stat
SyslogLevel=debug
diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index 190366d05588..9b057cc7650a 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
- btf_dump.o ringbuf.o
+ btf_dump.o ringbuf.o strset.o linker.o
diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index 887a494ad5fc..e43e1896cb4b 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -158,7 +158,7 @@ $(BPF_IN_STATIC): force $(BPF_HELPER_DEFS)
$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR)
$(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h
- $(QUIET_GEN)$(srctree)/scripts/bpf_helpers_doc.py --header \
+ $(QUIET_GEN)$(srctree)/scripts/bpf_doc.py --header \
--file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS)
$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)
@@ -215,7 +215,7 @@ define do_install
if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
fi; \
- $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+ $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
endef
install_lib: all_cmd
@@ -228,7 +228,6 @@ install_headers: $(BPF_HELPER_DEFS)
$(call do_install,bpf.h,$(prefix)/include/bpf,644); \
$(call do_install,libbpf.h,$(prefix)/include/bpf,644); \
$(call do_install,btf.h,$(prefix)/include/bpf,644); \
- $(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \
$(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \
$(call do_install,xsk.h,$(prefix)/include/bpf,644); \
$(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \
diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h
index 53b3e199fb25..09ebe3db5f2f 100644
--- a/tools/lib/bpf/bpf_core_read.h
+++ b/tools/lib/bpf/bpf_core_read.h
@@ -88,11 +88,19 @@ enum bpf_enum_value_kind {
const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \
unsigned long long val; \
\
+ /* This is a so-called barrier_var() operation that makes specified \
+ * variable "a black box" for optimizing compiler. \
+ * It forces compiler to perform BYTE_OFFSET relocation on p and use \
+ * its calculated value in the switch below, instead of applying \
+ * the same relocation 4 times for each individual memory load. \
+ */ \
+ asm volatile("" : "=r"(p) : "0"(p)); \
+ \
switch (__CORE_RELO(s, field, BYTE_SIZE)) { \
- case 1: val = *(const unsigned char *)p; \
- case 2: val = *(const unsigned short *)p; \
- case 4: val = *(const unsigned int *)p; \
- case 8: val = *(const unsigned long long *)p; \
+ case 1: val = *(const unsigned char *)p; break; \
+ case 2: val = *(const unsigned short *)p; break; \
+ case 4: val = *(const unsigned int *)p; break; \
+ case 8: val = *(const unsigned long long *)p; break; \
} \
val <<= __CORE_RELO(s, field, LSHIFT_U64); \
if (__CORE_RELO(s, field, SIGNED)) \
diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index ae6c975e0b87..9720dc0b4605 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -25,13 +25,21 @@
/*
* Helper macro to place programs, maps, license in
* different sections in elf_bpf file. Section names
- * are interpreted by elf_bpf loader
+ * are interpreted by libbpf depending on the context (BPF programs, BPF maps,
+ * extern variables, etc).
+ * To allow use of SEC() with externs (e.g., for extern .maps declarations),
+ * make sure __attribute__((unused)) doesn't trigger compilation warning.
*/
-#define SEC(NAME) __attribute__((section(NAME), used))
+#define SEC(name) \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wignored-attributes\"") \
+ __attribute__((section(name), used)) \
+ _Pragma("GCC diagnostic pop") \
-#ifndef __always_inline
+/* Avoid 'linux/stddef.h' definition of '__always_inline'. */
+#undef __always_inline
#define __always_inline inline __attribute__((always_inline))
-#endif
+
#ifndef __noinline
#define __noinline __attribute__((noinline))
#endif
@@ -40,7 +48,29 @@
#endif
/*
- * Helper macro to manipulate data structures
+ * Use __hidden attribute to mark a non-static BPF subprogram effectively
+ * static for BPF verifier's verification algorithm purposes, allowing more
+ * extensive and permissive BPF verification process, taking into account
+ * subprogram's caller context.
+ */
+#define __hidden __attribute__((visibility("hidden")))
+
+/* When utilizing vmlinux.h with BPF CO-RE, user BPF programs can't include
+ * any system-level headers (such as stddef.h, linux/version.h, etc), and
+ * commonly-used macros like NULL and KERNEL_VERSION aren't available through
+ * vmlinux.h. This just adds unnecessary hurdles and forces users to re-define
+ * them on their own. So as a convenience, provide such definitions here.
+ */
+#ifndef NULL
+#define NULL ((void *)0)
+#endif
+
+#ifndef KERNEL_VERSION
+#define KERNEL_VERSION(a, b, c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c)))
+#endif
+
+/*
+ * Helper macros to manipulate data structures
*/
#ifndef offsetof
#define offsetof(TYPE, MEMBER) ((unsigned long)&((TYPE *)0)->MEMBER)
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index f9ef37707888..8c954ebc0c7c 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -413,20 +413,56 @@ typeof(name(0)) name(struct pt_regs *ctx) \
} \
static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
+#define ___bpf_fill0(arr, p, x) do {} while (0)
+#define ___bpf_fill1(arr, p, x) arr[p] = x
+#define ___bpf_fill2(arr, p, x, args...) arr[p] = x; ___bpf_fill1(arr, p + 1, args)
+#define ___bpf_fill3(arr, p, x, args...) arr[p] = x; ___bpf_fill2(arr, p + 1, args)
+#define ___bpf_fill4(arr, p, x, args...) arr[p] = x; ___bpf_fill3(arr, p + 1, args)
+#define ___bpf_fill5(arr, p, x, args...) arr[p] = x; ___bpf_fill4(arr, p + 1, args)
+#define ___bpf_fill6(arr, p, x, args...) arr[p] = x; ___bpf_fill5(arr, p + 1, args)
+#define ___bpf_fill7(arr, p, x, args...) arr[p] = x; ___bpf_fill6(arr, p + 1, args)
+#define ___bpf_fill8(arr, p, x, args...) arr[p] = x; ___bpf_fill7(arr, p + 1, args)
+#define ___bpf_fill9(arr, p, x, args...) arr[p] = x; ___bpf_fill8(arr, p + 1, args)
+#define ___bpf_fill10(arr, p, x, args...) arr[p] = x; ___bpf_fill9(arr, p + 1, args)
+#define ___bpf_fill11(arr, p, x, args...) arr[p] = x; ___bpf_fill10(arr, p + 1, args)
+#define ___bpf_fill12(arr, p, x, args...) arr[p] = x; ___bpf_fill11(arr, p + 1, args)
+#define ___bpf_fill(arr, args...) \
+ ___bpf_apply(___bpf_fill, ___bpf_narg(args))(arr, 0, args)
+
/*
* BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
* in a structure.
*/
-#define BPF_SEQ_PRINTF(seq, fmt, args...) \
- ({ \
- _Pragma("GCC diagnostic push") \
- _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
- static const char ___fmt[] = fmt; \
- unsigned long long ___param[] = { args }; \
- _Pragma("GCC diagnostic pop") \
- int ___ret = bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
- ___param, sizeof(___param)); \
- ___ret; \
- })
+#define BPF_SEQ_PRINTF(seq, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_seq_printf(seq, ___fmt, sizeof(___fmt), \
+ ___param, sizeof(___param)); \
+})
+
+/*
+ * BPF_SNPRINTF wraps the bpf_snprintf helper with variadic arguments instead of
+ * an array of u64.
+ */
+#define BPF_SNPRINTF(out, out_size, fmt, args...) \
+({ \
+ static const char ___fmt[] = fmt; \
+ unsigned long long ___param[___bpf_narg(args)]; \
+ \
+ _Pragma("GCC diagnostic push") \
+ _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \
+ ___bpf_fill(___param, args); \
+ _Pragma("GCC diagnostic pop") \
+ \
+ bpf_snprintf(out, out_size, ___fmt, \
+ ___param, sizeof(___param)); \
+})
#endif
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index d9c10830d749..d57e13a13798 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -21,6 +21,7 @@
#include "libbpf.h"
#include "libbpf_internal.h"
#include "hashmap.h"
+#include "strset.h"
#define BTF_MAX_NR_TYPES 0x7fffffffU
#define BTF_MAX_STR_OFFSET 0x7fffffffU
@@ -67,7 +68,7 @@ struct btf {
* | | |
* hdr | |
* types_data----+ |
- * strs_data------------------+
+ * strset__data(strs_set)-----+
*
* +----------+---------+-----------+
* | Header | Types | Strings |
@@ -105,20 +106,15 @@ struct btf {
*/
int start_str_off;
+ /* only one of strs_data or strs_set can be non-NULL, depending on
+ * whether BTF is in a modifiable state (strs_set is used) or not
+ * (strs_data points inside raw_data)
+ */
void *strs_data;
- size_t strs_data_cap; /* used size stored in hdr->str_len */
-
- /* lookup index for each unique string in strings section */
- struct hashmap *strs_hash;
+ /* a set of unique strings */
+ struct strset *strs_set;
/* whether strings are already deduplicated */
bool strs_deduped;
- /* extra indirection layer to make strings hashmap work with stable
- * string offsets and ability to transparently choose between
- * btf->strs_data or btf_dedup->strs_data as a source of strings.
- * This is used for BTF strings dedup to transfer deduplicated strings
- * data back to struct btf without re-building strings index.
- */
- void **strs_data_ptr;
/* BTF object FD, if loaded into kernel */
int fd;
@@ -142,8 +138,8 @@ static inline __u64 ptr_to_u64(const void *ptr)
* On success, memory pointer to the beginning of unused memory is returned.
* On error, NULL is returned.
*/
-void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
- size_t cur_cnt, size_t max_cnt, size_t add_cnt)
+void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+ size_t cur_cnt, size_t max_cnt, size_t add_cnt)
{
size_t new_cnt;
void *new_data;
@@ -179,14 +175,14 @@ void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
/* Ensure given dynamically allocated memory region has enough allocated space
* to accommodate *need_cnt* elements of size *elem_sz* bytes each
*/
-int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt)
+int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt)
{
void *p;
if (need_cnt <= *cap_cnt)
return 0;
- p = btf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt);
+ p = libbpf_add_mem(data, cap_cnt, elem_sz, *cap_cnt, SIZE_MAX, need_cnt - *cap_cnt);
if (!p)
return -ENOMEM;
@@ -197,8 +193,8 @@ static int btf_add_type_idx_entry(struct btf *btf, __u32 type_off)
{
__u32 *p;
- p = btf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
- btf->nr_types, BTF_MAX_NR_TYPES, 1);
+ p = libbpf_add_mem((void **)&btf->type_offs, &btf->type_offs_cap, sizeof(__u32),
+ btf->nr_types, BTF_MAX_NR_TYPES, 1);
if (!p)
return -ENOMEM;
@@ -291,6 +287,7 @@ static int btf_type_size(const struct btf_type *t)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
return base_size;
case BTF_KIND_INT:
return base_size + sizeof(__u32);
@@ -338,6 +335,7 @@ static int btf_bswap_type_rest(struct btf_type *t)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
return 0;
case BTF_KIND_INT:
*(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1));
@@ -433,7 +431,7 @@ const struct btf *btf__base_btf(const struct btf *btf)
}
/* internal helper returning non-const pointer to a type */
-static struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
+struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id)
{
if (type_id == 0)
return &btf_void;
@@ -578,6 +576,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)
case BTF_KIND_UNION:
case BTF_KIND_ENUM:
case BTF_KIND_DATASEC:
+ case BTF_KIND_FLOAT:
size = t->size;
goto done;
case BTF_KIND_PTR:
@@ -621,6 +620,7 @@ int btf__align_of(const struct btf *btf, __u32 id)
switch (kind) {
case BTF_KIND_INT:
case BTF_KIND_ENUM:
+ case BTF_KIND_FLOAT:
return min(btf_ptr_sz(btf), (size_t)t->size);
case BTF_KIND_PTR:
return btf_ptr_sz(btf);
@@ -734,7 +734,7 @@ void btf__free(struct btf *btf)
*/
free(btf->hdr);
free(btf->types_data);
- free(btf->strs_data);
+ strset__free(btf->strs_set);
}
free(btf->raw_data);
free(btf->raw_data_swapped);
@@ -1242,6 +1242,11 @@ void btf__set_fd(struct btf *btf, int fd)
btf->fd = fd;
}
+static const void *btf_strs_data(const struct btf *btf)
+{
+ return btf->strs_data ? btf->strs_data : strset__data(btf->strs_set);
+}
+
static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian)
{
struct btf_header *hdr = btf->hdr;
@@ -1282,7 +1287,7 @@ static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endi
}
p += hdr->type_len;
- memcpy(p, btf->strs_data, hdr->str_len);
+ memcpy(p, btf_strs_data(btf), hdr->str_len);
p += hdr->str_len;
*size = data_sz;
@@ -1316,7 +1321,7 @@ const char *btf__str_by_offset(const struct btf *btf, __u32 offset)
if (offset < btf->start_str_off)
return btf__str_by_offset(btf->base_btf, offset);
else if (offset - btf->start_str_off < btf->hdr->str_len)
- return btf->strs_data + (offset - btf->start_str_off);
+ return btf_strs_data(btf) + (offset - btf->start_str_off);
else
return NULL;
}
@@ -1470,25 +1475,6 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,
return 0;
}
-static size_t strs_hash_fn(const void *key, void *ctx)
-{
- const struct btf *btf = ctx;
- const char *strs = *btf->strs_data_ptr;
- const char *str = strs + (long)key;
-
- return str_hash(str);
-}
-
-static bool strs_hash_equal_fn(const void *key1, const void *key2, void *ctx)
-{
- const struct btf *btf = ctx;
- const char *strs = *btf->strs_data_ptr;
- const char *str1 = strs + (long)key1;
- const char *str2 = strs + (long)key2;
-
- return strcmp(str1, str2) == 0;
-}
-
static void btf_invalidate_raw_data(struct btf *btf)
{
if (btf->raw_data) {
@@ -1507,10 +1493,9 @@ static void btf_invalidate_raw_data(struct btf *btf)
*/
static int btf_ensure_modifiable(struct btf *btf)
{
- void *hdr, *types, *strs, *strs_end, *s;
- struct hashmap *hash = NULL;
- long off;
- int err;
+ void *hdr, *types;
+ struct strset *set = NULL;
+ int err = -ENOMEM;
if (btf_is_modifiable(btf)) {
/* any BTF modification invalidates raw_data */
@@ -1521,44 +1506,25 @@ static int btf_ensure_modifiable(struct btf *btf)
/* split raw data into three memory regions */
hdr = malloc(btf->hdr->hdr_len);
types = malloc(btf->hdr->type_len);
- strs = malloc(btf->hdr->str_len);
- if (!hdr || !types || !strs)
+ if (!hdr || !types)
goto err_out;
memcpy(hdr, btf->hdr, btf->hdr->hdr_len);
memcpy(types, btf->types_data, btf->hdr->type_len);
- memcpy(strs, btf->strs_data, btf->hdr->str_len);
-
- /* make hashmap below use btf->strs_data as a source of strings */
- btf->strs_data_ptr = &btf->strs_data;
/* build lookup index for all strings */
- hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, btf);
- if (IS_ERR(hash)) {
- err = PTR_ERR(hash);
- hash = NULL;
+ set = strset__new(BTF_MAX_STR_OFFSET, btf->strs_data, btf->hdr->str_len);
+ if (IS_ERR(set)) {
+ err = PTR_ERR(set);
goto err_out;
}
- strs_end = strs + btf->hdr->str_len;
- for (off = 0, s = strs; s < strs_end; off += strlen(s) + 1, s = strs + off) {
- /* hashmap__add() returns EEXIST if string with the same
- * content already is in the hash map
- */
- err = hashmap__add(hash, (void *)off, (void *)off);
- if (err == -EEXIST)
- continue; /* duplicate */
- if (err)
- goto err_out;
- }
-
/* only when everything was successful, update internal state */
btf->hdr = hdr;
btf->types_data = types;
btf->types_data_cap = btf->hdr->type_len;
- btf->strs_data = strs;
- btf->strs_data_cap = btf->hdr->str_len;
- btf->strs_hash = hash;
+ btf->strs_data = NULL;
+ btf->strs_set = set;
/* if BTF was created from scratch, all strings are guaranteed to be
* unique and deduplicated
*/
@@ -1573,17 +1539,10 @@ static int btf_ensure_modifiable(struct btf *btf)
return 0;
err_out:
- hashmap__free(hash);
+ strset__free(set);
free(hdr);
free(types);
- free(strs);
- return -ENOMEM;
-}
-
-static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
-{
- return btf_add_mem(&btf->strs_data, &btf->strs_data_cap, 1,
- btf->hdr->str_len, BTF_MAX_STR_OFFSET, add_sz);
+ return err;
}
/* Find an offset in BTF string section that corresponds to a given string *s*.
@@ -1594,34 +1553,23 @@ static void *btf_add_str_mem(struct btf *btf, size_t add_sz)
*/
int btf__find_str(struct btf *btf, const char *s)
{
- long old_off, new_off, len;
- void *p;
+ int off;
if (btf->base_btf) {
- int ret;
-
- ret = btf__find_str(btf->base_btf, s);
- if (ret != -ENOENT)
- return ret;
+ off = btf__find_str(btf->base_btf, s);
+ if (off != -ENOENT)
+ return off;
}
/* BTF needs to be in a modifiable state to build string lookup index */
if (btf_ensure_modifiable(btf))
return -ENOMEM;
- /* see btf__add_str() for why we do this */
- len = strlen(s) + 1;
- p = btf_add_str_mem(btf, len);
- if (!p)
- return -ENOMEM;
-
- new_off = btf->hdr->str_len;
- memcpy(p, s, len);
-
- if (hashmap__find(btf->strs_hash, (void *)new_off, (void **)&old_off))
- return btf->start_str_off + old_off;
+ off = strset__find_str(btf->strs_set, s);
+ if (off < 0)
+ return off;
- return -ENOENT;
+ return btf->start_str_off + off;
}
/* Add a string s to the BTF string section.
@@ -1631,61 +1579,30 @@ int btf__find_str(struct btf *btf, const char *s)
*/
int btf__add_str(struct btf *btf, const char *s)
{
- long old_off, new_off, len;
- void *p;
- int err;
+ int off;
if (btf->base_btf) {
- int ret;
-
- ret = btf__find_str(btf->base_btf, s);
- if (ret != -ENOENT)
- return ret;
+ off = btf__find_str(btf->base_btf, s);
+ if (off != -ENOENT)
+ return off;
}
if (btf_ensure_modifiable(btf))
return -ENOMEM;
- /* Hashmap keys are always offsets within btf->strs_data, so to even
- * look up some string from the "outside", we need to first append it
- * at the end, so that it can be addressed with an offset. Luckily,
- * until btf->hdr->str_len is incremented, that string is just a piece
- * of garbage for the rest of BTF code, so no harm, no foul. On the
- * other hand, if the string is unique, it's already appended and
- * ready to be used, only a simple btf->hdr->str_len increment away.
- */
- len = strlen(s) + 1;
- p = btf_add_str_mem(btf, len);
- if (!p)
- return -ENOMEM;
-
- new_off = btf->hdr->str_len;
- memcpy(p, s, len);
+ off = strset__add_str(btf->strs_set, s);
+ if (off < 0)
+ return off;
- /* Now attempt to add the string, but only if the string with the same
- * contents doesn't exist already (HASHMAP_ADD strategy). If such
- * string exists, we'll get its offset in old_off (that's old_key).
- */
- err = hashmap__insert(btf->strs_hash, (void *)new_off, (void *)new_off,
- HASHMAP_ADD, (const void **)&old_off, NULL);
- if (err == -EEXIST)
- return btf->start_str_off + old_off; /* duplicated string, return existing offset */
- if (err)
- return err;
+ btf->hdr->str_len = strset__data_size(btf->strs_set);
- btf->hdr->str_len += len; /* new unique string, adjust data length */
- return btf->start_str_off + new_off;
+ return btf->start_str_off + off;
}
static void *btf_add_type_mem(struct btf *btf, size_t add_sz)
{
- return btf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
- btf->hdr->type_len, UINT_MAX, add_sz);
-}
-
-static __u32 btf_type_info(int kind, int vlen, int kflag)
-{
- return (kflag << 31) | (kind << 24) | vlen;
+ return libbpf_add_mem(&btf->types_data, &btf->types_data_cap, 1,
+ btf->hdr->type_len, UINT_MAX, add_sz);
}
static void btf_type_inc_vlen(struct btf_type *t)
@@ -1707,6 +1624,54 @@ static int btf_commit_type(struct btf *btf, int data_sz)
return btf->start_id + btf->nr_types - 1;
}
+struct btf_pipe {
+ const struct btf *src;
+ struct btf *dst;
+};
+
+static int btf_rewrite_str(__u32 *str_off, void *ctx)
+{
+ struct btf_pipe *p = ctx;
+ int off;
+
+ if (!*str_off) /* nothing to do for empty strings */
+ return 0;
+
+ off = btf__add_str(p->dst, btf__str_by_offset(p->src, *str_off));
+ if (off < 0)
+ return off;
+
+ *str_off = off;
+ return 0;
+}
+
+int btf__add_type(struct btf *btf, const struct btf *src_btf, const struct btf_type *src_type)
+{
+ struct btf_pipe p = { .src = src_btf, .dst = btf };
+ struct btf_type *t;
+ int sz, err;
+
+ sz = btf_type_size(src_type);
+ if (sz < 0)
+ return sz;
+
+ /* deconstruct BTF, if necessary, and invalidate raw_data */
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ memcpy(t, src_type, sz);
+
+ err = btf_type_visit_str_offs(t, btf_rewrite_str, &p);
+ if (err)
+ return err;
+
+ return btf_commit_type(btf, sz);
+}
+
/*
* Append new BTF_KIND_INT type with:
* - *name* - non-empty, non-NULL type name;
@@ -1756,6 +1721,47 @@ int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding
return btf_commit_type(btf, sz);
}
+/*
+ * Append new BTF_KIND_FLOAT type with:
+ * - *name* - non-empty, non-NULL type name;
+ * - *sz* - size of the type, in bytes;
+ * Returns:
+ * - >0, type ID of newly added BTF type;
+ * - <0, on error.
+ */
+int btf__add_float(struct btf *btf, const char *name, size_t byte_sz)
+{
+ struct btf_type *t;
+ int sz, name_off;
+
+ /* non-empty name */
+ if (!name || !name[0])
+ return -EINVAL;
+
+ /* byte_sz must be one of the explicitly allowed values */
+ if (byte_sz != 2 && byte_sz != 4 && byte_sz != 8 && byte_sz != 12 &&
+ byte_sz != 16)
+ return -EINVAL;
+
+ if (btf_ensure_modifiable(btf))
+ return -ENOMEM;
+
+ sz = sizeof(struct btf_type);
+ t = btf_add_type_mem(btf, sz);
+ if (!t)
+ return -ENOMEM;
+
+ name_off = btf__add_str(btf, name);
+ if (name_off < 0)
+ return name_off;
+
+ t->name_off = name_off;
+ t->info = btf_type_info(BTF_KIND_FLOAT, 0, 0);
+ t->size = byte_sz;
+
+ return btf_commit_type(btf, sz);
+}
+
/* it's completely legal to append BTF types with type IDs pointing forward to
* types that haven't been appended yet, so we only make sure that id looks
* sane, we can't guarantee that ID will always be valid
@@ -1883,7 +1889,7 @@ static int btf_add_composite(struct btf *btf, int kind, const char *name, __u32
* - *byte_sz* - size of the struct, in bytes;
*
* Struct initially has no fields in it. Fields can be added by
- * btf__add_field() right after btf__add_struct() succeeds.
+ * btf__add_field() right after btf__add_struct() succeeds.
*
* Returns:
* - >0, type ID of newly added BTF type;
@@ -2971,10 +2977,7 @@ struct btf_dedup {
/* Various option modifying behavior of algorithm */
struct btf_dedup_opts opts;
/* temporary strings deduplication state */
- void *strs_data;
- size_t strs_cap;
- size_t strs_len;
- struct hashmap* strs_hash;
+ struct strset *strs_set;
};
static long hash_combine(long h, long value)
@@ -3110,95 +3113,28 @@ done:
return d;
}
-typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx);
-
/*
* Iterate over all possible places in .BTF and .BTF.ext that can reference
* string and pass pointer to it to a provided callback `fn`.
*/
-static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx)
+static int btf_for_each_str_off(struct btf_dedup *d, str_off_visit_fn fn, void *ctx)
{
- void *line_data_cur, *line_data_end;
- int i, j, r, rec_size;
- struct btf_type *t;
+ int i, r;
for (i = 0; i < d->btf->nr_types; i++) {
- t = btf_type_by_id(d->btf, d->btf->start_id + i);
- r = fn(&t->name_off, ctx);
+ struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i);
+
+ r = btf_type_visit_str_offs(t, fn, ctx);
if (r)
return r;
-
- switch (btf_kind(t)) {
- case BTF_KIND_STRUCT:
- case BTF_KIND_UNION: {
- struct btf_member *m = btf_members(t);
- __u16 vlen = btf_vlen(t);
-
- for (j = 0; j < vlen; j++) {
- r = fn(&m->name_off, ctx);
- if (r)
- return r;
- m++;
- }
- break;
- }
- case BTF_KIND_ENUM: {
- struct btf_enum *m = btf_enum(t);
- __u16 vlen = btf_vlen(t);
-
- for (j = 0; j < vlen; j++) {
- r = fn(&m->name_off, ctx);
- if (r)
- return r;
- m++;
- }
- break;
- }
- case BTF_KIND_FUNC_PROTO: {
- struct btf_param *m = btf_params(t);
- __u16 vlen = btf_vlen(t);
-
- for (j = 0; j < vlen; j++) {
- r = fn(&m->name_off, ctx);
- if (r)
- return r;
- m++;
- }
- break;
- }
- default:
- break;
- }
}
if (!d->btf_ext)
return 0;
- line_data_cur = d->btf_ext->line_info.info;
- line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len;
- rec_size = d->btf_ext->line_info.rec_size;
-
- while (line_data_cur < line_data_end) {
- struct btf_ext_info_sec *sec = line_data_cur;
- struct bpf_line_info_min *line_info;
- __u32 num_info = sec->num_info;
-
- r = fn(&sec->sec_name_off, ctx);
- if (r)
- return r;
-
- line_data_cur += sizeof(struct btf_ext_info_sec);
- for (i = 0; i < num_info; i++) {
- line_info = line_data_cur;
- r = fn(&line_info->file_name_off, ctx);
- if (r)
- return r;
- r = fn(&line_info->line_off, ctx);
- if (r)
- return r;
- line_data_cur += rec_size;
- }
- }
+ r = btf_ext_visit_str_offs(d->btf_ext, fn, ctx);
+ if (r)
+ return r;
return 0;
}
@@ -3207,10 +3143,8 @@ static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
{
struct btf_dedup *d = ctx;
__u32 str_off = *str_off_ptr;
- long old_off, new_off, len;
const char *s;
- void *p;
- int err;
+ int off, err;
/* don't touch empty string or string in main BTF */
if (str_off == 0 || str_off < d->btf->start_str_off)
@@ -3227,29 +3161,11 @@ static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
return err;
}
- len = strlen(s) + 1;
-
- new_off = d->strs_len;
- p = btf_add_mem(&d->strs_data, &d->strs_cap, 1, new_off, BTF_MAX_STR_OFFSET, len);
- if (!p)
- return -ENOMEM;
+ off = strset__add_str(d->strs_set, s);
+ if (off < 0)
+ return off;
- memcpy(p, s, len);
-
- /* Now attempt to add the string, but only if the string with the same
- * contents doesn't exist already (HASHMAP_ADD strategy). If such
- * string exists, we'll get its offset in old_off (that's old_key).
- */
- err = hashmap__insert(d->strs_hash, (void *)new_off, (void *)new_off,
- HASHMAP_ADD, (const void **)&old_off, NULL);
- if (err == -EEXIST) {
- *str_off_ptr = d->btf->start_str_off + old_off;
- } else if (err) {
- return err;
- } else {
- *str_off_ptr = d->btf->start_str_off + new_off;
- d->strs_len += len;
- }
+ *str_off_ptr = d->btf->start_str_off + off;
return 0;
}
@@ -3266,39 +3182,23 @@ static int strs_dedup_remap_str_off(__u32 *str_off_ptr, void *ctx)
*/
static int btf_dedup_strings(struct btf_dedup *d)
{
- char *s;
int err;
if (d->btf->strs_deduped)
return 0;
- /* temporarily switch to use btf_dedup's strs_data for strings for hash
- * functions; later we'll just transfer hashmap to struct btf as is,
- * along the strs_data
- */
- d->btf->strs_data_ptr = &d->strs_data;
-
- d->strs_hash = hashmap__new(strs_hash_fn, strs_hash_equal_fn, d->btf);
- if (IS_ERR(d->strs_hash)) {
- err = PTR_ERR(d->strs_hash);
- d->strs_hash = NULL;
+ d->strs_set = strset__new(BTF_MAX_STR_OFFSET, NULL, 0);
+ if (IS_ERR(d->strs_set)) {
+ err = PTR_ERR(d->strs_set);
goto err_out;
}
if (!d->btf->base_btf) {
- s = btf_add_mem(&d->strs_data, &d->strs_cap, 1, d->strs_len, BTF_MAX_STR_OFFSET, 1);
- if (!s)
- return -ENOMEM;
- /* initial empty string */
- s[0] = 0;
- d->strs_len = 1;
-
/* insert empty string; we won't be looking it up during strings
* dedup, but it's good to have it for generic BTF string lookups
*/
- err = hashmap__insert(d->strs_hash, (void *)0, (void *)0,
- HASHMAP_ADD, NULL, NULL);
- if (err)
+ err = strset__add_str(d->strs_set, "");
+ if (err < 0)
goto err_out;
}
@@ -3308,28 +3208,16 @@ static int btf_dedup_strings(struct btf_dedup *d)
goto err_out;
/* replace BTF string data and hash with deduped ones */
- free(d->btf->strs_data);
- hashmap__free(d->btf->strs_hash);
- d->btf->strs_data = d->strs_data;
- d->btf->strs_data_cap = d->strs_cap;
- d->btf->hdr->str_len = d->strs_len;
- d->btf->strs_hash = d->strs_hash;
- /* now point strs_data_ptr back to btf->strs_data */
- d->btf->strs_data_ptr = &d->btf->strs_data;
-
- d->strs_data = d->strs_hash = NULL;
- d->strs_len = d->strs_cap = 0;
+ strset__free(d->btf->strs_set);
+ d->btf->hdr->str_len = strset__data_size(d->strs_set);
+ d->btf->strs_set = d->strs_set;
+ d->strs_set = NULL;
d->btf->strs_deduped = true;
return 0;
err_out:
- free(d->strs_data);
- hashmap__free(d->strs_hash);
- d->strs_data = d->strs_hash = NULL;
- d->strs_len = d->strs_cap = 0;
-
- /* restore strings pointer for existing d->btf->strs_hash back */
- d->btf->strs_data_ptr = &d->strs_data;
+ strset__free(d->strs_set);
+ d->strs_set = NULL;
return err;
}
@@ -3626,6 +3514,7 @@ static int btf_dedup_prep(struct btf_dedup *d)
case BTF_KIND_FWD:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
h = btf_hash_common(t);
break;
case BTF_KIND_INT:
@@ -3722,6 +3611,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)
break;
case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
h = btf_hash_common(t);
for_each_dedup_cand(d, hash_entry, h) {
cand_id = (__u32)(long)hash_entry->value;
@@ -3983,6 +3873,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,
return btf_compat_enum(cand_type, canon_type);
case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
return btf_equal_common(cand_type, canon_type);
case BTF_KIND_CONST:
@@ -4450,15 +4341,18 @@ static int btf_dedup_compact_types(struct btf_dedup *d)
* then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map,
* which is populated during compaction phase.
*/
-static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
+static int btf_dedup_remap_type_id(__u32 *type_id, void *ctx)
{
+ struct btf_dedup *d = ctx;
__u32 resolved_type_id, new_type_id;
- resolved_type_id = resolve_type_id(d, type_id);
+ resolved_type_id = resolve_type_id(d, *type_id);
new_type_id = d->hypot_map[resolved_type_id];
if (new_type_id > BTF_MAX_NR_TYPES)
return -EINVAL;
- return new_type_id;
+
+ *type_id = new_type_id;
+ return 0;
}
/*
@@ -4471,111 +4365,28 @@ static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id)
* referenced from any BTF type (e.g., struct fields, func proto args, etc) to
* their final deduped type IDs.
*/
-static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id)
+static int btf_dedup_remap_types(struct btf_dedup *d)
{
- struct btf_type *t = btf_type_by_id(d->btf, type_id);
int i, r;
- switch (btf_kind(t)) {
- case BTF_KIND_INT:
- case BTF_KIND_ENUM:
- break;
-
- case BTF_KIND_FWD:
- case BTF_KIND_CONST:
- case BTF_KIND_VOLATILE:
- case BTF_KIND_RESTRICT:
- case BTF_KIND_PTR:
- case BTF_KIND_TYPEDEF:
- case BTF_KIND_FUNC:
- case BTF_KIND_VAR:
- r = btf_dedup_remap_type_id(d, t->type);
- if (r < 0)
- return r;
- t->type = r;
- break;
-
- case BTF_KIND_ARRAY: {
- struct btf_array *arr_info = btf_array(t);
+ for (i = 0; i < d->btf->nr_types; i++) {
+ struct btf_type *t = btf_type_by_id(d->btf, d->btf->start_id + i);
- r = btf_dedup_remap_type_id(d, arr_info->type);
- if (r < 0)
- return r;
- arr_info->type = r;
- r = btf_dedup_remap_type_id(d, arr_info->index_type);
- if (r < 0)
+ r = btf_type_visit_type_ids(t, btf_dedup_remap_type_id, d);
+ if (r)
return r;
- arr_info->index_type = r;
- break;
- }
-
- case BTF_KIND_STRUCT:
- case BTF_KIND_UNION: {
- struct btf_member *member = btf_members(t);
- __u16 vlen = btf_vlen(t);
-
- for (i = 0; i < vlen; i++) {
- r = btf_dedup_remap_type_id(d, member->type);
- if (r < 0)
- return r;
- member->type = r;
- member++;
- }
- break;
}
- case BTF_KIND_FUNC_PROTO: {
- struct btf_param *param = btf_params(t);
- __u16 vlen = btf_vlen(t);
-
- r = btf_dedup_remap_type_id(d, t->type);
- if (r < 0)
- return r;
- t->type = r;
-
- for (i = 0; i < vlen; i++) {
- r = btf_dedup_remap_type_id(d, param->type);
- if (r < 0)
- return r;
- param->type = r;
- param++;
- }
- break;
- }
-
- case BTF_KIND_DATASEC: {
- struct btf_var_secinfo *var = btf_var_secinfos(t);
- __u16 vlen = btf_vlen(t);
-
- for (i = 0; i < vlen; i++) {
- r = btf_dedup_remap_type_id(d, var->type);
- if (r < 0)
- return r;
- var->type = r;
- var++;
- }
- break;
- }
+ if (!d->btf_ext)
+ return 0;
- default:
- return -EINVAL;
- }
+ r = btf_ext_visit_type_ids(d->btf_ext, btf_dedup_remap_type_id, d);
+ if (r)
+ return r;
return 0;
}
-static int btf_dedup_remap_types(struct btf_dedup *d)
-{
- int i, r;
-
- for (i = 0; i < d->btf->nr_types; i++) {
- r = btf_dedup_remap_type(d, d->btf->start_id + i);
- if (r < 0)
- return r;
- }
- return 0;
-}
-
/*
* Probe few well-known locations for vmlinux kernel image and try to load BTF
* data out of it to use for target BTF.
@@ -4626,3 +4437,200 @@ struct btf *libbpf_find_kernel_btf(void)
pr_warn("failed to find valid kernel BTF\n");
return ERR_PTR(-ESRCH);
}
+
+int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx)
+{
+ int i, n, err;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM:
+ return 0;
+
+ case BTF_KIND_FWD:
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_PTR:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ return visit(&t->type, ctx);
+
+ case BTF_KIND_ARRAY: {
+ struct btf_array *a = btf_array(t);
+
+ err = visit(&a->type, ctx);
+ err = err ?: visit(&a->index_type, ctx);
+ return err;
+ }
+
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ struct btf_member *m = btf_members(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->type, ctx);
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *m = btf_params(t);
+
+ err = visit(&t->type, ctx);
+ if (err)
+ return err;
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->type, ctx);
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+
+ case BTF_KIND_DATASEC: {
+ struct btf_var_secinfo *m = btf_var_secinfos(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->type, ctx);
+ if (err)
+ return err;
+ }
+ return 0;
+ }
+
+ default:
+ return -EINVAL;
+ }
+}
+
+int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx)
+{
+ int i, n, err;
+
+ err = visit(&t->name_off, ctx);
+ if (err)
+ return err;
+
+ switch (btf_kind(t)) {
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ struct btf_member *m = btf_members(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->name_off, ctx);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_ENUM: {
+ struct btf_enum *m = btf_enum(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->name_off, ctx);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ struct btf_param *m = btf_params(t);
+
+ for (i = 0, n = btf_vlen(t); i < n; i++, m++) {
+ err = visit(&m->name_off, ctx);
+ if (err)
+ return err;
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx)
+{
+ const struct btf_ext_info *seg;
+ struct btf_ext_info_sec *sec;
+ int i, err;
+
+ seg = &btf_ext->func_info;
+ for_each_btf_ext_sec(seg, sec) {
+ struct bpf_func_info_min *rec;
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = visit(&rec->type_id, ctx);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ seg = &btf_ext->core_relo_info;
+ for_each_btf_ext_sec(seg, sec) {
+ struct bpf_core_relo *rec;
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = visit(&rec->type_id, ctx);
+ if (err < 0)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx)
+{
+ const struct btf_ext_info *seg;
+ struct btf_ext_info_sec *sec;
+ int i, err;
+
+ seg = &btf_ext->func_info;
+ for_each_btf_ext_sec(seg, sec) {
+ err = visit(&sec->sec_name_off, ctx);
+ if (err)
+ return err;
+ }
+
+ seg = &btf_ext->line_info;
+ for_each_btf_ext_sec(seg, sec) {
+ struct bpf_line_info_min *rec;
+
+ err = visit(&sec->sec_name_off, ctx);
+ if (err)
+ return err;
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = visit(&rec->file_name_off, ctx);
+ if (err)
+ return err;
+ err = visit(&rec->line_off, ctx);
+ if (err)
+ return err;
+ }
+ }
+
+ seg = &btf_ext->core_relo_info;
+ for_each_btf_ext_sec(seg, sec) {
+ struct bpf_core_relo *rec;
+
+ err = visit(&sec->sec_name_off, ctx);
+ if (err)
+ return err;
+
+ for_each_btf_ext_rec(seg, sec, i, rec) {
+ err = visit(&rec->access_str_off, ctx);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 1237bcd1dd17..b54f1c3ebd57 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -93,8 +93,11 @@ LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
LIBBPF_API int btf__find_str(struct btf *btf, const char *s);
LIBBPF_API int btf__add_str(struct btf *btf, const char *s);
+LIBBPF_API int btf__add_type(struct btf *btf, const struct btf *src_btf,
+ const struct btf_type *src_type);
LIBBPF_API int btf__add_int(struct btf *btf, const char *name, size_t byte_sz, int encoding);
+LIBBPF_API int btf__add_float(struct btf *btf, const char *name, size_t byte_sz);
LIBBPF_API int btf__add_ptr(struct btf *btf, int ref_type_id);
LIBBPF_API int btf__add_array(struct btf *btf,
int index_type_id, int elem_type_id, __u32 nr_elems);
@@ -173,6 +176,7 @@ struct btf_dump_emit_type_decl_opts {
int indent_level;
/* strip all the const/volatile/restrict mods */
bool strip_mods;
+ size_t :0;
};
#define btf_dump_emit_type_decl_opts__last_field strip_mods
@@ -294,6 +298,11 @@ static inline bool btf_is_datasec(const struct btf_type *t)
return btf_kind(t) == BTF_KIND_DATASEC;
}
+static inline bool btf_is_float(const struct btf_type *t)
+{
+ return btf_kind(t) == BTF_KIND_FLOAT;
+}
+
static inline __u8 btf_int_encoding(const struct btf_type *t)
{
return BTF_INT_ENCODING(*(__u32 *)(t + 1));
diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 2f9d685bd522..5e2809d685bf 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -166,11 +166,11 @@ static int btf_dump_resize(struct btf_dump *d)
if (last_id <= d->last_id)
return 0;
- if (btf_ensure_mem((void **)&d->type_states, &d->type_states_cap,
- sizeof(*d->type_states), last_id + 1))
+ if (libbpf_ensure_mem((void **)&d->type_states, &d->type_states_cap,
+ sizeof(*d->type_states), last_id + 1))
return -ENOMEM;
- if (btf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap,
- sizeof(*d->cached_names), last_id + 1))
+ if (libbpf_ensure_mem((void **)&d->cached_names, &d->cached_names_cap,
+ sizeof(*d->cached_names), last_id + 1))
return -ENOMEM;
if (d->last_id == 0) {
@@ -279,6 +279,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)
case BTF_KIND_INT:
case BTF_KIND_ENUM:
case BTF_KIND_FWD:
+ case BTF_KIND_FLOAT:
break;
case BTF_KIND_VOLATILE:
@@ -453,6 +454,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
switch (btf_kind(t)) {
case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
tstate->order_state = ORDERED;
return 0;
@@ -462,7 +464,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
return err;
case BTF_KIND_ARRAY:
- return btf_dump_order_type(d, btf_array(t)->type, through_ptr);
+ return btf_dump_order_type(d, btf_array(t)->type, false);
case BTF_KIND_STRUCT:
case BTF_KIND_UNION: {
@@ -1133,6 +1135,7 @@ skip_mod:
case BTF_KIND_STRUCT:
case BTF_KIND_UNION:
case BTF_KIND_TYPEDEF:
+ case BTF_KIND_FLOAT:
goto done;
default:
pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n",
@@ -1247,6 +1250,7 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,
switch (kind) {
case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
btf_dump_emit_mods(d, decls);
name = btf_name_of(d, t->name_off);
btf_dump_printf(d, "%s", name);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d43cc3f29dae..e2a3cf437814 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -55,10 +55,6 @@
#include "libbpf_internal.h"
#include "hashmap.h"
-#ifndef EM_BPF
-#define EM_BPF 247
-#endif
-
#ifndef BPF_FS_MAGIC
#define BPF_FS_MAGIC 0xcafe4a11
#endif
@@ -73,8 +69,7 @@
#define __printf(a, b) __attribute__((format(printf, a, b)))
static struct bpf_map *bpf_object__add_map(struct bpf_object *obj);
-static const struct btf_type *
-skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog);
static int __base_pr(enum libbpf_print_level level, const char *format,
va_list args)
@@ -178,6 +173,8 @@ enum kern_feature_id {
FEAT_PROG_BIND_MAP,
/* Kernel support for module BTFs */
FEAT_MODULE_BTF,
+ /* BTF_KIND_FLOAT support */
+ FEAT_BTF_FLOAT,
__FEAT_CNT,
};
@@ -187,7 +184,9 @@ enum reloc_type {
RELO_LD64,
RELO_CALL,
RELO_DATA,
- RELO_EXTERN,
+ RELO_EXTERN_VAR,
+ RELO_EXTERN_FUNC,
+ RELO_SUBPROG_ADDR,
};
struct reloc_desc {
@@ -195,7 +194,6 @@ struct reloc_desc {
int insn_idx;
int map_idx;
int sym_off;
- bool processed;
};
struct bpf_sec_def;
@@ -275,6 +273,7 @@ struct bpf_program {
bpf_program_clear_priv_t clear_priv;
bool load;
+ bool mark_btf_static;
enum bpf_prog_type type;
enum bpf_attach_type expected_attach_type;
int prog_ifindex;
@@ -501,8 +500,6 @@ static Elf_Scn *elf_sec_by_name(const struct bpf_object *obj, const char *name);
static int elf_sec_hdr(const struct bpf_object *obj, Elf_Scn *scn, GElf_Shdr *hdr);
static const char *elf_sec_name(const struct bpf_object *obj, Elf_Scn *scn);
static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn);
-static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
- size_t off, __u32 sym_type, GElf_Sym *sym);
void bpf_program__unload(struct bpf_program *prog)
{
@@ -574,6 +571,21 @@ static bool insn_is_subprog_call(const struct bpf_insn *insn)
insn->off == 0;
}
+static bool is_ldimm64_insn(struct bpf_insn *insn)
+{
+ return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
+}
+
+static bool is_call_insn(const struct bpf_insn *insn)
+{
+ return insn->code == (BPF_JMP | BPF_CALL);
+}
+
+static bool insn_is_pseudo_func(struct bpf_insn *insn)
+{
+ return is_ldimm64_insn(insn) && insn->src_reg == BPF_PSEUDO_FUNC;
+}
+
static int
bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
const char *name, size_t sec_idx, const char *sec_name,
@@ -628,25 +640,29 @@ static int
bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
const char *sec_name, int sec_idx)
{
+ Elf_Data *symbols = obj->efile.symbols;
struct bpf_program *prog, *progs;
void *data = sec_data->d_buf;
- size_t sec_sz = sec_data->d_size, sec_off, prog_sz;
- int nr_progs, err;
+ size_t sec_sz = sec_data->d_size, sec_off, prog_sz, nr_syms;
+ int nr_progs, err, i;
const char *name;
GElf_Sym sym;
progs = obj->programs;
nr_progs = obj->nr_programs;
+ nr_syms = symbols->d_size / sizeof(GElf_Sym);
sec_off = 0;
- while (sec_off < sec_sz) {
- if (elf_sym_by_sec_off(obj, sec_idx, sec_off, STT_FUNC, &sym)) {
- pr_warn("sec '%s': failed to find program symbol at offset %zu\n",
- sec_name, sec_off);
- return -LIBBPF_ERRNO__FORMAT;
- }
+ for (i = 0; i < nr_syms; i++) {
+ if (!gelf_getsym(symbols, i, &sym))
+ continue;
+ if (sym.st_shndx != sec_idx)
+ continue;
+ if (GELF_ST_TYPE(sym.st_info) != STT_FUNC)
+ continue;
prog_sz = sym.st_size;
+ sec_off = sym.st_value;
name = elf_sym_str(obj, sym.st_name);
if (!name) {
@@ -684,10 +700,17 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,
if (err)
return err;
+ /* if function is a global/weak symbol, but has hidden
+ * visibility (STV_HIDDEN), mark its BTF FUNC as static to
+ * enable more permissive BPF verification mode with more
+ * outside context available to BPF verifier
+ */
+ if (GELF_ST_BIND(sym.st_info) != STB_LOCAL
+ && GELF_ST_VISIBILITY(sym.st_other) == STV_HIDDEN)
+ prog->mark_btf_static = true;
+
nr_progs++;
obj->nr_programs = nr_progs;
-
- sec_off += prog_sz;
}
return 0;
@@ -1121,11 +1144,6 @@ static void bpf_object__elf_finish(struct bpf_object *obj)
obj->efile.obj_buf_sz = 0;
}
-/* if libelf is old and doesn't support mmap(), fall back to read() */
-#ifndef ELF_C_READ_MMAP
-#define ELF_C_READ_MMAP ELF_C_READ
-#endif
-
static int bpf_object__elf_init(struct bpf_object *obj)
{
int err = 0;
@@ -1181,7 +1199,8 @@ static int bpf_object__elf_init(struct bpf_object *obj)
if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
pr_warn("elf: failed to get section names strings from %s: %s\n",
obj->path, elf_errmsg(-1));
- return -LIBBPF_ERRNO__FORMAT;
+ err = -LIBBPF_ERRNO__FORMAT;
+ goto errout;
}
/* Old LLVM set e_machine to EM_NONE */
@@ -1885,7 +1904,7 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)
return 0;
}
-static const struct btf_type *
+const struct btf_type *
skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)
{
const struct btf_type *t = btf__type_by_id(btf, id);
@@ -1916,9 +1935,9 @@ resolve_func_ptr(const struct btf *btf, __u32 id, __u32 *res_id)
return btf_is_func_proto(t) ? t : NULL;
}
-static const char *btf_kind_str(const struct btf_type *t)
+static const char *__btf_kind_str(__u16 kind)
{
- switch (btf_kind(t)) {
+ switch (kind) {
case BTF_KIND_UNKN: return "void";
case BTF_KIND_INT: return "int";
case BTF_KIND_PTR: return "ptr";
@@ -1935,10 +1954,16 @@ static const char *btf_kind_str(const struct btf_type *t)
case BTF_KIND_FUNC_PROTO: return "func_proto";
case BTF_KIND_VAR: return "var";
case BTF_KIND_DATASEC: return "datasec";
+ case BTF_KIND_FLOAT: return "float";
default: return "unknown";
}
}
+const char *btf_kind_str(const struct btf_type *t)
+{
+ return __btf_kind_str(btf_kind(t));
+}
+
/*
* Fetch integer attribute of BTF map definition. Such attributes are
* represented using a pointer to an array, in which dimensionality of array
@@ -1993,254 +2018,262 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
return bpf_map__set_pin_path(map, buf);
}
-
-static int parse_btf_map_def(struct bpf_object *obj,
- struct bpf_map *map,
- const struct btf_type *def,
- bool strict, bool is_inner,
- const char *pin_root_path)
+int parse_btf_map_def(const char *map_name, struct btf *btf,
+ const struct btf_type *def_t, bool strict,
+ struct btf_map_def *map_def, struct btf_map_def *inner_def)
{
const struct btf_type *t;
const struct btf_member *m;
+ bool is_inner = inner_def == NULL;
int vlen, i;
- vlen = btf_vlen(def);
- m = btf_members(def);
+ vlen = btf_vlen(def_t);
+ m = btf_members(def_t);
for (i = 0; i < vlen; i++, m++) {
- const char *name = btf__name_by_offset(obj->btf, m->name_off);
+ const char *name = btf__name_by_offset(btf, m->name_off);
if (!name) {
- pr_warn("map '%s': invalid field #%d.\n", map->name, i);
+ pr_warn("map '%s': invalid field #%d.\n", map_name, i);
return -EINVAL;
}
if (strcmp(name, "type") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m,
- &map->def.type))
+ if (!get_map_field_int(map_name, btf, m, &map_def->map_type))
return -EINVAL;
- pr_debug("map '%s': found type = %u.\n",
- map->name, map->def.type);
+ map_def->parts |= MAP_DEF_MAP_TYPE;
} else if (strcmp(name, "max_entries") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m,
- &map->def.max_entries))
+ if (!get_map_field_int(map_name, btf, m, &map_def->max_entries))
return -EINVAL;
- pr_debug("map '%s': found max_entries = %u.\n",
- map->name, map->def.max_entries);
+ map_def->parts |= MAP_DEF_MAX_ENTRIES;
} else if (strcmp(name, "map_flags") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m,
- &map->def.map_flags))
+ if (!get_map_field_int(map_name, btf, m, &map_def->map_flags))
return -EINVAL;
- pr_debug("map '%s': found map_flags = %u.\n",
- map->name, map->def.map_flags);
+ map_def->parts |= MAP_DEF_MAP_FLAGS;
} else if (strcmp(name, "numa_node") == 0) {
- if (!get_map_field_int(map->name, obj->btf, m, &map->numa_node))
+ if (!get_map_field_int(map_name, btf, m, &map_def->numa_node))
return -EINVAL;
- pr_debug("map '%s': found numa_node = %u.\n", map->name, map->numa_node);
+ map_def->parts |= MAP_DEF_NUMA_NODE;
} else if (strcmp(name, "key_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map->name, obj->btf, m, &sz))
+ if (!get_map_field_int(map_name, btf, m, &sz))
return -EINVAL;
- pr_debug("map '%s': found key_size = %u.\n",
- map->name, sz);
- if (map->def.key_size && map->def.key_size != sz) {
+ if (map_def->key_size && map_def->key_size != sz) {
pr_warn("map '%s': conflicting key size %u != %u.\n",
- map->name, map->def.key_size, sz);
+ map_name, map_def->key_size, sz);
return -EINVAL;
}
- map->def.key_size = sz;
+ map_def->key_size = sz;
+ map_def->parts |= MAP_DEF_KEY_SIZE;
} else if (strcmp(name, "key") == 0) {
__s64 sz;
- t = btf__type_by_id(obj->btf, m->type);
+ t = btf__type_by_id(btf, m->type);
if (!t) {
pr_warn("map '%s': key type [%d] not found.\n",
- map->name, m->type);
+ map_name, m->type);
return -EINVAL;
}
if (!btf_is_ptr(t)) {
pr_warn("map '%s': key spec is not PTR: %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- sz = btf__resolve_size(obj->btf, t->type);
+ sz = btf__resolve_size(btf, t->type);
if (sz < 0) {
pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
- map->name, t->type, (ssize_t)sz);
+ map_name, t->type, (ssize_t)sz);
return sz;
}
- pr_debug("map '%s': found key [%u], sz = %zd.\n",
- map->name, t->type, (ssize_t)sz);
- if (map->def.key_size && map->def.key_size != sz) {
+ if (map_def->key_size && map_def->key_size != sz) {
pr_warn("map '%s': conflicting key size %u != %zd.\n",
- map->name, map->def.key_size, (ssize_t)sz);
+ map_name, map_def->key_size, (ssize_t)sz);
return -EINVAL;
}
- map->def.key_size = sz;
- map->btf_key_type_id = t->type;
+ map_def->key_size = sz;
+ map_def->key_type_id = t->type;
+ map_def->parts |= MAP_DEF_KEY_SIZE | MAP_DEF_KEY_TYPE;
} else if (strcmp(name, "value_size") == 0) {
__u32 sz;
- if (!get_map_field_int(map->name, obj->btf, m, &sz))
+ if (!get_map_field_int(map_name, btf, m, &sz))
return -EINVAL;
- pr_debug("map '%s': found value_size = %u.\n",
- map->name, sz);
- if (map->def.value_size && map->def.value_size != sz) {
+ if (map_def->value_size && map_def->value_size != sz) {
pr_warn("map '%s': conflicting value size %u != %u.\n",
- map->name, map->def.value_size, sz);
+ map_name, map_def->value_size, sz);
return -EINVAL;
}
- map->def.value_size = sz;
+ map_def->value_size = sz;
+ map_def->parts |= MAP_DEF_VALUE_SIZE;
} else if (strcmp(name, "value") == 0) {
__s64 sz;
- t = btf__type_by_id(obj->btf, m->type);
+ t = btf__type_by_id(btf, m->type);
if (!t) {
pr_warn("map '%s': value type [%d] not found.\n",
- map->name, m->type);
+ map_name, m->type);
return -EINVAL;
}
if (!btf_is_ptr(t)) {
pr_warn("map '%s': value spec is not PTR: %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- sz = btf__resolve_size(obj->btf, t->type);
+ sz = btf__resolve_size(btf, t->type);
if (sz < 0) {
pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
- map->name, t->type, (ssize_t)sz);
+ map_name, t->type, (ssize_t)sz);
return sz;
}
- pr_debug("map '%s': found value [%u], sz = %zd.\n",
- map->name, t->type, (ssize_t)sz);
- if (map->def.value_size && map->def.value_size != sz) {
+ if (map_def->value_size && map_def->value_size != sz) {
pr_warn("map '%s': conflicting value size %u != %zd.\n",
- map->name, map->def.value_size, (ssize_t)sz);
+ map_name, map_def->value_size, (ssize_t)sz);
return -EINVAL;
}
- map->def.value_size = sz;
- map->btf_value_type_id = t->type;
+ map_def->value_size = sz;
+ map_def->value_type_id = t->type;
+ map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;
}
else if (strcmp(name, "values") == 0) {
+ char inner_map_name[128];
int err;
if (is_inner) {
pr_warn("map '%s': multi-level inner maps not supported.\n",
- map->name);
+ map_name);
return -ENOTSUP;
}
if (i != vlen - 1) {
pr_warn("map '%s': '%s' member should be last.\n",
- map->name, name);
+ map_name, name);
return -EINVAL;
}
- if (!bpf_map_type__is_map_in_map(map->def.type)) {
+ if (!bpf_map_type__is_map_in_map(map_def->map_type)) {
pr_warn("map '%s': should be map-in-map.\n",
- map->name);
+ map_name);
return -ENOTSUP;
}
- if (map->def.value_size && map->def.value_size != 4) {
+ if (map_def->value_size && map_def->value_size != 4) {
pr_warn("map '%s': conflicting value size %u != 4.\n",
- map->name, map->def.value_size);
+ map_name, map_def->value_size);
return -EINVAL;
}
- map->def.value_size = 4;
- t = btf__type_by_id(obj->btf, m->type);
+ map_def->value_size = 4;
+ t = btf__type_by_id(btf, m->type);
if (!t) {
pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
- map->name, m->type);
+ map_name, m->type);
return -EINVAL;
}
if (!btf_is_array(t) || btf_array(t)->nelems) {
pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
- map->name);
+ map_name);
return -EINVAL;
}
- t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
- NULL);
+ t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);
if (!btf_is_ptr(t)) {
pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+ t = skip_mods_and_typedefs(btf, t->type, NULL);
if (!btf_is_struct(t)) {
pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",
- map->name, btf_kind_str(t));
+ map_name, btf_kind_str(t));
return -EINVAL;
}
- map->inner_map = calloc(1, sizeof(*map->inner_map));
- if (!map->inner_map)
- return -ENOMEM;
- map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
- map->inner_map->name = malloc(strlen(map->name) +
- sizeof(".inner") + 1);
- if (!map->inner_map->name)
- return -ENOMEM;
- sprintf(map->inner_map->name, "%s.inner", map->name);
-
- err = parse_btf_map_def(obj, map->inner_map, t, strict,
- true /* is_inner */, NULL);
+ snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", map_name);
+ err = parse_btf_map_def(inner_map_name, btf, t, strict, inner_def, NULL);
if (err)
return err;
+
+ map_def->parts |= MAP_DEF_INNER_MAP;
} else if (strcmp(name, "pinning") == 0) {
__u32 val;
- int err;
if (is_inner) {
- pr_debug("map '%s': inner def can't be pinned.\n",
- map->name);
+ pr_warn("map '%s': inner def can't be pinned.\n", map_name);
return -EINVAL;
}
- if (!get_map_field_int(map->name, obj->btf, m, &val))
+ if (!get_map_field_int(map_name, btf, m, &val))
return -EINVAL;
- pr_debug("map '%s': found pinning = %u.\n",
- map->name, val);
-
- if (val != LIBBPF_PIN_NONE &&
- val != LIBBPF_PIN_BY_NAME) {
+ if (val != LIBBPF_PIN_NONE && val != LIBBPF_PIN_BY_NAME) {
pr_warn("map '%s': invalid pinning value %u.\n",
- map->name, val);
+ map_name, val);
return -EINVAL;
}
- if (val == LIBBPF_PIN_BY_NAME) {
- err = build_map_pin_path(map, pin_root_path);
- if (err) {
- pr_warn("map '%s': couldn't build pin path.\n",
- map->name);
- return err;
- }
- }
+ map_def->pinning = val;
+ map_def->parts |= MAP_DEF_PINNING;
} else {
if (strict) {
- pr_warn("map '%s': unknown field '%s'.\n",
- map->name, name);
+ pr_warn("map '%s': unknown field '%s'.\n", map_name, name);
return -ENOTSUP;
}
- pr_debug("map '%s': ignoring unknown field '%s'.\n",
- map->name, name);
+ pr_debug("map '%s': ignoring unknown field '%s'.\n", map_name, name);
}
}
- if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
- pr_warn("map '%s': map type isn't specified.\n", map->name);
+ if (map_def->map_type == BPF_MAP_TYPE_UNSPEC) {
+ pr_warn("map '%s': map type isn't specified.\n", map_name);
return -EINVAL;
}
return 0;
}
+static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
+{
+ map->def.type = def->map_type;
+ map->def.key_size = def->key_size;
+ map->def.value_size = def->value_size;
+ map->def.max_entries = def->max_entries;
+ map->def.map_flags = def->map_flags;
+
+ map->numa_node = def->numa_node;
+ map->btf_key_type_id = def->key_type_id;
+ map->btf_value_type_id = def->value_type_id;
+
+ if (def->parts & MAP_DEF_MAP_TYPE)
+ pr_debug("map '%s': found type = %u.\n", map->name, def->map_type);
+
+ if (def->parts & MAP_DEF_KEY_TYPE)
+ pr_debug("map '%s': found key [%u], sz = %u.\n",
+ map->name, def->key_type_id, def->key_size);
+ else if (def->parts & MAP_DEF_KEY_SIZE)
+ pr_debug("map '%s': found key_size = %u.\n", map->name, def->key_size);
+
+ if (def->parts & MAP_DEF_VALUE_TYPE)
+ pr_debug("map '%s': found value [%u], sz = %u.\n",
+ map->name, def->value_type_id, def->value_size);
+ else if (def->parts & MAP_DEF_VALUE_SIZE)
+ pr_debug("map '%s': found value_size = %u.\n", map->name, def->value_size);
+
+ if (def->parts & MAP_DEF_MAX_ENTRIES)
+ pr_debug("map '%s': found max_entries = %u.\n", map->name, def->max_entries);
+ if (def->parts & MAP_DEF_MAP_FLAGS)
+ pr_debug("map '%s': found map_flags = %u.\n", map->name, def->map_flags);
+ if (def->parts & MAP_DEF_PINNING)
+ pr_debug("map '%s': found pinning = %u.\n", map->name, def->pinning);
+ if (def->parts & MAP_DEF_NUMA_NODE)
+ pr_debug("map '%s': found numa_node = %u.\n", map->name, def->numa_node);
+
+ if (def->parts & MAP_DEF_INNER_MAP)
+ pr_debug("map '%s': found inner map definition.\n", map->name);
+}
+
static int bpf_object__init_user_btf_map(struct bpf_object *obj,
const struct btf_type *sec,
int var_idx, int sec_idx,
const Elf_Data *data, bool strict,
const char *pin_root_path)
{
+ struct btf_map_def map_def = {}, inner_def = {};
const struct btf_type *var, *def;
const struct btf_var_secinfo *vi;
const struct btf_var *var_extra;
const char *map_name;
struct bpf_map *map;
+ int err;
vi = btf_var_secinfos(sec) + var_idx;
var = btf__type_by_id(obj->btf, vi->type);
@@ -2294,7 +2327,35 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
map_name, map->sec_idx, map->sec_offset);
- return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
+ err = parse_btf_map_def(map->name, obj->btf, def, strict, &map_def, &inner_def);
+ if (err)
+ return err;
+
+ fill_map_from_def(map, &map_def);
+
+ if (map_def.pinning == LIBBPF_PIN_BY_NAME) {
+ err = build_map_pin_path(map, pin_root_path);
+ if (err) {
+ pr_warn("map '%s': couldn't build pin path.\n", map->name);
+ return err;
+ }
+ }
+
+ if (map_def.parts & MAP_DEF_INNER_MAP) {
+ map->inner_map = calloc(1, sizeof(*map->inner_map));
+ if (!map->inner_map)
+ return -ENOMEM;
+ map->inner_map->fd = -1;
+ map->inner_map->sec_idx = sec_idx;
+ map->inner_map->name = malloc(strlen(map_name) + sizeof(".inner") + 1);
+ if (!map->inner_map->name)
+ return -ENOMEM;
+ sprintf(map->inner_map->name, "%s.inner", map_name);
+
+ fill_map_from_def(map->inner_map, &inner_def);
+ }
+
+ return 0;
}
static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
@@ -2384,15 +2445,17 @@ static bool btf_needs_sanitization(struct bpf_object *obj)
{
bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+ bool has_float = kernel_supports(FEAT_BTF_FLOAT);
bool has_func = kernel_supports(FEAT_BTF_FUNC);
- return !has_func || !has_datasec || !has_func_global;
+ return !has_func || !has_datasec || !has_func_global || !has_float;
}
static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
{
bool has_func_global = kernel_supports(FEAT_BTF_GLOBAL_FUNC);
bool has_datasec = kernel_supports(FEAT_BTF_DATASEC);
+ bool has_float = kernel_supports(FEAT_BTF_FLOAT);
bool has_func = kernel_supports(FEAT_BTF_FUNC);
struct btf_type *t;
int i, j, vlen;
@@ -2445,6 +2508,13 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)
} else if (!has_func_global && btf_is_func(t)) {
/* replace BTF_FUNC_GLOBAL with BTF_FUNC_STATIC */
t->info = BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0);
+ } else if (!has_float && btf_is_float(t)) {
+ /* replace FLOAT with an equally-sized empty STRUCT;
+ * since C compilers do not accept e.g. "float" as a
+ * valid struct name, make it anonymous
+ */
+ t->name_off = 0;
+ t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0);
}
}
}
@@ -2587,7 +2657,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
{
struct btf *kern_btf = obj->btf;
bool btf_mandatory, sanitize;
- int err = 0;
+ int i, err = 0;
if (!obj->btf)
return 0;
@@ -2601,6 +2671,38 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)
return 0;
}
+ /* Even though some subprogs are global/weak, user might prefer more
+ * permissive BPF verification process that BPF verifier performs for
+ * static functions, taking into account more context from the caller
+ * functions. In such case, they need to mark such subprogs with
+ * __attribute__((visibility("hidden"))) and libbpf will adjust
+ * corresponding FUNC BTF type to be marked as static and trigger more
+ * involved BPF verification process.
+ */
+ for (i = 0; i < obj->nr_programs; i++) {
+ struct bpf_program *prog = &obj->programs[i];
+ struct btf_type *t;
+ const char *name;
+ int j, n;
+
+ if (!prog->mark_btf_static || !prog_is_subprog(obj, prog))
+ continue;
+
+ n = btf__get_nr_types(obj->btf);
+ for (j = 1; j <= n; j++) {
+ t = btf_type_by_id(obj->btf, j);
+ if (!btf_is_func(t) || btf_func_linkage(t) != BTF_FUNC_GLOBAL)
+ continue;
+
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ if (strcmp(name, prog->name) != 0)
+ continue;
+
+ t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_STATIC, 0);
+ break;
+ }
+ }
+
sanitize = btf_needs_sanitization(obj);
if (sanitize) {
const void *raw_data;
@@ -2751,26 +2853,6 @@ static Elf_Data *elf_sec_data(const struct bpf_object *obj, Elf_Scn *scn)
return data;
}
-static int elf_sym_by_sec_off(const struct bpf_object *obj, size_t sec_idx,
- size_t off, __u32 sym_type, GElf_Sym *sym)
-{
- Elf_Data *symbols = obj->efile.symbols;
- size_t n = symbols->d_size / sizeof(GElf_Sym);
- int i;
-
- for (i = 0; i < n; i++) {
- if (!gelf_getsym(symbols, i, sym))
- continue;
- if (sym->st_shndx != sec_idx || sym->st_value != off)
- continue;
- if (GELF_ST_TYPE(sym->st_info) != sym_type)
- continue;
- return 0;
- }
-
- return -ENOENT;
-}
-
static bool is_sec_name_dwarf(const char *name)
{
/* approximation, but the actual list is too long */
@@ -2784,7 +2866,7 @@ static bool ignore_elf_section(GElf_Shdr *hdr, const char *name)
return true;
/* ignore .llvm_addrsig section as well */
- if (hdr->sh_type == 0x6FFF4C03 /* SHT_LLVM_ADDRSIG */)
+ if (hdr->sh_type == SHT_LLVM_ADDRSIG)
return true;
/* no subprograms will lead to an empty .text section, ignore it */
@@ -2974,10 +3056,27 @@ static bool sym_is_extern(const GElf_Sym *sym)
GELF_ST_TYPE(sym->st_info) == STT_NOTYPE;
}
+static bool sym_is_subprog(const GElf_Sym *sym, int text_shndx)
+{
+ int bind = GELF_ST_BIND(sym->st_info);
+ int type = GELF_ST_TYPE(sym->st_info);
+
+ /* in .text section */
+ if (sym->st_shndx != text_shndx)
+ return false;
+
+ /* local function */
+ if (bind == STB_LOCAL && type == STT_SECTION)
+ return true;
+
+ /* global function */
+ return bind == STB_GLOBAL && type == STT_FUNC;
+}
+
static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
{
const struct btf_type *t;
- const char *var_name;
+ const char *tname;
int i, n;
if (!btf)
@@ -2987,14 +3086,18 @@ static int find_extern_btf_id(const struct btf *btf, const char *ext_name)
for (i = 1; i <= n; i++) {
t = btf__type_by_id(btf, i);
- if (!btf_is_var(t))
+ if (!btf_is_var(t) && !btf_is_func(t))
continue;
- var_name = btf__name_by_offset(btf, t->name_off);
- if (strcmp(var_name, ext_name))
+ tname = btf__name_by_offset(btf, t->name_off);
+ if (strcmp(tname, ext_name))
continue;
- if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+ if (btf_is_var(t) &&
+ btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN)
+ return -EINVAL;
+
+ if (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_EXTERN)
return -EINVAL;
return i;
@@ -3107,12 +3210,48 @@ static int find_int_btf_id(const struct btf *btf)
return 0;
}
+static int add_dummy_ksym_var(struct btf *btf)
+{
+ int i, int_btf_id, sec_btf_id, dummy_var_btf_id;
+ const struct btf_var_secinfo *vs;
+ const struct btf_type *sec;
+
+ sec_btf_id = btf__find_by_name_kind(btf, KSYMS_SEC,
+ BTF_KIND_DATASEC);
+ if (sec_btf_id < 0)
+ return 0;
+
+ sec = btf__type_by_id(btf, sec_btf_id);
+ vs = btf_var_secinfos(sec);
+ for (i = 0; i < btf_vlen(sec); i++, vs++) {
+ const struct btf_type *vt;
+
+ vt = btf__type_by_id(btf, vs->type);
+ if (btf_is_func(vt))
+ break;
+ }
+
+ /* No func in ksyms sec. No need to add dummy var. */
+ if (i == btf_vlen(sec))
+ return 0;
+
+ int_btf_id = find_int_btf_id(btf);
+ dummy_var_btf_id = btf__add_var(btf,
+ "dummy_ksym",
+ BTF_VAR_GLOBAL_ALLOCATED,
+ int_btf_id);
+ if (dummy_var_btf_id < 0)
+ pr_warn("cannot create a dummy_ksym var\n");
+
+ return dummy_var_btf_id;
+}
+
static int bpf_object__collect_externs(struct bpf_object *obj)
{
struct btf_type *sec, *kcfg_sec = NULL, *ksym_sec = NULL;
const struct btf_type *t;
struct extern_desc *ext;
- int i, n, off;
+ int i, n, off, dummy_var_btf_id;
const char *ext_name, *sec_name;
Elf_Scn *scn;
GElf_Shdr sh;
@@ -3124,6 +3263,10 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
if (elf_sec_hdr(obj, scn, &sh))
return -LIBBPF_ERRNO__FORMAT;
+ dummy_var_btf_id = add_dummy_ksym_var(obj->btf);
+ if (dummy_var_btf_id < 0)
+ return dummy_var_btf_id;
+
n = sh.sh_size / sh.sh_entsize;
pr_debug("looking for externs among %d symbols...\n", n);
@@ -3168,6 +3311,11 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
sec_name = btf__name_by_offset(obj->btf, sec->name_off);
if (strcmp(sec_name, KCONFIG_SEC) == 0) {
+ if (btf_is_func(t)) {
+ pr_warn("extern function %s is unsupported under %s section\n",
+ ext->name, KCONFIG_SEC);
+ return -ENOTSUP;
+ }
kcfg_sec = sec;
ext->type = EXT_KCFG;
ext->kcfg.sz = btf__resolve_size(obj->btf, t->type);
@@ -3189,6 +3337,11 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
return -ENOTSUP;
}
} else if (strcmp(sec_name, KSYMS_SEC) == 0) {
+ if (btf_is_func(t) && ext->is_weak) {
+ pr_warn("extern weak function %s is unsupported\n",
+ ext->name);
+ return -ENOTSUP;
+ }
ksym_sec = sec;
ext->type = EXT_KSYM;
skip_mods_and_typedefs(obj->btf, t->type,
@@ -3215,7 +3368,14 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
* extern variables in DATASEC
*/
int int_btf_id = find_int_btf_id(obj->btf);
+ /* For extern function, a dummy_var added earlier
+ * will be used to replace the vs->type and
+ * its name string will be used to refill
+ * the missing param's name.
+ */
+ const struct btf_type *dummy_var;
+ dummy_var = btf__type_by_id(obj->btf, dummy_var_btf_id);
for (i = 0; i < obj->nr_extern; i++) {
ext = &obj->externs[i];
if (ext->type != EXT_KSYM)
@@ -3234,12 +3394,32 @@ static int bpf_object__collect_externs(struct bpf_object *obj)
ext_name = btf__name_by_offset(obj->btf, vt->name_off);
ext = find_extern_by_name(obj, ext_name);
if (!ext) {
- pr_warn("failed to find extern definition for BTF var '%s'\n",
- ext_name);
+ pr_warn("failed to find extern definition for BTF %s '%s'\n",
+ btf_kind_str(vt), ext_name);
return -ESRCH;
}
- btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
- vt->type = int_btf_id;
+ if (btf_is_func(vt)) {
+ const struct btf_type *func_proto;
+ struct btf_param *param;
+ int j;
+
+ func_proto = btf__type_by_id(obj->btf,
+ vt->type);
+ param = btf_params(func_proto);
+ /* Reuse the dummy_var string if the
+ * func proto does not have param name.
+ */
+ for (j = 0; j < btf_vlen(func_proto); j++)
+ if (param[j].type && !param[j].name_off)
+ param[j].name_off =
+ dummy_var->name_off;
+ vs->type = dummy_var_btf_id;
+ vt->info &= ~0xffff;
+ vt->info |= BTF_FUNC_GLOBAL;
+ } else {
+ btf_var(vt)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ vt->type = int_btf_id;
+ }
vs->offset = off;
vs->size = sizeof(int);
}
@@ -3369,33 +3549,7 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
const char *sym_sec_name;
struct bpf_map *map;
- reloc_desc->processed = false;
-
- /* sub-program call relocation */
- if (insn->code == (BPF_JMP | BPF_CALL)) {
- if (insn->src_reg != BPF_PSEUDO_CALL) {
- pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
- return -LIBBPF_ERRNO__RELOC;
- }
- /* text_shndx can be 0, if no default "main" program exists */
- if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
- sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
- pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
- prog->name, sym_name, sym_sec_name);
- return -LIBBPF_ERRNO__RELOC;
- }
- if (sym->st_value % BPF_INSN_SZ) {
- pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
- prog->name, sym_name, (size_t)sym->st_value);
- return -LIBBPF_ERRNO__RELOC;
- }
- reloc_desc->type = RELO_CALL;
- reloc_desc->insn_idx = insn_idx;
- reloc_desc->sym_off = sym->st_value;
- return 0;
- }
-
- if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) {
+ if (!is_call_insn(insn) && !is_ldimm64_insn(insn)) {
pr_warn("prog '%s': invalid relo against '%s' for insns[%d].code 0x%x\n",
prog->name, sym_name, insn_idx, insn->code);
return -LIBBPF_ERRNO__RELOC;
@@ -3418,18 +3572,62 @@ static int bpf_program__record_reloc(struct bpf_program *prog,
}
pr_debug("prog '%s': found extern #%d '%s' (sym %d) for insn #%u\n",
prog->name, i, ext->name, ext->sym_idx, insn_idx);
- reloc_desc->type = RELO_EXTERN;
+ if (insn->code == (BPF_JMP | BPF_CALL))
+ reloc_desc->type = RELO_EXTERN_FUNC;
+ else
+ reloc_desc->type = RELO_EXTERN_VAR;
reloc_desc->insn_idx = insn_idx;
reloc_desc->sym_off = i; /* sym_off stores extern index */
return 0;
}
+ /* sub-program call relocation */
+ if (is_call_insn(insn)) {
+ if (insn->src_reg != BPF_PSEUDO_CALL) {
+ pr_warn("prog '%s': incorrect bpf_call opcode\n", prog->name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ /* text_shndx can be 0, if no default "main" program exists */
+ if (!shdr_idx || shdr_idx != obj->efile.text_shndx) {
+ sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
+ pr_warn("prog '%s': bad call relo against '%s' in section '%s'\n",
+ prog->name, sym_name, sym_sec_name);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ if (sym->st_value % BPF_INSN_SZ) {
+ pr_warn("prog '%s': bad call relo against '%s' at offset %zu\n",
+ prog->name, sym_name, (size_t)sym->st_value);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+ reloc_desc->type = RELO_CALL;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+ }
+
if (!shdr_idx || shdr_idx >= SHN_LORESERVE) {
pr_warn("prog '%s': invalid relo against '%s' in special section 0x%x; forgot to initialize global var?..\n",
prog->name, sym_name, shdr_idx);
return -LIBBPF_ERRNO__RELOC;
}
+ /* loading subprog addresses */
+ if (sym_is_subprog(sym, obj->efile.text_shndx)) {
+ /* global_func: sym->st_value = offset in the section, insn->imm = 0.
+ * local_func: sym->st_value = 0, insn->imm = offset in the section.
+ */
+ if ((sym->st_value % BPF_INSN_SZ) || (insn->imm % BPF_INSN_SZ)) {
+ pr_warn("prog '%s': bad subprog addr relo against '%s' at offset %zu+%d\n",
+ prog->name, sym_name, (size_t)sym->st_value, insn->imm);
+ return -LIBBPF_ERRNO__RELOC;
+ }
+
+ reloc_desc->type = RELO_SUBPROG_ADDR;
+ reloc_desc->insn_idx = insn_idx;
+ reloc_desc->sym_off = sym->st_value;
+ return 0;
+ }
+
type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx);
sym_sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, shdr_idx));
@@ -3533,11 +3731,16 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
int err, i, nrels;
const char *sym_name;
__u32 insn_idx;
+ Elf_Scn *scn;
+ Elf_Data *scn_data;
GElf_Sym sym;
GElf_Rel rel;
+ scn = elf_sec_by_idx(obj, sec_idx);
+ scn_data = elf_sec_data(obj, scn);
+
relo_sec_name = elf_sec_str(obj, shdr->sh_name);
- sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
+ sec_name = elf_sec_name(obj, scn);
if (!relo_sec_name || !sec_name)
return -EINVAL;
@@ -3555,7 +3758,8 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
return -LIBBPF_ERRNO__FORMAT;
}
- if (rel.r_offset % BPF_INSN_SZ) {
+
+ if (rel.r_offset % BPF_INSN_SZ || rel.r_offset >= scn_data->d_size) {
pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n",
relo_sec_name, (size_t)GELF_R_SYM(rel.r_info), i);
return -LIBBPF_ERRNO__FORMAT;
@@ -3579,9 +3783,9 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, GElf_Shdr *shdr, Elf_Data
prog = find_prog_by_sec_insn(obj, sec_idx, insn_idx);
if (!prog) {
- pr_warn("sec '%s': relo #%d: program not found in section '%s' for insn #%u\n",
+ pr_debug("sec '%s': relo #%d: couldn't find program in section '%s' for insn #%u, probably overridden weak function, skipping...\n",
relo_sec_name, i, sec_name, insn_idx);
- return -LIBBPF_ERRNO__RELOC;
+ continue;
}
relos = libbpf_reallocarray(prog->reloc_desc,
@@ -3696,6 +3900,14 @@ __u32 bpf_map__max_entries(const struct bpf_map *map)
return map->def.max_entries;
}
+struct bpf_map *bpf_map__inner_map(struct bpf_map *map)
+{
+ if (!bpf_map_type__is_map_in_map(map->def.type))
+ return NULL;
+
+ return map->inner_map;
+}
+
int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
{
if (map->fd >= 0)
@@ -3882,6 +4094,18 @@ static int probe_kern_btf_datasec(void)
strs, sizeof(strs)));
}
+static int probe_kern_btf_float(void)
+{
+ static const char strs[] = "\0float";
+ __u32 types[] = {
+ /* float */
+ BTF_TYPE_FLOAT_ENC(1, 4),
+ };
+
+ return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types),
+ strs, sizeof(strs)));
+}
+
static int probe_kern_array_mmap(void)
{
struct bpf_create_map_attr attr = {
@@ -4061,6 +4285,9 @@ static struct kern_feature_desc {
[FEAT_MODULE_BTF] = {
"module BTF support", probe_module_btf,
},
+ [FEAT_BTF_FLOAT] = {
+ "BTF_KIND_FLOAT support", probe_kern_btf_float,
+ },
};
static bool kernel_supports(enum kern_feature_id feat_id)
@@ -4795,8 +5022,8 @@ static int load_module_btfs(struct bpf_object *obj)
goto err_out;
}
- err = btf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
- sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
+ err = libbpf_ensure_mem((void **)&obj->btf_modules, &obj->btf_module_cap,
+ sizeof(*obj->btf_modules), obj->btf_module_cnt + 1);
if (err)
goto err_out;
@@ -4888,6 +5115,7 @@ err_out:
* least one of enums should be anonymous;
* - for ENUMs, check sizes, names are ignored;
* - for INT, size and signedness are ignored;
+ * - any two FLOATs are always compatible;
* - for ARRAY, dimensionality is ignored, element types are checked for
* compatibility recursively;
* - everything else shouldn't be ever a target of relocation.
@@ -4914,6 +5142,7 @@ recur:
switch (btf_kind(local_type)) {
case BTF_KIND_PTR:
+ case BTF_KIND_FLOAT:
return 1;
case BTF_KIND_FWD:
case BTF_KIND_ENUM: {
@@ -5566,11 +5795,6 @@ static void bpf_core_poison_insn(struct bpf_program *prog, int relo_idx,
insn->imm = 195896080; /* => 0xbad2310 => "bad relo" */
}
-static bool is_ldimm64(struct bpf_insn *insn)
-{
- return insn->code == (BPF_LD | BPF_IMM | BPF_DW);
-}
-
static int insn_bpf_size_to_bytes(struct bpf_insn *insn)
{
switch (BPF_SIZE(insn->code)) {
@@ -5636,7 +5860,7 @@ poison:
/* poison second part of ldimm64 to avoid confusing error from
* verifier about "unknown opcode 00"
*/
- if (is_ldimm64(insn))
+ if (is_ldimm64_insn(insn))
bpf_core_poison_insn(prog, relo_idx, insn_idx + 1, insn + 1);
bpf_core_poison_insn(prog, relo_idx, insn_idx, insn);
return 0;
@@ -5712,7 +5936,7 @@ poison:
case BPF_LD: {
__u64 imm;
- if (!is_ldimm64(insn) ||
+ if (!is_ldimm64_insn(insn) ||
insn[0].src_reg != 0 || insn[0].off != 0 ||
insn_idx + 1 >= prog->insns_cnt ||
insn[1].code != 0 || insn[1].dst_reg != 0 ||
@@ -6023,8 +6247,8 @@ patch_insn:
/* bpf_core_patch_insn() should know how to handle missing targ_spec */
err = bpf_core_patch_insn(prog, relo, relo_idx, &targ_res);
if (err) {
- pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n",
- prog->name, relo_idx, relo->insn_off, err);
+ pr_warn("prog '%s': relo #%d: failed to patch insn #%zu: %d\n",
+ prog->name, relo_idx, relo->insn_off / BPF_INSN_SZ, err);
return -EINVAL;
}
@@ -6146,15 +6370,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
case RELO_LD64:
insn[0].src_reg = BPF_PSEUDO_MAP_FD;
insn[0].imm = obj->maps[relo->map_idx].fd;
- relo->processed = true;
break;
case RELO_DATA:
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
insn[1].imm = insn[0].imm + relo->sym_off;
insn[0].imm = obj->maps[relo->map_idx].fd;
- relo->processed = true;
break;
- case RELO_EXTERN:
+ case RELO_EXTERN_VAR:
ext = &obj->externs[relo->sym_off];
if (ext->type == EXT_KCFG) {
insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;
@@ -6170,7 +6392,15 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)
insn[1].imm = ext->ksym.addr >> 32;
}
}
- relo->processed = true;
+ break;
+ case RELO_EXTERN_FUNC:
+ ext = &obj->externs[relo->sym_off];
+ insn[0].src_reg = BPF_PSEUDO_KFUNC_CALL;
+ insn[0].imm = ext->ksym.kernel_btf_id;
+ break;
+ case RELO_SUBPROG_ADDR:
+ insn[0].src_reg = BPF_PSEUDO_FUNC;
+ /* will be handled as a follow up pass */
break;
case RELO_CALL:
/* will be handled as a follow up pass */
@@ -6358,11 +6588,11 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
for (insn_idx = 0; insn_idx < prog->sec_insn_cnt; insn_idx++) {
insn = &main_prog->insns[prog->sub_insn_off + insn_idx];
- if (!insn_is_subprog_call(insn))
+ if (!insn_is_subprog_call(insn) && !insn_is_pseudo_func(insn))
continue;
relo = find_prog_insn_relo(prog, insn_idx);
- if (relo && relo->type != RELO_CALL) {
+ if (relo && relo->type != RELO_CALL && relo->type != RELO_SUBPROG_ADDR) {
pr_warn("prog '%s': unexpected relo for insn #%zu, type %d\n",
prog->name, insn_idx, relo->type);
return -LIBBPF_ERRNO__RELOC;
@@ -6374,8 +6604,22 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* call always has imm = -1, but for static functions
* relocation is against STT_SECTION and insn->imm
* points to a start of a static function
+ *
+ * for subprog addr relocation, the relo->sym_off + insn->imm is
+ * the byte offset in the corresponding section.
+ */
+ if (relo->type == RELO_CALL)
+ sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+ else
+ sub_insn_idx = (relo->sym_off + insn->imm) / BPF_INSN_SZ;
+ } else if (insn_is_pseudo_func(insn)) {
+ /*
+ * RELO_SUBPROG_ADDR relo is always emitted even if both
+ * functions are in the same section, so it shouldn't reach here.
*/
- sub_insn_idx = relo->sym_off / BPF_INSN_SZ + insn->imm + 1;
+ pr_warn("prog '%s': missing subprog addr relo for insn #%zu\n",
+ prog->name, insn_idx);
+ return -LIBBPF_ERRNO__RELOC;
} else {
/* if subprogram call is to a static function within
* the same ELF section, there won't be any relocation
@@ -6438,9 +6682,6 @@ bpf_object__reloc_code(struct bpf_object *obj, struct bpf_program *main_prog,
* different main programs */
insn->imm = subprog->sub_insn_off - (prog->sub_insn_off + insn_idx) - 1;
- if (relo)
- relo->processed = true;
-
pr_debug("prog '%s': insn #%zu relocated, imm %d points to subprog '%s' (now at %zu offset)\n",
prog->name, insn_idx, insn->imm, subprog->name, subprog->sub_insn_off);
}
@@ -6533,7 +6774,7 @@ static int
bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
{
struct bpf_program *subprog;
- int i, j, err;
+ int i, err;
/* mark all subprogs as not relocated (yet) within the context of
* current main program
@@ -6544,9 +6785,6 @@ bpf_object__relocate_calls(struct bpf_object *obj, struct bpf_program *prog)
continue;
subprog->sub_insn_off = 0;
- for (j = 0; j < subprog->nr_reloc; j++)
- if (subprog->reloc_desc[j].type == RELO_CALL)
- subprog->reloc_desc[j].processed = false;
}
err = bpf_object__reloc_code(obj, prog, prog);
@@ -6793,7 +7031,7 @@ static bool insn_is_helper_call(struct bpf_insn *insn, enum bpf_func_id *func_id
return false;
}
-static int bpf_object__sanitize_prog(struct bpf_object* obj, struct bpf_program *prog)
+static int bpf_object__sanitize_prog(struct bpf_object *obj, struct bpf_program *prog)
{
struct bpf_insn *insn = prog->insns;
enum bpf_func_id func_id;
@@ -7274,6 +7512,7 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
{
char sym_type, sym_name[500];
unsigned long long sym_addr;
+ const struct btf_type *t;
struct extern_desc *ext;
int ret, err = 0;
FILE *f;
@@ -7300,6 +7539,10 @@ static int bpf_object__read_kallsyms_file(struct bpf_object *obj)
if (!ext || ext->type != EXT_KSYM)
continue;
+ t = btf__type_by_id(obj->btf, ext->btf_id);
+ if (!btf_is_var(t))
+ continue;
+
if (ext->is_set && ext->ksym.addr != sym_addr) {
pr_warn("extern (ksym) '%s' resolution is ambiguous: 0x%llx or 0x%llx\n",
sym_name, ext->ksym.addr, sym_addr);
@@ -7318,75 +7561,151 @@ out:
return err;
}
-static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
+static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name,
+ __u16 kind, struct btf **res_btf,
+ int *res_btf_fd)
{
- struct extern_desc *ext;
+ int i, id, btf_fd, err;
struct btf *btf;
- int i, j, id, btf_fd, err;
- for (i = 0; i < obj->nr_extern; i++) {
- const struct btf_type *targ_var, *targ_type;
- __u32 targ_type_id, local_type_id;
- const char *targ_var_name;
- int ret;
-
- ext = &obj->externs[i];
- if (ext->type != EXT_KSYM || !ext->ksym.type_id)
- continue;
+ btf = obj->btf_vmlinux;
+ btf_fd = 0;
+ id = btf__find_by_name_kind(btf, ksym_name, kind);
- btf = obj->btf_vmlinux;
- btf_fd = 0;
- id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
- if (id == -ENOENT) {
- err = load_module_btfs(obj);
- if (err)
- return err;
+ if (id == -ENOENT) {
+ err = load_module_btfs(obj);
+ if (err)
+ return err;
- for (j = 0; j < obj->btf_module_cnt; j++) {
- btf = obj->btf_modules[j].btf;
- /* we assume module BTF FD is always >0 */
- btf_fd = obj->btf_modules[j].fd;
- id = btf__find_by_name_kind(btf, ext->name, BTF_KIND_VAR);
- if (id != -ENOENT)
- break;
- }
- }
- if (id <= 0) {
- pr_warn("extern (ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
- ext->name);
- return -ESRCH;
+ for (i = 0; i < obj->btf_module_cnt; i++) {
+ btf = obj->btf_modules[i].btf;
+ /* we assume module BTF FD is always >0 */
+ btf_fd = obj->btf_modules[i].fd;
+ id = btf__find_by_name_kind(btf, ksym_name, kind);
+ if (id != -ENOENT)
+ break;
}
+ }
+ if (id <= 0) {
+ pr_warn("extern (%s ksym) '%s': failed to find BTF ID in kernel BTF(s).\n",
+ __btf_kind_str(kind), ksym_name);
+ return -ESRCH;
+ }
+
+ *res_btf = btf;
+ *res_btf_fd = btf_fd;
+ return id;
+}
- /* find local type_id */
- local_type_id = ext->ksym.type_id;
+static int bpf_object__resolve_ksym_var_btf_id(struct bpf_object *obj,
+ struct extern_desc *ext)
+{
+ const struct btf_type *targ_var, *targ_type;
+ __u32 targ_type_id, local_type_id;
+ const char *targ_var_name;
+ int id, btf_fd = 0, err;
+ struct btf *btf = NULL;
- /* find target type_id */
- targ_var = btf__type_by_id(btf, id);
- targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
- targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
+ id = find_ksym_btf_id(obj, ext->name, BTF_KIND_VAR, &btf, &btf_fd);
+ if (id < 0)
+ return id;
- ret = bpf_core_types_are_compat(obj->btf, local_type_id,
- btf, targ_type_id);
- if (ret <= 0) {
- const struct btf_type *local_type;
- const char *targ_name, *local_name;
+ /* find local type_id */
+ local_type_id = ext->ksym.type_id;
- local_type = btf__type_by_id(obj->btf, local_type_id);
- local_name = btf__name_by_offset(obj->btf, local_type->name_off);
- targ_name = btf__name_by_offset(btf, targ_type->name_off);
+ /* find target type_id */
+ targ_var = btf__type_by_id(btf, id);
+ targ_var_name = btf__name_by_offset(btf, targ_var->name_off);
+ targ_type = skip_mods_and_typedefs(btf, targ_var->type, &targ_type_id);
- pr_warn("extern (ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
- ext->name, local_type_id,
- btf_kind_str(local_type), local_name, targ_type_id,
- btf_kind_str(targ_type), targ_name);
- return -EINVAL;
- }
+ err = bpf_core_types_are_compat(obj->btf, local_type_id,
+ btf, targ_type_id);
+ if (err <= 0) {
+ const struct btf_type *local_type;
+ const char *targ_name, *local_name;
+
+ local_type = btf__type_by_id(obj->btf, local_type_id);
+ local_name = btf__name_by_offset(obj->btf, local_type->name_off);
+ targ_name = btf__name_by_offset(btf, targ_type->name_off);
+
+ pr_warn("extern (var ksym) '%s': incompatible types, expected [%d] %s %s, but kernel has [%d] %s %s\n",
+ ext->name, local_type_id,
+ btf_kind_str(local_type), local_name, targ_type_id,
+ btf_kind_str(targ_type), targ_name);
+ return -EINVAL;
+ }
+
+ ext->is_set = true;
+ ext->ksym.kernel_btf_obj_fd = btf_fd;
+ ext->ksym.kernel_btf_id = id;
+ pr_debug("extern (var ksym) '%s': resolved to [%d] %s %s\n",
+ ext->name, id, btf_kind_str(targ_var), targ_var_name);
+
+ return 0;
+}
+
+static int bpf_object__resolve_ksym_func_btf_id(struct bpf_object *obj,
+ struct extern_desc *ext)
+{
+ int local_func_proto_id, kfunc_proto_id, kfunc_id;
+ const struct btf_type *kern_func;
+ struct btf *kern_btf = NULL;
+ int ret, kern_btf_fd = 0;
+
+ local_func_proto_id = ext->ksym.type_id;
+
+ kfunc_id = find_ksym_btf_id(obj, ext->name, BTF_KIND_FUNC,
+ &kern_btf, &kern_btf_fd);
+ if (kfunc_id < 0) {
+ pr_warn("extern (func ksym) '%s': not found in kernel BTF\n",
+ ext->name);
+ return kfunc_id;
+ }
+
+ if (kern_btf != obj->btf_vmlinux) {
+ pr_warn("extern (func ksym) '%s': function in kernel module is not supported\n",
+ ext->name);
+ return -ENOTSUP;
+ }
- ext->is_set = true;
- ext->ksym.kernel_btf_obj_fd = btf_fd;
- ext->ksym.kernel_btf_id = id;
- pr_debug("extern (ksym) '%s': resolved to [%d] %s %s\n",
- ext->name, id, btf_kind_str(targ_var), targ_var_name);
+ kern_func = btf__type_by_id(kern_btf, kfunc_id);
+ kfunc_proto_id = kern_func->type;
+
+ ret = bpf_core_types_are_compat(obj->btf, local_func_proto_id,
+ kern_btf, kfunc_proto_id);
+ if (ret <= 0) {
+ pr_warn("extern (func ksym) '%s': func_proto [%d] incompatible with kernel [%d]\n",
+ ext->name, local_func_proto_id, kfunc_proto_id);
+ return -EINVAL;
+ }
+
+ ext->is_set = true;
+ ext->ksym.kernel_btf_obj_fd = kern_btf_fd;
+ ext->ksym.kernel_btf_id = kfunc_id;
+ pr_debug("extern (func ksym) '%s': resolved to kernel [%d]\n",
+ ext->name, kfunc_id);
+
+ return 0;
+}
+
+static int bpf_object__resolve_ksyms_btf_id(struct bpf_object *obj)
+{
+ const struct btf_type *t;
+ struct extern_desc *ext;
+ int i, err;
+
+ for (i = 0; i < obj->nr_extern; i++) {
+ ext = &obj->externs[i];
+ if (ext->type != EXT_KSYM || !ext->ksym.type_id)
+ continue;
+
+ t = btf__type_by_id(obj->btf, ext->btf_id);
+ if (btf_is_var(t))
+ err = bpf_object__resolve_ksym_var_btf_id(obj, ext);
+ else
+ err = bpf_object__resolve_ksym_func_btf_id(obj, ext);
+ if (err)
+ return err;
}
return 0;
}
@@ -8193,6 +8512,16 @@ int bpf_object__btf_fd(const struct bpf_object *obj)
return obj->btf ? btf__fd(obj->btf) : -1;
}
+int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version)
+{
+ if (obj->loaded)
+ return -EINVAL;
+
+ obj->kern_version = kern_version;
+
+ return 0;
+}
+
int bpf_object__set_priv(struct bpf_object *obj, void *priv,
bpf_object_clear_priv_t clear_priv)
{
@@ -8381,7 +8710,7 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)
return fd;
}
-enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog)
+enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog)
{
return prog->type;
}
@@ -8426,7 +8755,7 @@ BPF_PROG_TYPE_FNS(extension, BPF_PROG_TYPE_EXT);
BPF_PROG_TYPE_FNS(sk_lookup, BPF_PROG_TYPE_SK_LOOKUP);
enum bpf_attach_type
-bpf_program__get_expected_attach_type(struct bpf_program *prog)
+bpf_program__get_expected_attach_type(const struct bpf_program *prog)
{
return prog->expected_attach_type;
}
@@ -9202,6 +9531,7 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)
pr_warn("error: inner_map_fd already specified\n");
return -EINVAL;
}
+ zfree(&map->inner_map);
map->inner_map_fd = fd;
return 0;
}
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 3c35eb401931..bec4e6a6e31d 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -143,6 +143,7 @@ LIBBPF_API int bpf_object__unload(struct bpf_object *obj);
LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj);
LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj);
+LIBBPF_API int bpf_object__set_kversion(struct bpf_object *obj, __u32 kern_version);
struct btf;
LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);
@@ -361,12 +362,12 @@ LIBBPF_API int bpf_program__set_struct_ops(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_extension(struct bpf_program *prog);
LIBBPF_API int bpf_program__set_sk_lookup(struct bpf_program *prog);
-LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);
+LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,
enum bpf_prog_type type);
LIBBPF_API enum bpf_attach_type
-bpf_program__get_expected_attach_type(struct bpf_program *prog);
+bpf_program__get_expected_attach_type(const struct bpf_program *prog);
LIBBPF_API void
bpf_program__set_expected_attach_type(struct bpf_program *prog,
enum bpf_attach_type type);
@@ -479,6 +480,7 @@ LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path);
LIBBPF_API int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd);
+LIBBPF_API struct bpf_map *bpf_map__inner_map(struct bpf_map *map);
LIBBPF_API long libbpf_get_error(const void *ptr);
@@ -507,6 +509,7 @@ struct xdp_link_info {
struct bpf_xdp_set_link_opts {
size_t sz;
int old_fd;
+ size_t :0;
};
#define bpf_xdp_set_link_opts__last_field old_fd
@@ -759,6 +762,19 @@ enum libbpf_tristate {
TRI_MODULE = 2,
};
+struct bpf_linker_opts {
+ /* size of this struct, for forward/backward compatiblity */
+ size_t sz;
+};
+#define bpf_linker_opts__last_field sz
+
+struct bpf_linker;
+
+LIBBPF_API struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts);
+LIBBPF_API int bpf_linker__add_file(struct bpf_linker *linker, const char *filename);
+LIBBPF_API int bpf_linker__finalize(struct bpf_linker *linker);
+LIBBPF_API void bpf_linker__free(struct bpf_linker *linker);
+
#ifdef __cplusplus
} /* extern "C" */
#endif
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 1c0fd2dd233a..b9b29baf1df8 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -350,3 +350,15 @@ LIBBPF_0.3.0 {
xsk_setup_xdp_prog;
xsk_socket__update_xskmap;
} LIBBPF_0.2.0;
+
+LIBBPF_0.4.0 {
+ global:
+ btf__add_float;
+ btf__add_type;
+ bpf_linker__add_file;
+ bpf_linker__finalize;
+ bpf_linker__free;
+ bpf_linker__new;
+ bpf_map__inner_map;
+ bpf_object__set_kversion;
+} LIBBPF_0.3.0;
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 969d0ac592ba..ee426226928f 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -19,6 +19,27 @@
#pragma GCC poison reallocarray
#include "libbpf.h"
+#include "btf.h"
+
+#ifndef EM_BPF
+#define EM_BPF 247
+#endif
+
+#ifndef R_BPF_64_64
+#define R_BPF_64_64 1
+#endif
+#ifndef R_BPF_64_32
+#define R_BPF_64_32 10
+#endif
+
+#ifndef SHT_LLVM_ADDRSIG
+#define SHT_LLVM_ADDRSIG 0x6FFF4C03
+#endif
+
+/* if libelf is old and doesn't support mmap(), fall back to read() */
+#ifndef ELF_C_READ_MMAP
+#define ELF_C_READ_MMAP ELF_C_READ
+#endif
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
@@ -31,6 +52,8 @@
#define BTF_MEMBER_ENC(name, type, bits_offset) (name), (type), (bits_offset)
#define BTF_PARAM_ENC(name, type) (name), (type)
#define BTF_VAR_SECINFO_ENC(type, offset, size) (type), (offset), (size)
+#define BTF_TYPE_FLOAT_ENC(name, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
#ifndef likely
#define likely(x) __builtin_expect(!!(x), 1)
@@ -105,9 +128,58 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)
return realloc(ptr, total);
}
-void *btf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
- size_t cur_cnt, size_t max_cnt, size_t add_cnt);
-int btf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
+struct btf;
+struct btf_type;
+
+struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id);
+const char *btf_kind_str(const struct btf_type *t);
+const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id);
+
+static inline enum btf_func_linkage btf_func_linkage(const struct btf_type *t)
+{
+ return (enum btf_func_linkage)(int)btf_vlen(t);
+}
+
+static inline __u32 btf_type_info(int kind, int vlen, int kflag)
+{
+ return (kflag << 31) | (kind << 24) | vlen;
+}
+
+enum map_def_parts {
+ MAP_DEF_MAP_TYPE = 0x001,
+ MAP_DEF_KEY_TYPE = 0x002,
+ MAP_DEF_KEY_SIZE = 0x004,
+ MAP_DEF_VALUE_TYPE = 0x008,
+ MAP_DEF_VALUE_SIZE = 0x010,
+ MAP_DEF_MAX_ENTRIES = 0x020,
+ MAP_DEF_MAP_FLAGS = 0x040,
+ MAP_DEF_NUMA_NODE = 0x080,
+ MAP_DEF_PINNING = 0x100,
+ MAP_DEF_INNER_MAP = 0x200,
+
+ MAP_DEF_ALL = 0x3ff, /* combination of all above */
+};
+
+struct btf_map_def {
+ enum map_def_parts parts;
+ __u32 map_type;
+ __u32 key_type_id;
+ __u32 key_size;
+ __u32 value_type_id;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 map_flags;
+ __u32 numa_node;
+ __u32 pinning;
+};
+
+int parse_btf_map_def(const char *map_name, struct btf *btf,
+ const struct btf_type *def_t, bool strict,
+ struct btf_map_def *map_def, struct btf_map_def *inner_def);
+
+void *libbpf_add_mem(void **data, size_t *cap_cnt, size_t elem_sz,
+ size_t cur_cnt, size_t max_cnt, size_t add_cnt);
+int libbpf_ensure_mem(void **data, size_t *cap_cnt, size_t elem_sz, size_t need_cnt);
static inline bool libbpf_validate_opts(const char *opts,
size_t opts_sz, size_t user_sz,
@@ -349,4 +421,11 @@ struct bpf_core_relo {
enum bpf_core_relo_kind kind;
};
+typedef int (*type_id_visit_fn)(__u32 *type_id, void *ctx);
+typedef int (*str_off_visit_fn)(__u32 *str_off, void *ctx);
+int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ctx);
+int btf_type_visit_str_offs(struct btf_type *t, str_off_visit_fn visit, void *ctx);
+int btf_ext_visit_type_ids(struct btf_ext *btf_ext, type_id_visit_fn visit, void *ctx);
+int btf_ext_visit_str_offs(struct btf_ext *btf_ext, str_off_visit_fn visit, void *ctx);
+
#endif /* __LIBBPF_LIBBPF_INTERNAL_H */
diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h
deleted file mode 100644
index 59c779c5790c..000000000000
--- a/tools/lib/bpf/libbpf_util.h
+++ /dev/null
@@ -1,47 +0,0 @@
-/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
-/* Copyright (c) 2019 Facebook */
-
-#ifndef __LIBBPF_LIBBPF_UTIL_H
-#define __LIBBPF_LIBBPF_UTIL_H
-
-#include <stdbool.h>
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/* Use these barrier functions instead of smp_[rw]mb() when they are
- * used in a libbpf header file. That way they can be built into the
- * application that uses libbpf.
- */
-#if defined(__i386__) || defined(__x86_64__)
-# define libbpf_smp_rmb() asm volatile("" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("" : : : "memory")
-# define libbpf_smp_mb() \
- asm volatile("lock; addl $0,-4(%%rsp)" : : : "memory", "cc")
-/* Hinders stores to be observed before older loads. */
-# define libbpf_smp_rwmb() asm volatile("" : : : "memory")
-#elif defined(__aarch64__)
-# define libbpf_smp_rmb() asm volatile("dmb ishld" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#elif defined(__arm__)
-/* These are only valid for armv7 and above */
-# define libbpf_smp_rmb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_wmb() asm volatile("dmb ishst" : : : "memory")
-# define libbpf_smp_mb() asm volatile("dmb ish" : : : "memory")
-# define libbpf_smp_rwmb() libbpf_smp_mb()
-#else
-/* Architecture missing native barrier functions. */
-# define libbpf_smp_rmb() __sync_synchronize()
-# define libbpf_smp_wmb() __sync_synchronize()
-# define libbpf_smp_mb() __sync_synchronize()
-# define libbpf_smp_rwmb() __sync_synchronize()
-#endif
-
-#ifdef __cplusplus
-} /* extern "C" */
-#endif
-
-#endif
diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c
new file mode 100644
index 000000000000..9de084b1c699
--- /dev/null
+++ b/tools/lib/bpf/linker.c
@@ -0,0 +1,2883 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * BPF static linker
+ *
+ * Copyright (c) 2021 Facebook
+ */
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/err.h>
+#include <linux/btf.h>
+#include <elf.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <fcntl.h>
+#include "libbpf.h"
+#include "btf.h"
+#include "libbpf_internal.h"
+#include "strset.h"
+
+#define BTF_EXTERN_SEC ".extern"
+
+struct src_sec {
+ const char *sec_name;
+ /* positional (not necessarily ELF) index in an array of sections */
+ int id;
+ /* positional (not necessarily ELF) index of a matching section in a final object file */
+ int dst_id;
+ /* section data offset in a matching output section */
+ int dst_off;
+ /* whether section is omitted from the final ELF file */
+ bool skipped;
+ /* whether section is an ephemeral section, not mapped to an ELF section */
+ bool ephemeral;
+
+ /* ELF info */
+ size_t sec_idx;
+ Elf_Scn *scn;
+ Elf64_Shdr *shdr;
+ Elf_Data *data;
+
+ /* corresponding BTF DATASEC type ID */
+ int sec_type_id;
+};
+
+struct src_obj {
+ const char *filename;
+ int fd;
+ Elf *elf;
+ /* Section header strings section index */
+ size_t shstrs_sec_idx;
+ /* SYMTAB section index */
+ size_t symtab_sec_idx;
+
+ struct btf *btf;
+ struct btf_ext *btf_ext;
+
+ /* List of sections (including ephemeral). Slot zero is unused. */
+ struct src_sec *secs;
+ int sec_cnt;
+
+ /* mapping of symbol indices from src to dst ELF */
+ int *sym_map;
+ /* mapping from the src BTF type IDs to dst ones */
+ int *btf_type_map;
+};
+
+/* single .BTF.ext data section */
+struct btf_ext_sec_data {
+ size_t rec_cnt;
+ __u32 rec_sz;
+ void *recs;
+};
+
+struct glob_sym {
+ /* ELF symbol index */
+ int sym_idx;
+ /* associated section id for .ksyms, .kconfig, etc, but not .extern */
+ int sec_id;
+ /* extern name offset in STRTAB */
+ int name_off;
+ /* optional associated BTF type ID */
+ int btf_id;
+ /* BTF type ID to which VAR/FUNC type is pointing to; used for
+ * rewriting types when extern VAR/FUNC is resolved to a concrete
+ * definition
+ */
+ int underlying_btf_id;
+ /* sec_var index in the corresponding dst_sec, if exists */
+ int var_idx;
+
+ /* extern or resolved/global symbol */
+ bool is_extern;
+ /* weak or strong symbol, never goes back from strong to weak */
+ bool is_weak;
+};
+
+struct dst_sec {
+ char *sec_name;
+ /* positional (not necessarily ELF) index in an array of sections */
+ int id;
+
+ bool ephemeral;
+
+ /* ELF info */
+ size_t sec_idx;
+ Elf_Scn *scn;
+ Elf64_Shdr *shdr;
+ Elf_Data *data;
+
+ /* final output section size */
+ int sec_sz;
+ /* final output contents of the section */
+ void *raw_data;
+
+ /* corresponding STT_SECTION symbol index in SYMTAB */
+ int sec_sym_idx;
+
+ /* section's DATASEC variable info, emitted on BTF finalization */
+ bool has_btf;
+ int sec_var_cnt;
+ struct btf_var_secinfo *sec_vars;
+
+ /* section's .BTF.ext data */
+ struct btf_ext_sec_data func_info;
+ struct btf_ext_sec_data line_info;
+ struct btf_ext_sec_data core_relo_info;
+};
+
+struct bpf_linker {
+ char *filename;
+ int fd;
+ Elf *elf;
+ Elf64_Ehdr *elf_hdr;
+
+ /* Output sections metadata */
+ struct dst_sec *secs;
+ int sec_cnt;
+
+ struct strset *strtab_strs; /* STRTAB unique strings */
+ size_t strtab_sec_idx; /* STRTAB section index */
+ size_t symtab_sec_idx; /* SYMTAB section index */
+
+ struct btf *btf;
+ struct btf_ext *btf_ext;
+
+ /* global (including extern) ELF symbols */
+ int glob_sym_cnt;
+ struct glob_sym *glob_syms;
+};
+
+#define pr_warn_elf(fmt, ...) \
+ libbpf_print(LIBBPF_WARN, "libbpf: " fmt ": %s\n", ##__VA_ARGS__, elf_errmsg(-1))
+
+static int init_output_elf(struct bpf_linker *linker, const char *file);
+
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj);
+static int linker_sanity_check_elf(struct src_obj *obj);
+static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec);
+static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec);
+static int linker_sanity_check_btf(struct src_obj *obj);
+static int linker_sanity_check_btf_ext(struct src_obj *obj);
+static int linker_fixup_btf(struct src_obj *obj);
+static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
+ Elf64_Sym *sym, const char *sym_name, int src_sym_idx);
+static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj);
+static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj);
+
+static int finalize_btf(struct bpf_linker *linker);
+static int finalize_btf_ext(struct bpf_linker *linker);
+
+void bpf_linker__free(struct bpf_linker *linker)
+{
+ int i;
+
+ if (!linker)
+ return;
+
+ free(linker->filename);
+
+ if (linker->elf)
+ elf_end(linker->elf);
+
+ if (linker->fd >= 0)
+ close(linker->fd);
+
+ strset__free(linker->strtab_strs);
+
+ btf__free(linker->btf);
+ btf_ext__free(linker->btf_ext);
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ free(sec->sec_name);
+ free(sec->raw_data);
+ free(sec->sec_vars);
+
+ free(sec->func_info.recs);
+ free(sec->line_info.recs);
+ free(sec->core_relo_info.recs);
+ }
+ free(linker->secs);
+
+ free(linker);
+}
+
+struct bpf_linker *bpf_linker__new(const char *filename, struct bpf_linker_opts *opts)
+{
+ struct bpf_linker *linker;
+ int err;
+
+ if (!OPTS_VALID(opts, bpf_linker_opts))
+ return NULL;
+
+ if (elf_version(EV_CURRENT) == EV_NONE) {
+ pr_warn_elf("libelf initialization failed");
+ return NULL;
+ }
+
+ linker = calloc(1, sizeof(*linker));
+ if (!linker)
+ return NULL;
+
+ linker->fd = -1;
+
+ err = init_output_elf(linker, filename);
+ if (err)
+ goto err_out;
+
+ return linker;
+
+err_out:
+ bpf_linker__free(linker);
+ return NULL;
+}
+
+static struct dst_sec *add_dst_sec(struct bpf_linker *linker, const char *sec_name)
+{
+ struct dst_sec *secs = linker->secs, *sec;
+ size_t new_cnt = linker->sec_cnt ? linker->sec_cnt + 1 : 2;
+
+ secs = libbpf_reallocarray(secs, new_cnt, sizeof(*secs));
+ if (!secs)
+ return NULL;
+
+ /* zero out newly allocated memory */
+ memset(secs + linker->sec_cnt, 0, (new_cnt - linker->sec_cnt) * sizeof(*secs));
+
+ linker->secs = secs;
+ linker->sec_cnt = new_cnt;
+
+ sec = &linker->secs[new_cnt - 1];
+ sec->id = new_cnt - 1;
+ sec->sec_name = strdup(sec_name);
+ if (!sec->sec_name)
+ return NULL;
+
+ return sec;
+}
+
+static Elf64_Sym *add_new_sym(struct bpf_linker *linker, size_t *sym_idx)
+{
+ struct dst_sec *symtab = &linker->secs[linker->symtab_sec_idx];
+ Elf64_Sym *syms, *sym;
+ size_t sym_cnt = symtab->sec_sz / sizeof(*sym);
+
+ syms = libbpf_reallocarray(symtab->raw_data, sym_cnt + 1, sizeof(*sym));
+ if (!syms)
+ return NULL;
+
+ sym = &syms[sym_cnt];
+ memset(sym, 0, sizeof(*sym));
+
+ symtab->raw_data = syms;
+ symtab->sec_sz += sizeof(*sym);
+ symtab->shdr->sh_size += sizeof(*sym);
+ symtab->data->d_size += sizeof(*sym);
+
+ if (sym_idx)
+ *sym_idx = sym_cnt;
+
+ return sym;
+}
+
+static int init_output_elf(struct bpf_linker *linker, const char *file)
+{
+ int err, str_off;
+ Elf64_Sym *init_sym;
+ struct dst_sec *sec;
+
+ linker->filename = strdup(file);
+ if (!linker->filename)
+ return -ENOMEM;
+
+ linker->fd = open(file, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (linker->fd < 0) {
+ err = -errno;
+ pr_warn("failed to create '%s': %d\n", file, err);
+ return err;
+ }
+
+ linker->elf = elf_begin(linker->fd, ELF_C_WRITE, NULL);
+ if (!linker->elf) {
+ pr_warn_elf("failed to create ELF object");
+ return -EINVAL;
+ }
+
+ /* ELF header */
+ linker->elf_hdr = elf64_newehdr(linker->elf);
+ if (!linker->elf_hdr) {
+ pr_warn_elf("failed to create ELF header");
+ return -EINVAL;
+ }
+
+ linker->elf_hdr->e_machine = EM_BPF;
+ linker->elf_hdr->e_type = ET_REL;
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2LSB;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ linker->elf_hdr->e_ident[EI_DATA] = ELFDATA2MSB;
+#else
+#error "Unknown __BYTE_ORDER"
+#endif
+
+ /* STRTAB */
+ /* initialize strset with an empty string to conform to ELF */
+ linker->strtab_strs = strset__new(INT_MAX, "", sizeof(""));
+ if (libbpf_get_error(linker->strtab_strs))
+ return libbpf_get_error(linker->strtab_strs);
+
+ sec = add_dst_sec(linker, ".strtab");
+ if (!sec)
+ return -ENOMEM;
+
+ sec->scn = elf_newscn(linker->elf);
+ if (!sec->scn) {
+ pr_warn_elf("failed to create STRTAB section");
+ return -EINVAL;
+ }
+
+ sec->shdr = elf64_getshdr(sec->scn);
+ if (!sec->shdr)
+ return -EINVAL;
+
+ sec->data = elf_newdata(sec->scn);
+ if (!sec->data) {
+ pr_warn_elf("failed to create STRTAB data");
+ return -EINVAL;
+ }
+
+ str_off = strset__add_str(linker->strtab_strs, sec->sec_name);
+ if (str_off < 0)
+ return str_off;
+
+ sec->sec_idx = elf_ndxscn(sec->scn);
+ linker->elf_hdr->e_shstrndx = sec->sec_idx;
+ linker->strtab_sec_idx = sec->sec_idx;
+
+ sec->shdr->sh_name = str_off;
+ sec->shdr->sh_type = SHT_STRTAB;
+ sec->shdr->sh_flags = SHF_STRINGS;
+ sec->shdr->sh_offset = 0;
+ sec->shdr->sh_link = 0;
+ sec->shdr->sh_info = 0;
+ sec->shdr->sh_addralign = 1;
+ sec->shdr->sh_size = sec->sec_sz = 0;
+ sec->shdr->sh_entsize = 0;
+
+ /* SYMTAB */
+ sec = add_dst_sec(linker, ".symtab");
+ if (!sec)
+ return -ENOMEM;
+
+ sec->scn = elf_newscn(linker->elf);
+ if (!sec->scn) {
+ pr_warn_elf("failed to create SYMTAB section");
+ return -EINVAL;
+ }
+
+ sec->shdr = elf64_getshdr(sec->scn);
+ if (!sec->shdr)
+ return -EINVAL;
+
+ sec->data = elf_newdata(sec->scn);
+ if (!sec->data) {
+ pr_warn_elf("failed to create SYMTAB data");
+ return -EINVAL;
+ }
+
+ str_off = strset__add_str(linker->strtab_strs, sec->sec_name);
+ if (str_off < 0)
+ return str_off;
+
+ sec->sec_idx = elf_ndxscn(sec->scn);
+ linker->symtab_sec_idx = sec->sec_idx;
+
+ sec->shdr->sh_name = str_off;
+ sec->shdr->sh_type = SHT_SYMTAB;
+ sec->shdr->sh_flags = 0;
+ sec->shdr->sh_offset = 0;
+ sec->shdr->sh_link = linker->strtab_sec_idx;
+ /* sh_info should be one greater than the index of the last local
+ * symbol (i.e., binding is STB_LOCAL). But why and who cares?
+ */
+ sec->shdr->sh_info = 0;
+ sec->shdr->sh_addralign = 8;
+ sec->shdr->sh_entsize = sizeof(Elf64_Sym);
+
+ /* .BTF */
+ linker->btf = btf__new_empty();
+ err = libbpf_get_error(linker->btf);
+ if (err)
+ return err;
+
+ /* add the special all-zero symbol */
+ init_sym = add_new_sym(linker, NULL);
+ if (!init_sym)
+ return -EINVAL;
+
+ init_sym->st_name = 0;
+ init_sym->st_info = 0;
+ init_sym->st_other = 0;
+ init_sym->st_shndx = SHN_UNDEF;
+ init_sym->st_value = 0;
+ init_sym->st_size = 0;
+
+ return 0;
+}
+
+int bpf_linker__add_file(struct bpf_linker *linker, const char *filename)
+{
+ struct src_obj obj = {};
+ int err = 0;
+
+ if (!linker->elf)
+ return -EINVAL;
+
+ err = err ?: linker_load_obj_file(linker, filename, &obj);
+ err = err ?: linker_append_sec_data(linker, &obj);
+ err = err ?: linker_append_elf_syms(linker, &obj);
+ err = err ?: linker_append_elf_relos(linker, &obj);
+ err = err ?: linker_append_btf(linker, &obj);
+ err = err ?: linker_append_btf_ext(linker, &obj);
+
+ /* free up src_obj resources */
+ free(obj.btf_type_map);
+ btf__free(obj.btf);
+ btf_ext__free(obj.btf_ext);
+ free(obj.secs);
+ free(obj.sym_map);
+ if (obj.elf)
+ elf_end(obj.elf);
+ if (obj.fd >= 0)
+ close(obj.fd);
+
+ return err;
+}
+
+static bool is_dwarf_sec_name(const char *name)
+{
+ /* approximation, but the actual list is too long */
+ return strncmp(name, ".debug_", sizeof(".debug_") - 1) == 0;
+}
+
+static bool is_ignored_sec(struct src_sec *sec)
+{
+ Elf64_Shdr *shdr = sec->shdr;
+ const char *name = sec->sec_name;
+
+ /* no special handling of .strtab */
+ if (shdr->sh_type == SHT_STRTAB)
+ return true;
+
+ /* ignore .llvm_addrsig section as well */
+ if (shdr->sh_type == SHT_LLVM_ADDRSIG)
+ return true;
+
+ /* no subprograms will lead to an empty .text section, ignore it */
+ if (shdr->sh_type == SHT_PROGBITS && shdr->sh_size == 0 &&
+ strcmp(sec->sec_name, ".text") == 0)
+ return true;
+
+ /* DWARF sections */
+ if (is_dwarf_sec_name(sec->sec_name))
+ return true;
+
+ if (strncmp(name, ".rel", sizeof(".rel") - 1) == 0) {
+ name += sizeof(".rel") - 1;
+ /* DWARF section relocations */
+ if (is_dwarf_sec_name(name))
+ return true;
+
+ /* .BTF and .BTF.ext don't need relocations */
+ if (strcmp(name, BTF_ELF_SEC) == 0 ||
+ strcmp(name, BTF_EXT_ELF_SEC) == 0)
+ return true;
+ }
+
+ return false;
+}
+
+static struct src_sec *add_src_sec(struct src_obj *obj, const char *sec_name)
+{
+ struct src_sec *secs = obj->secs, *sec;
+ size_t new_cnt = obj->sec_cnt ? obj->sec_cnt + 1 : 2;
+
+ secs = libbpf_reallocarray(secs, new_cnt, sizeof(*secs));
+ if (!secs)
+ return NULL;
+
+ /* zero out newly allocated memory */
+ memset(secs + obj->sec_cnt, 0, (new_cnt - obj->sec_cnt) * sizeof(*secs));
+
+ obj->secs = secs;
+ obj->sec_cnt = new_cnt;
+
+ sec = &obj->secs[new_cnt - 1];
+ sec->id = new_cnt - 1;
+ sec->sec_name = sec_name;
+
+ return sec;
+}
+
+static int linker_load_obj_file(struct bpf_linker *linker, const char *filename, struct src_obj *obj)
+{
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ const int host_endianness = ELFDATA2LSB;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+ const int host_endianness = ELFDATA2MSB;
+#else
+#error "Unknown __BYTE_ORDER"
+#endif
+ int err = 0;
+ Elf_Scn *scn;
+ Elf_Data *data;
+ Elf64_Ehdr *ehdr;
+ Elf64_Shdr *shdr;
+ struct src_sec *sec;
+
+ pr_debug("linker: adding object file '%s'...\n", filename);
+
+ obj->filename = filename;
+
+ obj->fd = open(filename, O_RDONLY);
+ if (obj->fd < 0) {
+ err = -errno;
+ pr_warn("failed to open file '%s': %d\n", filename, err);
+ return err;
+ }
+ obj->elf = elf_begin(obj->fd, ELF_C_READ_MMAP, NULL);
+ if (!obj->elf) {
+ err = -errno;
+ pr_warn_elf("failed to parse ELF file '%s'", filename);
+ return err;
+ }
+
+ /* Sanity check ELF file high-level properties */
+ ehdr = elf64_getehdr(obj->elf);
+ if (!ehdr) {
+ err = -errno;
+ pr_warn_elf("failed to get ELF header for %s", filename);
+ return err;
+ }
+ if (ehdr->e_ident[EI_DATA] != host_endianness) {
+ err = -EOPNOTSUPP;
+ pr_warn_elf("unsupported byte order of ELF file %s", filename);
+ return err;
+ }
+ if (ehdr->e_type != ET_REL
+ || ehdr->e_machine != EM_BPF
+ || ehdr->e_ident[EI_CLASS] != ELFCLASS64) {
+ err = -EOPNOTSUPP;
+ pr_warn_elf("unsupported kind of ELF file %s", filename);
+ return err;
+ }
+
+ if (elf_getshdrstrndx(obj->elf, &obj->shstrs_sec_idx)) {
+ err = -errno;
+ pr_warn_elf("failed to get SHSTRTAB section index for %s", filename);
+ return err;
+ }
+
+ scn = NULL;
+ while ((scn = elf_nextscn(obj->elf, scn)) != NULL) {
+ size_t sec_idx = elf_ndxscn(scn);
+ const char *sec_name;
+
+ shdr = elf64_getshdr(scn);
+ if (!shdr) {
+ err = -errno;
+ pr_warn_elf("failed to get section #%zu header for %s",
+ sec_idx, filename);
+ return err;
+ }
+
+ sec_name = elf_strptr(obj->elf, obj->shstrs_sec_idx, shdr->sh_name);
+ if (!sec_name) {
+ err = -errno;
+ pr_warn_elf("failed to get section #%zu name for %s",
+ sec_idx, filename);
+ return err;
+ }
+
+ data = elf_getdata(scn, 0);
+ if (!data) {
+ err = -errno;
+ pr_warn_elf("failed to get section #%zu (%s) data from %s",
+ sec_idx, sec_name, filename);
+ return err;
+ }
+
+ sec = add_src_sec(obj, sec_name);
+ if (!sec)
+ return -ENOMEM;
+
+ sec->scn = scn;
+ sec->shdr = shdr;
+ sec->data = data;
+ sec->sec_idx = elf_ndxscn(scn);
+
+ if (is_ignored_sec(sec)) {
+ sec->skipped = true;
+ continue;
+ }
+
+ switch (shdr->sh_type) {
+ case SHT_SYMTAB:
+ if (obj->symtab_sec_idx) {
+ err = -EOPNOTSUPP;
+ pr_warn("multiple SYMTAB sections found, not supported\n");
+ return err;
+ }
+ obj->symtab_sec_idx = sec_idx;
+ break;
+ case SHT_STRTAB:
+ /* we'll construct our own string table */
+ break;
+ case SHT_PROGBITS:
+ if (strcmp(sec_name, BTF_ELF_SEC) == 0) {
+ obj->btf = btf__new(data->d_buf, shdr->sh_size);
+ err = libbpf_get_error(obj->btf);
+ if (err) {
+ pr_warn("failed to parse .BTF from %s: %d\n", filename, err);
+ return err;
+ }
+ sec->skipped = true;
+ continue;
+ }
+ if (strcmp(sec_name, BTF_EXT_ELF_SEC) == 0) {
+ obj->btf_ext = btf_ext__new(data->d_buf, shdr->sh_size);
+ err = libbpf_get_error(obj->btf_ext);
+ if (err) {
+ pr_warn("failed to parse .BTF.ext from '%s': %d\n", filename, err);
+ return err;
+ }
+ sec->skipped = true;
+ continue;
+ }
+
+ /* data & code */
+ break;
+ case SHT_NOBITS:
+ /* BSS */
+ break;
+ case SHT_REL:
+ /* relocations */
+ break;
+ default:
+ pr_warn("unrecognized section #%zu (%s) in %s\n",
+ sec_idx, sec_name, filename);
+ err = -EINVAL;
+ return err;
+ }
+ }
+
+ err = err ?: linker_sanity_check_elf(obj);
+ err = err ?: linker_sanity_check_btf(obj);
+ err = err ?: linker_sanity_check_btf_ext(obj);
+ err = err ?: linker_fixup_btf(obj);
+
+ return err;
+}
+
+static bool is_pow_of_2(size_t x)
+{
+ return x && (x & (x - 1)) == 0;
+}
+
+static int linker_sanity_check_elf(struct src_obj *obj)
+{
+ struct src_sec *sec;
+ int i, err;
+
+ if (!obj->symtab_sec_idx) {
+ pr_warn("ELF is missing SYMTAB section in %s\n", obj->filename);
+ return -EINVAL;
+ }
+ if (!obj->shstrs_sec_idx) {
+ pr_warn("ELF is missing section headers STRTAB section in %s\n", obj->filename);
+ return -EINVAL;
+ }
+
+ for (i = 1; i < obj->sec_cnt; i++) {
+ sec = &obj->secs[i];
+
+ if (sec->sec_name[0] == '\0') {
+ pr_warn("ELF section #%zu has empty name in %s\n", sec->sec_idx, obj->filename);
+ return -EINVAL;
+ }
+
+ if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign))
+ return -EINVAL;
+ if (sec->shdr->sh_addralign != sec->data->d_align)
+ return -EINVAL;
+
+ if (sec->shdr->sh_size != sec->data->d_size)
+ return -EINVAL;
+
+ switch (sec->shdr->sh_type) {
+ case SHT_SYMTAB:
+ err = linker_sanity_check_elf_symtab(obj, sec);
+ if (err)
+ return err;
+ break;
+ case SHT_STRTAB:
+ break;
+ case SHT_PROGBITS:
+ if (sec->shdr->sh_flags & SHF_EXECINSTR) {
+ if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0)
+ return -EINVAL;
+ }
+ break;
+ case SHT_NOBITS:
+ break;
+ case SHT_REL:
+ err = linker_sanity_check_elf_relos(obj, sec);
+ if (err)
+ return err;
+ break;
+ case SHT_LLVM_ADDRSIG:
+ break;
+ default:
+ pr_warn("ELF section #%zu (%s) has unrecognized type %zu in %s\n",
+ sec->sec_idx, sec->sec_name, (size_t)sec->shdr->sh_type, obj->filename);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int linker_sanity_check_elf_symtab(struct src_obj *obj, struct src_sec *sec)
+{
+ struct src_sec *link_sec;
+ Elf64_Sym *sym;
+ int i, n;
+
+ if (sec->shdr->sh_entsize != sizeof(Elf64_Sym))
+ return -EINVAL;
+ if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+ return -EINVAL;
+
+ if (!sec->shdr->sh_link || sec->shdr->sh_link >= obj->sec_cnt) {
+ pr_warn("ELF SYMTAB section #%zu points to missing STRTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
+ link_sec = &obj->secs[sec->shdr->sh_link];
+ if (link_sec->shdr->sh_type != SHT_STRTAB) {
+ pr_warn("ELF SYMTAB section #%zu points to invalid STRTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
+
+ n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+ sym = sec->data->d_buf;
+ for (i = 0; i < n; i++, sym++) {
+ int sym_type = ELF64_ST_TYPE(sym->st_info);
+ int sym_bind = ELF64_ST_BIND(sym->st_info);
+ int sym_vis = ELF64_ST_VISIBILITY(sym->st_other);
+
+ if (i == 0) {
+ if (sym->st_name != 0 || sym->st_info != 0
+ || sym->st_other != 0 || sym->st_shndx != 0
+ || sym->st_value != 0 || sym->st_size != 0) {
+ pr_warn("ELF sym #0 is invalid in %s\n", obj->filename);
+ return -EINVAL;
+ }
+ continue;
+ }
+ if (sym_bind != STB_LOCAL && sym_bind != STB_GLOBAL && sym_bind != STB_WEAK) {
+ pr_warn("ELF sym #%d in section #%zu has unsupported symbol binding %d\n",
+ i, sec->sec_idx, sym_bind);
+ return -EINVAL;
+ }
+ if (sym_vis != STV_DEFAULT && sym_vis != STV_HIDDEN) {
+ pr_warn("ELF sym #%d in section #%zu has unsupported symbol visibility %d\n",
+ i, sec->sec_idx, sym_vis);
+ return -EINVAL;
+ }
+ if (sym->st_shndx == 0) {
+ if (sym_type != STT_NOTYPE || sym_bind == STB_LOCAL
+ || sym->st_value != 0 || sym->st_size != 0) {
+ pr_warn("ELF sym #%d is invalid extern symbol in %s\n",
+ i, obj->filename);
+
+ return -EINVAL;
+ }
+ continue;
+ }
+ if (sym->st_shndx < SHN_LORESERVE && sym->st_shndx >= obj->sec_cnt) {
+ pr_warn("ELF sym #%d in section #%zu points to missing section #%zu in %s\n",
+ i, sec->sec_idx, (size_t)sym->st_shndx, obj->filename);
+ return -EINVAL;
+ }
+ if (sym_type == STT_SECTION) {
+ if (sym->st_value != 0)
+ return -EINVAL;
+ continue;
+ }
+ }
+
+ return 0;
+}
+
+static int linker_sanity_check_elf_relos(struct src_obj *obj, struct src_sec *sec)
+{
+ struct src_sec *link_sec, *sym_sec;
+ Elf64_Rel *relo;
+ int i, n;
+
+ if (sec->shdr->sh_entsize != sizeof(Elf64_Rel))
+ return -EINVAL;
+ if (sec->shdr->sh_size % sec->shdr->sh_entsize != 0)
+ return -EINVAL;
+
+ /* SHT_REL's sh_link should point to SYMTAB */
+ if (sec->shdr->sh_link != obj->symtab_sec_idx) {
+ pr_warn("ELF relo section #%zu points to invalid SYMTAB section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_link, obj->filename);
+ return -EINVAL;
+ }
+
+ /* SHT_REL's sh_info points to relocated section */
+ if (!sec->shdr->sh_info || sec->shdr->sh_info >= obj->sec_cnt) {
+ pr_warn("ELF relo section #%zu points to missing section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+ return -EINVAL;
+ }
+ link_sec = &obj->secs[sec->shdr->sh_info];
+
+ /* .rel<secname> -> <secname> pattern is followed */
+ if (strncmp(sec->sec_name, ".rel", sizeof(".rel") - 1) != 0
+ || strcmp(sec->sec_name + sizeof(".rel") - 1, link_sec->sec_name) != 0) {
+ pr_warn("ELF relo section #%zu name has invalid name in %s\n",
+ sec->sec_idx, obj->filename);
+ return -EINVAL;
+ }
+
+ /* don't further validate relocations for ignored sections */
+ if (link_sec->skipped)
+ return 0;
+
+ /* relocatable section is data or instructions */
+ if (link_sec->shdr->sh_type != SHT_PROGBITS && link_sec->shdr->sh_type != SHT_NOBITS) {
+ pr_warn("ELF relo section #%zu points to invalid section #%zu in %s\n",
+ sec->sec_idx, (size_t)sec->shdr->sh_info, obj->filename);
+ return -EINVAL;
+ }
+
+ /* check sanity of each relocation */
+ n = sec->shdr->sh_size / sec->shdr->sh_entsize;
+ relo = sec->data->d_buf;
+ sym_sec = &obj->secs[obj->symtab_sec_idx];
+ for (i = 0; i < n; i++, relo++) {
+ size_t sym_idx = ELF64_R_SYM(relo->r_info);
+ size_t sym_type = ELF64_R_TYPE(relo->r_info);
+
+ if (sym_type != R_BPF_64_64 && sym_type != R_BPF_64_32) {
+ pr_warn("ELF relo #%d in section #%zu has unexpected type %zu in %s\n",
+ i, sec->sec_idx, sym_type, obj->filename);
+ return -EINVAL;
+ }
+
+ if (!sym_idx || sym_idx * sizeof(Elf64_Sym) >= sym_sec->shdr->sh_size) {
+ pr_warn("ELF relo #%d in section #%zu points to invalid symbol #%zu in %s\n",
+ i, sec->sec_idx, sym_idx, obj->filename);
+ return -EINVAL;
+ }
+
+ if (link_sec->shdr->sh_flags & SHF_EXECINSTR) {
+ if (relo->r_offset % sizeof(struct bpf_insn) != 0) {
+ pr_warn("ELF relo #%d in section #%zu points to missing symbol #%zu in %s\n",
+ i, sec->sec_idx, sym_idx, obj->filename);
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int check_btf_type_id(__u32 *type_id, void *ctx)
+{
+ struct btf *btf = ctx;
+
+ if (*type_id > btf__get_nr_types(btf))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int check_btf_str_off(__u32 *str_off, void *ctx)
+{
+ struct btf *btf = ctx;
+ const char *s;
+
+ s = btf__str_by_offset(btf, *str_off);
+
+ if (!s)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int linker_sanity_check_btf(struct src_obj *obj)
+{
+ struct btf_type *t;
+ int i, n, err = 0;
+
+ if (!obj->btf)
+ return 0;
+
+ n = btf__get_nr_types(obj->btf);
+ for (i = 1; i <= n; i++) {
+ t = btf_type_by_id(obj->btf, i);
+
+ err = err ?: btf_type_visit_type_ids(t, check_btf_type_id, obj->btf);
+ err = err ?: btf_type_visit_str_offs(t, check_btf_str_off, obj->btf);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int linker_sanity_check_btf_ext(struct src_obj *obj)
+{
+ int err = 0;
+
+ if (!obj->btf_ext)
+ return 0;
+
+ /* can't use .BTF.ext without .BTF */
+ if (!obj->btf)
+ return -EINVAL;
+
+ err = err ?: btf_ext_visit_type_ids(obj->btf_ext, check_btf_type_id, obj->btf);
+ err = err ?: btf_ext_visit_str_offs(obj->btf_ext, check_btf_str_off, obj->btf);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int init_sec(struct bpf_linker *linker, struct dst_sec *dst_sec, struct src_sec *src_sec)
+{
+ Elf_Scn *scn;
+ Elf_Data *data;
+ Elf64_Shdr *shdr;
+ int name_off;
+
+ dst_sec->sec_sz = 0;
+ dst_sec->sec_idx = 0;
+ dst_sec->ephemeral = src_sec->ephemeral;
+
+ /* ephemeral sections are just thin section shells lacking most parts */
+ if (src_sec->ephemeral)
+ return 0;
+
+ scn = elf_newscn(linker->elf);
+ if (!scn)
+ return -ENOMEM;
+ data = elf_newdata(scn);
+ if (!data)
+ return -ENOMEM;
+ shdr = elf64_getshdr(scn);
+ if (!shdr)
+ return -ENOMEM;
+
+ dst_sec->scn = scn;
+ dst_sec->shdr = shdr;
+ dst_sec->data = data;
+ dst_sec->sec_idx = elf_ndxscn(scn);
+
+ name_off = strset__add_str(linker->strtab_strs, src_sec->sec_name);
+ if (name_off < 0)
+ return name_off;
+
+ shdr->sh_name = name_off;
+ shdr->sh_type = src_sec->shdr->sh_type;
+ shdr->sh_flags = src_sec->shdr->sh_flags;
+ shdr->sh_size = 0;
+ /* sh_link and sh_info have different meaning for different types of
+ * sections, so we leave it up to the caller code to fill them in, if
+ * necessary
+ */
+ shdr->sh_link = 0;
+ shdr->sh_info = 0;
+ shdr->sh_addralign = src_sec->shdr->sh_addralign;
+ shdr->sh_entsize = src_sec->shdr->sh_entsize;
+
+ data->d_type = src_sec->data->d_type;
+ data->d_size = 0;
+ data->d_buf = NULL;
+ data->d_align = src_sec->data->d_align;
+ data->d_off = 0;
+
+ return 0;
+}
+
+static struct dst_sec *find_dst_sec_by_name(struct bpf_linker *linker, const char *sec_name)
+{
+ struct dst_sec *sec;
+ int i;
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ sec = &linker->secs[i];
+
+ if (strcmp(sec->sec_name, sec_name) == 0)
+ return sec;
+ }
+
+ return NULL;
+}
+
+static bool secs_match(struct dst_sec *dst, struct src_sec *src)
+{
+ if (dst->ephemeral || src->ephemeral)
+ return true;
+
+ if (dst->shdr->sh_type != src->shdr->sh_type) {
+ pr_warn("sec %s types mismatch\n", dst->sec_name);
+ return false;
+ }
+ if (dst->shdr->sh_flags != src->shdr->sh_flags) {
+ pr_warn("sec %s flags mismatch\n", dst->sec_name);
+ return false;
+ }
+ if (dst->shdr->sh_entsize != src->shdr->sh_entsize) {
+ pr_warn("sec %s entsize mismatch\n", dst->sec_name);
+ return false;
+ }
+
+ return true;
+}
+
+static bool sec_content_is_same(struct dst_sec *dst_sec, struct src_sec *src_sec)
+{
+ if (dst_sec->sec_sz != src_sec->shdr->sh_size)
+ return false;
+ if (memcmp(dst_sec->raw_data, src_sec->data->d_buf, dst_sec->sec_sz) != 0)
+ return false;
+ return true;
+}
+
+static int extend_sec(struct bpf_linker *linker, struct dst_sec *dst, struct src_sec *src)
+{
+ void *tmp;
+ size_t dst_align, src_align;
+ size_t dst_align_sz, dst_final_sz;
+ int err;
+
+ /* Ephemeral source section doesn't contribute anything to ELF
+ * section data.
+ */
+ if (src->ephemeral)
+ return 0;
+
+ /* Some sections (like .maps) can contain both externs (and thus be
+ * ephemeral) and non-externs (map definitions). So it's possible that
+ * it has to be "upgraded" from ephemeral to non-ephemeral when the
+ * first non-ephemeral entity appears. In such case, we add ELF
+ * section, data, etc.
+ */
+ if (dst->ephemeral) {
+ err = init_sec(linker, dst, src);
+ if (err)
+ return err;
+ }
+
+ dst_align = dst->shdr->sh_addralign;
+ src_align = src->shdr->sh_addralign;
+ if (dst_align == 0)
+ dst_align = 1;
+ if (dst_align < src_align)
+ dst_align = src_align;
+
+ dst_align_sz = (dst->sec_sz + dst_align - 1) / dst_align * dst_align;
+
+ /* no need to re-align final size */
+ dst_final_sz = dst_align_sz + src->shdr->sh_size;
+
+ if (src->shdr->sh_type != SHT_NOBITS) {
+ tmp = realloc(dst->raw_data, dst_final_sz);
+ if (!tmp)
+ return -ENOMEM;
+ dst->raw_data = tmp;
+
+ /* pad dst section, if it's alignment forced size increase */
+ memset(dst->raw_data + dst->sec_sz, 0, dst_align_sz - dst->sec_sz);
+ /* now copy src data at a properly aligned offset */
+ memcpy(dst->raw_data + dst_align_sz, src->data->d_buf, src->shdr->sh_size);
+ }
+
+ dst->sec_sz = dst_final_sz;
+ dst->shdr->sh_size = dst_final_sz;
+ dst->data->d_size = dst_final_sz;
+
+ dst->shdr->sh_addralign = dst_align;
+ dst->data->d_align = dst_align;
+
+ src->dst_off = dst_align_sz;
+
+ return 0;
+}
+
+static bool is_data_sec(struct src_sec *sec)
+{
+ if (!sec || sec->skipped)
+ return false;
+ /* ephemeral sections are data sections, e.g., .kconfig, .ksyms */
+ if (sec->ephemeral)
+ return true;
+ return sec->shdr->sh_type == SHT_PROGBITS || sec->shdr->sh_type == SHT_NOBITS;
+}
+
+static bool is_relo_sec(struct src_sec *sec)
+{
+ if (!sec || sec->skipped || sec->ephemeral)
+ return false;
+ return sec->shdr->sh_type == SHT_REL;
+}
+
+static int linker_append_sec_data(struct bpf_linker *linker, struct src_obj *obj)
+{
+ int i, err;
+
+ for (i = 1; i < obj->sec_cnt; i++) {
+ struct src_sec *src_sec;
+ struct dst_sec *dst_sec;
+
+ src_sec = &obj->secs[i];
+ if (!is_data_sec(src_sec))
+ continue;
+
+ dst_sec = find_dst_sec_by_name(linker, src_sec->sec_name);
+ if (!dst_sec) {
+ dst_sec = add_dst_sec(linker, src_sec->sec_name);
+ if (!dst_sec)
+ return -ENOMEM;
+ err = init_sec(linker, dst_sec, src_sec);
+ if (err) {
+ pr_warn("failed to init section '%s'\n", src_sec->sec_name);
+ return err;
+ }
+ } else {
+ if (!secs_match(dst_sec, src_sec)) {
+ pr_warn("ELF sections %s are incompatible\n", src_sec->sec_name);
+ return -1;
+ }
+
+ /* "license" and "version" sections are deduped */
+ if (strcmp(src_sec->sec_name, "license") == 0
+ || strcmp(src_sec->sec_name, "version") == 0) {
+ if (!sec_content_is_same(dst_sec, src_sec)) {
+ pr_warn("non-identical contents of section '%s' are not supported\n", src_sec->sec_name);
+ return -EINVAL;
+ }
+ src_sec->skipped = true;
+ src_sec->dst_id = dst_sec->id;
+ continue;
+ }
+ }
+
+ /* record mapped section index */
+ src_sec->dst_id = dst_sec->id;
+
+ err = extend_sec(linker, dst_sec, src_sec);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int linker_append_elf_syms(struct bpf_linker *linker, struct src_obj *obj)
+{
+ struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx];
+ Elf64_Sym *sym = symtab->data->d_buf;
+ int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize, err;
+ int str_sec_idx = symtab->shdr->sh_link;
+ const char *sym_name;
+
+ obj->sym_map = calloc(n + 1, sizeof(*obj->sym_map));
+ if (!obj->sym_map)
+ return -ENOMEM;
+
+ for (i = 0; i < n; i++, sym++) {
+ /* We already validated all-zero symbol #0 and we already
+ * appended it preventively to the final SYMTAB, so skip it.
+ */
+ if (i == 0)
+ continue;
+
+ sym_name = elf_strptr(obj->elf, str_sec_idx, sym->st_name);
+ if (!sym_name) {
+ pr_warn("can't fetch symbol name for symbol #%d in '%s'\n", i, obj->filename);
+ return -EINVAL;
+ }
+
+ err = linker_append_elf_sym(linker, obj, sym, sym_name, i);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static Elf64_Sym *get_sym_by_idx(struct bpf_linker *linker, size_t sym_idx)
+{
+ struct dst_sec *symtab = &linker->secs[linker->symtab_sec_idx];
+ Elf64_Sym *syms = symtab->raw_data;
+
+ return &syms[sym_idx];
+}
+
+static struct glob_sym *find_glob_sym(struct bpf_linker *linker, const char *sym_name)
+{
+ struct glob_sym *glob_sym;
+ const char *name;
+ int i;
+
+ for (i = 0; i < linker->glob_sym_cnt; i++) {
+ glob_sym = &linker->glob_syms[i];
+ name = strset__data(linker->strtab_strs) + glob_sym->name_off;
+
+ if (strcmp(name, sym_name) == 0)
+ return glob_sym;
+ }
+
+ return NULL;
+}
+
+static struct glob_sym *add_glob_sym(struct bpf_linker *linker)
+{
+ struct glob_sym *syms, *sym;
+
+ syms = libbpf_reallocarray(linker->glob_syms, linker->glob_sym_cnt + 1,
+ sizeof(*linker->glob_syms));
+ if (!syms)
+ return NULL;
+
+ sym = &syms[linker->glob_sym_cnt];
+ memset(sym, 0, sizeof(*sym));
+ sym->var_idx = -1;
+
+ linker->glob_syms = syms;
+ linker->glob_sym_cnt++;
+
+ return sym;
+}
+
+static bool glob_sym_btf_matches(const char *sym_name, bool exact,
+ const struct btf *btf1, __u32 id1,
+ const struct btf *btf2, __u32 id2)
+{
+ const struct btf_type *t1, *t2;
+ bool is_static1, is_static2;
+ const char *n1, *n2;
+ int i, n;
+
+recur:
+ n1 = n2 = NULL;
+ t1 = skip_mods_and_typedefs(btf1, id1, &id1);
+ t2 = skip_mods_and_typedefs(btf2, id2, &id2);
+
+ /* check if only one side is FWD, otherwise handle with common logic */
+ if (!exact && btf_is_fwd(t1) != btf_is_fwd(t2)) {
+ n1 = btf__str_by_offset(btf1, t1->name_off);
+ n2 = btf__str_by_offset(btf2, t2->name_off);
+ if (strcmp(n1, n2) != 0) {
+ pr_warn("global '%s': incompatible forward declaration names '%s' and '%s'\n",
+ sym_name, n1, n2);
+ return false;
+ }
+ /* validate if FWD kind matches concrete kind */
+ if (btf_is_fwd(t1)) {
+ if (btf_kflag(t1) && btf_is_union(t2))
+ return true;
+ if (!btf_kflag(t1) && btf_is_struct(t2))
+ return true;
+ pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n",
+ sym_name, btf_kflag(t1) ? "union" : "struct", btf_kind_str(t2));
+ } else {
+ if (btf_kflag(t2) && btf_is_union(t1))
+ return true;
+ if (!btf_kflag(t2) && btf_is_struct(t1))
+ return true;
+ pr_warn("global '%s': incompatible %s forward declaration and concrete kind %s\n",
+ sym_name, btf_kflag(t2) ? "union" : "struct", btf_kind_str(t1));
+ }
+ return false;
+ }
+
+ if (btf_kind(t1) != btf_kind(t2)) {
+ pr_warn("global '%s': incompatible BTF kinds %s and %s\n",
+ sym_name, btf_kind_str(t1), btf_kind_str(t2));
+ return false;
+ }
+
+ switch (btf_kind(t1)) {
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION:
+ case BTF_KIND_ENUM:
+ case BTF_KIND_FWD:
+ case BTF_KIND_FUNC:
+ case BTF_KIND_VAR:
+ n1 = btf__str_by_offset(btf1, t1->name_off);
+ n2 = btf__str_by_offset(btf2, t2->name_off);
+ if (strcmp(n1, n2) != 0) {
+ pr_warn("global '%s': incompatible %s names '%s' and '%s'\n",
+ sym_name, btf_kind_str(t1), n1, n2);
+ return false;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (btf_kind(t1)) {
+ case BTF_KIND_UNKN: /* void */
+ case BTF_KIND_FWD:
+ return true;
+ case BTF_KIND_INT:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_ENUM:
+ /* ignore encoding for int and enum values for enum */
+ if (t1->size != t2->size) {
+ pr_warn("global '%s': incompatible %s '%s' size %u and %u\n",
+ sym_name, btf_kind_str(t1), n1, t1->size, t2->size);
+ return false;
+ }
+ return true;
+ case BTF_KIND_PTR:
+ /* just validate overall shape of the referenced type, so no
+ * contents comparison for struct/union, and allowd fwd vs
+ * struct/union
+ */
+ exact = false;
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_ARRAY:
+ /* ignore index type and array size */
+ id1 = btf_array(t1)->type;
+ id2 = btf_array(t2)->type;
+ goto recur;
+ case BTF_KIND_FUNC:
+ /* extern and global linkages are compatible */
+ is_static1 = btf_func_linkage(t1) == BTF_FUNC_STATIC;
+ is_static2 = btf_func_linkage(t2) == BTF_FUNC_STATIC;
+ if (is_static1 != is_static2) {
+ pr_warn("global '%s': incompatible func '%s' linkage\n", sym_name, n1);
+ return false;
+ }
+
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_VAR:
+ /* extern and global linkages are compatible */
+ is_static1 = btf_var(t1)->linkage == BTF_VAR_STATIC;
+ is_static2 = btf_var(t2)->linkage == BTF_VAR_STATIC;
+ if (is_static1 != is_static2) {
+ pr_warn("global '%s': incompatible var '%s' linkage\n", sym_name, n1);
+ return false;
+ }
+
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m1, *m2;
+
+ if (!exact)
+ return true;
+
+ if (btf_vlen(t1) != btf_vlen(t2)) {
+ pr_warn("global '%s': incompatible number of %s fields %u and %u\n",
+ sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2));
+ return false;
+ }
+
+ n = btf_vlen(t1);
+ m1 = btf_members(t1);
+ m2 = btf_members(t2);
+ for (i = 0; i < n; i++, m1++, m2++) {
+ n1 = btf__str_by_offset(btf1, m1->name_off);
+ n2 = btf__str_by_offset(btf2, m2->name_off);
+ if (strcmp(n1, n2) != 0) {
+ pr_warn("global '%s': incompatible field #%d names '%s' and '%s'\n",
+ sym_name, i, n1, n2);
+ return false;
+ }
+ if (m1->offset != m2->offset) {
+ pr_warn("global '%s': incompatible field #%d ('%s') offsets\n",
+ sym_name, i, n1);
+ return false;
+ }
+ if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type))
+ return false;
+ }
+
+ return true;
+ }
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *m1, *m2;
+
+ if (btf_vlen(t1) != btf_vlen(t2)) {
+ pr_warn("global '%s': incompatible number of %s params %u and %u\n",
+ sym_name, btf_kind_str(t1), btf_vlen(t1), btf_vlen(t2));
+ return false;
+ }
+
+ n = btf_vlen(t1);
+ m1 = btf_params(t1);
+ m2 = btf_params(t2);
+ for (i = 0; i < n; i++, m1++, m2++) {
+ /* ignore func arg names */
+ if (!glob_sym_btf_matches(sym_name, exact, btf1, m1->type, btf2, m2->type))
+ return false;
+ }
+
+ /* now check return type as well */
+ id1 = t1->type;
+ id2 = t2->type;
+ goto recur;
+ }
+
+ /* skip_mods_and_typedefs() make this impossible */
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_CONST:
+ case BTF_KIND_RESTRICT:
+ /* DATASECs are never compared with each other */
+ case BTF_KIND_DATASEC:
+ default:
+ pr_warn("global '%s': unsupported BTF kind %s\n",
+ sym_name, btf_kind_str(t1));
+ return false;
+ }
+}
+
+static bool map_defs_match(const char *sym_name,
+ const struct btf *main_btf,
+ const struct btf_map_def *main_def,
+ const struct btf_map_def *main_inner_def,
+ const struct btf *extra_btf,
+ const struct btf_map_def *extra_def,
+ const struct btf_map_def *extra_inner_def)
+{
+ const char *reason;
+
+ if (main_def->map_type != extra_def->map_type) {
+ reason = "type";
+ goto mismatch;
+ }
+
+ /* check key type/size match */
+ if (main_def->key_size != extra_def->key_size) {
+ reason = "key_size";
+ goto mismatch;
+ }
+ if (!!main_def->key_type_id != !!extra_def->key_type_id) {
+ reason = "key type";
+ goto mismatch;
+ }
+ if ((main_def->parts & MAP_DEF_KEY_TYPE)
+ && !glob_sym_btf_matches(sym_name, true /*exact*/,
+ main_btf, main_def->key_type_id,
+ extra_btf, extra_def->key_type_id)) {
+ reason = "key type";
+ goto mismatch;
+ }
+
+ /* validate value type/size match */
+ if (main_def->value_size != extra_def->value_size) {
+ reason = "value_size";
+ goto mismatch;
+ }
+ if (!!main_def->value_type_id != !!extra_def->value_type_id) {
+ reason = "value type";
+ goto mismatch;
+ }
+ if ((main_def->parts & MAP_DEF_VALUE_TYPE)
+ && !glob_sym_btf_matches(sym_name, true /*exact*/,
+ main_btf, main_def->value_type_id,
+ extra_btf, extra_def->value_type_id)) {
+ reason = "key type";
+ goto mismatch;
+ }
+
+ if (main_def->max_entries != extra_def->max_entries) {
+ reason = "max_entries";
+ goto mismatch;
+ }
+ if (main_def->map_flags != extra_def->map_flags) {
+ reason = "map_flags";
+ goto mismatch;
+ }
+ if (main_def->numa_node != extra_def->numa_node) {
+ reason = "numa_node";
+ goto mismatch;
+ }
+ if (main_def->pinning != extra_def->pinning) {
+ reason = "pinning";
+ goto mismatch;
+ }
+
+ if ((main_def->parts & MAP_DEF_INNER_MAP) != (extra_def->parts & MAP_DEF_INNER_MAP)) {
+ reason = "inner map";
+ goto mismatch;
+ }
+
+ if (main_def->parts & MAP_DEF_INNER_MAP) {
+ char inner_map_name[128];
+
+ snprintf(inner_map_name, sizeof(inner_map_name), "%s.inner", sym_name);
+
+ return map_defs_match(inner_map_name,
+ main_btf, main_inner_def, NULL,
+ extra_btf, extra_inner_def, NULL);
+ }
+
+ return true;
+
+mismatch:
+ pr_warn("global '%s': map %s mismatch\n", sym_name, reason);
+ return false;
+}
+
+static bool glob_map_defs_match(const char *sym_name,
+ struct bpf_linker *linker, struct glob_sym *glob_sym,
+ struct src_obj *obj, Elf64_Sym *sym, int btf_id)
+{
+ struct btf_map_def dst_def = {}, dst_inner_def = {};
+ struct btf_map_def src_def = {}, src_inner_def = {};
+ const struct btf_type *t;
+ int err;
+
+ t = btf__type_by_id(obj->btf, btf_id);
+ if (!btf_is_var(t)) {
+ pr_warn("global '%s': invalid map definition type [%d]\n", sym_name, btf_id);
+ return false;
+ }
+ t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+
+ err = parse_btf_map_def(sym_name, obj->btf, t, true /*strict*/, &src_def, &src_inner_def);
+ if (err) {
+ pr_warn("global '%s': invalid map definition\n", sym_name);
+ return false;
+ }
+
+ /* re-parse existing map definition */
+ t = btf__type_by_id(linker->btf, glob_sym->btf_id);
+ t = skip_mods_and_typedefs(linker->btf, t->type, NULL);
+ err = parse_btf_map_def(sym_name, linker->btf, t, true /*strict*/, &dst_def, &dst_inner_def);
+ if (err) {
+ /* this should not happen, because we already validated it */
+ pr_warn("global '%s': invalid dst map definition\n", sym_name);
+ return false;
+ }
+
+ /* Currently extern map definition has to be complete and match
+ * concrete map definition exactly. This restriction might be lifted
+ * in the future.
+ */
+ return map_defs_match(sym_name, linker->btf, &dst_def, &dst_inner_def,
+ obj->btf, &src_def, &src_inner_def);
+}
+
+static bool glob_syms_match(const char *sym_name,
+ struct bpf_linker *linker, struct glob_sym *glob_sym,
+ struct src_obj *obj, Elf64_Sym *sym, size_t sym_idx, int btf_id)
+{
+ const struct btf_type *src_t;
+
+ /* if we are dealing with externs, BTF types describing both global
+ * and extern VARs/FUNCs should be completely present in all files
+ */
+ if (!glob_sym->btf_id || !btf_id) {
+ pr_warn("BTF info is missing for global symbol '%s'\n", sym_name);
+ return false;
+ }
+
+ src_t = btf__type_by_id(obj->btf, btf_id);
+ if (!btf_is_var(src_t) && !btf_is_func(src_t)) {
+ pr_warn("only extern variables and functions are supported, but got '%s' for '%s'\n",
+ btf_kind_str(src_t), sym_name);
+ return false;
+ }
+
+ /* deal with .maps definitions specially */
+ if (glob_sym->sec_id && strcmp(linker->secs[glob_sym->sec_id].sec_name, MAPS_ELF_SEC) == 0)
+ return glob_map_defs_match(sym_name, linker, glob_sym, obj, sym, btf_id);
+
+ if (!glob_sym_btf_matches(sym_name, true /*exact*/,
+ linker->btf, glob_sym->btf_id, obj->btf, btf_id))
+ return false;
+
+ return true;
+}
+
+static bool btf_is_non_static(const struct btf_type *t)
+{
+ return (btf_is_var(t) && btf_var(t)->linkage != BTF_VAR_STATIC)
+ || (btf_is_func(t) && btf_func_linkage(t) != BTF_FUNC_STATIC);
+}
+
+static int find_glob_sym_btf(struct src_obj *obj, Elf64_Sym *sym, const char *sym_name,
+ int *out_btf_sec_id, int *out_btf_id)
+{
+ int i, j, n = btf__get_nr_types(obj->btf), m, btf_id = 0;
+ const struct btf_type *t;
+ const struct btf_var_secinfo *vi;
+ const char *name;
+
+ for (i = 1; i <= n; i++) {
+ t = btf__type_by_id(obj->btf, i);
+
+ /* some global and extern FUNCs and VARs might not be associated with any
+ * DATASEC, so try to detect them in the same pass
+ */
+ if (btf_is_non_static(t)) {
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ if (strcmp(name, sym_name) != 0)
+ continue;
+
+ /* remember and still try to find DATASEC */
+ btf_id = i;
+ continue;
+ }
+
+ if (!btf_is_datasec(t))
+ continue;
+
+ vi = btf_var_secinfos(t);
+ for (j = 0, m = btf_vlen(t); j < m; j++, vi++) {
+ t = btf__type_by_id(obj->btf, vi->type);
+ name = btf__str_by_offset(obj->btf, t->name_off);
+
+ if (strcmp(name, sym_name) != 0)
+ continue;
+ if (btf_is_var(t) && btf_var(t)->linkage == BTF_VAR_STATIC)
+ continue;
+ if (btf_is_func(t) && btf_func_linkage(t) == BTF_FUNC_STATIC)
+ continue;
+
+ if (btf_id && btf_id != vi->type) {
+ pr_warn("global/extern '%s' BTF is ambiguous: both types #%d and #%u match\n",
+ sym_name, btf_id, vi->type);
+ return -EINVAL;
+ }
+
+ *out_btf_sec_id = i;
+ *out_btf_id = vi->type;
+
+ return 0;
+ }
+ }
+
+ /* free-floating extern or global FUNC */
+ if (btf_id) {
+ *out_btf_sec_id = 0;
+ *out_btf_id = btf_id;
+ return 0;
+ }
+
+ pr_warn("failed to find BTF info for global/extern symbol '%s'\n", sym_name);
+ return -ENOENT;
+}
+
+static struct src_sec *find_src_sec_by_name(struct src_obj *obj, const char *sec_name)
+{
+ struct src_sec *sec;
+ int i;
+
+ for (i = 1; i < obj->sec_cnt; i++) {
+ sec = &obj->secs[i];
+
+ if (strcmp(sec->sec_name, sec_name) == 0)
+ return sec;
+ }
+
+ return NULL;
+}
+
+static int complete_extern_btf_info(struct btf *dst_btf, int dst_id,
+ struct btf *src_btf, int src_id)
+{
+ struct btf_type *dst_t = btf_type_by_id(dst_btf, dst_id);
+ struct btf_type *src_t = btf_type_by_id(src_btf, src_id);
+ struct btf_param *src_p, *dst_p;
+ const char *s;
+ int i, n, off;
+
+ /* We already made sure that source and destination types (FUNC or
+ * VAR) match in terms of types and argument names.
+ */
+ if (btf_is_var(dst_t)) {
+ btf_var(dst_t)->linkage = BTF_VAR_GLOBAL_ALLOCATED;
+ return 0;
+ }
+
+ dst_t->info = btf_type_info(BTF_KIND_FUNC, BTF_FUNC_GLOBAL, 0);
+
+ /* now onto FUNC_PROTO types */
+ src_t = btf_type_by_id(src_btf, src_t->type);
+ dst_t = btf_type_by_id(dst_btf, dst_t->type);
+
+ /* Fill in all the argument names, which for extern FUNCs are missing.
+ * We'll end up with two copies of FUNCs/VARs for externs, but that
+ * will be taken care of by BTF dedup at the very end.
+ * It might be that BTF types for extern in one file has less/more BTF
+ * information (e.g., FWD instead of full STRUCT/UNION information),
+ * but that should be (in most cases, subject to BTF dedup rules)
+ * handled and resolved by BTF dedup algorithm as well, so we won't
+ * worry about it. Our only job is to make sure that argument names
+ * are populated on both sides, otherwise BTF dedup will pedantically
+ * consider them different.
+ */
+ src_p = btf_params(src_t);
+ dst_p = btf_params(dst_t);
+ for (i = 0, n = btf_vlen(dst_t); i < n; i++, src_p++, dst_p++) {
+ if (!src_p->name_off)
+ continue;
+
+ /* src_btf has more complete info, so add name to dst_btf */
+ s = btf__str_by_offset(src_btf, src_p->name_off);
+ off = btf__add_str(dst_btf, s);
+ if (off < 0)
+ return off;
+ dst_p->name_off = off;
+ }
+ return 0;
+}
+
+static void sym_update_bind(Elf64_Sym *sym, int sym_bind)
+{
+ sym->st_info = ELF64_ST_INFO(sym_bind, ELF64_ST_TYPE(sym->st_info));
+}
+
+static void sym_update_type(Elf64_Sym *sym, int sym_type)
+{
+ sym->st_info = ELF64_ST_INFO(ELF64_ST_BIND(sym->st_info), sym_type);
+}
+
+static void sym_update_visibility(Elf64_Sym *sym, int sym_vis)
+{
+ /* libelf doesn't provide setters for ST_VISIBILITY,
+ * but it is stored in the lower 2 bits of st_other
+ */
+ sym->st_other &= 0x03;
+ sym->st_other |= sym_vis;
+}
+
+static int linker_append_elf_sym(struct bpf_linker *linker, struct src_obj *obj,
+ Elf64_Sym *sym, const char *sym_name, int src_sym_idx)
+{
+ struct src_sec *src_sec = NULL;
+ struct dst_sec *dst_sec = NULL;
+ struct glob_sym *glob_sym = NULL;
+ int name_off, sym_type, sym_bind, sym_vis, err;
+ int btf_sec_id = 0, btf_id = 0;
+ size_t dst_sym_idx;
+ Elf64_Sym *dst_sym;
+ bool sym_is_extern;
+
+ sym_type = ELF64_ST_TYPE(sym->st_info);
+ sym_bind = ELF64_ST_BIND(sym->st_info);
+ sym_vis = ELF64_ST_VISIBILITY(sym->st_other);
+ sym_is_extern = sym->st_shndx == SHN_UNDEF;
+
+ if (sym_is_extern) {
+ if (!obj->btf) {
+ pr_warn("externs without BTF info are not supported\n");
+ return -ENOTSUP;
+ }
+ } else if (sym->st_shndx < SHN_LORESERVE) {
+ src_sec = &obj->secs[sym->st_shndx];
+ if (src_sec->skipped)
+ return 0;
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ /* allow only one STT_SECTION symbol per section */
+ if (sym_type == STT_SECTION && dst_sec->sec_sym_idx) {
+ obj->sym_map[src_sym_idx] = dst_sec->sec_sym_idx;
+ return 0;
+ }
+ }
+
+ if (sym_bind == STB_LOCAL)
+ goto add_sym;
+
+ /* find matching BTF info */
+ err = find_glob_sym_btf(obj, sym, sym_name, &btf_sec_id, &btf_id);
+ if (err)
+ return err;
+
+ if (sym_is_extern && btf_sec_id) {
+ const char *sec_name = NULL;
+ const struct btf_type *t;
+
+ t = btf__type_by_id(obj->btf, btf_sec_id);
+ sec_name = btf__str_by_offset(obj->btf, t->name_off);
+
+ /* Clang puts unannotated extern vars into
+ * '.extern' BTF DATASEC. Treat them the same
+ * as unannotated extern funcs (which are
+ * currently not put into any DATASECs).
+ * Those don't have associated src_sec/dst_sec.
+ */
+ if (strcmp(sec_name, BTF_EXTERN_SEC) != 0) {
+ src_sec = find_src_sec_by_name(obj, sec_name);
+ if (!src_sec) {
+ pr_warn("failed to find matching ELF sec '%s'\n", sec_name);
+ return -ENOENT;
+ }
+ dst_sec = &linker->secs[src_sec->dst_id];
+ }
+ }
+
+ glob_sym = find_glob_sym(linker, sym_name);
+ if (glob_sym) {
+ /* Preventively resolve to existing symbol. This is
+ * needed for further relocation symbol remapping in
+ * the next step of linking.
+ */
+ obj->sym_map[src_sym_idx] = glob_sym->sym_idx;
+
+ /* If both symbols are non-externs, at least one of
+ * them has to be STB_WEAK, otherwise they are in
+ * a conflict with each other.
+ */
+ if (!sym_is_extern && !glob_sym->is_extern
+ && !glob_sym->is_weak && sym_bind != STB_WEAK) {
+ pr_warn("conflicting non-weak symbol #%d (%s) definition in '%s'\n",
+ src_sym_idx, sym_name, obj->filename);
+ return -EINVAL;
+ }
+
+ if (!glob_syms_match(sym_name, linker, glob_sym, obj, sym, src_sym_idx, btf_id))
+ return -EINVAL;
+
+ dst_sym = get_sym_by_idx(linker, glob_sym->sym_idx);
+
+ /* If new symbol is strong, then force dst_sym to be strong as
+ * well; this way a mix of weak and non-weak extern
+ * definitions will end up being strong.
+ */
+ if (sym_bind == STB_GLOBAL) {
+ /* We still need to preserve type (NOTYPE or
+ * OBJECT/FUNC, depending on whether the symbol is
+ * extern or not)
+ */
+ sym_update_bind(dst_sym, STB_GLOBAL);
+ glob_sym->is_weak = false;
+ }
+
+ /* Non-default visibility is "contaminating", with stricter
+ * visibility overwriting more permissive ones, even if more
+ * permissive visibility comes from just an extern definition.
+ * Currently only STV_DEFAULT and STV_HIDDEN are allowed and
+ * ensured by ELF symbol sanity checks above.
+ */
+ if (sym_vis > ELF64_ST_VISIBILITY(dst_sym->st_other))
+ sym_update_visibility(dst_sym, sym_vis);
+
+ /* If the new symbol is extern, then regardless if
+ * existing symbol is extern or resolved global, just
+ * keep the existing one untouched.
+ */
+ if (sym_is_extern)
+ return 0;
+
+ /* If existing symbol is a strong resolved symbol, bail out,
+ * because we lost resolution battle have nothing to
+ * contribute. We already checked abover that there is no
+ * strong-strong conflict. We also already tightened binding
+ * and visibility, so nothing else to contribute at that point.
+ */
+ if (!glob_sym->is_extern && sym_bind == STB_WEAK)
+ return 0;
+
+ /* At this point, new symbol is strong non-extern,
+ * so overwrite glob_sym with new symbol information.
+ * Preserve binding and visibility.
+ */
+ sym_update_type(dst_sym, sym_type);
+ dst_sym->st_shndx = dst_sec->sec_idx;
+ dst_sym->st_value = src_sec->dst_off + sym->st_value;
+ dst_sym->st_size = sym->st_size;
+
+ /* see comment below about dst_sec->id vs dst_sec->sec_idx */
+ glob_sym->sec_id = dst_sec->id;
+ glob_sym->is_extern = false;
+
+ if (complete_extern_btf_info(linker->btf, glob_sym->btf_id,
+ obj->btf, btf_id))
+ return -EINVAL;
+
+ /* request updating VAR's/FUNC's underlying BTF type when appending BTF type */
+ glob_sym->underlying_btf_id = 0;
+
+ obj->sym_map[src_sym_idx] = glob_sym->sym_idx;
+ return 0;
+ }
+
+add_sym:
+ name_off = strset__add_str(linker->strtab_strs, sym_name);
+ if (name_off < 0)
+ return name_off;
+
+ dst_sym = add_new_sym(linker, &dst_sym_idx);
+ if (!dst_sym)
+ return -ENOMEM;
+
+ dst_sym->st_name = name_off;
+ dst_sym->st_info = sym->st_info;
+ dst_sym->st_other = sym->st_other;
+ dst_sym->st_shndx = dst_sec ? dst_sec->sec_idx : sym->st_shndx;
+ dst_sym->st_value = (src_sec ? src_sec->dst_off : 0) + sym->st_value;
+ dst_sym->st_size = sym->st_size;
+
+ obj->sym_map[src_sym_idx] = dst_sym_idx;
+
+ if (sym_type == STT_SECTION && dst_sym) {
+ dst_sec->sec_sym_idx = dst_sym_idx;
+ dst_sym->st_value = 0;
+ }
+
+ if (sym_bind != STB_LOCAL) {
+ glob_sym = add_glob_sym(linker);
+ if (!glob_sym)
+ return -ENOMEM;
+
+ glob_sym->sym_idx = dst_sym_idx;
+ /* we use dst_sec->id (and not dst_sec->sec_idx), because
+ * ephemeral sections (.kconfig, .ksyms, etc) don't have
+ * sec_idx (as they don't have corresponding ELF section), but
+ * still have id. .extern doesn't have even ephemeral section
+ * associated with it, so dst_sec->id == dst_sec->sec_idx == 0.
+ */
+ glob_sym->sec_id = dst_sec ? dst_sec->id : 0;
+ glob_sym->name_off = name_off;
+ /* we will fill btf_id in during BTF merging step */
+ glob_sym->btf_id = 0;
+ glob_sym->is_extern = sym_is_extern;
+ glob_sym->is_weak = sym_bind == STB_WEAK;
+ }
+
+ return 0;
+}
+
+static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj)
+{
+ struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx];
+ struct dst_sec *dst_symtab = &linker->secs[linker->symtab_sec_idx];
+ int i, err;
+
+ for (i = 1; i < obj->sec_cnt; i++) {
+ struct src_sec *src_sec, *src_linked_sec;
+ struct dst_sec *dst_sec, *dst_linked_sec;
+ Elf64_Rel *src_rel, *dst_rel;
+ int j, n;
+
+ src_sec = &obj->secs[i];
+ if (!is_relo_sec(src_sec))
+ continue;
+
+ /* shdr->sh_info points to relocatable section */
+ src_linked_sec = &obj->secs[src_sec->shdr->sh_info];
+ if (src_linked_sec->skipped)
+ continue;
+
+ dst_sec = find_dst_sec_by_name(linker, src_sec->sec_name);
+ if (!dst_sec) {
+ dst_sec = add_dst_sec(linker, src_sec->sec_name);
+ if (!dst_sec)
+ return -ENOMEM;
+ err = init_sec(linker, dst_sec, src_sec);
+ if (err) {
+ pr_warn("failed to init section '%s'\n", src_sec->sec_name);
+ return err;
+ }
+ } else if (!secs_match(dst_sec, src_sec)) {
+ pr_warn("sections %s are not compatible\n", src_sec->sec_name);
+ return -1;
+ }
+
+ /* shdr->sh_link points to SYMTAB */
+ dst_sec->shdr->sh_link = linker->symtab_sec_idx;
+
+ /* shdr->sh_info points to relocated section */
+ dst_linked_sec = &linker->secs[src_linked_sec->dst_id];
+ dst_sec->shdr->sh_info = dst_linked_sec->sec_idx;
+
+ src_sec->dst_id = dst_sec->id;
+ err = extend_sec(linker, dst_sec, src_sec);
+ if (err)
+ return err;
+
+ src_rel = src_sec->data->d_buf;
+ dst_rel = dst_sec->raw_data + src_sec->dst_off;
+ n = src_sec->shdr->sh_size / src_sec->shdr->sh_entsize;
+ for (j = 0; j < n; j++, src_rel++, dst_rel++) {
+ size_t src_sym_idx = ELF64_R_SYM(src_rel->r_info);
+ size_t sym_type = ELF64_R_TYPE(src_rel->r_info);
+ Elf64_Sym *src_sym, *dst_sym;
+ size_t dst_sym_idx;
+
+ src_sym_idx = ELF64_R_SYM(src_rel->r_info);
+ src_sym = src_symtab->data->d_buf + sizeof(*src_sym) * src_sym_idx;
+
+ dst_sym_idx = obj->sym_map[src_sym_idx];
+ dst_sym = dst_symtab->raw_data + sizeof(*dst_sym) * dst_sym_idx;
+ dst_rel->r_offset += src_linked_sec->dst_off;
+ sym_type = ELF64_R_TYPE(src_rel->r_info);
+ dst_rel->r_info = ELF64_R_INFO(dst_sym_idx, sym_type);
+
+ if (ELF64_ST_TYPE(src_sym->st_info) == STT_SECTION) {
+ struct src_sec *sec = &obj->secs[src_sym->st_shndx];
+ struct bpf_insn *insn;
+
+ if (src_linked_sec->shdr->sh_flags & SHF_EXECINSTR) {
+ /* calls to the very first static function inside
+ * .text section at offset 0 will
+ * reference section symbol, not the
+ * function symbol. Fix that up,
+ * otherwise it won't be possible to
+ * relocate calls to two different
+ * static functions with the same name
+ * (rom two different object files)
+ */
+ insn = dst_linked_sec->raw_data + dst_rel->r_offset;
+ if (insn->code == (BPF_JMP | BPF_CALL))
+ insn->imm += sec->dst_off / sizeof(struct bpf_insn);
+ else
+ insn->imm += sec->dst_off;
+ } else {
+ pr_warn("relocation against STT_SECTION in non-exec section is not supported!\n");
+ return -EINVAL;
+ }
+ }
+
+ }
+ }
+
+ return 0;
+}
+
+static Elf64_Sym *find_sym_by_name(struct src_obj *obj, size_t sec_idx,
+ int sym_type, const char *sym_name)
+{
+ struct src_sec *symtab = &obj->secs[obj->symtab_sec_idx];
+ Elf64_Sym *sym = symtab->data->d_buf;
+ int i, n = symtab->shdr->sh_size / symtab->shdr->sh_entsize;
+ int str_sec_idx = symtab->shdr->sh_link;
+ const char *name;
+
+ for (i = 0; i < n; i++, sym++) {
+ if (sym->st_shndx != sec_idx)
+ continue;
+ if (ELF64_ST_TYPE(sym->st_info) != sym_type)
+ continue;
+
+ name = elf_strptr(obj->elf, str_sec_idx, sym->st_name);
+ if (!name)
+ return NULL;
+
+ if (strcmp(sym_name, name) != 0)
+ continue;
+
+ return sym;
+ }
+
+ return NULL;
+}
+
+static int linker_fixup_btf(struct src_obj *obj)
+{
+ const char *sec_name;
+ struct src_sec *sec;
+ int i, j, n, m;
+
+ if (!obj->btf)
+ return 0;
+
+ n = btf__get_nr_types(obj->btf);
+ for (i = 1; i <= n; i++) {
+ struct btf_var_secinfo *vi;
+ struct btf_type *t;
+
+ t = btf_type_by_id(obj->btf, i);
+ if (btf_kind(t) != BTF_KIND_DATASEC)
+ continue;
+
+ sec_name = btf__str_by_offset(obj->btf, t->name_off);
+ sec = find_src_sec_by_name(obj, sec_name);
+ if (sec) {
+ /* record actual section size, unless ephemeral */
+ if (sec->shdr)
+ t->size = sec->shdr->sh_size;
+ } else {
+ /* BTF can have some sections that are not represented
+ * in ELF, e.g., .kconfig, .ksyms, .extern, which are used
+ * for special extern variables.
+ *
+ * For all but one such special (ephemeral)
+ * sections, we pre-create "section shells" to be able
+ * to keep track of extra per-section metadata later
+ * (e.g., those BTF extern variables).
+ *
+ * .extern is even more special, though, because it
+ * contains extern variables that need to be resolved
+ * by static linker, not libbpf and kernel. When such
+ * externs are resolved, we are going to remove them
+ * from .extern BTF section and might end up not
+ * needing it at all. Each resolved extern should have
+ * matching non-extern VAR/FUNC in other sections.
+ *
+ * We do support leaving some of the externs
+ * unresolved, though, to support cases of building
+ * libraries, which will later be linked against final
+ * BPF applications. So if at finalization we still
+ * see unresolved externs, we'll create .extern
+ * section on our own.
+ */
+ if (strcmp(sec_name, BTF_EXTERN_SEC) == 0)
+ continue;
+
+ sec = add_src_sec(obj, sec_name);
+ if (!sec)
+ return -ENOMEM;
+
+ sec->ephemeral = true;
+ sec->sec_idx = 0; /* will match UNDEF shndx in ELF */
+ }
+
+ /* remember ELF section and its BTF type ID match */
+ sec->sec_type_id = i;
+
+ /* fix up variable offsets */
+ vi = btf_var_secinfos(t);
+ for (j = 0, m = btf_vlen(t); j < m; j++, vi++) {
+ const struct btf_type *vt = btf__type_by_id(obj->btf, vi->type);
+ const char *var_name = btf__str_by_offset(obj->btf, vt->name_off);
+ int var_linkage = btf_var(vt)->linkage;
+ Elf64_Sym *sym;
+
+ /* no need to patch up static or extern vars */
+ if (var_linkage != BTF_VAR_GLOBAL_ALLOCATED)
+ continue;
+
+ sym = find_sym_by_name(obj, sec->sec_idx, STT_OBJECT, var_name);
+ if (!sym) {
+ pr_warn("failed to find symbol for variable '%s' in section '%s'\n", var_name, sec_name);
+ return -ENOENT;
+ }
+
+ vi->offset = sym->st_value;
+ }
+ }
+
+ return 0;
+}
+
+static int remap_type_id(__u32 *type_id, void *ctx)
+{
+ int *id_map = ctx;
+ int new_id = id_map[*type_id];
+
+ /* Error out if the type wasn't remapped. Ignore VOID which stays VOID. */
+ if (new_id == 0 && *type_id != 0) {
+ pr_warn("failed to find new ID mapping for original BTF type ID %u\n", *type_id);
+ return -EINVAL;
+ }
+
+ *type_id = id_map[*type_id];
+
+ return 0;
+}
+
+static int linker_append_btf(struct bpf_linker *linker, struct src_obj *obj)
+{
+ const struct btf_type *t;
+ int i, j, n, start_id, id;
+ const char *name;
+
+ if (!obj->btf)
+ return 0;
+
+ start_id = btf__get_nr_types(linker->btf) + 1;
+ n = btf__get_nr_types(obj->btf);
+
+ obj->btf_type_map = calloc(n + 1, sizeof(int));
+ if (!obj->btf_type_map)
+ return -ENOMEM;
+
+ for (i = 1; i <= n; i++) {
+ struct glob_sym *glob_sym = NULL;
+
+ t = btf__type_by_id(obj->btf, i);
+
+ /* DATASECs are handled specially below */
+ if (btf_kind(t) == BTF_KIND_DATASEC)
+ continue;
+
+ if (btf_is_non_static(t)) {
+ /* there should be glob_sym already */
+ name = btf__str_by_offset(obj->btf, t->name_off);
+ glob_sym = find_glob_sym(linker, name);
+
+ /* VARs without corresponding glob_sym are those that
+ * belong to skipped/deduplicated sections (i.e.,
+ * license and version), so just skip them
+ */
+ if (!glob_sym)
+ continue;
+
+ /* linker_append_elf_sym() might have requested
+ * updating underlying type ID, if extern was resolved
+ * to strong symbol or weak got upgraded to non-weak
+ */
+ if (glob_sym->underlying_btf_id == 0)
+ glob_sym->underlying_btf_id = -t->type;
+
+ /* globals from previous object files that match our
+ * VAR/FUNC already have a corresponding associated
+ * BTF type, so just make sure to use it
+ */
+ if (glob_sym->btf_id) {
+ /* reuse existing BTF type for global var/func */
+ obj->btf_type_map[i] = glob_sym->btf_id;
+ continue;
+ }
+ }
+
+ id = btf__add_type(linker->btf, obj->btf, t);
+ if (id < 0) {
+ pr_warn("failed to append BTF type #%d from file '%s'\n", i, obj->filename);
+ return id;
+ }
+
+ obj->btf_type_map[i] = id;
+
+ /* record just appended BTF type for var/func */
+ if (glob_sym) {
+ glob_sym->btf_id = id;
+ glob_sym->underlying_btf_id = -t->type;
+ }
+ }
+
+ /* remap all the types except DATASECs */
+ n = btf__get_nr_types(linker->btf);
+ for (i = start_id; i <= n; i++) {
+ struct btf_type *dst_t = btf_type_by_id(linker->btf, i);
+
+ if (btf_type_visit_type_ids(dst_t, remap_type_id, obj->btf_type_map))
+ return -EINVAL;
+ }
+
+ /* Rewrite VAR/FUNC underlying types (i.e., FUNC's FUNC_PROTO and VAR's
+ * actual type), if necessary
+ */
+ for (i = 0; i < linker->glob_sym_cnt; i++) {
+ struct glob_sym *glob_sym = &linker->glob_syms[i];
+ struct btf_type *glob_t;
+
+ if (glob_sym->underlying_btf_id >= 0)
+ continue;
+
+ glob_sym->underlying_btf_id = obj->btf_type_map[-glob_sym->underlying_btf_id];
+
+ glob_t = btf_type_by_id(linker->btf, glob_sym->btf_id);
+ glob_t->type = glob_sym->underlying_btf_id;
+ }
+
+ /* append DATASEC info */
+ for (i = 1; i < obj->sec_cnt; i++) {
+ struct src_sec *src_sec;
+ struct dst_sec *dst_sec;
+ const struct btf_var_secinfo *src_var;
+ struct btf_var_secinfo *dst_var;
+
+ src_sec = &obj->secs[i];
+ if (!src_sec->sec_type_id || src_sec->skipped)
+ continue;
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ /* Mark section as having BTF regardless of the presence of
+ * variables. In some cases compiler might generate empty BTF
+ * with no variables information. E.g., when promoting local
+ * array/structure variable initial values and BPF object
+ * file otherwise has no read-only static variables in
+ * .rodata. We need to preserve such empty BTF and just set
+ * correct section size.
+ */
+ dst_sec->has_btf = true;
+
+ t = btf__type_by_id(obj->btf, src_sec->sec_type_id);
+ src_var = btf_var_secinfos(t);
+ n = btf_vlen(t);
+ for (j = 0; j < n; j++, src_var++) {
+ void *sec_vars = dst_sec->sec_vars;
+ int new_id = obj->btf_type_map[src_var->type];
+ struct glob_sym *glob_sym = NULL;
+
+ t = btf_type_by_id(linker->btf, new_id);
+ if (btf_is_non_static(t)) {
+ name = btf__str_by_offset(linker->btf, t->name_off);
+ glob_sym = find_glob_sym(linker, name);
+ if (glob_sym->sec_id != dst_sec->id) {
+ pr_warn("global '%s': section mismatch %d vs %d\n",
+ name, glob_sym->sec_id, dst_sec->id);
+ return -EINVAL;
+ }
+ }
+
+ /* If there is already a member (VAR or FUNC) mapped
+ * to the same type, don't add a duplicate entry.
+ * This will happen when multiple object files define
+ * the same extern VARs/FUNCs.
+ */
+ if (glob_sym && glob_sym->var_idx >= 0) {
+ __s64 sz;
+
+ dst_var = &dst_sec->sec_vars[glob_sym->var_idx];
+ /* Because underlying BTF type might have
+ * changed, so might its size have changed, so
+ * re-calculate and update it in sec_var.
+ */
+ sz = btf__resolve_size(linker->btf, glob_sym->underlying_btf_id);
+ if (sz < 0) {
+ pr_warn("global '%s': failed to resolve size of underlying type: %d\n",
+ name, (int)sz);
+ return -EINVAL;
+ }
+ dst_var->size = sz;
+ continue;
+ }
+
+ sec_vars = libbpf_reallocarray(sec_vars,
+ dst_sec->sec_var_cnt + 1,
+ sizeof(*dst_sec->sec_vars));
+ if (!sec_vars)
+ return -ENOMEM;
+
+ dst_sec->sec_vars = sec_vars;
+ dst_sec->sec_var_cnt++;
+
+ dst_var = &dst_sec->sec_vars[dst_sec->sec_var_cnt - 1];
+ dst_var->type = obj->btf_type_map[src_var->type];
+ dst_var->size = src_var->size;
+ dst_var->offset = src_sec->dst_off + src_var->offset;
+
+ if (glob_sym)
+ glob_sym->var_idx = dst_sec->sec_var_cnt - 1;
+ }
+ }
+
+ return 0;
+}
+
+static void *add_btf_ext_rec(struct btf_ext_sec_data *ext_data, const void *src_rec)
+{
+ void *tmp;
+
+ tmp = libbpf_reallocarray(ext_data->recs, ext_data->rec_cnt + 1, ext_data->rec_sz);
+ if (!tmp)
+ return NULL;
+ ext_data->recs = tmp;
+
+ tmp += ext_data->rec_cnt * ext_data->rec_sz;
+ memcpy(tmp, src_rec, ext_data->rec_sz);
+
+ ext_data->rec_cnt++;
+
+ return tmp;
+}
+
+static int linker_append_btf_ext(struct bpf_linker *linker, struct src_obj *obj)
+{
+ const struct btf_ext_info_sec *ext_sec;
+ const char *sec_name, *s;
+ struct src_sec *src_sec;
+ struct dst_sec *dst_sec;
+ int rec_sz, str_off, i;
+
+ if (!obj->btf_ext)
+ return 0;
+
+ rec_sz = obj->btf_ext->func_info.rec_size;
+ for_each_btf_ext_sec(&obj->btf_ext->func_info, ext_sec) {
+ struct bpf_func_info_min *src_rec, *dst_rec;
+
+ sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off);
+ src_sec = find_src_sec_by_name(obj, sec_name);
+ if (!src_sec) {
+ pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name);
+ return -EINVAL;
+ }
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ if (dst_sec->func_info.rec_sz == 0)
+ dst_sec->func_info.rec_sz = rec_sz;
+ if (dst_sec->func_info.rec_sz != rec_sz) {
+ pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name);
+ return -EINVAL;
+ }
+
+ for_each_btf_ext_rec(&obj->btf_ext->func_info, ext_sec, i, src_rec) {
+ dst_rec = add_btf_ext_rec(&dst_sec->func_info, src_rec);
+ if (!dst_rec)
+ return -ENOMEM;
+
+ dst_rec->insn_off += src_sec->dst_off;
+ dst_rec->type_id = obj->btf_type_map[dst_rec->type_id];
+ }
+ }
+
+ rec_sz = obj->btf_ext->line_info.rec_size;
+ for_each_btf_ext_sec(&obj->btf_ext->line_info, ext_sec) {
+ struct bpf_line_info_min *src_rec, *dst_rec;
+
+ sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off);
+ src_sec = find_src_sec_by_name(obj, sec_name);
+ if (!src_sec) {
+ pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name);
+ return -EINVAL;
+ }
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ if (dst_sec->line_info.rec_sz == 0)
+ dst_sec->line_info.rec_sz = rec_sz;
+ if (dst_sec->line_info.rec_sz != rec_sz) {
+ pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name);
+ return -EINVAL;
+ }
+
+ for_each_btf_ext_rec(&obj->btf_ext->line_info, ext_sec, i, src_rec) {
+ dst_rec = add_btf_ext_rec(&dst_sec->line_info, src_rec);
+ if (!dst_rec)
+ return -ENOMEM;
+
+ dst_rec->insn_off += src_sec->dst_off;
+
+ s = btf__str_by_offset(obj->btf, src_rec->file_name_off);
+ str_off = btf__add_str(linker->btf, s);
+ if (str_off < 0)
+ return -ENOMEM;
+ dst_rec->file_name_off = str_off;
+
+ s = btf__str_by_offset(obj->btf, src_rec->line_off);
+ str_off = btf__add_str(linker->btf, s);
+ if (str_off < 0)
+ return -ENOMEM;
+ dst_rec->line_off = str_off;
+
+ /* dst_rec->line_col is fine */
+ }
+ }
+
+ rec_sz = obj->btf_ext->core_relo_info.rec_size;
+ for_each_btf_ext_sec(&obj->btf_ext->core_relo_info, ext_sec) {
+ struct bpf_core_relo *src_rec, *dst_rec;
+
+ sec_name = btf__name_by_offset(obj->btf, ext_sec->sec_name_off);
+ src_sec = find_src_sec_by_name(obj, sec_name);
+ if (!src_sec) {
+ pr_warn("can't find section '%s' referenced from .BTF.ext\n", sec_name);
+ return -EINVAL;
+ }
+ dst_sec = &linker->secs[src_sec->dst_id];
+
+ if (dst_sec->core_relo_info.rec_sz == 0)
+ dst_sec->core_relo_info.rec_sz = rec_sz;
+ if (dst_sec->core_relo_info.rec_sz != rec_sz) {
+ pr_warn("incompatible .BTF.ext record sizes for section '%s'\n", sec_name);
+ return -EINVAL;
+ }
+
+ for_each_btf_ext_rec(&obj->btf_ext->core_relo_info, ext_sec, i, src_rec) {
+ dst_rec = add_btf_ext_rec(&dst_sec->core_relo_info, src_rec);
+ if (!dst_rec)
+ return -ENOMEM;
+
+ dst_rec->insn_off += src_sec->dst_off;
+ dst_rec->type_id = obj->btf_type_map[dst_rec->type_id];
+
+ s = btf__str_by_offset(obj->btf, src_rec->access_str_off);
+ str_off = btf__add_str(linker->btf, s);
+ if (str_off < 0)
+ return -ENOMEM;
+ dst_rec->access_str_off = str_off;
+
+ /* dst_rec->kind is fine */
+ }
+ }
+
+ return 0;
+}
+
+int bpf_linker__finalize(struct bpf_linker *linker)
+{
+ struct dst_sec *sec;
+ size_t strs_sz;
+ const void *strs;
+ int err, i;
+
+ if (!linker->elf)
+ return -EINVAL;
+
+ err = finalize_btf(linker);
+ if (err)
+ return err;
+
+ /* Finalize strings */
+ strs_sz = strset__data_size(linker->strtab_strs);
+ strs = strset__data(linker->strtab_strs);
+
+ sec = &linker->secs[linker->strtab_sec_idx];
+ sec->data->d_align = 1;
+ sec->data->d_off = 0LL;
+ sec->data->d_buf = (void *)strs;
+ sec->data->d_type = ELF_T_BYTE;
+ sec->data->d_size = strs_sz;
+ sec->shdr->sh_size = strs_sz;
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ sec = &linker->secs[i];
+
+ /* STRTAB is handled specially above */
+ if (sec->sec_idx == linker->strtab_sec_idx)
+ continue;
+
+ /* special ephemeral sections (.ksyms, .kconfig, etc) */
+ if (!sec->scn)
+ continue;
+
+ sec->data->d_buf = sec->raw_data;
+ }
+
+ /* Finalize ELF layout */
+ if (elf_update(linker->elf, ELF_C_NULL) < 0) {
+ err = -errno;
+ pr_warn_elf("failed to finalize ELF layout");
+ return err;
+ }
+
+ /* Write out final ELF contents */
+ if (elf_update(linker->elf, ELF_C_WRITE) < 0) {
+ err = -errno;
+ pr_warn_elf("failed to write ELF contents");
+ return err;
+ }
+
+ elf_end(linker->elf);
+ close(linker->fd);
+
+ linker->elf = NULL;
+ linker->fd = -1;
+
+ return 0;
+}
+
+static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name,
+ size_t align, const void *raw_data, size_t raw_sz)
+{
+ Elf_Scn *scn;
+ Elf_Data *data;
+ Elf64_Shdr *shdr;
+ int name_off;
+
+ name_off = strset__add_str(linker->strtab_strs, sec_name);
+ if (name_off < 0)
+ return name_off;
+
+ scn = elf_newscn(linker->elf);
+ if (!scn)
+ return -ENOMEM;
+ data = elf_newdata(scn);
+ if (!data)
+ return -ENOMEM;
+ shdr = elf64_getshdr(scn);
+ if (!shdr)
+ return -EINVAL;
+
+ shdr->sh_name = name_off;
+ shdr->sh_type = SHT_PROGBITS;
+ shdr->sh_flags = 0;
+ shdr->sh_size = raw_sz;
+ shdr->sh_link = 0;
+ shdr->sh_info = 0;
+ shdr->sh_addralign = align;
+ shdr->sh_entsize = 0;
+
+ data->d_type = ELF_T_BYTE;
+ data->d_size = raw_sz;
+ data->d_buf = (void *)raw_data;
+ data->d_align = align;
+ data->d_off = 0;
+
+ return 0;
+}
+
+static int finalize_btf(struct bpf_linker *linker)
+{
+ struct btf *btf = linker->btf;
+ const void *raw_data;
+ int i, j, id, err;
+ __u32 raw_sz;
+
+ /* bail out if no BTF data was produced */
+ if (btf__get_nr_types(linker->btf) == 0)
+ return 0;
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ if (!sec->has_btf)
+ continue;
+
+ id = btf__add_datasec(btf, sec->sec_name, sec->sec_sz);
+ if (id < 0) {
+ pr_warn("failed to add consolidated BTF type for datasec '%s': %d\n",
+ sec->sec_name, id);
+ return id;
+ }
+
+ for (j = 0; j < sec->sec_var_cnt; j++) {
+ struct btf_var_secinfo *vi = &sec->sec_vars[j];
+
+ if (btf__add_datasec_var_info(btf, vi->type, vi->offset, vi->size))
+ return -EINVAL;
+ }
+ }
+
+ err = finalize_btf_ext(linker);
+ if (err) {
+ pr_warn(".BTF.ext generation failed: %d\n", err);
+ return err;
+ }
+
+ err = btf__dedup(linker->btf, linker->btf_ext, NULL);
+ if (err) {
+ pr_warn("BTF dedup failed: %d\n", err);
+ return err;
+ }
+
+ /* Emit .BTF section */
+ raw_data = btf__get_raw_data(linker->btf, &raw_sz);
+ if (!raw_data)
+ return -ENOMEM;
+
+ err = emit_elf_data_sec(linker, BTF_ELF_SEC, 8, raw_data, raw_sz);
+ if (err) {
+ pr_warn("failed to write out .BTF ELF section: %d\n", err);
+ return err;
+ }
+
+ /* Emit .BTF.ext section */
+ if (linker->btf_ext) {
+ raw_data = btf_ext__get_raw_data(linker->btf_ext, &raw_sz);
+ if (!raw_data)
+ return -ENOMEM;
+
+ err = emit_elf_data_sec(linker, BTF_EXT_ELF_SEC, 8, raw_data, raw_sz);
+ if (err) {
+ pr_warn("failed to write out .BTF.ext ELF section: %d\n", err);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int emit_btf_ext_data(struct bpf_linker *linker, void *output,
+ const char *sec_name, struct btf_ext_sec_data *sec_data)
+{
+ struct btf_ext_info_sec *sec_info;
+ void *cur = output;
+ int str_off;
+ size_t sz;
+
+ if (!sec_data->rec_cnt)
+ return 0;
+
+ str_off = btf__add_str(linker->btf, sec_name);
+ if (str_off < 0)
+ return -ENOMEM;
+
+ sec_info = cur;
+ sec_info->sec_name_off = str_off;
+ sec_info->num_info = sec_data->rec_cnt;
+ cur += sizeof(struct btf_ext_info_sec);
+
+ sz = sec_data->rec_cnt * sec_data->rec_sz;
+ memcpy(cur, sec_data->recs, sz);
+ cur += sz;
+
+ return cur - output;
+}
+
+static int finalize_btf_ext(struct bpf_linker *linker)
+{
+ size_t funcs_sz = 0, lines_sz = 0, core_relos_sz = 0, total_sz = 0;
+ size_t func_rec_sz = 0, line_rec_sz = 0, core_relo_rec_sz = 0;
+ struct btf_ext_header *hdr;
+ void *data, *cur;
+ int i, err, sz;
+
+ /* validate that all sections have the same .BTF.ext record sizes
+ * and calculate total data size for each type of data (func info,
+ * line info, core relos)
+ */
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ if (sec->func_info.rec_cnt) {
+ if (func_rec_sz == 0)
+ func_rec_sz = sec->func_info.rec_sz;
+ if (func_rec_sz != sec->func_info.rec_sz) {
+ pr_warn("mismatch in func_info record size %zu != %u\n",
+ func_rec_sz, sec->func_info.rec_sz);
+ return -EINVAL;
+ }
+
+ funcs_sz += sizeof(struct btf_ext_info_sec) + func_rec_sz * sec->func_info.rec_cnt;
+ }
+ if (sec->line_info.rec_cnt) {
+ if (line_rec_sz == 0)
+ line_rec_sz = sec->line_info.rec_sz;
+ if (line_rec_sz != sec->line_info.rec_sz) {
+ pr_warn("mismatch in line_info record size %zu != %u\n",
+ line_rec_sz, sec->line_info.rec_sz);
+ return -EINVAL;
+ }
+
+ lines_sz += sizeof(struct btf_ext_info_sec) + line_rec_sz * sec->line_info.rec_cnt;
+ }
+ if (sec->core_relo_info.rec_cnt) {
+ if (core_relo_rec_sz == 0)
+ core_relo_rec_sz = sec->core_relo_info.rec_sz;
+ if (core_relo_rec_sz != sec->core_relo_info.rec_sz) {
+ pr_warn("mismatch in core_relo_info record size %zu != %u\n",
+ core_relo_rec_sz, sec->core_relo_info.rec_sz);
+ return -EINVAL;
+ }
+
+ core_relos_sz += sizeof(struct btf_ext_info_sec) + core_relo_rec_sz * sec->core_relo_info.rec_cnt;
+ }
+ }
+
+ if (!funcs_sz && !lines_sz && !core_relos_sz)
+ return 0;
+
+ total_sz += sizeof(struct btf_ext_header);
+ if (funcs_sz) {
+ funcs_sz += sizeof(__u32); /* record size prefix */
+ total_sz += funcs_sz;
+ }
+ if (lines_sz) {
+ lines_sz += sizeof(__u32); /* record size prefix */
+ total_sz += lines_sz;
+ }
+ if (core_relos_sz) {
+ core_relos_sz += sizeof(__u32); /* record size prefix */
+ total_sz += core_relos_sz;
+ }
+
+ cur = data = calloc(1, total_sz);
+ if (!data)
+ return -ENOMEM;
+
+ hdr = cur;
+ hdr->magic = BTF_MAGIC;
+ hdr->version = BTF_VERSION;
+ hdr->flags = 0;
+ hdr->hdr_len = sizeof(struct btf_ext_header);
+ cur += sizeof(struct btf_ext_header);
+
+ /* All offsets are in bytes relative to the end of this header */
+ hdr->func_info_off = 0;
+ hdr->func_info_len = funcs_sz;
+ hdr->line_info_off = funcs_sz;
+ hdr->line_info_len = lines_sz;
+ hdr->core_relo_off = funcs_sz + lines_sz;
+ hdr->core_relo_len = core_relos_sz;
+
+ if (funcs_sz) {
+ *(__u32 *)cur = func_rec_sz;
+ cur += sizeof(__u32);
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->func_info);
+ if (sz < 0) {
+ err = sz;
+ goto out;
+ }
+
+ cur += sz;
+ }
+ }
+
+ if (lines_sz) {
+ *(__u32 *)cur = line_rec_sz;
+ cur += sizeof(__u32);
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->line_info);
+ if (sz < 0) {
+ err = sz;
+ goto out;
+ }
+
+ cur += sz;
+ }
+ }
+
+ if (core_relos_sz) {
+ *(__u32 *)cur = core_relo_rec_sz;
+ cur += sizeof(__u32);
+
+ for (i = 1; i < linker->sec_cnt; i++) {
+ struct dst_sec *sec = &linker->secs[i];
+
+ sz = emit_btf_ext_data(linker, cur, sec->sec_name, &sec->core_relo_info);
+ if (sz < 0) {
+ err = sz;
+ goto out;
+ }
+
+ cur += sz;
+ }
+ }
+
+ linker->btf_ext = btf_ext__new(data, total_sz);
+ err = libbpf_get_error(linker->btf_ext);
+ if (err) {
+ linker->btf_ext = NULL;
+ pr_warn("failed to parse final .BTF.ext data: %d\n", err);
+ goto out;
+ }
+
+out:
+ free(data);
+ return err;
+}
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 4dd73de00b6f..d2cb28e9ef52 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -40,7 +40,7 @@ static int libbpf_netlink_open(__u32 *nl_pid)
memset(&sa, 0, sizeof(sa));
sa.nl_family = AF_NETLINK;
- sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
if (sock < 0)
return -errno;
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 8caaafe7e312..e7a8d847161f 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -227,7 +227,7 @@ static int ringbuf_process_ring(struct ring* r)
if ((len & BPF_RINGBUF_DISCARD_BIT) == 0) {
sample = (void *)len_ptr + BPF_RINGBUF_HDR_SZ;
err = r->sample_cb(r->ctx, sample, len);
- if (err) {
+ if (err < 0) {
/* update consumer pos and bail out */
smp_store_release(r->consumer_pos,
cons_pos);
diff --git a/tools/lib/bpf/strset.c b/tools/lib/bpf/strset.c
new file mode 100644
index 000000000000..1fb8b49de1d6
--- /dev/null
+++ b/tools/lib/bpf/strset.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Facebook */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <linux/err.h>
+#include "hashmap.h"
+#include "libbpf_internal.h"
+#include "strset.h"
+
+struct strset {
+ void *strs_data;
+ size_t strs_data_len;
+ size_t strs_data_cap;
+ size_t strs_data_max_len;
+
+ /* lookup index for each unique string in strings set */
+ struct hashmap *strs_hash;
+};
+
+static size_t strset_hash_fn(const void *key, void *ctx)
+{
+ const struct strset *s = ctx;
+ const char *str = s->strs_data + (long)key;
+
+ return str_hash(str);
+}
+
+static bool strset_equal_fn(const void *key1, const void *key2, void *ctx)
+{
+ const struct strset *s = ctx;
+ const char *str1 = s->strs_data + (long)key1;
+ const char *str2 = s->strs_data + (long)key2;
+
+ return strcmp(str1, str2) == 0;
+}
+
+struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t init_data_sz)
+{
+ struct strset *set = calloc(1, sizeof(*set));
+ struct hashmap *hash;
+ int err = -ENOMEM;
+
+ if (!set)
+ return ERR_PTR(-ENOMEM);
+
+ hash = hashmap__new(strset_hash_fn, strset_equal_fn, set);
+ if (IS_ERR(hash))
+ goto err_out;
+
+ set->strs_data_max_len = max_data_sz;
+ set->strs_hash = hash;
+
+ if (init_data) {
+ long off;
+
+ set->strs_data = malloc(init_data_sz);
+ if (!set->strs_data)
+ goto err_out;
+
+ memcpy(set->strs_data, init_data, init_data_sz);
+ set->strs_data_len = init_data_sz;
+ set->strs_data_cap = init_data_sz;
+
+ for (off = 0; off < set->strs_data_len; off += strlen(set->strs_data + off) + 1) {
+ /* hashmap__add() returns EEXIST if string with the same
+ * content already is in the hash map
+ */
+ err = hashmap__add(hash, (void *)off, (void *)off);
+ if (err == -EEXIST)
+ continue; /* duplicate */
+ if (err)
+ goto err_out;
+ }
+ }
+
+ return set;
+err_out:
+ strset__free(set);
+ return ERR_PTR(err);
+}
+
+void strset__free(struct strset *set)
+{
+ if (IS_ERR_OR_NULL(set))
+ return;
+
+ hashmap__free(set->strs_hash);
+ free(set->strs_data);
+}
+
+size_t strset__data_size(const struct strset *set)
+{
+ return set->strs_data_len;
+}
+
+const char *strset__data(const struct strset *set)
+{
+ return set->strs_data;
+}
+
+static void *strset_add_str_mem(struct strset *set, size_t add_sz)
+{
+ return libbpf_add_mem(&set->strs_data, &set->strs_data_cap, 1,
+ set->strs_data_len, set->strs_data_max_len, add_sz);
+}
+
+/* Find string offset that corresponds to a given string *s*.
+ * Returns:
+ * - >0 offset into string data, if string is found;
+ * - -ENOENT, if string is not in the string data;
+ * - <0, on any other error.
+ */
+int strset__find_str(struct strset *set, const char *s)
+{
+ long old_off, new_off, len;
+ void *p;
+
+ /* see strset__add_str() for why we do this */
+ len = strlen(s) + 1;
+ p = strset_add_str_mem(set, len);
+ if (!p)
+ return -ENOMEM;
+
+ new_off = set->strs_data_len;
+ memcpy(p, s, len);
+
+ if (hashmap__find(set->strs_hash, (void *)new_off, (void **)&old_off))
+ return old_off;
+
+ return -ENOENT;
+}
+
+/* Add a string s to the string data. If the string already exists, return its
+ * offset within string data.
+ * Returns:
+ * - > 0 offset into string data, on success;
+ * - < 0, on error.
+ */
+int strset__add_str(struct strset *set, const char *s)
+{
+ long old_off, new_off, len;
+ void *p;
+ int err;
+
+ /* Hashmap keys are always offsets within set->strs_data, so to even
+ * look up some string from the "outside", we need to first append it
+ * at the end, so that it can be addressed with an offset. Luckily,
+ * until set->strs_data_len is incremented, that string is just a piece
+ * of garbage for the rest of the code, so no harm, no foul. On the
+ * other hand, if the string is unique, it's already appended and
+ * ready to be used, only a simple set->strs_data_len increment away.
+ */
+ len = strlen(s) + 1;
+ p = strset_add_str_mem(set, len);
+ if (!p)
+ return -ENOMEM;
+
+ new_off = set->strs_data_len;
+ memcpy(p, s, len);
+
+ /* Now attempt to add the string, but only if the string with the same
+ * contents doesn't exist already (HASHMAP_ADD strategy). If such
+ * string exists, we'll get its offset in old_off (that's old_key).
+ */
+ err = hashmap__insert(set->strs_hash, (void *)new_off, (void *)new_off,
+ HASHMAP_ADD, (const void **)&old_off, NULL);
+ if (err == -EEXIST)
+ return old_off; /* duplicated string, return existing offset */
+ if (err)
+ return err;
+
+ set->strs_data_len += len; /* new unique string, adjust data length */
+ return new_off;
+}
diff --git a/tools/lib/bpf/strset.h b/tools/lib/bpf/strset.h
new file mode 100644
index 000000000000..b6ddf77a83c2
--- /dev/null
+++ b/tools/lib/bpf/strset.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/* Copyright (c) 2021 Facebook */
+#ifndef __LIBBPF_STRSET_H
+#define __LIBBPF_STRSET_H
+
+#include <stdbool.h>
+#include <stddef.h>
+
+struct strset;
+
+struct strset *strset__new(size_t max_data_sz, const char *init_data, size_t init_data_sz);
+void strset__free(struct strset *set);
+
+const char *strset__data(const struct strset *set);
+size_t strset__data_size(const struct strset *set);
+
+int strset__find_str(struct strset *set, const char *s);
+int strset__add_str(struct strset *set, const char *s);
+
+#endif /* __LIBBPF_STRSET_H */
diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index 526fc35c0b23..6061431ee04c 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -28,6 +28,7 @@
#include <sys/mman.h>
#include <sys/socket.h>
#include <sys/types.h>
+#include <linux/if_link.h>
#include "bpf.h"
#include "libbpf.h"
@@ -59,6 +60,8 @@ struct xsk_umem {
int fd;
int refcount;
struct list_head ctx_list;
+ bool rx_ring_setup_done;
+ bool tx_ring_setup_done;
};
struct xsk_ctx {
@@ -70,8 +73,10 @@ struct xsk_ctx {
int ifindex;
struct list_head list;
int prog_fd;
+ int link_fd;
int xsks_map_fd;
char ifname[IFNAMSIZ];
+ bool has_bpf_link;
};
struct xsk_socket {
@@ -409,7 +414,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
static const int log_buf_size = 16 * 1024;
struct xsk_ctx *ctx = xsk->ctx;
char log_buf[log_buf_size];
- int err, prog_fd;
+ int prog_fd;
/* This is the fallback C-program:
* SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)
@@ -499,14 +504,41 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)
return prog_fd;
}
- err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, prog_fd,
- xsk->config.xdp_flags);
+ ctx->prog_fd = prog_fd;
+ return 0;
+}
+
+static int xsk_create_bpf_link(struct xsk_socket *xsk)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+ struct xsk_ctx *ctx = xsk->ctx;
+ __u32 prog_id = 0;
+ int link_fd;
+ int err;
+
+ err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
if (err) {
- close(prog_fd);
+ pr_warn("getting XDP prog id failed\n");
return err;
}
- ctx->prog_fd = prog_fd;
+ /* if there's a netlink-based XDP prog loaded on interface, bail out
+ * and ask user to do the removal by himself
+ */
+ if (prog_id) {
+ pr_warn("Netlink-based XDP prog detected, please unload it in order to launch AF_XDP prog\n");
+ return -EINVAL;
+ }
+
+ opts.flags = xsk->config.xdp_flags & ~(XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_REPLACE);
+
+ link_fd = bpf_link_create(ctx->prog_fd, ctx->ifindex, BPF_XDP, &opts);
+ if (link_fd < 0) {
+ pr_warn("bpf_link_create failed: %s\n", strerror(errno));
+ return link_fd;
+ }
+
+ ctx->link_fd = link_fd;
return 0;
}
@@ -625,7 +657,6 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
close(fd);
}
- err = 0;
if (ctx->xsks_map_fd == -1)
err = -ENOENT;
@@ -642,6 +673,98 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk)
&xsk->fd, 0);
}
+static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)
+{
+ struct bpf_link_info link_info;
+ __u32 link_len;
+ __u32 id = 0;
+ int err;
+ int fd;
+
+ while (true) {
+ err = bpf_link_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT) {
+ err = 0;
+ break;
+ }
+ pr_warn("can't get next link: %s\n", strerror(errno));
+ break;
+ }
+
+ fd = bpf_link_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ pr_warn("can't get link by id (%u): %s\n", id, strerror(errno));
+ err = -errno;
+ break;
+ }
+
+ link_len = sizeof(struct bpf_link_info);
+ memset(&link_info, 0, link_len);
+ err = bpf_obj_get_info_by_fd(fd, &link_info, &link_len);
+ if (err) {
+ pr_warn("can't get link info: %s\n", strerror(errno));
+ close(fd);
+ break;
+ }
+ if (link_info.type == BPF_LINK_TYPE_XDP) {
+ if (link_info.xdp.ifindex == ifindex) {
+ *link_fd = fd;
+ if (prog_id)
+ *prog_id = link_info.prog_id;
+ break;
+ }
+ }
+ close(fd);
+ }
+
+ return err;
+}
+
+static bool xsk_probe_bpf_link(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts,
+ .flags = XDP_FLAGS_SKB_MODE);
+ struct bpf_load_program_attr prog_attr;
+ struct bpf_insn insns[2] = {
+ BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
+ BPF_EXIT_INSN()
+ };
+ int prog_fd, link_fd = -1;
+ int ifindex_lo = 1;
+ bool ret = false;
+ int err;
+
+ err = xsk_link_lookup(ifindex_lo, NULL, &link_fd);
+ if (err)
+ return ret;
+
+ if (link_fd >= 0)
+ return true;
+
+ memset(&prog_attr, 0, sizeof(prog_attr));
+ prog_attr.prog_type = BPF_PROG_TYPE_XDP;
+ prog_attr.insns = insns;
+ prog_attr.insns_cnt = ARRAY_SIZE(insns);
+ prog_attr.license = "GPL";
+
+ prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0);
+ if (prog_fd < 0)
+ return ret;
+
+ link_fd = bpf_link_create(prog_fd, ifindex_lo, BPF_XDP, &opts);
+ close(prog_fd);
+
+ if (link_fd >= 0) {
+ ret = true;
+ close(link_fd);
+ }
+
+ return ret;
+}
+
static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
{
char ifname[IFNAMSIZ];
@@ -663,64 +786,108 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)
ctx->ifname[IFNAMSIZ - 1] = 0;
xsk->ctx = ctx;
+ xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
return 0;
}
-static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp,
- int *xsks_map_fd)
+static int xsk_init_xdp_res(struct xsk_socket *xsk,
+ int *xsks_map_fd)
{
- struct xsk_socket *xsk = _xdp;
struct xsk_ctx *ctx = xsk->ctx;
- __u32 prog_id = 0;
int err;
- err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id,
- xsk->config.xdp_flags);
+ err = xsk_create_bpf_maps(xsk);
if (err)
return err;
- if (!prog_id) {
- err = xsk_create_bpf_maps(xsk);
- if (err)
- return err;
+ err = xsk_load_xdp_prog(xsk);
+ if (err)
+ goto err_load_xdp_prog;
- err = xsk_load_xdp_prog(xsk);
- if (err) {
- goto err_load_xdp_prog;
- }
- } else {
- ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
- if (ctx->prog_fd < 0)
- return -errno;
- err = xsk_lookup_bpf_maps(xsk);
- if (err) {
- close(ctx->prog_fd);
- return err;
- }
- }
+ if (ctx->has_bpf_link)
+ err = xsk_create_bpf_link(xsk);
+ else
+ err = bpf_set_link_xdp_fd(xsk->ctx->ifindex, ctx->prog_fd,
+ xsk->config.xdp_flags);
- if (xsk->rx) {
- err = xsk_set_bpf_maps(xsk);
- if (err) {
- if (!prog_id) {
- goto err_set_bpf_maps;
- } else {
- close(ctx->prog_fd);
- return err;
- }
- }
- }
- if (xsks_map_fd)
- *xsks_map_fd = ctx->xsks_map_fd;
+ if (err)
+ goto err_attach_xdp_prog;
- return 0;
+ if (!xsk->rx)
+ return err;
+
+ err = xsk_set_bpf_maps(xsk);
+ if (err)
+ goto err_set_bpf_maps;
+
+ return err;
err_set_bpf_maps:
+ if (ctx->has_bpf_link)
+ close(ctx->link_fd);
+ else
+ bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
+err_attach_xdp_prog:
close(ctx->prog_fd);
- bpf_set_link_xdp_fd(ctx->ifindex, -1, 0);
err_load_xdp_prog:
xsk_delete_bpf_maps(xsk);
+ return err;
+}
+
+static int xsk_lookup_xdp_res(struct xsk_socket *xsk, int *xsks_map_fd, int prog_id)
+{
+ struct xsk_ctx *ctx = xsk->ctx;
+ int err;
+
+ ctx->prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (ctx->prog_fd < 0) {
+ err = -errno;
+ goto err_prog_fd;
+ }
+ err = xsk_lookup_bpf_maps(xsk);
+ if (err)
+ goto err_lookup_maps;
+
+ if (!xsk->rx)
+ return err;
+
+ err = xsk_set_bpf_maps(xsk);
+ if (err)
+ goto err_set_maps;
+
+ return err;
+
+err_set_maps:
+ close(ctx->xsks_map_fd);
+err_lookup_maps:
+ close(ctx->prog_fd);
+err_prog_fd:
+ if (ctx->has_bpf_link)
+ close(ctx->link_fd);
+ return err;
+}
+
+static int __xsk_setup_xdp_prog(struct xsk_socket *_xdp, int *xsks_map_fd)
+{
+ struct xsk_socket *xsk = _xdp;
+ struct xsk_ctx *ctx = xsk->ctx;
+ __u32 prog_id = 0;
+ int err;
+
+ if (ctx->has_bpf_link)
+ err = xsk_link_lookup(ctx->ifindex, &prog_id, &ctx->link_fd);
+ else
+ err = bpf_get_link_xdp_id(ctx->ifindex, &prog_id, xsk->config.xdp_flags);
+
+ if (err)
+ return err;
+
+ err = !prog_id ? xsk_init_xdp_res(xsk, xsks_map_fd) :
+ xsk_lookup_xdp_res(xsk, xsks_map_fd, prog_id);
+
+ if (!err && xsks_map_fd)
+ *xsks_map_fd = ctx->xsks_map_fd;
return err;
}
@@ -743,26 +910,30 @@ static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
return NULL;
}
-static void xsk_put_ctx(struct xsk_ctx *ctx)
+static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
{
struct xsk_umem *umem = ctx->umem;
struct xdp_mmap_offsets off;
int err;
- if (--ctx->refcount == 0) {
- err = xsk_get_mmap_offsets(umem->fd, &off);
- if (!err) {
- munmap(ctx->fill->ring - off.fr.desc,
- off.fr.desc + umem->config.fill_size *
- sizeof(__u64));
- munmap(ctx->comp->ring - off.cr.desc,
- off.cr.desc + umem->config.comp_size *
- sizeof(__u64));
- }
+ if (--ctx->refcount)
+ return;
- list_del(&ctx->list);
- free(ctx);
- }
+ if (!unmap)
+ goto out_free;
+
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (err)
+ goto out_free;
+
+ munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
+ sizeof(__u64));
+ munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
+ sizeof(__u64));
+
+out_free:
+ list_del(&ctx->list);
+ free(ctx);
}
static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
@@ -797,8 +968,6 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
memcpy(ctx->ifname, ifname, IFNAMSIZ - 1);
ctx->ifname[IFNAMSIZ - 1] = '\0';
- umem->fill_save = NULL;
- umem->comp_save = NULL;
ctx->fill = fill;
ctx->comp = comp;
list_add(&ctx->list, &umem->ctx_list);
@@ -848,6 +1017,7 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
struct xsk_ring_cons *comp,
const struct xsk_socket_config *usr_config)
{
+ bool unmap, rx_setup_done = false, tx_setup_done = false;
void *rx_map = NULL, *tx_map = NULL;
struct sockaddr_xdp sxdp = {};
struct xdp_mmap_offsets off;
@@ -858,6 +1028,8 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
if (!umem || !xsk_ptr || !(rx || tx))
return -EFAULT;
+ unmap = umem->fill_save != fill;
+
xsk = calloc(1, sizeof(*xsk));
if (!xsk)
return -ENOMEM;
@@ -881,6 +1053,8 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
}
} else {
xsk->fd = umem->fd;
+ rx_setup_done = umem->rx_ring_setup_done;
+ tx_setup_done = umem->tx_ring_setup_done;
}
ctx = xsk_get_ctx(umem, ifindex, queue_id);
@@ -898,8 +1072,9 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
}
}
xsk->ctx = ctx;
+ xsk->ctx->has_bpf_link = xsk_probe_bpf_link();
- if (rx) {
+ if (rx && !rx_setup_done) {
err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
&xsk->config.rx_size,
sizeof(xsk->config.rx_size));
@@ -907,8 +1082,10 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
err = -errno;
goto out_put_ctx;
}
+ if (xsk->fd == umem->fd)
+ umem->rx_ring_setup_done = true;
}
- if (tx) {
+ if (tx && !tx_setup_done) {
err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
&xsk->config.tx_size,
sizeof(xsk->config.tx_size));
@@ -916,6 +1093,8 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
err = -errno;
goto out_put_ctx;
}
+ if (xsk->fd == umem->fd)
+ umem->rx_ring_setup_done = true;
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
@@ -994,6 +1173,8 @@ int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
}
*xsk_ptr = xsk;
+ umem->fill_save = NULL;
+ umem->comp_save = NULL;
return 0;
out_mmap_tx:
@@ -1005,7 +1186,7 @@ out_mmap_rx:
munmap(rx_map, off.rx.desc +
xsk->config.rx_size * sizeof(struct xdp_desc));
out_put_ctx:
- xsk_put_ctx(ctx);
+ xsk_put_ctx(ctx, unmap);
out_socket:
if (--umem->refcount)
close(xsk->fd);
@@ -1019,6 +1200,9 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,
struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
const struct xsk_socket_config *usr_config)
{
+ if (!umem)
+ return -EFAULT;
+
return xsk_socket__create_shared(xsk_ptr, ifname, queue_id, umem,
rx, tx, umem->fill_save,
umem->comp_save, usr_config);
@@ -1054,6 +1238,8 @@ void xsk_socket__delete(struct xsk_socket *xsk)
if (ctx->prog_fd != -1) {
xsk_delete_bpf_maps(xsk);
close(ctx->prog_fd);
+ if (ctx->has_bpf_link)
+ close(ctx->link_fd);
}
err = xsk_get_mmap_offsets(xsk->fd, &off);
@@ -1068,7 +1254,7 @@ void xsk_socket__delete(struct xsk_socket *xsk)
}
}
- xsk_put_ctx(ctx);
+ xsk_put_ctx(ctx, true);
umem->refcount--;
/* Do not close an fd that also has an associated umem connected
diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h
index e9f121f5d129..01c12dca9c10 100644
--- a/tools/lib/bpf/xsk.h
+++ b/tools/lib/bpf/xsk.h
@@ -3,7 +3,8 @@
/*
* AF_XDP user-space access library.
*
- * Copyright(c) 2018 - 2019 Intel Corporation.
+ * Copyright (c) 2018 - 2019 Intel Corporation.
+ * Copyright (c) 2019 Facebook
*
* Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
*/
@@ -13,15 +14,80 @@
#include <stdio.h>
#include <stdint.h>
+#include <stdbool.h>
#include <linux/if_xdp.h>
#include "libbpf.h"
-#include "libbpf_util.h"
#ifdef __cplusplus
extern "C" {
#endif
+/* Load-Acquire Store-Release barriers used by the XDP socket
+ * library. The following macros should *NOT* be considered part of
+ * the xsk.h API, and is subject to change anytime.
+ *
+ * LIBRARY INTERNAL
+ */
+
+#define __XSK_READ_ONCE(x) (*(volatile typeof(x) *)&x)
+#define __XSK_WRITE_ONCE(x, v) (*(volatile typeof(x) *)&x) = (v)
+
+#if defined(__i386__) || defined(__x86_64__)
+# define libbpf_smp_store_release(p, v) \
+ do { \
+ asm volatile("" : : : "memory"); \
+ __XSK_WRITE_ONCE(*p, v); \
+ } while (0)
+# define libbpf_smp_load_acquire(p) \
+ ({ \
+ typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \
+ asm volatile("" : : : "memory"); \
+ ___p1; \
+ })
+#elif defined(__aarch64__)
+# define libbpf_smp_store_release(p, v) \
+ asm volatile ("stlr %w1, %0" : "=Q" (*p) : "r" (v) : "memory")
+# define libbpf_smp_load_acquire(p) \
+ ({ \
+ typeof(*p) ___p1; \
+ asm volatile ("ldar %w0, %1" \
+ : "=r" (___p1) : "Q" (*p) : "memory"); \
+ ___p1; \
+ })
+#elif defined(__riscv)
+# define libbpf_smp_store_release(p, v) \
+ do { \
+ asm volatile ("fence rw,w" : : : "memory"); \
+ __XSK_WRITE_ONCE(*p, v); \
+ } while (0)
+# define libbpf_smp_load_acquire(p) \
+ ({ \
+ typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \
+ asm volatile ("fence r,rw" : : : "memory"); \
+ ___p1; \
+ })
+#endif
+
+#ifndef libbpf_smp_store_release
+#define libbpf_smp_store_release(p, v) \
+ do { \
+ __sync_synchronize(); \
+ __XSK_WRITE_ONCE(*p, v); \
+ } while (0)
+#endif
+
+#ifndef libbpf_smp_load_acquire
+#define libbpf_smp_load_acquire(p) \
+ ({ \
+ typeof(*p) ___p1 = __XSK_READ_ONCE(*p); \
+ __sync_synchronize(); \
+ ___p1; \
+ })
+#endif
+
+/* LIBRARY INTERNAL -- END */
+
/* Do not access these members directly. Use the functions below. */
#define DEFINE_XSK_RING(name) \
struct name { \
@@ -96,7 +162,8 @@ static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
* this function. Without this optimization it whould have been
* free_entries = r->cached_prod - r->cached_cons + r->size.
*/
- r->cached_cons = *r->consumer + r->size;
+ r->cached_cons = libbpf_smp_load_acquire(r->consumer);
+ r->cached_cons += r->size;
return r->cached_cons - r->cached_prod;
}
@@ -106,7 +173,7 @@ static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
__u32 entries = r->cached_prod - r->cached_cons;
if (entries == 0) {
- r->cached_prod = *r->producer;
+ r->cached_prod = libbpf_smp_load_acquire(r->producer);
entries = r->cached_prod - r->cached_cons;
}
@@ -129,9 +196,7 @@ static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
/* Make sure everything has been written to the ring before indicating
* this to the kernel by writing the producer pointer.
*/
- libbpf_smp_wmb();
-
- *prod->producer += nb;
+ libbpf_smp_store_release(prod->producer, *prod->producer + nb);
}
static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
@@ -139,11 +204,6 @@ static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __
__u32 entries = xsk_cons_nb_avail(cons, nb);
if (entries > 0) {
- /* Make sure we do not speculatively read the data before
- * we have received the packet buffers from the ring.
- */
- libbpf_smp_rmb();
-
*idx = cons->cached_cons;
cons->cached_cons += entries;
}
@@ -161,9 +221,8 @@ static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
/* Make sure data has been read before indicating we are done
* with the entries by updating the consumer pointer.
*/
- libbpf_smp_rwmb();
+ libbpf_smp_store_release(cons->consumer, *cons->consumer + nb);
- *cons->consumer += nb;
}
static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
diff --git a/tools/memory-model/Documentation/access-marking.txt b/tools/memory-model/Documentation/access-marking.txt
new file mode 100644
index 000000000000..1ab189f51f55
--- /dev/null
+++ b/tools/memory-model/Documentation/access-marking.txt
@@ -0,0 +1,479 @@
+MARKING SHARED-MEMORY ACCESSES
+==============================
+
+This document provides guidelines for marking intentionally concurrent
+normal accesses to shared memory, that is "normal" as in accesses that do
+not use read-modify-write atomic operations. It also describes how to
+document these accesses, both with comments and with special assertions
+processed by the Kernel Concurrency Sanitizer (KCSAN). This discussion
+builds on an earlier LWN article [1].
+
+
+ACCESS-MARKING OPTIONS
+======================
+
+The Linux kernel provides the following access-marking options:
+
+1. Plain C-language accesses (unmarked), for example, "a = b;"
+
+2. Data-race marking, for example, "data_race(a = b);"
+
+3. READ_ONCE(), for example, "a = READ_ONCE(b);"
+ The various forms of atomic_read() also fit in here.
+
+4. WRITE_ONCE(), for example, "WRITE_ONCE(a, b);"
+ The various forms of atomic_set() also fit in here.
+
+
+These may be used in combination, as shown in this admittedly improbable
+example:
+
+ WRITE_ONCE(a, b + data_race(c + d) + READ_ONCE(e));
+
+Neither plain C-language accesses nor data_race() (#1 and #2 above) place
+any sort of constraint on the compiler's choice of optimizations [2].
+In contrast, READ_ONCE() and WRITE_ONCE() (#3 and #4 above) restrict the
+compiler's use of code-motion and common-subexpression optimizations.
+Therefore, if a given access is involved in an intentional data race,
+using READ_ONCE() for loads and WRITE_ONCE() for stores is usually
+preferable to data_race(), which in turn is usually preferable to plain
+C-language accesses.
+
+KCSAN will complain about many types of data races involving plain
+C-language accesses, but marking all accesses involved in a given data
+race with one of data_race(), READ_ONCE(), or WRITE_ONCE(), will prevent
+KCSAN from complaining. Of course, lack of KCSAN complaints does not
+imply correct code. Therefore, please take a thoughtful approach
+when responding to KCSAN complaints. Churning the code base with
+ill-considered additions of data_race(), READ_ONCE(), and WRITE_ONCE()
+is unhelpful.
+
+In fact, the following sections describe situations where use of
+data_race() and even plain C-language accesses is preferable to
+READ_ONCE() and WRITE_ONCE().
+
+
+Use of the data_race() Macro
+----------------------------
+
+Here are some situations where data_race() should be used instead of
+READ_ONCE() and WRITE_ONCE():
+
+1. Data-racy loads from shared variables whose values are used only
+ for diagnostic purposes.
+
+2. Data-racy reads whose values are checked against marked reload.
+
+3. Reads whose values feed into error-tolerant heuristics.
+
+4. Writes setting values that feed into error-tolerant heuristics.
+
+
+Data-Racy Reads for Approximate Diagnostics
+
+Approximate diagnostics include lockdep reports, monitoring/statistics
+(including /proc and /sys output), WARN*()/BUG*() checks whose return
+values are ignored, and other situations where reads from shared variables
+are not an integral part of the core concurrency design.
+
+In fact, use of data_race() instead READ_ONCE() for these diagnostic
+reads can enable better checking of the remaining accesses implementing
+the core concurrency design. For example, suppose that the core design
+prevents any non-diagnostic reads from shared variable x from running
+concurrently with updates to x. Then using plain C-language writes
+to x allows KCSAN to detect reads from x from within regions of code
+that fail to exclude the updates. In this case, it is important to use
+data_race() for the diagnostic reads because otherwise KCSAN would give
+false-positive warnings about these diagnostic reads.
+
+In theory, plain C-language loads can also be used for this use case.
+However, in practice this will have the disadvantage of causing KCSAN
+to generate false positives because KCSAN will have no way of knowing
+that the resulting data race was intentional.
+
+
+Data-Racy Reads That Are Checked Against Marked Reload
+
+The values from some reads are not implicitly trusted. They are instead
+fed into some operation that checks the full value against a later marked
+load from memory, which means that the occasional arbitrarily bogus value
+is not a problem. For example, if a bogus value is fed into cmpxchg(),
+all that happens is that this cmpxchg() fails, which normally results
+in a retry. Unless the race condition that resulted in the bogus value
+recurs, this retry will with high probability succeed, so no harm done.
+
+However, please keep in mind that a data_race() load feeding into
+a cmpxchg_relaxed() might still be subject to load fusing on some
+architectures. Therefore, it is best to capture the return value from
+the failing cmpxchg() for the next iteration of the loop, an approach
+that provides the compiler much less scope for mischievous optimizations.
+Capturing the return value from cmpxchg() also saves a memory reference
+in many cases.
+
+In theory, plain C-language loads can also be used for this use case.
+However, in practice this will have the disadvantage of causing KCSAN
+to generate false positives because KCSAN will have no way of knowing
+that the resulting data race was intentional.
+
+
+Reads Feeding Into Error-Tolerant Heuristics
+
+Values from some reads feed into heuristics that can tolerate occasional
+errors. Such reads can use data_race(), thus allowing KCSAN to focus on
+the other accesses to the relevant shared variables. But please note
+that data_race() loads are subject to load fusing, which can result in
+consistent errors, which in turn are quite capable of breaking heuristics.
+Therefore use of data_race() should be limited to cases where some other
+code (such as a barrier() call) will force the occasional reload.
+
+In theory, plain C-language loads can also be used for this use case.
+However, in practice this will have the disadvantage of causing KCSAN
+to generate false positives because KCSAN will have no way of knowing
+that the resulting data race was intentional.
+
+
+Writes Setting Values Feeding Into Error-Tolerant Heuristics
+
+The values read into error-tolerant heuristics come from somewhere,
+for example, from sysfs. This means that some code in sysfs writes
+to this same variable, and these writes can also use data_race().
+After all, if the heuristic can tolerate the occasional bogus value
+due to compiler-mangled reads, it can also tolerate the occasional
+compiler-mangled write, at least assuming that the proper value is in
+place once the write completes.
+
+Plain C-language stores can also be used for this use case. However,
+in kernels built with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n, this
+will have the disadvantage of causing KCSAN to generate false positives
+because KCSAN will have no way of knowing that the resulting data race
+was intentional.
+
+
+Use of Plain C-Language Accesses
+--------------------------------
+
+Here are some example situations where plain C-language accesses should
+used instead of READ_ONCE(), WRITE_ONCE(), and data_race():
+
+1. Accesses protected by mutual exclusion, including strict locking
+ and sequence locking.
+
+2. Initialization-time and cleanup-time accesses. This covers a
+ wide variety of situations, including the uniprocessor phase of
+ system boot, variables to be used by not-yet-spawned kthreads,
+ structures not yet published to reference-counted or RCU-protected
+ data structures, and the cleanup side of any of these situations.
+
+3. Per-CPU variables that are not accessed from other CPUs.
+
+4. Private per-task variables, including on-stack variables, some
+ fields in the task_struct structure, and task-private heap data.
+
+5. Any other loads for which there is not supposed to be a concurrent
+ store to that same variable.
+
+6. Any other stores for which there should be neither concurrent
+ loads nor concurrent stores to that same variable.
+
+ But note that KCSAN makes two explicit exceptions to this rule
+ by default, refraining from flagging plain C-language stores:
+
+ a. No matter what. You can override this default by building
+ with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n.
+
+ b. When the store writes the value already contained in
+ that variable. You can override this default by building
+ with CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n.
+
+ c. When one of the stores is in an interrupt handler and
+ the other in the interrupted code. You can override this
+ default by building with CONFIG_KCSAN_INTERRUPT_WATCHER=y.
+
+Note that it is important to use plain C-language accesses in these cases,
+because doing otherwise prevents KCSAN from detecting violations of your
+code's synchronization rules.
+
+
+ACCESS-DOCUMENTATION OPTIONS
+============================
+
+It is important to comment marked accesses so that people reading your
+code, yourself included, are reminded of the synchronization design.
+However, it is even more important to comment plain C-language accesses
+that are intentionally involved in data races. Such comments are
+needed to remind people reading your code, again, yourself included,
+of how the compiler has been prevented from optimizing those accesses
+into concurrency bugs.
+
+It is also possible to tell KCSAN about your synchronization design.
+For example, ASSERT_EXCLUSIVE_ACCESS(foo) tells KCSAN that any
+concurrent access to variable foo by any other CPU is an error, even
+if that concurrent access is marked with READ_ONCE(). In addition,
+ASSERT_EXCLUSIVE_WRITER(foo) tells KCSAN that although it is OK for there
+to be concurrent reads from foo from other CPUs, it is an error for some
+other CPU to be concurrently writing to foo, even if that concurrent
+write is marked with data_race() or WRITE_ONCE().
+
+Note that although KCSAN will call out data races involving either
+ASSERT_EXCLUSIVE_ACCESS() or ASSERT_EXCLUSIVE_WRITER() on the one hand
+and data_race() writes on the other, KCSAN will not report the location
+of these data_race() writes.
+
+
+EXAMPLES
+========
+
+As noted earlier, the goal is to prevent the compiler from destroying
+your concurrent algorithm, to help the human reader, and to inform
+KCSAN of aspects of your concurrency design. This section looks at a
+few examples showing how this can be done.
+
+
+Lock Protection With Lockless Diagnostic Access
+-----------------------------------------------
+
+For example, suppose a shared variable "foo" is read only while a
+reader-writer spinlock is read-held, written only while that same
+spinlock is write-held, except that it is also read locklessly for
+diagnostic purposes. The code might look as follows:
+
+ int foo;
+ DEFINE_RWLOCK(foo_rwlock);
+
+ void update_foo(int newval)
+ {
+ write_lock(&foo_rwlock);
+ foo = newval;
+ do_something(newval);
+ write_unlock(&foo_rwlock);
+ }
+
+ int read_foo(void)
+ {
+ int ret;
+
+ read_lock(&foo_rwlock);
+ do_something_else();
+ ret = foo;
+ read_unlock(&foo_rwlock);
+ return ret;
+ }
+
+ int read_foo_diagnostic(void)
+ {
+ return data_race(foo);
+ }
+
+The reader-writer lock prevents the compiler from introducing concurrency
+bugs into any part of the main algorithm using foo, which means that
+the accesses to foo within both update_foo() and read_foo() can (and
+should) be plain C-language accesses. One benefit of making them be
+plain C-language accesses is that KCSAN can detect any erroneous lockless
+reads from or updates to foo. The data_race() in read_foo_diagnostic()
+tells KCSAN that data races are expected, and should be silently
+ignored. This data_race() also tells the human reading the code that
+read_foo_diagnostic() might sometimes return a bogus value.
+
+However, please note that your kernel must be built with
+CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n in order for KCSAN to
+detect a buggy lockless write. If you need KCSAN to detect such a
+write even if that write did not change the value of foo, you also
+need CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n. If you need KCSAN to
+detect such a write happening in an interrupt handler running on the
+same CPU doing the legitimate lock-protected write, you also need
+CONFIG_KCSAN_INTERRUPT_WATCHER=y. With some or all of these Kconfig
+options set properly, KCSAN can be quite helpful, although it is not
+necessarily a full replacement for hardware watchpoints. On the other
+hand, neither are hardware watchpoints a full replacement for KCSAN
+because it is not always easy to tell hardware watchpoint to conditionally
+trap on accesses.
+
+
+Lock-Protected Writes With Lockless Reads
+-----------------------------------------
+
+For another example, suppose a shared variable "foo" is updated only
+while holding a spinlock, but is read locklessly. The code might look
+as follows:
+
+ int foo;
+ DEFINE_SPINLOCK(foo_lock);
+
+ void update_foo(int newval)
+ {
+ spin_lock(&foo_lock);
+ WRITE_ONCE(foo, newval);
+ ASSERT_EXCLUSIVE_WRITER(foo);
+ do_something(newval);
+ spin_unlock(&foo_wlock);
+ }
+
+ int read_foo(void)
+ {
+ do_something_else();
+ return READ_ONCE(foo);
+ }
+
+Because foo is read locklessly, all accesses are marked. The purpose
+of the ASSERT_EXCLUSIVE_WRITER() is to allow KCSAN to check for a buggy
+concurrent lockless write.
+
+
+Lockless Reads and Writes
+-------------------------
+
+For another example, suppose a shared variable "foo" is both read and
+updated locklessly. The code might look as follows:
+
+ int foo;
+
+ int update_foo(int newval)
+ {
+ int ret;
+
+ ret = xchg(&foo, newval);
+ do_something(newval);
+ return ret;
+ }
+
+ int read_foo(void)
+ {
+ do_something_else();
+ return READ_ONCE(foo);
+ }
+
+Because foo is accessed locklessly, all accesses are marked. It does
+not make sense to use ASSERT_EXCLUSIVE_WRITER() in this case because
+there really can be concurrent lockless writers. KCSAN would
+flag any concurrent plain C-language reads from foo, and given
+CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n, also any concurrent plain
+C-language writes to foo.
+
+
+Lockless Reads and Writes, But With Single-Threaded Initialization
+------------------------------------------------------------------
+
+For yet another example, suppose that foo is initialized in a
+single-threaded manner, but that a number of kthreads are then created
+that locklessly and concurrently access foo. Some snippets of this code
+might look as follows:
+
+ int foo;
+
+ void initialize_foo(int initval, int nkthreads)
+ {
+ int i;
+
+ foo = initval;
+ ASSERT_EXCLUSIVE_ACCESS(foo);
+ for (i = 0; i < nkthreads; i++)
+ kthread_run(access_foo_concurrently, ...);
+ }
+
+ /* Called from access_foo_concurrently(). */
+ int update_foo(int newval)
+ {
+ int ret;
+
+ ret = xchg(&foo, newval);
+ do_something(newval);
+ return ret;
+ }
+
+ /* Also called from access_foo_concurrently(). */
+ int read_foo(void)
+ {
+ do_something_else();
+ return READ_ONCE(foo);
+ }
+
+The initialize_foo() uses a plain C-language write to foo because there
+are not supposed to be concurrent accesses during initialization. The
+ASSERT_EXCLUSIVE_ACCESS() allows KCSAN to flag buggy concurrent unmarked
+reads, and the ASSERT_EXCLUSIVE_ACCESS() call further allows KCSAN to
+flag buggy concurrent writes, even if: (1) Those writes are marked or
+(2) The kernel was built with CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=y.
+
+
+Checking Stress-Test Race Coverage
+----------------------------------
+
+When designing stress tests it is important to ensure that race conditions
+of interest really do occur. For example, consider the following code
+fragment:
+
+ int foo;
+
+ int update_foo(int newval)
+ {
+ return xchg(&foo, newval);
+ }
+
+ int xor_shift_foo(int shift, int mask)
+ {
+ int old, new, newold;
+
+ newold = data_race(foo); /* Checked by cmpxchg(). */
+ do {
+ old = newold;
+ new = (old << shift) ^ mask;
+ newold = cmpxchg(&foo, old, new);
+ } while (newold != old);
+ return old;
+ }
+
+ int read_foo(void)
+ {
+ return READ_ONCE(foo);
+ }
+
+If it is possible for update_foo(), xor_shift_foo(), and read_foo() to be
+invoked concurrently, the stress test should force this concurrency to
+actually happen. KCSAN can evaluate the stress test when the above code
+is modified to read as follows:
+
+ int foo;
+
+ int update_foo(int newval)
+ {
+ ASSERT_EXCLUSIVE_ACCESS(foo);
+ return xchg(&foo, newval);
+ }
+
+ int xor_shift_foo(int shift, int mask)
+ {
+ int old, new, newold;
+
+ newold = data_race(foo); /* Checked by cmpxchg(). */
+ do {
+ old = newold;
+ new = (old << shift) ^ mask;
+ ASSERT_EXCLUSIVE_ACCESS(foo);
+ newold = cmpxchg(&foo, old, new);
+ } while (newold != old);
+ return old;
+ }
+
+
+ int read_foo(void)
+ {
+ ASSERT_EXCLUSIVE_ACCESS(foo);
+ return READ_ONCE(foo);
+ }
+
+If a given stress-test run does not result in KCSAN complaints from
+each possible pair of ASSERT_EXCLUSIVE_ACCESS() invocations, the
+stress test needs improvement. If the stress test was to be evaluated
+on a regular basis, it would be wise to place the above instances of
+ASSERT_EXCLUSIVE_ACCESS() under #ifdef so that they did not result in
+false positives when not evaluating the stress test.
+
+
+REFERENCES
+==========
+
+[1] "Concurrency bugs should fear the big bad data-race detector (part 2)"
+ https://lwn.net/Articles/816854/
+
+[2] "Who's afraid of a big bad optimizing compiler?"
+ https://lwn.net/Articles/793253/
diff --git a/tools/memory-model/Documentation/glossary.txt b/tools/memory-model/Documentation/glossary.txt
index b2da6365be63..6f3d16dbf467 100644
--- a/tools/memory-model/Documentation/glossary.txt
+++ b/tools/memory-model/Documentation/glossary.txt
@@ -19,7 +19,7 @@ Address Dependency: When the address of a later memory access is computed
from the value returned by the rcu_dereference() on line 2, the
address dependency extends from that rcu_dereference() to that
"p->a". In rare cases, optimizing compilers can destroy address
- dependencies. Please see Documentation/RCU/rcu_dereference.txt
+ dependencies. Please see Documentation/RCU/rcu_dereference.rst
for more information.
See also "Control Dependency" and "Data Dependency".
diff --git a/tools/memory-model/Documentation/simple.txt b/tools/memory-model/Documentation/simple.txt
index 81e1a0ec5342..4c789ec8334f 100644
--- a/tools/memory-model/Documentation/simple.txt
+++ b/tools/memory-model/Documentation/simple.txt
@@ -189,7 +189,6 @@ Additional information may be found in these files:
Documentation/atomic_t.txt
Documentation/atomic_bitops.txt
-Documentation/core-api/atomic_ops.rst
Documentation/core-api/refcount-vs-atomic.rst
Reading code using these primitives is often also quite helpful.
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
index 549813cff8ab..cedf3ede7545 100644
--- a/tools/objtool/arch/x86/decode.c
+++ b/tools/objtool/arch/x86/decode.c
@@ -11,22 +11,15 @@
#include "../../../arch/x86/lib/inat.c"
#include "../../../arch/x86/lib/insn.c"
+#define CONFIG_64BIT 1
+#include <asm/nops.h>
+
#include <asm/orc_types.h>
#include <objtool/check.h>
#include <objtool/elf.h>
#include <objtool/arch.h>
#include <objtool/warn.h>
-
-static unsigned char op_to_cfi_reg[][2] = {
- {CFI_AX, CFI_R8},
- {CFI_CX, CFI_R9},
- {CFI_DX, CFI_R10},
- {CFI_BX, CFI_R11},
- {CFI_SP, CFI_R12},
- {CFI_BP, CFI_R13},
- {CFI_SI, CFI_R14},
- {CFI_DI, CFI_R15},
-};
+#include <arch/elf.h>
static int is_x86_64(const struct elf *elf)
{
@@ -83,6 +76,31 @@ unsigned long arch_jump_destination(struct instruction *insn)
return -1; \
else for (list_add_tail(&op->list, ops_list); op; op = NULL)
+/*
+ * Helpers to decode ModRM/SIB:
+ *
+ * r/m| AX CX DX BX | SP | BP | SI DI |
+ * | R8 R9 R10 R11 | R12 | R13 | R14 R15 |
+ * Mod+----------------+-----+-----+---------+
+ * 00 | [r/m] |[SIB]|[IP+]| [r/m] |
+ * 01 | [r/m + d8] |[S+d]| [r/m + d8] |
+ * 10 | [r/m + d32] |[S+D]| [r/m + d32] |
+ * 11 | r/ m |
+ */
+
+#define mod_is_mem() (modrm_mod != 3)
+#define mod_is_reg() (modrm_mod == 3)
+
+#define is_RIP() ((modrm_rm & 7) == CFI_BP && modrm_mod == 0)
+#define have_SIB() ((modrm_rm & 7) == CFI_SP && mod_is_mem())
+
+#define rm_is(reg) (have_SIB() ? \
+ sib_base == (reg) && sib_index == CFI_SP : \
+ modrm_rm == (reg))
+
+#define rm_is_mem(reg) (mod_is_mem() && !is_RIP() && rm_is(reg))
+#define rm_is_reg(reg) (mod_is_reg() && modrm_rm == (reg))
+
int arch_decode_instruction(const struct elf *elf, const struct section *sec,
unsigned long offset, unsigned int maxlen,
unsigned int *len, enum insn_type *type,
@@ -90,21 +108,22 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
struct list_head *ops_list)
{
struct insn insn;
- int x86_64, sign;
- unsigned char op1, op2, rex = 0, rex_b = 0, rex_r = 0, rex_w = 0,
- rex_x = 0, modrm = 0, modrm_mod = 0, modrm_rm = 0,
- modrm_reg = 0, sib = 0;
+ int x86_64, ret;
+ unsigned char op1, op2,
+ rex = 0, rex_b = 0, rex_r = 0, rex_w = 0, rex_x = 0,
+ modrm = 0, modrm_mod = 0, modrm_rm = 0, modrm_reg = 0,
+ sib = 0, /* sib_scale = 0, */ sib_index = 0, sib_base = 0;
struct stack_op *op = NULL;
struct symbol *sym;
+ u64 imm;
x86_64 = is_x86_64(elf);
if (x86_64 == -1)
return -1;
- insn_init(&insn, sec->data->d_buf + offset, maxlen, x86_64);
- insn_get_length(&insn);
-
- if (!insn_complete(&insn)) {
+ ret = insn_decode(&insn, sec->data->d_buf + offset, maxlen,
+ x86_64 ? INSN_MODE_64 : INSN_MODE_32);
+ if (ret < 0) {
WARN("can't decode instruction at %s:0x%lx", sec->name, offset);
return -1;
}
@@ -129,23 +148,27 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
if (insn.modrm.nbytes) {
modrm = insn.modrm.bytes[0];
modrm_mod = X86_MODRM_MOD(modrm);
- modrm_reg = X86_MODRM_REG(modrm);
- modrm_rm = X86_MODRM_RM(modrm);
+ modrm_reg = X86_MODRM_REG(modrm) + 8*rex_r;
+ modrm_rm = X86_MODRM_RM(modrm) + 8*rex_b;
}
- if (insn.sib.nbytes)
+ if (insn.sib.nbytes) {
sib = insn.sib.bytes[0];
+ /* sib_scale = X86_SIB_SCALE(sib); */
+ sib_index = X86_SIB_INDEX(sib) + 8*rex_x;
+ sib_base = X86_SIB_BASE(sib) + 8*rex_b;
+ }
switch (op1) {
case 0x1:
case 0x29:
- if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+ if (rex_w && rm_is_reg(CFI_SP)) {
/* add/sub reg, %rsp */
ADD_OP(op) {
op->src.type = OP_SRC_ADD;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->src.reg = modrm_reg;
op->dest.type = OP_DEST_REG;
op->dest.reg = CFI_SP;
}
@@ -157,7 +180,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
/* push reg */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+ op->src.reg = (op1 & 0x7) + 8*rex_b;
op->dest.type = OP_DEST_PUSH;
}
@@ -169,7 +192,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
ADD_OP(op) {
op->src.type = OP_SRC_POP;
op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[op1 & 0x7][rex_b];
+ op->dest.reg = (op1 & 0x7) + 8*rex_b;
}
break;
@@ -187,12 +210,54 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
*type = INSN_JUMP_CONDITIONAL;
break;
- case 0x81:
- case 0x83:
- if (rex != 0x48)
+ case 0x80 ... 0x83:
+ /*
+ * 1000 00sw : mod OP r/m : immediate
+ *
+ * s - sign extend immediate
+ * w - imm8 / imm32
+ *
+ * OP: 000 ADD 100 AND
+ * 001 OR 101 SUB
+ * 010 ADC 110 XOR
+ * 011 SBB 111 CMP
+ */
+
+ /* 64bit only */
+ if (!rex_w)
+ break;
+
+ /* %rsp target only */
+ if (!rm_is_reg(CFI_SP))
break;
- if (modrm == 0xe4) {
+ imm = insn.immediate.value;
+ if (op1 & 2) { /* sign extend */
+ if (op1 & 1) { /* imm32 */
+ imm <<= 32;
+ imm = (s64)imm >> 32;
+ } else { /* imm8 */
+ imm <<= 56;
+ imm = (s64)imm >> 56;
+ }
+ }
+
+ switch (modrm_reg & 7) {
+ case 5:
+ imm = -imm;
+ /* fallthrough */
+ case 0:
+ /* add/sub imm, %rsp */
+ ADD_OP(op) {
+ op->src.type = OP_SRC_ADD;
+ op->src.reg = CFI_SP;
+ op->src.offset = imm;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
+ break;
+
+ case 4:
/* and imm, %rsp */
ADD_OP(op) {
op->src.type = OP_SRC_AND;
@@ -202,53 +267,48 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
op->dest.reg = CFI_SP;
}
break;
- }
- if (modrm == 0xc4)
- sign = 1;
- else if (modrm == 0xec)
- sign = -1;
- else
+ default:
+ /* WARN ? */
break;
-
- /* add/sub imm, %rsp */
- ADD_OP(op) {
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_SP;
- op->src.offset = insn.immediate.value * sign;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
}
+
break;
case 0x89:
- if (rex_w && !rex_r && modrm_reg == 4) {
+ if (!rex_w)
+ break;
- if (modrm_mod == 3) {
+ if (modrm_reg == CFI_SP) {
+
+ if (mod_is_reg()) {
/* mov %rsp, reg */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
op->src.reg = CFI_SP;
op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ op->dest.reg = modrm_rm;
}
break;
} else {
- /* skip nontrivial SIB */
- if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x))
- break;
-
/* skip RIP relative displacement */
- if (modrm_rm == 5 && modrm_mod == 0)
+ if (is_RIP())
break;
+ /* skip nontrivial SIB */
+ if (have_SIB()) {
+ modrm_rm = sib_base;
+ if (sib_index != CFI_SP)
+ break;
+ }
+
/* mov %rsp, disp(%reg) */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
op->src.reg = CFI_SP;
op->dest.type = OP_DEST_REG_INDIRECT;
- op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+ op->dest.reg = modrm_rm;
op->dest.offset = insn.displacement.value;
}
break;
@@ -257,12 +317,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
break;
}
- if (rex_w && !rex_b && modrm_mod == 3 && modrm_rm == 4) {
+ if (rm_is_reg(CFI_SP)) {
/* mov reg, %rsp */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->src.reg = modrm_reg;
op->dest.type = OP_DEST_REG;
op->dest.reg = CFI_SP;
}
@@ -271,13 +331,15 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
/* fallthrough */
case 0x88:
- if (!rex_b &&
- (modrm_mod == 1 || modrm_mod == 2) && modrm_rm == 5) {
+ if (!rex_w)
+ break;
+
+ if (rm_is_mem(CFI_BP)) {
/* mov reg, disp(%rbp) */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->src.reg = modrm_reg;
op->dest.type = OP_DEST_REG_INDIRECT;
op->dest.reg = CFI_BP;
op->dest.offset = insn.displacement.value;
@@ -285,12 +347,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
break;
}
- if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
+ if (rm_is_mem(CFI_SP)) {
/* mov reg, disp(%rsp) */
ADD_OP(op) {
op->src.type = OP_SRC_REG;
- op->src.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->src.reg = modrm_reg;
op->dest.type = OP_DEST_REG_INDIRECT;
op->dest.reg = CFI_SP;
op->dest.offset = insn.displacement.value;
@@ -301,7 +363,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
break;
case 0x8b:
- if (rex_w && !rex_b && modrm_mod == 1 && modrm_rm == 5) {
+ if (!rex_w)
+ break;
+
+ if (rm_is_mem(CFI_BP)) {
/* mov disp(%rbp), reg */
ADD_OP(op) {
@@ -309,11 +374,12 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
op->src.reg = CFI_BP;
op->src.offset = insn.displacement.value;
op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.reg = modrm_reg;
}
+ break;
+ }
- } else if (rex_w && !rex_b && sib == 0x24 &&
- modrm_mod != 3 && modrm_rm == 4) {
+ if (rm_is_mem(CFI_SP)) {
/* mov disp(%rsp), reg */
ADD_OP(op) {
@@ -321,75 +387,48 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
op->src.reg = CFI_SP;
op->src.offset = insn.displacement.value;
op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
+ op->dest.reg = modrm_reg;
}
+ break;
}
break;
case 0x8d:
- if (sib == 0x24 && rex_w && !rex_b && !rex_x) {
-
- ADD_OP(op) {
- if (!insn.displacement.value) {
- /* lea (%rsp), reg */
- op->src.type = OP_SRC_REG;
- } else {
- /* lea disp(%rsp), reg */
- op->src.type = OP_SRC_ADD;
- op->src.offset = insn.displacement.value;
- }
- op->src.reg = CFI_SP;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = op_to_cfi_reg[modrm_reg][rex_r];
- }
-
- } else if (rex == 0x48 && modrm == 0x65) {
-
- /* lea disp(%rbp), %rsp */
- ADD_OP(op) {
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_BP;
- op->src.offset = insn.displacement.value;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
- }
+ if (mod_is_reg()) {
+ WARN("invalid LEA encoding at %s:0x%lx", sec->name, offset);
+ break;
+ }
- } else if (rex == 0x49 && modrm == 0x62 &&
- insn.displacement.value == -8) {
+ /* skip non 64bit ops */
+ if (!rex_w)
+ break;
- /*
- * lea -0x8(%r10), %rsp
- *
- * Restoring rsp back to its original value after a
- * stack realignment.
- */
- ADD_OP(op) {
- op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_R10;
- op->src.offset = -8;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
- }
+ /* skip RIP relative displacement */
+ if (is_RIP())
+ break;
- } else if (rex == 0x49 && modrm == 0x65 &&
- insn.displacement.value == -16) {
+ /* skip nontrivial SIB */
+ if (have_SIB()) {
+ modrm_rm = sib_base;
+ if (sib_index != CFI_SP)
+ break;
+ }
- /*
- * lea -0x10(%r13), %rsp
- *
- * Restoring rsp back to its original value after a
- * stack realignment.
- */
- ADD_OP(op) {
+ /* lea disp(%src), %dst */
+ ADD_OP(op) {
+ op->src.offset = insn.displacement.value;
+ if (!op->src.offset) {
+ /* lea (%src), %dst */
+ op->src.type = OP_SRC_REG;
+ } else {
+ /* lea disp(%src), %dst */
op->src.type = OP_SRC_ADD;
- op->src.reg = CFI_R13;
- op->src.offset = -16;
- op->dest.type = OP_DEST_REG;
- op->dest.reg = CFI_SP;
}
+ op->src.reg = modrm_rm;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = modrm_reg;
}
-
break;
case 0x8f:
@@ -476,9 +515,17 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
* mov bp, sp
* pop bp
*/
- ADD_OP(op)
- op->dest.type = OP_DEST_LEAVE;
-
+ ADD_OP(op) {
+ op->src.type = OP_SRC_REG;
+ op->src.reg = CFI_BP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_SP;
+ }
+ ADD_OP(op) {
+ op->src.type = OP_SRC_POP;
+ op->dest.type = OP_DEST_REG;
+ op->dest.reg = CFI_BP;
+ }
break;
case 0xe3:
@@ -596,11 +643,11 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state)
const char *arch_nop_insn(int len)
{
static const char nops[5][5] = {
- /* 1 */ { 0x90 },
- /* 2 */ { 0x66, 0x90 },
- /* 3 */ { 0x0f, 0x1f, 0x00 },
- /* 4 */ { 0x0f, 0x1f, 0x40, 0x00 },
- /* 5 */ { 0x0f, 0x1f, 0x44, 0x00, 0x00 },
+ { BYTES_NOP1 },
+ { BYTES_NOP2 },
+ { BYTES_NOP3 },
+ { BYTES_NOP4 },
+ { BYTES_NOP5 },
};
if (len < 1 || len > 5) {
@@ -611,6 +658,122 @@ const char *arch_nop_insn(int len)
return nops[len-1];
}
+/* asm/alternative.h ? */
+
+#define ALTINSTR_FLAG_INV (1 << 15)
+#define ALT_NOT(feat) ((feat) | ALTINSTR_FLAG_INV)
+
+struct alt_instr {
+ s32 instr_offset; /* original instruction */
+ s32 repl_offset; /* offset to replacement instruction */
+ u16 cpuid; /* cpuid bit set for replacement */
+ u8 instrlen; /* length of original instruction */
+ u8 replacementlen; /* length of new instruction */
+} __packed;
+
+static int elf_add_alternative(struct elf *elf,
+ struct instruction *orig, struct symbol *sym,
+ int cpuid, u8 orig_len, u8 repl_len)
+{
+ const int size = sizeof(struct alt_instr);
+ struct alt_instr *alt;
+ struct section *sec;
+ Elf_Scn *s;
+
+ sec = find_section_by_name(elf, ".altinstructions");
+ if (!sec) {
+ sec = elf_create_section(elf, ".altinstructions",
+ SHF_WRITE, size, 0);
+
+ if (!sec) {
+ WARN_ELF("elf_create_section");
+ return -1;
+ }
+ }
+
+ s = elf_getscn(elf->elf, sec->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ sec->data = elf_newdata(s);
+ if (!sec->data) {
+ WARN_ELF("elf_newdata");
+ return -1;
+ }
+
+ sec->data->d_size = size;
+ sec->data->d_align = 1;
+
+ alt = sec->data->d_buf = malloc(size);
+ if (!sec->data->d_buf) {
+ perror("malloc");
+ return -1;
+ }
+ memset(sec->data->d_buf, 0, size);
+
+ if (elf_add_reloc_to_insn(elf, sec, sec->sh.sh_size,
+ R_X86_64_PC32, orig->sec, orig->offset)) {
+ WARN("elf_create_reloc: alt_instr::instr_offset");
+ return -1;
+ }
+
+ if (elf_add_reloc(elf, sec, sec->sh.sh_size + 4,
+ R_X86_64_PC32, sym, 0)) {
+ WARN("elf_create_reloc: alt_instr::repl_offset");
+ return -1;
+ }
+
+ alt->cpuid = cpuid;
+ alt->instrlen = orig_len;
+ alt->replacementlen = repl_len;
+
+ sec->sh.sh_size += size;
+ sec->changed = true;
+
+ return 0;
+}
+
+#define X86_FEATURE_RETPOLINE ( 7*32+12)
+
+int arch_rewrite_retpolines(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct reloc *reloc;
+ struct symbol *sym;
+ char name[32] = "";
+
+ list_for_each_entry(insn, &file->retpoline_call_list, call_node) {
+
+ if (!strcmp(insn->sec->name, ".text.__x86.indirect_thunk"))
+ continue;
+
+ reloc = insn->reloc;
+
+ sprintf(name, "__x86_indirect_alt_%s_%s",
+ insn->type == INSN_JUMP_DYNAMIC ? "jmp" : "call",
+ reloc->sym->name + 21);
+
+ sym = find_symbol_by_name(file->elf, name);
+ if (!sym) {
+ sym = elf_create_undef_symbol(file->elf, name);
+ if (!sym) {
+ WARN("elf_create_undef_symbol");
+ return -1;
+ }
+ }
+
+ if (elf_add_alternative(file->elf, insn, sym,
+ ALT_NOT(X86_FEATURE_RETPOLINE), 5, 5)) {
+ WARN("elf_add_alternative");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
{
struct cfi_reg *cfa = &insn->cfi.cfa;
@@ -646,3 +809,8 @@ int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg)
return 0;
}
+
+bool arch_is_retpoline(struct symbol *sym)
+{
+ return !strncmp(sym->name, "__x86_indirect_", 15);
+}
diff --git a/tools/objtool/arch/x86/include/arch/cfi_regs.h b/tools/objtool/arch/x86/include/arch/cfi_regs.h
index 79bc517efba8..0579d22c433c 100644
--- a/tools/objtool/arch/x86/include/arch/cfi_regs.h
+++ b/tools/objtool/arch/x86/include/arch/cfi_regs.h
@@ -4,13 +4,13 @@
#define _OBJTOOL_CFI_REGS_H
#define CFI_AX 0
-#define CFI_DX 1
-#define CFI_CX 2
+#define CFI_CX 1
+#define CFI_DX 2
#define CFI_BX 3
-#define CFI_SI 4
-#define CFI_DI 5
-#define CFI_BP 6
-#define CFI_SP 7
+#define CFI_SP 4
+#define CFI_BP 5
+#define CFI_SI 6
+#define CFI_DI 7
#define CFI_R8 8
#define CFI_R9 9
#define CFI_R10 10
diff --git a/tools/objtool/arch/x86/include/arch/special.h b/tools/objtool/arch/x86/include/arch/special.h
index d818b2bffa02..14271cca0c74 100644
--- a/tools/objtool/arch/x86/include/arch/special.h
+++ b/tools/objtool/arch/x86/include/arch/special.h
@@ -10,7 +10,7 @@
#define JUMP_ORIG_OFFSET 0
#define JUMP_NEW_OFFSET 4
-#define ALT_ENTRY_SIZE 13
+#define ALT_ENTRY_SIZE 12
#define ALT_ORIG_OFFSET 0
#define ALT_NEW_OFFSET 4
#define ALT_FEATURE_OFFSET 8
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
index c3a85d8f6c5c..8b38b5d6fec7 100644
--- a/tools/objtool/builtin-check.c
+++ b/tools/objtool/builtin-check.c
@@ -15,16 +15,23 @@
#include <subcmd/parse-options.h>
#include <string.h>
+#include <stdlib.h>
#include <objtool/builtin.h>
#include <objtool/objtool.h>
-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr;
+bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+ validate_dup, vmlinux, mcount, noinstr, backup;
static const char * const check_usage[] = {
"objtool check [<options>] file.o",
NULL,
};
+static const char * const env_usage[] = {
+ "OBJTOOL_ARGS=\"<options>\"",
+ NULL,
+};
+
const struct option check_options[] = {
OPT_BOOLEAN('f', "no-fp", &no_fp, "Skip frame pointer validation"),
OPT_BOOLEAN('u', "no-unreachable", &no_unreachable, "Skip 'unreachable instruction' warnings"),
@@ -37,20 +44,44 @@ const struct option check_options[] = {
OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"),
OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"),
+ OPT_BOOLEAN('B', "backup", &backup, "create .orig files before modification"),
OPT_END(),
};
+int cmd_parse_options(int argc, const char **argv, const char * const usage[])
+{
+ const char *envv[16] = { };
+ char *env;
+ int envc;
+
+ env = getenv("OBJTOOL_ARGS");
+ if (env) {
+ envv[0] = "OBJTOOL_ARGS";
+ for (envc = 1; envc < ARRAY_SIZE(envv); ) {
+ envv[envc++] = env;
+ env = strchr(env, ' ');
+ if (!env)
+ break;
+ *env = '\0';
+ env++;
+ }
+
+ parse_options(envc, envv, check_options, env_usage, 0);
+ }
+
+ argc = parse_options(argc, argv, check_options, usage, 0);
+ if (argc != 1)
+ usage_with_options(usage, check_options);
+ return argc;
+}
+
int cmd_check(int argc, const char **argv)
{
const char *objname;
struct objtool_file *file;
int ret;
- argc = parse_options(argc, argv, check_options, check_usage, 0);
-
- if (argc != 1)
- usage_with_options(check_usage, check_options);
-
+ argc = cmd_parse_options(argc, argv, check_usage);
objname = argv[0];
file = objtool_open_read(objname);
diff --git a/tools/objtool/builtin-orc.c b/tools/objtool/builtin-orc.c
index 8273bbf7cebb..17f8b9307738 100644
--- a/tools/objtool/builtin-orc.c
+++ b/tools/objtool/builtin-orc.c
@@ -34,10 +34,7 @@ int cmd_orc(int argc, const char **argv)
struct objtool_file *file;
int ret;
- argc = parse_options(argc, argv, check_options, orc_usage, 0);
- if (argc != 1)
- usage_with_options(orc_usage, check_options);
-
+ argc = cmd_parse_options(argc, argv, orc_usage);
objname = argv[0];
file = objtool_open_read(objname);
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 5e5388a38e2a..9ed1a4cd00dc 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -108,6 +108,18 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
for (insn = next_insn_same_sec(file, insn); insn; \
insn = next_insn_same_sec(file, insn))
+static bool is_jump_table_jump(struct instruction *insn)
+{
+ struct alt_group *alt_group = insn->alt_group;
+
+ if (insn->jump_table)
+ return true;
+
+ /* Retpoline alternative for a jump table? */
+ return alt_group && alt_group->orig_group &&
+ alt_group->orig_group->first_insn->jump_table;
+}
+
static bool is_sibling_call(struct instruction *insn)
{
/*
@@ -120,7 +132,7 @@ static bool is_sibling_call(struct instruction *insn)
/* An indirect jump is either a sibling call or a jump to a table. */
if (insn->type == INSN_JUMP_DYNAMIC)
- return list_empty(&insn->alts);
+ return !is_jump_table_jump(insn);
/* add_jump_destinations() sets insn->call_dest for sibling calls. */
return (is_static_jump(insn) && insn->call_dest);
@@ -433,8 +445,7 @@ reachable:
static int create_static_call_sections(struct objtool_file *file)
{
- struct section *sec, *reloc_sec;
- struct reloc *reloc;
+ struct section *sec;
struct static_call_site *site;
struct instruction *insn;
struct symbol *key_sym;
@@ -452,7 +463,7 @@ static int create_static_call_sections(struct objtool_file *file)
return 0;
idx = 0;
- list_for_each_entry(insn, &file->static_call_list, static_call_node)
+ list_for_each_entry(insn, &file->static_call_list, call_node)
idx++;
sec = elf_create_section(file->elf, ".static_call_sites", SHF_WRITE,
@@ -460,36 +471,18 @@ static int create_static_call_sections(struct objtool_file *file)
if (!sec)
return -1;
- reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
- if (!reloc_sec)
- return -1;
-
idx = 0;
- list_for_each_entry(insn, &file->static_call_list, static_call_node) {
+ list_for_each_entry(insn, &file->static_call_list, call_node) {
site = (struct static_call_site *)sec->data->d_buf + idx;
memset(site, 0, sizeof(struct static_call_site));
/* populate reloc for 'addr' */
- reloc = malloc(sizeof(*reloc));
-
- if (!reloc) {
- perror("malloc");
- return -1;
- }
- memset(reloc, 0, sizeof(*reloc));
-
- insn_to_reloc_sym_addend(insn->sec, insn->offset, reloc);
- if (!reloc->sym) {
- WARN_FUNC("static call tramp: missing containing symbol",
- insn->sec, insn->offset);
+ if (elf_add_reloc_to_insn(file->elf, sec,
+ idx * sizeof(struct static_call_site),
+ R_X86_64_PC32,
+ insn->sec, insn->offset))
return -1;
- }
-
- reloc->type = R_X86_64_PC32;
- reloc->offset = idx * sizeof(struct static_call_site);
- reloc->sec = reloc_sec;
- elf_add_reloc(file->elf, reloc);
/* find key symbol */
key_name = strdup(insn->call_dest->name);
@@ -526,32 +519,21 @@ static int create_static_call_sections(struct objtool_file *file)
free(key_name);
/* populate reloc for 'key' */
- reloc = malloc(sizeof(*reloc));
- if (!reloc) {
- perror("malloc");
+ if (elf_add_reloc(file->elf, sec,
+ idx * sizeof(struct static_call_site) + 4,
+ R_X86_64_PC32, key_sym,
+ is_sibling_call(insn) * STATIC_CALL_SITE_TAIL))
return -1;
- }
- memset(reloc, 0, sizeof(*reloc));
- reloc->sym = key_sym;
- reloc->addend = is_sibling_call(insn) ? STATIC_CALL_SITE_TAIL : 0;
- reloc->type = R_X86_64_PC32;
- reloc->offset = idx * sizeof(struct static_call_site) + 4;
- reloc->sec = reloc_sec;
- elf_add_reloc(file->elf, reloc);
idx++;
}
- if (elf_rebuild_reloc_section(file->elf, reloc_sec))
- return -1;
-
return 0;
}
static int create_mcount_loc_sections(struct objtool_file *file)
{
- struct section *sec, *reloc_sec;
- struct reloc *reloc;
+ struct section *sec;
unsigned long *loc;
struct instruction *insn;
int idx;
@@ -574,49 +556,21 @@ static int create_mcount_loc_sections(struct objtool_file *file)
if (!sec)
return -1;
- reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
- if (!reloc_sec)
- return -1;
-
idx = 0;
list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) {
loc = (unsigned long *)sec->data->d_buf + idx;
memset(loc, 0, sizeof(unsigned long));
- reloc = malloc(sizeof(*reloc));
- if (!reloc) {
- perror("malloc");
+ if (elf_add_reloc_to_insn(file->elf, sec,
+ idx * sizeof(unsigned long),
+ R_X86_64_64,
+ insn->sec, insn->offset))
return -1;
- }
- memset(reloc, 0, sizeof(*reloc));
-
- if (insn->sec->sym) {
- reloc->sym = insn->sec->sym;
- reloc->addend = insn->offset;
- } else {
- reloc->sym = find_symbol_containing(insn->sec, insn->offset);
-
- if (!reloc->sym) {
- WARN("missing symbol for insn at offset 0x%lx\n",
- insn->offset);
- return -1;
- }
-
- reloc->addend = insn->offset - reloc->sym->offset;
- }
-
- reloc->type = R_X86_64_64;
- reloc->offset = idx * sizeof(unsigned long);
- reloc->sec = reloc_sec;
- elf_add_reloc(file->elf, reloc);
idx++;
}
- if (elf_rebuild_reloc_section(file->elf, reloc_sec))
- return -1;
-
return 0;
}
@@ -850,6 +804,30 @@ static int add_ignore_alternatives(struct objtool_file *file)
return 0;
}
+__weak bool arch_is_retpoline(struct symbol *sym)
+{
+ return false;
+}
+
+#define NEGATIVE_RELOC ((void *)-1L)
+
+static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
+{
+ if (insn->reloc == NEGATIVE_RELOC)
+ return NULL;
+
+ if (!insn->reloc) {
+ insn->reloc = find_reloc_by_dest_range(file->elf, insn->sec,
+ insn->offset, insn->len);
+ if (!insn->reloc) {
+ insn->reloc = NEGATIVE_RELOC;
+ return NULL;
+ }
+ }
+
+ return insn->reloc;
+}
+
/*
* Find the destination instructions for all jumps.
*/
@@ -864,16 +842,14 @@ static int add_jump_destinations(struct objtool_file *file)
if (!is_static_jump(insn))
continue;
- reloc = find_reloc_by_dest_range(file->elf, insn->sec,
- insn->offset, insn->len);
+ reloc = insn_reloc(file, insn);
if (!reloc) {
dest_sec = insn->sec;
dest_off = arch_jump_destination(insn);
} else if (reloc->sym->type == STT_SECTION) {
dest_sec = reloc->sym->sec;
dest_off = arch_dest_reloc_offset(reloc->addend);
- } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) ||
- !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) {
+ } else if (arch_is_retpoline(reloc->sym)) {
/*
* Retpoline jumps are really dynamic jumps in
* disguise, so convert them accordingly.
@@ -883,13 +859,16 @@ static int add_jump_destinations(struct objtool_file *file)
else
insn->type = INSN_JUMP_DYNAMIC_CONDITIONAL;
+ list_add_tail(&insn->call_node,
+ &file->retpoline_call_list);
+
insn->retpoline_safe = true;
continue;
} else if (insn->func) {
/* internal or external sibling call (with reloc) */
insn->call_dest = reloc->sym;
if (insn->call_dest->static_call_tramp) {
- list_add_tail(&insn->static_call_node,
+ list_add_tail(&insn->call_node,
&file->static_call_list);
}
continue;
@@ -951,7 +930,7 @@ static int add_jump_destinations(struct objtool_file *file)
/* internal sibling call (without reloc) */
insn->call_dest = insn->jump_dest->func;
if (insn->call_dest->static_call_tramp) {
- list_add_tail(&insn->static_call_node,
+ list_add_tail(&insn->call_node,
&file->static_call_list);
}
}
@@ -995,8 +974,7 @@ static int add_call_destinations(struct objtool_file *file)
if (insn->type != INSN_CALL)
continue;
- reloc = find_reloc_by_dest_range(file->elf, insn->sec,
- insn->offset, insn->len);
+ reloc = insn_reloc(file, insn);
if (!reloc) {
dest_off = arch_jump_destination(insn);
insn->call_dest = find_call_destination(insn->sec, dest_off);
@@ -1026,9 +1004,29 @@ static int add_call_destinations(struct objtool_file *file)
dest_off);
return -1;
}
+
+ } else if (arch_is_retpoline(reloc->sym)) {
+ /*
+ * Retpoline calls are really dynamic calls in
+ * disguise, so convert them accordingly.
+ */
+ insn->type = INSN_CALL_DYNAMIC;
+ insn->retpoline_safe = true;
+
+ list_add_tail(&insn->call_node,
+ &file->retpoline_call_list);
+
+ remove_insn_ops(insn);
+ continue;
+
} else
insn->call_dest = reloc->sym;
+ if (insn->call_dest && insn->call_dest->static_call_tramp) {
+ list_add_tail(&insn->call_node,
+ &file->static_call_list);
+ }
+
/*
* Many compilers cannot disable KCOV with a function attribute
* so they need a little help, NOP out any KCOV calls from noinstr
@@ -1175,8 +1173,7 @@ static int handle_group_alt(struct objtool_file *file,
* alternatives code can adjust the relative offsets
* accordingly.
*/
- alt_reloc = find_reloc_by_dest_range(file->elf, insn->sec,
- insn->offset, insn->len);
+ alt_reloc = insn_reloc(file, insn);
if (alt_reloc &&
!arch_support_alt_relocation(special_alt, insn, alt_reloc)) {
@@ -1751,6 +1748,11 @@ static void mark_rodata(struct objtool_file *file)
file->rodata = found;
}
+__weak int arch_rewrite_retpolines(struct objtool_file *file)
+{
+ return 0;
+}
+
static int decode_sections(struct objtool_file *file)
{
int ret;
@@ -1772,10 +1774,17 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
+ /*
+ * Must be before add_{jump_call}_destination.
+ */
ret = read_static_call_tramps(file);
if (ret)
return ret;
+ /*
+ * Must be before add_special_section_alts() as that depends on
+ * jump_dest being set.
+ */
ret = add_jump_destinations(file);
if (ret)
return ret;
@@ -1784,6 +1793,10 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
+ /*
+ * Must be before add_call_destination(); it changes INSN_CALL to
+ * INSN_JUMP.
+ */
ret = read_intra_function_calls(file);
if (ret)
return ret;
@@ -1808,6 +1821,15 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
+ /*
+ * Must be after add_special_section_alts(), since this will emit
+ * alternatives. Must be after add_{jump,call}_destination(), since
+ * those create the call insn lists.
+ */
+ ret = arch_rewrite_retpolines(file);
+ if (ret)
+ return ret;
+
return 0;
}
@@ -1959,8 +1981,9 @@ static void restore_reg(struct cfi_state *cfi, unsigned char reg)
* 41 5d pop %r13
* c3 retq
*/
-static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
- struct stack_op *op)
+static int update_cfi_state(struct instruction *insn,
+ struct instruction *next_insn,
+ struct cfi_state *cfi, struct stack_op *op)
{
struct cfi_reg *cfa = &cfi->cfa;
struct cfi_reg *regs = cfi->regs;
@@ -2019,7 +2042,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
}
else if (op->src.reg == CFI_BP && op->dest.reg == CFI_SP &&
- cfa->base == CFI_BP) {
+ (cfa->base == CFI_BP || cfa->base == cfi->drap_reg)) {
/*
* mov %rbp, %rsp
@@ -2161,7 +2184,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
break;
}
- if (op->dest.reg == cfi->cfa.base) {
+ if (op->dest.reg == cfi->cfa.base && !(next_insn && next_insn->hint)) {
WARN_FUNC("unsupported stack register modification",
insn->sec, insn->offset);
return -1;
@@ -2216,7 +2239,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
cfa->offset = 0;
cfi->drap_offset = -1;
- } else if (regs[op->dest.reg].offset == -cfi->stack_size) {
+ } else if (cfi->stack_size == -regs[op->dest.reg].offset) {
/* pop %reg */
restore_reg(cfi, op->dest.reg);
@@ -2357,26 +2380,6 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
break;
- case OP_DEST_LEAVE:
- if ((!cfi->drap && cfa->base != CFI_BP) ||
- (cfi->drap && cfa->base != cfi->drap_reg)) {
- WARN_FUNC("leave instruction with modified stack frame",
- insn->sec, insn->offset);
- return -1;
- }
-
- /* leave (mov %rbp, %rsp; pop %rbp) */
-
- cfi->stack_size = -cfi->regs[CFI_BP].offset - 8;
- restore_reg(cfi, CFI_BP);
-
- if (!cfi->drap) {
- cfa->base = CFI_SP;
- cfa->offset -= 8;
- }
-
- break;
-
case OP_DEST_MEM:
if (op->src.type != OP_SRC_POP && op->src.type != OP_SRC_POPF) {
WARN_FUNC("unknown stack-related memory operation",
@@ -2433,13 +2436,15 @@ static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn
return 0;
}
-static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
+static int handle_insn_ops(struct instruction *insn,
+ struct instruction *next_insn,
+ struct insn_state *state)
{
struct stack_op *op;
list_for_each_entry(op, &insn->stack_ops, list) {
- if (update_cfi_state(insn, &state->cfi, op))
+ if (update_cfi_state(insn, next_insn, &state->cfi, op))
return 1;
if (!insn->alt_group)
@@ -2722,7 +2727,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
return 0;
}
- if (handle_insn_ops(insn, &state))
+ if (handle_insn_ops(insn, next_insn, &state))
return 1;
switch (insn->type) {
@@ -2746,11 +2751,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
if (dead_end_function(file, insn->call_dest))
return 0;
- if (insn->type == INSN_CALL && insn->call_dest->static_call_tramp) {
- list_add_tail(&insn->static_call_node,
- &file->static_call_list);
- }
-
break;
case INSN_JUMP_CONDITIONAL:
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
index 93fa833a49a5..d08f5f3670f8 100644
--- a/tools/objtool/elf.c
+++ b/tools/objtool/elf.c
@@ -211,32 +211,6 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns
return find_reloc_by_dest_range(elf, sec, offset, 1);
}
-void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
- struct reloc *reloc)
-{
- if (sec->sym) {
- reloc->sym = sec->sym;
- reloc->addend = offset;
- return;
- }
-
- /*
- * The Clang assembler strips section symbols, so we have to reference
- * the function symbol instead:
- */
- reloc->sym = find_symbol_containing(sec, offset);
- if (!reloc->sym) {
- /*
- * Hack alert. This happens when we need to reference the NOP
- * pad insn immediately after the function.
- */
- reloc->sym = find_symbol_containing(sec, offset - 1);
- }
-
- if (reloc->sym)
- reloc->addend = offset - reloc->sym->offset;
-}
-
static int read_sections(struct elf *elf)
{
Elf_Scn *s = NULL;
@@ -316,12 +290,39 @@ static int read_sections(struct elf *elf)
return 0;
}
+static void elf_add_symbol(struct elf *elf, struct symbol *sym)
+{
+ struct list_head *entry;
+ struct rb_node *pnode;
+
+ sym->type = GELF_ST_TYPE(sym->sym.st_info);
+ sym->bind = GELF_ST_BIND(sym->sym.st_info);
+
+ sym->offset = sym->sym.st_value;
+ sym->len = sym->sym.st_size;
+
+ rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset);
+ pnode = rb_prev(&sym->node);
+ if (pnode)
+ entry = &rb_entry(pnode, struct symbol, node)->list;
+ else
+ entry = &sym->sec->symbol_list;
+ list_add(&sym->list, entry);
+ elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
+ elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
+
+ /*
+ * Don't store empty STT_NOTYPE symbols in the rbtree. They
+ * can exist within a function, confusing the sorting.
+ */
+ if (!sym->len)
+ rb_erase(&sym->node, &sym->sec->symbol_tree);
+}
+
static int read_symbols(struct elf *elf)
{
struct section *symtab, *symtab_shndx, *sec;
struct symbol *sym, *pfunc;
- struct list_head *entry;
- struct rb_node *pnode;
int symbols_nr, i;
char *coldstr;
Elf_Data *shndx_data = NULL;
@@ -366,9 +367,6 @@ static int read_symbols(struct elf *elf)
goto err;
}
- sym->type = GELF_ST_TYPE(sym->sym.st_info);
- sym->bind = GELF_ST_BIND(sym->sym.st_info);
-
if ((sym->sym.st_shndx > SHN_UNDEF &&
sym->sym.st_shndx < SHN_LORESERVE) ||
(shndx_data && sym->sym.st_shndx == SHN_XINDEX)) {
@@ -381,32 +379,14 @@ static int read_symbols(struct elf *elf)
sym->name);
goto err;
}
- if (sym->type == STT_SECTION) {
+ if (GELF_ST_TYPE(sym->sym.st_info) == STT_SECTION) {
sym->name = sym->sec->name;
sym->sec->sym = sym;
}
} else
sym->sec = find_section_by_index(elf, 0);
- sym->offset = sym->sym.st_value;
- sym->len = sym->sym.st_size;
-
- rb_add(&sym->node, &sym->sec->symbol_tree, symbol_to_offset);
- pnode = rb_prev(&sym->node);
- if (pnode)
- entry = &rb_entry(pnode, struct symbol, node)->list;
- else
- entry = &sym->sec->symbol_list;
- list_add(&sym->list, entry);
- elf_hash_add(elf->symbol_hash, &sym->hash, sym->idx);
- elf_hash_add(elf->symbol_name_hash, &sym->name_hash, str_hash(sym->name));
-
- /*
- * Don't store empty STT_NOTYPE symbols in the rbtree. They
- * can exist within a function, confusing the sorting.
- */
- if (!sym->len)
- rb_erase(&sym->node, &sym->sec->symbol_tree);
+ elf_add_symbol(elf, sym);
}
if (stats)
@@ -473,12 +453,73 @@ err:
return -1;
}
-void elf_add_reloc(struct elf *elf, struct reloc *reloc)
+static struct section *elf_create_reloc_section(struct elf *elf,
+ struct section *base,
+ int reltype);
+
+int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+ unsigned int type, struct symbol *sym, int addend)
{
- struct section *sec = reloc->sec;
+ struct reloc *reloc;
- list_add_tail(&reloc->list, &sec->reloc_list);
+ if (!sec->reloc && !elf_create_reloc_section(elf, sec, SHT_RELA))
+ return -1;
+
+ reloc = malloc(sizeof(*reloc));
+ if (!reloc) {
+ perror("malloc");
+ return -1;
+ }
+ memset(reloc, 0, sizeof(*reloc));
+
+ reloc->sec = sec->reloc;
+ reloc->offset = offset;
+ reloc->type = type;
+ reloc->sym = sym;
+ reloc->addend = addend;
+
+ list_add_tail(&reloc->list, &sec->reloc->reloc_list);
elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+
+ sec->reloc->changed = true;
+
+ return 0;
+}
+
+int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int type,
+ struct section *insn_sec, unsigned long insn_off)
+{
+ struct symbol *sym;
+ int addend;
+
+ if (insn_sec->sym) {
+ sym = insn_sec->sym;
+ addend = insn_off;
+
+ } else {
+ /*
+ * The Clang assembler strips section symbols, so we have to
+ * reference the function symbol instead:
+ */
+ sym = find_symbol_containing(insn_sec, insn_off);
+ if (!sym) {
+ /*
+ * Hack alert. This happens when we need to reference
+ * the NOP pad insn immediately after the function.
+ */
+ sym = find_symbol_containing(insn_sec, insn_off - 1);
+ }
+
+ if (!sym) {
+ WARN("can't find symbol containing %s+0x%lx", insn_sec->name, insn_off);
+ return -1;
+ }
+
+ addend = insn_off - sym->offset;
+ }
+
+ return elf_add_reloc(elf, sec, offset, type, sym, addend);
}
static int read_rel_reloc(struct section *sec, int i, struct reloc *reloc, unsigned int *symndx)
@@ -558,7 +599,9 @@ static int read_relocs(struct elf *elf)
return -1;
}
- elf_add_reloc(elf, reloc);
+ list_add_tail(&reloc->list, &sec->reloc_list);
+ elf_hash_add(elf->reloc_hash, &reloc->hash, reloc_hash(reloc));
+
nr_reloc++;
}
max_reloc = max(max_reloc, nr_reloc);
@@ -636,13 +679,108 @@ err:
return NULL;
}
+static int elf_add_string(struct elf *elf, struct section *strtab, char *str)
+{
+ Elf_Data *data;
+ Elf_Scn *s;
+ int len;
+
+ if (!strtab)
+ strtab = find_section_by_name(elf, ".strtab");
+ if (!strtab) {
+ WARN("can't find .strtab section");
+ return -1;
+ }
+
+ s = elf_getscn(elf->elf, strtab->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return -1;
+ }
+
+ data = elf_newdata(s);
+ if (!data) {
+ WARN_ELF("elf_newdata");
+ return -1;
+ }
+
+ data->d_buf = str;
+ data->d_size = strlen(str) + 1;
+ data->d_align = 1;
+
+ len = strtab->len;
+ strtab->len += data->d_size;
+ strtab->changed = true;
+
+ return len;
+}
+
+struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name)
+{
+ struct section *symtab;
+ struct symbol *sym;
+ Elf_Data *data;
+ Elf_Scn *s;
+
+ sym = malloc(sizeof(*sym));
+ if (!sym) {
+ perror("malloc");
+ return NULL;
+ }
+ memset(sym, 0, sizeof(*sym));
+
+ sym->name = strdup(name);
+
+ sym->sym.st_name = elf_add_string(elf, NULL, sym->name);
+ if (sym->sym.st_name == -1)
+ return NULL;
+
+ sym->sym.st_info = GELF_ST_INFO(STB_GLOBAL, STT_NOTYPE);
+ // st_other 0
+ // st_shndx 0
+ // st_value 0
+ // st_size 0
+
+ symtab = find_section_by_name(elf, ".symtab");
+ if (!symtab) {
+ WARN("can't find .symtab");
+ return NULL;
+ }
+
+ s = elf_getscn(elf->elf, symtab->idx);
+ if (!s) {
+ WARN_ELF("elf_getscn");
+ return NULL;
+ }
+
+ data = elf_newdata(s);
+ if (!data) {
+ WARN_ELF("elf_newdata");
+ return NULL;
+ }
+
+ data->d_buf = &sym->sym;
+ data->d_size = sizeof(sym->sym);
+ data->d_align = 1;
+
+ sym->idx = symtab->len / sizeof(sym->sym);
+
+ symtab->len += data->d_size;
+ symtab->changed = true;
+
+ sym->sec = find_section_by_index(elf, 0);
+
+ elf_add_symbol(elf, sym);
+
+ return sym;
+}
+
struct section *elf_create_section(struct elf *elf, const char *name,
unsigned int sh_flags, size_t entsize, int nr)
{
struct section *sec, *shstrtab;
size_t size = entsize * nr;
Elf_Scn *s;
- Elf_Data *data;
sec = malloc(sizeof(*sec));
if (!sec) {
@@ -699,7 +837,6 @@ struct section *elf_create_section(struct elf *elf, const char *name,
sec->sh.sh_addralign = 1;
sec->sh.sh_flags = SHF_ALLOC | sh_flags;
-
/* Add section name to .shstrtab (or .strtab for Clang) */
shstrtab = find_section_by_name(elf, ".shstrtab");
if (!shstrtab)
@@ -708,27 +845,9 @@ struct section *elf_create_section(struct elf *elf, const char *name,
WARN("can't find .shstrtab or .strtab section");
return NULL;
}
-
- s = elf_getscn(elf->elf, shstrtab->idx);
- if (!s) {
- WARN_ELF("elf_getscn");
- return NULL;
- }
-
- data = elf_newdata(s);
- if (!data) {
- WARN_ELF("elf_newdata");
+ sec->sh.sh_name = elf_add_string(elf, shstrtab, sec->name);
+ if (sec->sh.sh_name == -1)
return NULL;
- }
-
- data->d_buf = sec->name;
- data->d_size = strlen(name) + 1;
- data->d_align = 1;
-
- sec->sh.sh_name = shstrtab->len;
-
- shstrtab->len += strlen(name) + 1;
- shstrtab->changed = true;
list_add_tail(&sec->list, &elf->sections);
elf_hash_add(elf->section_hash, &sec->hash, sec->idx);
@@ -799,7 +918,7 @@ static struct section *elf_create_rela_reloc_section(struct elf *elf, struct sec
return sec;
}
-struct section *elf_create_reloc_section(struct elf *elf,
+static struct section *elf_create_reloc_section(struct elf *elf,
struct section *base,
int reltype)
{
@@ -873,14 +992,11 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr)
return 0;
}
-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
+static int elf_rebuild_reloc_section(struct elf *elf, struct section *sec)
{
struct reloc *reloc;
int nr;
- sec->changed = true;
- elf->changed = true;
-
nr = 0;
list_for_each_entry(reloc, &sec->reloc_list, list)
nr++;
@@ -944,9 +1060,15 @@ int elf_write(struct elf *elf)
struct section *sec;
Elf_Scn *s;
- /* Update section headers for changed sections: */
+ /* Update changed relocation sections and section headers: */
list_for_each_entry(sec, &elf->sections, list) {
if (sec->changed) {
+ if (sec->base &&
+ elf_rebuild_reloc_section(elf, sec)) {
+ WARN("elf_rebuild_reloc_section");
+ return -1;
+ }
+
s = elf_getscn(elf->elf, sec->idx);
if (!s) {
WARN_ELF("elf_getscn");
@@ -958,6 +1080,7 @@ int elf_write(struct elf *elf)
}
sec->changed = false;
+ elf->changed = true;
}
}
diff --git a/tools/objtool/include/objtool/arch.h b/tools/objtool/include/objtool/arch.h
index 6ff0685f5cc5..062bb6e9b865 100644
--- a/tools/objtool/include/objtool/arch.h
+++ b/tools/objtool/include/objtool/arch.h
@@ -35,7 +35,6 @@ enum op_dest_type {
OP_DEST_MEM,
OP_DEST_PUSH,
OP_DEST_PUSHF,
- OP_DEST_LEAVE,
};
struct op_dest {
@@ -86,4 +85,8 @@ const char *arch_nop_insn(int len);
int arch_decode_hint_reg(struct instruction *insn, u8 sp_reg);
+bool arch_is_retpoline(struct symbol *sym);
+
+int arch_rewrite_retpolines(struct objtool_file *file);
+
#endif /* _ARCH_H */
diff --git a/tools/objtool/include/objtool/builtin.h b/tools/objtool/include/objtool/builtin.h
index 2502bb27de17..15ac0b7d3d6a 100644
--- a/tools/objtool/include/objtool/builtin.h
+++ b/tools/objtool/include/objtool/builtin.h
@@ -8,7 +8,10 @@
#include <subcmd/parse-options.h>
extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats,
+ validate_dup, vmlinux, mcount, noinstr, backup;
+
+extern int cmd_parse_options(int argc, const char **argv, const char * const usage[]);
extern int cmd_check(int argc, const char **argv);
extern int cmd_orc(int argc, const char **argv);
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index f5be798107bc..56d50bc50c10 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -39,7 +39,7 @@ struct alt_group {
struct instruction {
struct list_head list;
struct hlist_node hash;
- struct list_head static_call_node;
+ struct list_head call_node;
struct list_head mcount_loc_node;
struct section *sec;
unsigned long offset;
@@ -56,6 +56,7 @@ struct instruction {
struct instruction *jump_dest;
struct instruction *first_jump_src;
struct reloc *jump_table;
+ struct reloc *reloc;
struct list_head alts;
struct symbol *func;
struct list_head stack_ops;
diff --git a/tools/objtool/include/objtool/elf.h b/tools/objtool/include/objtool/elf.h
index e6890cc70a25..45e5ede363b0 100644
--- a/tools/objtool/include/objtool/elf.h
+++ b/tools/objtool/include/objtool/elf.h
@@ -122,12 +122,18 @@ static inline u32 reloc_hash(struct reloc *reloc)
struct elf *elf_open_read(const char *name, int flags);
struct section *elf_create_section(struct elf *elf, const char *name, unsigned int sh_flags, size_t entsize, int nr);
-struct section *elf_create_reloc_section(struct elf *elf, struct section *base, int reltype);
-void elf_add_reloc(struct elf *elf, struct reloc *reloc);
+
+int elf_add_reloc(struct elf *elf, struct section *sec, unsigned long offset,
+ unsigned int type, struct symbol *sym, int addend);
+int elf_add_reloc_to_insn(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int type,
+ struct section *insn_sec, unsigned long insn_off);
+
int elf_write_insn(struct elf *elf, struct section *sec,
unsigned long offset, unsigned int len,
const char *insn);
int elf_write_reloc(struct elf *elf, struct reloc *reloc);
+struct symbol *elf_create_undef_symbol(struct elf *elf, const char *name);
int elf_write(struct elf *elf);
void elf_close(struct elf *elf);
@@ -140,9 +146,6 @@ struct reloc *find_reloc_by_dest(const struct elf *elf, struct section *sec, uns
struct reloc *find_reloc_by_dest_range(const struct elf *elf, struct section *sec,
unsigned long offset, unsigned int len);
struct symbol *find_func_containing(struct section *sec, unsigned long offset);
-void insn_to_reloc_sym_addend(struct section *sec, unsigned long offset,
- struct reloc *reloc);
-int elf_rebuild_reloc_section(struct elf *elf, struct section *sec);
#define for_each_sec(file, sec) \
list_for_each_entry(sec, &file->elf->sections, list)
diff --git a/tools/objtool/include/objtool/objtool.h b/tools/objtool/include/objtool/objtool.h
index e68e37476c15..e4084afb2304 100644
--- a/tools/objtool/include/objtool/objtool.h
+++ b/tools/objtool/include/objtool/objtool.h
@@ -18,6 +18,7 @@ struct objtool_file {
struct elf *elf;
struct list_head insn_list;
DECLARE_HASHTABLE(insn_hash, 20);
+ struct list_head retpoline_call_list;
struct list_head static_call_list;
struct list_head mcount_loc_list;
bool ignore_unreachables, c_file, hints, rodata;
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
index 7b97ce499405..e21db8bce493 100644
--- a/tools/objtool/objtool.c
+++ b/tools/objtool/objtool.c
@@ -17,6 +17,7 @@
#include <stdbool.h>
#include <string.h>
#include <stdlib.h>
+#include <unistd.h>
#include <subcmd/exec-cmd.h>
#include <subcmd/pager.h>
#include <linux/kernel.h>
@@ -44,6 +45,64 @@ bool help;
const char *objname;
static struct objtool_file file;
+static bool objtool_create_backup(const char *_objname)
+{
+ int len = strlen(_objname);
+ char *buf, *base, *name = malloc(len+6);
+ int s, d, l, t;
+
+ if (!name) {
+ perror("failed backup name malloc");
+ return false;
+ }
+
+ strcpy(name, _objname);
+ strcpy(name + len, ".orig");
+
+ d = open(name, O_CREAT|O_WRONLY|O_TRUNC, 0644);
+ if (d < 0) {
+ perror("failed to create backup file");
+ return false;
+ }
+
+ s = open(_objname, O_RDONLY);
+ if (s < 0) {
+ perror("failed to open orig file");
+ return false;
+ }
+
+ buf = malloc(4096);
+ if (!buf) {
+ perror("failed backup data malloc");
+ return false;
+ }
+
+ while ((l = read(s, buf, 4096)) > 0) {
+ base = buf;
+ do {
+ t = write(d, base, l);
+ if (t < 0) {
+ perror("failed backup write");
+ return false;
+ }
+ base += t;
+ l -= t;
+ } while (l);
+ }
+
+ if (l < 0) {
+ perror("failed backup read");
+ return false;
+ }
+
+ free(name);
+ free(buf);
+ close(d);
+ close(s);
+
+ return true;
+}
+
struct objtool_file *objtool_open_read(const char *_objname)
{
if (objname) {
@@ -59,8 +118,14 @@ struct objtool_file *objtool_open_read(const char *_objname)
if (!file.elf)
return NULL;
+ if (backup && !objtool_create_backup(objname)) {
+ WARN("can't create backup file");
+ return NULL;
+ }
+
INIT_LIST_HEAD(&file.insn_list);
hash_init(file.insn_hash);
+ INIT_LIST_HEAD(&file.retpoline_call_list);
INIT_LIST_HEAD(&file.static_call_list);
INIT_LIST_HEAD(&file.mcount_loc_list);
file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
diff --git a/tools/objtool/orc_gen.c b/tools/objtool/orc_gen.c
index 738aa5021bc4..dc9b7dd314b0 100644
--- a/tools/objtool/orc_gen.c
+++ b/tools/objtool/orc_gen.c
@@ -82,12 +82,11 @@ static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi)
}
static int write_orc_entry(struct elf *elf, struct section *orc_sec,
- struct section *ip_rsec, unsigned int idx,
+ struct section *ip_sec, unsigned int idx,
struct section *insn_sec, unsigned long insn_off,
struct orc_entry *o)
{
struct orc_entry *orc;
- struct reloc *reloc;
/* populate ORC data */
orc = (struct orc_entry *)orc_sec->data->d_buf + idx;
@@ -96,25 +95,9 @@ static int write_orc_entry(struct elf *elf, struct section *orc_sec,
orc->bp_offset = bswap_if_needed(orc->bp_offset);
/* populate reloc for ip */
- reloc = malloc(sizeof(*reloc));
- if (!reloc) {
- perror("malloc");
+ if (elf_add_reloc_to_insn(elf, ip_sec, idx * sizeof(int), R_X86_64_PC32,
+ insn_sec, insn_off))
return -1;
- }
- memset(reloc, 0, sizeof(*reloc));
-
- insn_to_reloc_sym_addend(insn_sec, insn_off, reloc);
- if (!reloc->sym) {
- WARN("missing symbol for insn at offset 0x%lx",
- insn_off);
- return -1;
- }
-
- reloc->type = R_X86_64_PC32;
- reloc->offset = idx * sizeof(int);
- reloc->sec = ip_rsec;
-
- elf_add_reloc(elf, reloc);
return 0;
}
@@ -153,7 +136,7 @@ static unsigned long alt_group_len(struct alt_group *alt_group)
int orc_create(struct objtool_file *file)
{
- struct section *sec, *ip_rsec, *orc_sec;
+ struct section *sec, *orc_sec;
unsigned int nr = 0, idx = 0;
struct orc_list_entry *entry;
struct list_head orc_list;
@@ -242,20 +225,14 @@ int orc_create(struct objtool_file *file)
sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr);
if (!sec)
return -1;
- ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
- if (!ip_rsec)
- return -1;
/* Write ORC entries to sections: */
list_for_each_entry(entry, &orc_list, list) {
- if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++,
+ if (write_orc_entry(file->elf, orc_sec, sec, idx++,
entry->insn_sec, entry->insn_off,
&entry->orc))
return -1;
}
- if (elf_rebuild_reloc_section(file->elf, ip_rsec))
- return -1;
-
return 0;
}
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
index 2c7fbda7b055..07b21cfabf5c 100644
--- a/tools/objtool/special.c
+++ b/tools/objtool/special.c
@@ -106,6 +106,14 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
return -1;
}
+ /*
+ * Skip retpoline .altinstr_replacement... we already rewrite the
+ * instructions for retpolines anyway, see arch_is_retpoline()
+ * usage in add_{call,jump}_destinations().
+ */
+ if (arch_is_retpoline(new_reloc->sym))
+ return 1;
+
alt->new_sec = new_reloc->sym->sec;
alt->new_off = (unsigned int)new_reloc->addend;
@@ -154,7 +162,9 @@ int special_get_alts(struct elf *elf, struct list_head *alts)
memset(alt, 0, sizeof(*alt));
ret = get_alt_entry(elf, entry, sec, idx, alt);
- if (ret)
+ if (ret > 0)
+ continue;
+ if (ret < 0)
return ret;
list_add_tail(&alt->list, alts);
diff --git a/tools/objtool/sync-check.sh b/tools/objtool/sync-check.sh
index 606a4b5e929f..105a291ff8e7 100755
--- a/tools/objtool/sync-check.sh
+++ b/tools/objtool/sync-check.sh
@@ -10,17 +10,21 @@ FILES="include/linux/objtool.h"
if [ "$SRCARCH" = "x86" ]; then
FILES="$FILES
+arch/x86/include/asm/nops.h
arch/x86/include/asm/inat_types.h
arch/x86/include/asm/orc_types.h
arch/x86/include/asm/emulate_prefix.h
arch/x86/lib/x86-opcode-map.txt
arch/x86/tools/gen-insn-attr-x86.awk
include/linux/static_call_types.h
-arch/x86/include/asm/inat.h -I '^#include [\"<]\(asm/\)*inat_types.h[\">]'
-arch/x86/include/asm/insn.h -I '^#include [\"<]\(asm/\)*inat.h[\">]'
-arch/x86/lib/inat.c -I '^#include [\"<]\(../include/\)*asm/insn.h[\">]'
-arch/x86/lib/insn.c -I '^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]' -I '^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]'
"
+
+SYNC_CHECK_FILES='
+arch/x86/include/asm/inat.h
+arch/x86/include/asm/insn.h
+arch/x86/lib/inat.c
+arch/x86/lib/insn.c
+'
fi
check_2 () {
@@ -63,3 +67,9 @@ while read -r file_entry; do
done <<EOF
$FILES
EOF
+
+if [ "$SRCARCH" = "x86" ]; then
+ for i in $SYNC_CHECK_FILES; do
+ check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"'
+ done
+fi
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 5d7b947320fb..f05c4d48fd7e 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -20,4 +20,4 @@ tools/lib/bitmap.c
tools/lib/str_error_r.c
tools/lib/vsprintf.c
tools/lib/zalloc.c
-scripts/bpf_helpers_doc.py
+scripts/bpf_doc.py
diff --git a/tools/perf/arch/x86/tests/insn-x86.c b/tools/perf/arch/x86/tests/insn-x86.c
index 4f75ae990140..0262b0d8ccf5 100644
--- a/tools/perf/arch/x86/tests/insn-x86.c
+++ b/tools/perf/arch/x86/tests/insn-x86.c
@@ -96,13 +96,12 @@ static int get_branch(const char *branch_str)
static int test_data_item(struct test_data *dat, int x86_64)
{
struct intel_pt_insn intel_pt_insn;
+ int op, branch, ret;
struct insn insn;
- int op, branch;
- insn_init(&insn, dat->data, MAX_INSN_SIZE, x86_64);
- insn_get_length(&insn);
-
- if (!insn_complete(&insn)) {
+ ret = insn_decode(&insn, dat->data, MAX_INSN_SIZE,
+ x86_64 ? INSN_MODE_64 : INSN_MODE_32);
+ if (ret < 0) {
pr_debug("Failed to decode: %s\n", dat->asm_rep);
return -1;
}
diff --git a/tools/perf/arch/x86/util/archinsn.c b/tools/perf/arch/x86/util/archinsn.c
index 34d600c51044..546feda08428 100644
--- a/tools/perf/arch/x86/util/archinsn.c
+++ b/tools/perf/arch/x86/util/archinsn.c
@@ -11,7 +11,7 @@ void arch_fetch_insn(struct perf_sample *sample,
struct machine *machine)
{
struct insn insn;
- int len;
+ int len, ret;
bool is64bit = false;
if (!sample->ip)
@@ -19,8 +19,9 @@ void arch_fetch_insn(struct perf_sample *sample,
len = thread__memcpy(thread, machine, sample->insn, sample->ip, sizeof(sample->insn), &is64bit);
if (len <= 0)
return;
- insn_init(&insn, sample->insn, len, is64bit);
- insn_get_length(&insn);
- if (insn_complete(&insn) && insn.length <= len)
+
+ ret = insn_decode(&insn, sample->insn, len,
+ is64bit ? INSN_MODE_64 : INSN_MODE_32);
+ if (ret >= 0 && insn.length <= len)
sample->insn_len = insn.length;
}
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index a6420c647959..6df0dc00d73a 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -776,6 +776,12 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
}
}
+ if (!opts->auxtrace_snapshot_mode && !opts->auxtrace_sample_mode) {
+ u32 aux_watermark = opts->auxtrace_mmap_pages * page_size / 4;
+
+ intel_pt_evsel->core.attr.aux_watermark = aux_watermark;
+ }
+
intel_pt_parse_terms(intel_pt_pmu->name, &intel_pt_pmu->format,
"tsc", &tsc_bit);
diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c
index ace8772a4f03..7c4a9d424a64 100644
--- a/tools/perf/builtin-daemon.c
+++ b/tools/perf/builtin-daemon.c
@@ -402,35 +402,42 @@ static pid_t handle_signalfd(struct daemon *daemon)
int status;
pid_t pid;
+ /*
+ * Take signal fd data as pure signal notification and check all
+ * the sessions state. The reason is that multiple signals can get
+ * coalesced in kernel and we can receive only single signal even
+ * if multiple SIGCHLD were generated.
+ */
err = read(daemon->signal_fd, &si, sizeof(struct signalfd_siginfo));
- if (err != sizeof(struct signalfd_siginfo))
+ if (err != sizeof(struct signalfd_siginfo)) {
+ pr_err("failed to read signal fd\n");
return -1;
+ }
list_for_each_entry(session, &daemon->sessions, list) {
+ if (session->pid == -1)
+ continue;
- if (session->pid != (int) si.ssi_pid)
+ pid = waitpid(session->pid, &status, WNOHANG);
+ if (pid <= 0)
continue;
- pid = waitpid(session->pid, &status, 0);
- if (pid == session->pid) {
- if (WIFEXITED(status)) {
- pr_info("session '%s' exited, status=%d\n",
- session->name, WEXITSTATUS(status));
- } else if (WIFSIGNALED(status)) {
- pr_info("session '%s' killed (signal %d)\n",
- session->name, WTERMSIG(status));
- } else if (WIFSTOPPED(status)) {
- pr_info("session '%s' stopped (signal %d)\n",
- session->name, WSTOPSIG(status));
- } else {
- pr_info("session '%s' Unexpected status (0x%x)\n",
- session->name, status);
- }
+ if (WIFEXITED(status)) {
+ pr_info("session '%s' exited, status=%d\n",
+ session->name, WEXITSTATUS(status));
+ } else if (WIFSIGNALED(status)) {
+ pr_info("session '%s' killed (signal %d)\n",
+ session->name, WTERMSIG(status));
+ } else if (WIFSTOPPED(status)) {
+ pr_info("session '%s' stopped (signal %d)\n",
+ session->name, WSTOPSIG(status));
+ } else {
+ pr_info("session '%s' Unexpected status (0x%x)\n",
+ session->name, status);
}
session->state = KILL;
session->pid = -1;
- return pid;
}
return 0;
@@ -443,7 +450,6 @@ static int daemon_session__wait(struct daemon_session *session, struct daemon *d
.fd = daemon->signal_fd,
.events = POLLIN,
};
- pid_t wpid = 0, pid = session->pid;
time_t start;
start = time(NULL);
@@ -452,7 +458,7 @@ static int daemon_session__wait(struct daemon_session *session, struct daemon *d
int err = poll(&pollfd, 1, 1000);
if (err > 0) {
- wpid = handle_signalfd(daemon);
+ handle_signalfd(daemon);
} else if (err < 0) {
perror("failed: poll\n");
return -1;
@@ -460,7 +466,7 @@ static int daemon_session__wait(struct daemon_session *session, struct daemon *d
if (start + secs < time(NULL))
return -1;
- } while (wpid != pid);
+ } while (session->pid != -1);
return 0;
}
@@ -902,7 +908,9 @@ static void daemon_session__kill(struct daemon_session *session,
daemon_session__signal(session, SIGKILL);
break;
default:
- break;
+ pr_err("failed to wait for session %s\n",
+ session->name);
+ return;
}
how++;
@@ -955,7 +963,8 @@ static void daemon__kill(struct daemon *daemon)
daemon__signal(daemon, SIGKILL);
break;
default:
- break;
+ pr_err("failed to wait for sessions\n");
+ return;
}
how++;
@@ -1344,7 +1353,7 @@ out:
close(sock_fd);
if (conf_fd != -1)
close(conf_fd);
- if (conf_fd != -1)
+ if (signal_fd != -1)
close(signal_fd);
pr_info("daemon exited\n");
diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c
index d49448a1060c..87cb11a7a3ee 100644
--- a/tools/perf/builtin-ftrace.c
+++ b/tools/perf/builtin-ftrace.c
@@ -289,7 +289,7 @@ static int set_tracing_pid(struct perf_ftrace *ftrace)
for (i = 0; i < perf_thread_map__nr(ftrace->evlist->core.threads); i++) {
scnprintf(buf, sizeof(buf), "%d",
- ftrace->evlist->core.threads->map[i]);
+ perf_thread_map__pid(ftrace->evlist->core.threads, i));
if (append_tracing_file("set_ftrace_pid", buf) < 0)
return -1;
}
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 6fe44d97fde5..ddccc0eb7390 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -906,7 +906,7 @@ int cmd_inject(int argc, const char **argv)
}
data.path = inject.input_name;
- inject.session = perf_session__new(&data, true, &inject.tool);
+ inject.session = perf_session__new(&data, inject.output.is_pipe, &inject.tool);
if (IS_ERR(inject.session))
return PTR_ERR(inject.session);
diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh
index dded93a2bc89..07857dfb4d91 100755
--- a/tools/perf/check-headers.sh
+++ b/tools/perf/check-headers.sh
@@ -75,6 +75,13 @@ include/uapi/asm-generic/mman-common.h
include/uapi/asm-generic/unistd.h
'
+SYNC_CHECK_FILES='
+arch/x86/include/asm/inat.h
+arch/x86/include/asm/insn.h
+arch/x86/lib/inat.c
+arch/x86/lib/insn.c
+'
+
# These copies are under tools/perf/trace/beauty/ as they are not used to in
# building object files only by scripts in tools/perf/trace/beauty/ to generate
# tables that then gets included in .c files for things like id->string syscall
@@ -129,6 +136,10 @@ for i in $FILES; do
check $i -B
done
+for i in $SYNC_CHECK_FILES; do
+ check $i '-I "^.*\/\*.*__ignore_sync_check__.*\*\/.*$"'
+done
+
# diff with extra ignore lines
check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))"'
check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include <asm/export.h>" -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"'
@@ -137,10 +148,6 @@ check include/uapi/linux/mman.h '-I "^#include <\(uapi/\)*asm/mman.h>"'
check include/linux/build_bug.h '-I "^#\(ifndef\|endif\)\( \/\/\)* static_assert$"'
check include/linux/ctype.h '-I "isdigit("'
check lib/ctype.c '-I "^EXPORT_SYMBOL" -I "^#include <linux/export.h>" -B'
-check arch/x86/include/asm/inat.h '-I "^#include [\"<]\(asm/\)*inat_types.h[\">]"'
-check arch/x86/include/asm/insn.h '-I "^#include [\"<]\(asm/\)*inat.h[\">]"'
-check arch/x86/lib/inat.c '-I "^#include [\"<]\(../include/\)*asm/insn.h[\">]"'
-check arch/x86/lib/insn.c '-I "^#include [\"<]\(../include/\)*asm/in\(at\|sn\).h[\">]" -I "^#include [\"<]\(../include/\)*asm/emulate_prefix.h[\">]"'
# diff non-symmetric files
check_2 tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index f57e075b0ed2..c72adbd67386 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -86,7 +86,7 @@ static struct {
.msg_load_fail = "check your vmlinux setting?",
.target_func = &epoll_pwait_loop,
.expect_result = (NR_ITERS + 1) / 2,
- .pin = true,
+ .pin = true,
},
#ifdef HAVE_BPF_PROLOGUE
{
@@ -99,13 +99,6 @@ static struct {
.expect_result = (NR_ITERS + 1) / 4,
},
#endif
- {
- .prog_id = LLVM_TESTCASE_BPF_RELOCATION,
- .desc = "BPF relocation checker",
- .name = "[bpf_relocation_test]",
- .msg_compile_fail = "fix 'perf test LLVM' first",
- .msg_load_fail = "libbpf error when dealing with relocation",
- },
};
static int do_test(struct bpf_object *obj, int (*func)(void),
diff --git a/tools/perf/tests/shell/daemon.sh b/tools/perf/tests/shell/daemon.sh
index 5ad3ca8d681b..58984380b211 100755
--- a/tools/perf/tests/shell/daemon.sh
+++ b/tools/perf/tests/shell/daemon.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# daemon operations
# SPDX-License-Identifier: GPL-2.0
diff --git a/tools/perf/trace/beauty/tracepoints/x86_msr.sh b/tools/perf/trace/beauty/tracepoints/x86_msr.sh
index 27ee1ea1fe94..9b0614a87831 100755
--- a/tools/perf/trace/beauty/tracepoints/x86_msr.sh
+++ b/tools/perf/trace/beauty/tracepoints/x86_msr.sh
@@ -15,7 +15,7 @@ x86_msr_index=${arch_x86_header_dir}/msr-index.h
printf "static const char *x86_MSRs[] = {\n"
regex='^[[:space:]]*#[[:space:]]*define[[:space:]]+MSR_([[:alnum:]][[:alnum:]_]+)[[:space:]]+(0x00000[[:xdigit:]]+)[[:space:]]*.*'
-egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|IA32_(TSCDEADLINE|UCODE_REV)|IDT_FCR4)' | \
+egrep $regex ${x86_msr_index} | egrep -v 'MSR_(ATOM|P[46]|IA32_(TSC_DEADLINE|UCODE_REV)|IDT_FCR4)' | \
sed -r "s/$regex/\2 \1/g" | sort -n | \
xargs printf "\t[%s] = \"%s\",\n"
printf "};\n\n"
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
index f3ac9d40cebf..2e5eff4f8f03 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-pkt-decoder.c
@@ -210,8 +210,10 @@ static int arm_spe_do_get_packet(const unsigned char *buf, size_t len,
if ((hdr & SPE_HEADER0_MASK2) == SPE_HEADER0_EXTENDED) {
/* 16-bit extended format header */
- ext_hdr = 1;
+ if (len == 1)
+ return ARM_SPE_BAD_PACKET;
+ ext_hdr = 1;
hdr = buf[1];
if (hdr == SPE_HEADER1_ALIGNMENT)
return arm_spe_get_alignment(buf, len, packet);
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 953f4afacd3b..1b4091a3b508 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -298,10 +298,6 @@ static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
queue->set = true;
queue->tid = buffer->tid;
queue->cpu = buffer->cpu;
- } else if (buffer->cpu != queue->cpu || buffer->tid != queue->tid) {
- pr_err("auxtrace queue conflict: cpu %d, tid %d vs cpu %d, tid %d\n",
- queue->cpu, queue->tid, buffer->cpu, buffer->tid);
- return -EINVAL;
}
buffer->buffer_nr = queues->next_buffer_nr++;
@@ -638,7 +634,7 @@ int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
break;
}
- if (itr)
+ if (itr && itr->parse_snapshot_options)
return itr->parse_snapshot_options(itr, opts, str);
pr_err("No AUX area tracing to snapshot\n");
diff --git a/tools/perf/util/block-info.c b/tools/perf/util/block-info.c
index 423ec69bda6c..5ecd4f401f32 100644
--- a/tools/perf/util/block-info.c
+++ b/tools/perf/util/block-info.c
@@ -201,7 +201,7 @@ static int block_total_cycles_pct_entry(struct perf_hpp_fmt *fmt,
double ratio = 0.0;
if (block_fmt->total_cycles)
- ratio = (double)bi->cycles / (double)block_fmt->total_cycles;
+ ratio = (double)bi->cycles_aggr / (double)block_fmt->total_cycles;
return color_pct(hpp, block_fmt->width, 100.0 * ratio);
}
@@ -216,9 +216,9 @@ static int64_t block_total_cycles_pct_sort(struct perf_hpp_fmt *fmt,
double l, r;
if (block_fmt->total_cycles) {
- l = ((double)bi_l->cycles /
+ l = ((double)bi_l->cycles_aggr /
(double)block_fmt->total_cycles) * 100000.0;
- r = ((double)bi_r->cycles /
+ r = ((double)bi_r->cycles_aggr /
(double)block_fmt->total_cycles) * 100000.0;
return (int64_t)l - (int64_t)r;
}
diff --git a/tools/perf/util/bpf-event.c b/tools/perf/util/bpf-event.c
index 57d58c81a5f8..cdecda1ddd36 100644
--- a/tools/perf/util/bpf-event.c
+++ b/tools/perf/util/bpf-event.c
@@ -196,25 +196,32 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
}
if (info_linear->info_len < offsetof(struct bpf_prog_info, prog_tags)) {
+ free(info_linear);
pr_debug("%s: the kernel is too old, aborting\n", __func__);
return -2;
}
info = &info_linear->info;
+ if (!info->jited_ksyms) {
+ free(info_linear);
+ return -1;
+ }
/* number of ksyms, func_lengths, and tags should match */
sub_prog_cnt = info->nr_jited_ksyms;
if (sub_prog_cnt != info->nr_prog_tags ||
- sub_prog_cnt != info->nr_jited_func_lens)
+ sub_prog_cnt != info->nr_jited_func_lens) {
+ free(info_linear);
return -1;
+ }
/* check BTF func info support */
if (info->btf_id && info->nr_func_info && info->func_info_rec_size) {
/* btf func info number should be same as sub_prog_cnt */
if (sub_prog_cnt != info->nr_func_info) {
pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__);
- err = -1;
- goto out;
+ free(info_linear);
+ return -1;
}
if (btf__get_from_id(info->btf_id, &btf)) {
pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id);
diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c
index f29af4fc3d09..8fca4779ae6a 100644
--- a/tools/perf/util/data.c
+++ b/tools/perf/util/data.c
@@ -35,7 +35,7 @@ void perf_data__close_dir(struct perf_data *data)
int perf_data__create_dir(struct perf_data *data, int nr)
{
struct perf_data_file *files = NULL;
- int i, ret = -1;
+ int i, ret;
if (WARN_ON(!data->is_dir))
return -EINVAL;
@@ -51,7 +51,8 @@ int perf_data__create_dir(struct perf_data *data, int nr)
for (i = 0; i < nr; i++) {
struct perf_data_file *file = &files[i];
- if (asprintf(&file->path, "%s/data.%d", data->path, i) < 0)
+ ret = asprintf(&file->path, "%s/data.%d", data->path, i);
+ if (ret < 0)
goto out_err;
ret = open(file->path, O_RDWR|O_CREAT|O_TRUNC, S_IRUSR|S_IWUSR);
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 2f6cc7eea251..593f20e9774c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -169,11 +169,13 @@ int intel_pt_get_insn(const unsigned char *buf, size_t len, int x86_64,
struct intel_pt_insn *intel_pt_insn)
{
struct insn insn;
+ int ret;
- insn_init(&insn, buf, len, x86_64);
- insn_get_length(&insn);
- if (!insn_complete(&insn) || insn.length > len)
+ ret = insn_decode(&insn, buf, len,
+ x86_64 ? INSN_MODE_64 : INSN_MODE_32);
+ if (ret < 0 || insn.length > len)
return -1;
+
intel_pt_insn_decoder(&insn, intel_pt_insn);
if (insn.length < INTEL_PT_INSN_BUF_SZ)
memcpy(intel_pt_insn->buf, buf, insn.length);
@@ -194,12 +196,13 @@ const char *dump_insn(struct perf_insn *x, uint64_t ip __maybe_unused,
u8 *inbuf, int inlen, int *lenp)
{
struct insn insn;
- int n, i;
+ int n, i, ret;
int left;
- insn_init(&insn, inbuf, inlen, x->is64bit);
- insn_get_length(&insn);
- if (!insn_complete(&insn) || insn.length > inlen)
+ ret = insn_decode(&insn, inbuf, inlen,
+ x->is64bit ? INSN_MODE_64 : INSN_MODE_32);
+
+ if (ret < 0 || insn.length > inlen)
return "<bad>";
if (lenp)
*lenp = insn.length;
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index fbc40a2c17d4..8af693d9678c 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -840,15 +840,18 @@ out:
int maps__clone(struct thread *thread, struct maps *parent)
{
struct maps *maps = thread->maps;
- int err = -ENOMEM;
+ int err;
struct map *map;
down_read(&parent->lock);
maps__for_each_entry(parent, map) {
struct map *new = map__clone(map);
- if (new == NULL)
+
+ if (new == NULL) {
+ err = -ENOMEM;
goto out_unlock;
+ }
err = unwind__prepare_access(maps, new, NULL);
if (err)
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 42c84adeb2fb..c0c0fab22cb8 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -356,6 +356,9 @@ __add_event(struct list_head *list, int *idx,
struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) :
cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
+ if (pmu && attr->type == PERF_TYPE_RAW)
+ perf_pmu__warn_invalid_config(pmu, attr->config, name);
+
if (init_attr)
event_attr_init(attr);
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 44ef28302fc7..46fd0f998484 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -1812,3 +1812,36 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu)
return nr_caps;
}
+
+void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
+ char *name)
+{
+ struct perf_pmu_format *format;
+ __u64 masks = 0, bits;
+ char buf[100];
+ unsigned int i;
+
+ list_for_each_entry(format, &pmu->format, list) {
+ if (format->value != PERF_PMU_FORMAT_VALUE_CONFIG)
+ continue;
+
+ for_each_set_bit(i, format->bits, PERF_PMU_FORMAT_BITS)
+ masks |= 1ULL << i;
+ }
+
+ /*
+ * Kernel doesn't export any valid format bits.
+ */
+ if (masks == 0)
+ return;
+
+ bits = config & ~masks;
+ if (bits == 0)
+ return;
+
+ bitmap_scnprintf((unsigned long *)&bits, sizeof(bits) * 8, buf, sizeof(buf));
+
+ pr_warning("WARNING: event '%s' not valid (bits %s of config "
+ "'%llx' not supported by kernel)!\n",
+ name ?: "N/A", buf, config);
+}
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 8164388478c6..160b0f561771 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -123,4 +123,7 @@ int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
int perf_pmu__caps_parse(struct perf_pmu *pmu);
+void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
+ char *name);
+
#endif /* __PMU_H */
diff --git a/tools/perf/util/synthetic-events.c b/tools/perf/util/synthetic-events.c
index b698046ec2db..dff178103ce5 100644
--- a/tools/perf/util/synthetic-events.c
+++ b/tools/perf/util/synthetic-events.c
@@ -424,7 +424,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
while (!io.eof) {
static const char anonstr[] = "//anon";
- size_t size;
+ size_t size, aligned_size;
/* ensure null termination since stack will be reused. */
event->mmap2.filename[0] = '\0';
@@ -484,11 +484,12 @@ out:
}
size = strlen(event->mmap2.filename) + 1;
- size = PERF_ALIGN(size, sizeof(u64));
+ aligned_size = PERF_ALIGN(size, sizeof(u64));
event->mmap2.len -= event->mmap.start;
event->mmap2.header.size = (sizeof(event->mmap2) -
- (sizeof(event->mmap2.filename) - size));
- memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
+ (sizeof(event->mmap2.filename) - aligned_size));
+ memset(event->mmap2.filename + size, 0, machine->id_hdr_size +
+ (aligned_size - size));
event->mmap2.header.size += machine->id_hdr_size;
event->mmap2.pid = tgid;
event->mmap2.tid = pid;
@@ -758,7 +759,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
for (i = 0; i < n; i++) {
char *end;
pid_t _pid;
- bool kernel_thread;
+ bool kernel_thread = false;
_pid = strtol(dirent[i]->d_name, &end, 10);
if (*end)
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 3cc91ad048ea..43beb169631d 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -133,6 +133,8 @@ static struct dso *__machine__addnew_vdso(struct machine *machine, const char *s
if (dso != NULL) {
__dsos__add(&machine->dsos, dso);
dso__set_long_name(dso, long_name, false);
+ /* Put dso here because __dsos_add already got it */
+ dso__put(dso);
}
return dso;
diff --git a/tools/power/acpi/common/cmfsize.c b/tools/power/acpi/common/cmfsize.c
index 9ea2c0aeb86c..185b8c588e1d 100644
--- a/tools/power/acpi/common/cmfsize.c
+++ b/tools/power/acpi/common/cmfsize.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
/******************************************************************************
*
- * Module Name: cfsize - Common get file size function
+ * Module Name: cmfsize - Common get file size function
*
* Copyright (C) 2000 - 2021, Intel Corp.
*
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 81f4b8abbdf7..ffd50953a024 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -6819,7 +6819,7 @@ if __name__ == '__main__':
sysvals.outdir = val
sysvals.notestrun = True
if(os.path.isdir(val) == False):
- doError('%s is not accesible' % val)
+ doError('%s is not accessible' % val)
elif(arg == '-filter'):
try:
val = next(args)
diff --git a/tools/power/x86/intel-speed-select/isst-config.c b/tools/power/x86/intel-speed-select/isst-config.c
index 582feb88eca3..ab940c508ef0 100644
--- a/tools/power/x86/intel-speed-select/isst-config.c
+++ b/tools/power/x86/intel-speed-select/isst-config.c
@@ -15,7 +15,7 @@ struct process_cmd_struct {
int arg;
};
-static const char *version_str = "v1.8";
+static const char *version_str = "v1.9";
static const int supported_api_ver = 1;
static struct isst_if_platform_info isst_platform_info;
static char *progname;
@@ -381,6 +381,18 @@ static void set_cpu_online_offline(int cpu, int state)
close(fd);
}
+static void force_all_cpus_online(void)
+{
+ int i;
+
+ fprintf(stderr, "Forcing all CPUs online\n");
+
+ for (i = 0; i < topo_max_cpus; ++i)
+ set_cpu_online_offline(i, 1);
+
+ unlink("/var/run/isst_cpu_topology.dat");
+}
+
#define MAX_PACKAGE_COUNT 8
#define MAX_DIE_PER_PACKAGE 2
static void for_each_online_package_in_set(void (*callback)(int, void *, void *,
@@ -959,6 +971,10 @@ static void isst_print_extended_platform_info(void)
fprintf(outf, "Intel(R) SST-BF (feature base-freq) is not supported\n");
ret = isst_read_pm_config(i, &cp_state, &cp_cap);
+ if (ret) {
+ fprintf(outf, "Intel(R) SST-CP (feature core-power) status is unknown\n");
+ return;
+ }
if (cp_cap)
fprintf(outf, "Intel(R) SST-CP (feature core-power) is supported\n");
else
@@ -2763,6 +2779,7 @@ static void usage(void)
printf("\t[-f|--format] : output format [json|text]. Default: text\n");
printf("\t[-h|--help] : Print help\n");
printf("\t[-i|--info] : Print platform information\n");
+ printf("\t[-a|--all-cpus-online] : Force online every CPU in the system\n");
printf("\t[-o|--out] : Output file\n");
printf("\t\t\tDefault : stderr\n");
printf("\t[-p|--pause] : Delay between two mail box commands in milliseconds\n");
@@ -2791,7 +2808,6 @@ static void usage(void)
static void print_version(void)
{
fprintf(outf, "Version %s\n", version_str);
- fprintf(outf, "Build date %s time %s\n", __DATE__, __TIME__);
exit(0);
}
@@ -2800,11 +2816,12 @@ static void cmdline(int argc, char **argv)
const char *pathname = "/dev/isst_interface";
char *ptr;
FILE *fp;
- int opt;
+ int opt, force_cpus_online = 0;
int option_index = 0;
int ret;
static struct option long_options[] = {
+ { "all-cpus-online", no_argument, 0, 'a' },
{ "cpu", required_argument, 0, 'c' },
{ "debug", no_argument, 0, 'd' },
{ "format", required_argument, 0, 'f' },
@@ -2840,9 +2857,12 @@ static void cmdline(int argc, char **argv)
}
progname = argv[0];
- while ((opt = getopt_long_only(argc, argv, "+c:df:hio:v", long_options,
+ while ((opt = getopt_long_only(argc, argv, "+c:df:hio:va", long_options,
&option_index)) != -1) {
switch (opt) {
+ case 'a':
+ force_cpus_online = 1;
+ break;
case 'c':
parse_cpu_command(optarg);
break;
@@ -2892,6 +2912,8 @@ static void cmdline(int argc, char **argv)
exit(0);
}
set_max_cpu_num();
+ if (force_cpus_online)
+ force_all_cpus_online();
store_cpu_topology();
set_cpu_present_cpu_mask();
set_cpu_target_cpu_mask();
diff --git a/tools/power/x86/intel-speed-select/isst-display.c b/tools/power/x86/intel-speed-select/isst-display.c
index 8e54ce47648e..3bf1820c0da1 100644
--- a/tools/power/x86/intel-speed-select/isst-display.c
+++ b/tools/power/x86/intel-speed-select/isst-display.c
@@ -25,10 +25,14 @@ static void printcpulist(int str_len, char *str, int mask_size,
index = snprintf(&str[curr_index],
str_len - curr_index, ",");
curr_index += index;
+ if (curr_index >= str_len)
+ break;
}
index = snprintf(&str[curr_index], str_len - curr_index, "%d",
i);
curr_index += index;
+ if (curr_index >= str_len)
+ break;
first = 0;
}
}
@@ -64,10 +68,14 @@ static void printcpumask(int str_len, char *str, int mask_size,
index = snprintf(&str[curr_index], str_len - curr_index, "%08x",
mask[i]);
curr_index += index;
+ if (curr_index >= str_len)
+ break;
if (i) {
strncat(&str[curr_index], ",", str_len - curr_index);
curr_index++;
}
+ if (curr_index >= str_len)
+ break;
}
free(mask);
@@ -185,7 +193,7 @@ static void _isst_pbf_display_information(int cpu, FILE *outf, int level,
int disp_level)
{
char header[256];
- char value[256];
+ char value[512];
snprintf(header, sizeof(header), "speed-select-base-freq-properties");
format_and_print(outf, disp_level, header, NULL);
@@ -349,7 +357,7 @@ void isst_ctdp_display_information(int cpu, FILE *outf, int tdp_level,
struct isst_pkg_ctdp *pkg_dev)
{
char header[256];
- char value[256];
+ char value[512];
static int level;
int i;
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index a7c4f0772e53..5939615265f1 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -2449,7 +2449,7 @@ dump_knl_turbo_ratio_limits(void)
fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
base_cpu, msr);
- /**
+ /*
* Turbo encoding in KNL is as follows:
* [0] -- Reserved
* [7:1] -- Base value of number of active cores of bucket 1.
diff --git a/tools/scripts/Makefile.include b/tools/scripts/Makefile.include
index a402f32a145c..25adfec2cb39 100644
--- a/tools/scripts/Makefile.include
+++ b/tools/scripts/Makefile.include
@@ -39,8 +39,6 @@ EXTRA_WARNINGS += -Wundef
EXTRA_WARNINGS += -Wwrite-strings
EXTRA_WARNINGS += -Wformat
-CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
-
# Makefiles suck: This macro sets a default value of $(2) for the
# variable named by $(1), unless the variable has been set by
# environment or command line. This is necessary for CC and AR
@@ -52,12 +50,22 @@ define allow-override
$(eval $(1) = $(2)))
endef
+ifneq ($(LLVM),)
+$(call allow-override,CC,clang)
+$(call allow-override,AR,llvm-ar)
+$(call allow-override,LD,ld.lld)
+$(call allow-override,CXX,clang++)
+$(call allow-override,STRIP,llvm-strip)
+else
# Allow setting various cross-compile vars or setting CROSS_COMPILE as a prefix.
$(call allow-override,CC,$(CROSS_COMPILE)gcc)
$(call allow-override,AR,$(CROSS_COMPILE)ar)
$(call allow-override,LD,$(CROSS_COMPILE)ld)
$(call allow-override,CXX,$(CROSS_COMPILE)g++)
$(call allow-override,STRIP,$(CROSS_COMPILE)strip)
+endif
+
+CC_NO_CLANG := $(shell $(CC) -dM -E -x c /dev/null | grep -Fq "__clang__"; echo $$?)
ifneq ($(LLVM),)
HOSTAR ?= llvm-ar
@@ -86,7 +94,8 @@ endif
# in newer systems.
# Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h
#
-# See https://lkml.org/lkml/2006/11/28/253 and https://gcc.gnu.org/gcc-4.8/changes.html,
+# See https://lore.kernel.org/lkml/9a8748490611281710g78402fbeh8ff7fcc162dbcbca@mail.gmail.com/
+# and https://gcc.gnu.org/gcc-4.8/changes.html,
# that takes into account Linus's comments (search for Wshadow) for the reasoning about
# -Wshadow not being interesting before gcc 4.8.
diff --git a/tools/spi/Makefile b/tools/spi/Makefile
index ada881afb489..0aa6dbd31fb8 100644
--- a/tools/spi/Makefile
+++ b/tools/spi/Makefile
@@ -25,11 +25,12 @@ include $(srctree)/tools/build/Makefile.include
#
# We need the following to be outside of kernel tree
#
-$(OUTPUT)include/linux/spi/spidev.h: ../../include/uapi/linux/spi/spidev.h
+$(OUTPUT)include/linux/spi: ../../include/uapi/linux/spi
mkdir -p $(OUTPUT)include/linux/spi 2>&1 || true
ln -sf $(CURDIR)/../../include/uapi/linux/spi/spidev.h $@
+ ln -sf $(CURDIR)/../../include/uapi/linux/spi/spi.h $@
-prepare: $(OUTPUT)include/linux/spi/spidev.h
+prepare: $(OUTPUT)include/linux/spi
#
# spidev_test
diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config
index a7f0603d33f6..690870043ac0 100644
--- a/tools/testing/kunit/configs/broken_on_uml.config
+++ b/tools/testing/kunit/configs/broken_on_uml.config
@@ -40,3 +40,5 @@
# CONFIG_RESET_BRCMSTB_RESCAL is not set
# CONFIG_RESET_INTEL_GW is not set
# CONFIG_ADI_AXI_ADC is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_PAGE_POISONING is not set
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index d5144fcb03ac..5da8fb3762f9 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -184,7 +184,9 @@ def add_common_opts(parser) -> None:
help='Run all KUnit tests through allyesconfig',
action='store_true')
parser.add_argument('--kunitconfig',
- help='Path to Kconfig fragment that enables KUnit tests',
+ help='Path to Kconfig fragment that enables KUnit tests.'
+ ' If given a directory, (e.g. lib/kunit), "/.kunitconfig" '
+ 'will get automatically appended.',
metavar='kunitconfig')
def add_build_opts(parser) -> None:
diff --git a/tools/testing/kunit/kunit_config.py b/tools/testing/kunit/kunit_config.py
index 0b550cbd667d..1e2683dcc0e7 100644
--- a/tools/testing/kunit/kunit_config.py
+++ b/tools/testing/kunit/kunit_config.py
@@ -13,7 +13,7 @@ from typing import List, Set
CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$'
CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$'
-KconfigEntryBase = collections.namedtuple('KconfigEntry', ['name', 'value'])
+KconfigEntryBase = collections.namedtuple('KconfigEntryBase', ['name', 'value'])
class KconfigEntry(KconfigEntryBase):
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index f309a33256cd..89a7d4024e87 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -132,6 +132,8 @@ class LinuxSourceTree(object):
return
if kunitconfig_path:
+ if os.path.isdir(kunitconfig_path):
+ kunitconfig_path = os.path.join(kunitconfig_path, KUNITCONFIG_PATH)
if not os.path.exists(kunitconfig_path):
raise ConfigError(f'Specified kunitconfig ({kunitconfig_path}) does not exist')
else:
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 1ad3049e9069..2e809dd956a7 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -251,6 +251,12 @@ class LinuxSourceTreeTest(unittest.TestCase):
with tempfile.NamedTemporaryFile('wt') as kunitconfig:
tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=kunitconfig.name)
+ def test_dir_kunitconfig(self):
+ with tempfile.TemporaryDirectory('') as dir:
+ with open(os.path.join(dir, '.kunitconfig'), 'w') as f:
+ pass
+ tree = kunit_kernel.LinuxSourceTree('', kunitconfig_path=dir)
+
# TODO: add more test cases.
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 3b796dd5e577..ca24f6839d50 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -296,21 +296,34 @@ static void *idr_throbber(void *arg)
return NULL;
}
+/*
+ * There are always either 1 or 2 objects in the IDR. If we find nothing,
+ * or we find something at an ID we didn't expect, that's a bug.
+ */
void idr_find_test_1(int anchor_id, int throbber_id)
{
pthread_t throbber;
time_t start = time(NULL);
- pthread_create(&throbber, NULL, idr_throbber, &throbber_id);
-
BUG_ON(idr_alloc(&find_idr, xa_mk_value(anchor_id), anchor_id,
anchor_id + 1, GFP_KERNEL) != anchor_id);
+ pthread_create(&throbber, NULL, idr_throbber, &throbber_id);
+
+ rcu_read_lock();
do {
int id = 0;
void *entry = idr_get_next(&find_idr, &id);
- BUG_ON(entry != xa_mk_value(id));
+ rcu_read_unlock();
+ if ((id != anchor_id && id != throbber_id) ||
+ entry != xa_mk_value(id)) {
+ printf("%s(%d, %d): %p at %d\n", __func__, anchor_id,
+ throbber_id, entry, id);
+ abort();
+ }
+ rcu_read_lock();
} while (time(NULL) < start + 11);
+ rcu_read_unlock();
pthread_join(throbber, NULL);
@@ -577,6 +590,7 @@ void ida_tests(void)
int __weak main(void)
{
+ rcu_register_thread();
radix_tree_init();
idr_checks();
ida_tests();
@@ -584,5 +598,6 @@ int __weak main(void)
rcu_barrier();
if (nr_allocated)
printf("nr_allocated = %d\n", nr_allocated);
+ rcu_unregister_thread();
return 0;
}
diff --git a/tools/testing/radix-tree/linux/compiler_types.h b/tools/testing/radix-tree/linux/compiler_types.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/tools/testing/radix-tree/linux/compiler_types.h
+++ /dev/null
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 9eae0fb5a67d..e00520cc6349 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -224,7 +224,9 @@ void multiorder_checks(void)
int __weak main(void)
{
+ rcu_register_thread();
radix_tree_init();
multiorder_checks();
+ rcu_unregister_thread();
return 0;
}
diff --git a/tools/testing/radix-tree/xarray.c b/tools/testing/radix-tree/xarray.c
index e61e43efe463..f20e12cbbfd4 100644
--- a/tools/testing/radix-tree/xarray.c
+++ b/tools/testing/radix-tree/xarray.c
@@ -25,11 +25,13 @@ void xarray_tests(void)
int __weak main(void)
{
+ rcu_register_thread();
radix_tree_init();
xarray_tests();
radix_tree_cpu_dead(1);
rcu_barrier();
if (nr_allocated)
printf("nr_allocated = %d\n", nr_allocated);
+ rcu_unregister_thread();
return 0;
}
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 2c9d012797a7..ced910fb4019 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal pauth fp mte
+ARM64_SUBTARGETS ?= tags signal pauth fp mte bti
else
ARM64_SUBTARGETS :=
endif
diff --git a/tools/testing/selftests/arm64/bti/.gitignore b/tools/testing/selftests/arm64/bti/.gitignore
new file mode 100644
index 000000000000..73869fabada4
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/.gitignore
@@ -0,0 +1,2 @@
+btitest
+nobtitest
diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile
new file mode 100644
index 000000000000..73e013c082a6
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/Makefile
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS := btitest nobtitest
+
+PROGS := $(patsubst %,gen/%,$(TEST_GEN_PROGS))
+
+# These tests are built as freestanding binaries since otherwise BTI
+# support in ld.so is required which is not currently widespread; when
+# it is available it will still be useful to test this separately as the
+# cases for statically linked and dynamically lined binaries are
+# slightly different.
+
+CFLAGS_NOBTI = -DBTI=0
+CFLAGS_BTI = -mbranch-protection=standard -DBTI=1
+
+CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS)
+
+BTI_CC_COMMAND = $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -c -o $@ $<
+NOBTI_CC_COMMAND = $(CC) $(CFLAGS_NOBTI) $(CFLAGS_COMMON) -c -o $@ $<
+
+%-bti.o: %.c
+ $(BTI_CC_COMMAND)
+
+%-bti.o: %.S
+ $(BTI_CC_COMMAND)
+
+%-nobti.o: %.c
+ $(NOBTI_CC_COMMAND)
+
+%-nobti.o: %.S
+ $(NOBTI_CC_COMMAND)
+
+BTI_OBJS = \
+ test-bti.o \
+ signal-bti.o \
+ start-bti.o \
+ syscall-bti.o \
+ system-bti.o \
+ teststubs-bti.o \
+ trampoline-bti.o
+gen/btitest: $(BTI_OBJS)
+ $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
+
+NOBTI_OBJS = \
+ test-nobti.o \
+ signal-nobti.o \
+ start-nobti.o \
+ syscall-nobti.o \
+ system-nobti.o \
+ teststubs-nobti.o \
+ trampoline-nobti.o
+gen/nobtitest: $(NOBTI_OBJS)
+ $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -o $@ $^
+
+# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
+# to account for any OUTPUT target-dirs optionally provided by
+# the toplevel makefile
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): $(PROGS)
+ cp $(PROGS) $(OUTPUT)/
diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h
new file mode 100644
index 000000000000..04e7b72880ef
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/assembler.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+#define NT_GNU_PROPERTY_TYPE_0 5
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+
+/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
+#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
+
+
+.macro startfn name:req
+ .globl \name
+\name:
+ .macro endfn
+ .size \name, . - \name
+ .type \name, @function
+ .purgem endfn
+ .endm
+.endm
+
+.macro emit_aarch64_feature_1_and
+ .pushsection .note.gnu.property, "a"
+ .align 3
+ .long 2f - 1f
+ .long 6f - 3f
+ .long NT_GNU_PROPERTY_TYPE_0
+1: .string "GNU"
+2:
+ .align 3
+3: .long GNU_PROPERTY_AARCH64_FEATURE_1_AND
+ .long 5f - 4f
+4:
+#if BTI
+ .long GNU_PROPERTY_AARCH64_FEATURE_1_PAC | \
+ GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+#else
+ .long 0
+#endif
+5:
+ .align 3
+6:
+ .popsection
+.endm
+
+.macro paciasp
+ hint 0x19
+.endm
+
+.macro autiasp
+ hint 0x1d
+.endm
+
+.macro __bti_
+ hint 0x20
+.endm
+
+.macro __bti_c
+ hint 0x22
+.endm
+
+.macro __bti_j
+ hint 0x24
+.endm
+
+.macro __bti_jc
+ hint 0x26
+.endm
+
+.macro bti what=
+ __bti_\what
+.endm
+
+#endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/bti/btitest.h b/tools/testing/selftests/arm64/bti/btitest.h
new file mode 100644
index 000000000000..2aff9b10336e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/btitest.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef BTITEST_H
+#define BTITEST_H
+
+/* Trampolines for calling the test stubs: */
+void call_using_br_x0(void (*)(void));
+void call_using_br_x16(void (*)(void));
+void call_using_blr(void (*)(void));
+
+/* Test stubs: */
+void nohint_func(void);
+void bti_none_func(void);
+void bti_c_func(void);
+void bti_j_func(void);
+void bti_jc_func(void);
+void paciasp_func(void);
+
+#endif /* !BTITEST_H */
diff --git a/tools/testing/selftests/arm64/bti/compiler.h b/tools/testing/selftests/arm64/bti/compiler.h
new file mode 100644
index 000000000000..ebb6204f447a
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/compiler.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef COMPILER_H
+#define COMPILER_H
+
+#define __always_unused __attribute__((__unused__))
+#define __noreturn __attribute__((__noreturn__))
+#define __unreachable() __builtin_unreachable()
+
+/* curse(e) has value e, but the compiler cannot assume so */
+#define curse(e) ({ \
+ __typeof__(e) __curse_e = (e); \
+ asm ("" : "+r" (__curse_e)); \
+ __curse_e; \
+})
+
+#endif /* ! COMPILER_H */
diff --git a/tools/testing/selftests/arm64/bti/gen/.gitignore b/tools/testing/selftests/arm64/bti/gen/.gitignore
new file mode 100644
index 000000000000..73869fabada4
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/gen/.gitignore
@@ -0,0 +1,2 @@
+btitest
+nobtitest
diff --git a/tools/testing/selftests/arm64/bti/signal.c b/tools/testing/selftests/arm64/bti/signal.c
new file mode 100644
index 000000000000..f3fd29b91141
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/signal.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+#include "signal.h"
+
+int sigemptyset(sigset_t *s)
+{
+ unsigned int i;
+
+ for (i = 0; i < _NSIG_WORDS; ++i)
+ s->sig[i] = 0;
+
+ return 0;
+}
+
+int sigaddset(sigset_t *s, int n)
+{
+ if (n < 1 || n > _NSIG)
+ return -EINVAL;
+
+ s->sig[(n - 1) / _NSIG_BPW] |= 1UL << (n - 1) % _NSIG_BPW;
+ return 0;
+}
+
+int sigaction(int n, struct sigaction *sa, const struct sigaction *old)
+{
+ return syscall(__NR_rt_sigaction, n, sa, old, sizeof(sa->sa_mask));
+}
+
+int sigprocmask(int how, const sigset_t *mask, sigset_t *old)
+{
+ return syscall(__NR_rt_sigprocmask, how, mask, old, sizeof(*mask));
+}
diff --git a/tools/testing/selftests/arm64/bti/signal.h b/tools/testing/selftests/arm64/bti/signal.h
new file mode 100644
index 000000000000..103457dc880e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/signal.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef SIGNAL_H
+#define SIGNAL_H
+
+#include <linux/signal.h>
+
+#include "system.h"
+
+typedef __sighandler_t sighandler_t;
+
+int sigemptyset(sigset_t *s);
+int sigaddset(sigset_t *s, int n);
+int sigaction(int n, struct sigaction *sa, const struct sigaction *old);
+int sigprocmask(int how, const sigset_t *mask, sigset_t *old);
+
+#endif /* ! SIGNAL_H */
diff --git a/tools/testing/selftests/arm64/bti/start.S b/tools/testing/selftests/arm64/bti/start.S
new file mode 100644
index 000000000000..831f952e0572
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/start.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn _start
+ mov x0, sp
+ b start
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/syscall.S b/tools/testing/selftests/arm64/bti/syscall.S
new file mode 100644
index 000000000000..8dde8b6f3db1
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/syscall.S
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn syscall
+ bti c
+ mov w8, w0
+ mov x0, x1
+ mov x1, x2
+ mov x2, x3
+ mov x3, x4
+ mov x4, x5
+ mov x5, x6
+ mov x6, x7
+ svc #0
+ ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/system.c b/tools/testing/selftests/arm64/bti/system.c
new file mode 100644
index 000000000000..6385d8d4973b
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/system.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+
+#include <asm/unistd.h>
+
+#include "compiler.h"
+
+void __noreturn exit(int n)
+{
+ syscall(__NR_exit, n);
+ __unreachable();
+}
+
+ssize_t write(int fd, const void *buf, size_t size)
+{
+ return syscall(__NR_write, fd, buf, size);
+}
diff --git a/tools/testing/selftests/arm64/bti/system.h b/tools/testing/selftests/arm64/bti/system.h
new file mode 100644
index 000000000000..aca118589705
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/system.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef SYSTEM_H
+#define SYSTEM_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+
+typedef __kernel_size_t size_t;
+typedef __kernel_ssize_t ssize_t;
+
+#include <linux/errno.h>
+#include <asm/hwcap.h>
+#include <asm/ptrace.h>
+#include <asm/unistd.h>
+
+#include "compiler.h"
+
+long syscall(int nr, ...);
+
+void __noreturn exit(int n);
+ssize_t write(int fd, const void *buf, size_t size);
+
+#endif /* ! SYSTEM_H */
diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c
new file mode 100644
index 000000000000..656b04976ccc
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/test.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019,2021 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+
+#include <linux/errno.h>
+#include <linux/auxvec.h>
+#include <linux/signal.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+
+typedef struct ucontext ucontext_t;
+
+#include "btitest.h"
+#include "compiler.h"
+#include "signal.h"
+
+#define EXPECTED_TESTS 18
+
+static volatile unsigned int test_num = 1;
+static unsigned int test_passed;
+static unsigned int test_failed;
+static unsigned int test_skipped;
+
+static void fdputs(int fd, const char *str)
+{
+ size_t len = 0;
+ const char *p = str;
+
+ while (*p++)
+ ++len;
+
+ write(fd, str, len);
+}
+
+static void putstr(const char *str)
+{
+ fdputs(1, str);
+}
+
+static void putnum(unsigned int num)
+{
+ char c;
+
+ if (num / 10)
+ putnum(num / 10);
+
+ c = '0' + (num % 10);
+ write(1, &c, 1);
+}
+
+#define puttestname(test_name, trampoline_name) do { \
+ putstr(test_name); \
+ putstr("/"); \
+ putstr(trampoline_name); \
+} while (0)
+
+void print_summary(void)
+{
+ putstr("# Totals: pass:");
+ putnum(test_passed);
+ putstr(" fail:");
+ putnum(test_failed);
+ putstr(" xfail:0 xpass:0 skip:");
+ putnum(test_skipped);
+ putstr(" error:0\n");
+}
+
+static const char *volatile current_test_name;
+static const char *volatile current_trampoline_name;
+static volatile int sigill_expected, sigill_received;
+
+static void handler(int n, siginfo_t *si __always_unused,
+ void *uc_ __always_unused)
+{
+ ucontext_t *uc = uc_;
+
+ putstr("# \t[SIGILL in ");
+ puttestname(current_test_name, current_trampoline_name);
+ putstr(", BTYPE=");
+ write(1, &"00011011"[((uc->uc_mcontext.pstate & PSR_BTYPE_MASK)
+ >> PSR_BTYPE_SHIFT) * 2], 2);
+ if (!sigill_expected) {
+ putstr("]\n");
+ putstr("not ok ");
+ putnum(test_num);
+ putstr(" ");
+ puttestname(current_test_name, current_trampoline_name);
+ putstr("(unexpected SIGILL)\n");
+ print_summary();
+ exit(128 + n);
+ }
+
+ putstr(" (expected)]\n");
+ sigill_received = 1;
+ /* zap BTYPE so that resuming the faulting code will work */
+ uc->uc_mcontext.pstate &= ~PSR_BTYPE_MASK;
+}
+
+static int skip_all;
+
+static void __do_test(void (*trampoline)(void (*)(void)),
+ void (*fn)(void),
+ const char *trampoline_name,
+ const char *name,
+ int expect_sigill)
+{
+ if (skip_all) {
+ test_skipped++;
+ putstr("ok ");
+ putnum(test_num);
+ putstr(" ");
+ puttestname(name, trampoline_name);
+ putstr(" # SKIP\n");
+
+ return;
+ }
+
+ /* Branch Target exceptions should only happen in BTI binaries: */
+ if (!BTI)
+ expect_sigill = 0;
+
+ sigill_expected = expect_sigill;
+ sigill_received = 0;
+ current_test_name = name;
+ current_trampoline_name = trampoline_name;
+
+ trampoline(fn);
+
+ if (expect_sigill && !sigill_received) {
+ putstr("not ok ");
+ test_failed++;
+ } else {
+ putstr("ok ");
+ test_passed++;
+ }
+ putnum(test_num++);
+ putstr(" ");
+ puttestname(name, trampoline_name);
+ putstr("\n");
+}
+
+#define do_test(expect_sigill_br_x0, \
+ expect_sigill_br_x16, \
+ expect_sigill_blr, \
+ name) \
+do { \
+ __do_test(call_using_br_x0, name, "call_using_br_x0", #name, \
+ expect_sigill_br_x0); \
+ __do_test(call_using_br_x16, name, "call_using_br_x16", #name, \
+ expect_sigill_br_x16); \
+ __do_test(call_using_blr, name, "call_using_blr", #name, \
+ expect_sigill_blr); \
+} while (0)
+
+void start(int *argcp)
+{
+ struct sigaction sa;
+ void *const *p;
+ const struct auxv_entry {
+ unsigned long type;
+ unsigned long val;
+ } *auxv;
+ unsigned long hwcap = 0, hwcap2 = 0;
+
+ putstr("TAP version 13\n");
+ putstr("1..");
+ putnum(EXPECTED_TESTS);
+ putstr("\n");
+
+ /* Gross hack for finding AT_HWCAP2 from the initial process stack: */
+ p = (void *const *)argcp + 1 + *argcp + 1; /* start of environment */
+ /* step over environment */
+ while (*p++)
+ ;
+ for (auxv = (const struct auxv_entry *)p; auxv->type != AT_NULL; ++auxv) {
+ switch (auxv->type) {
+ case AT_HWCAP:
+ hwcap = auxv->val;
+ break;
+ case AT_HWCAP2:
+ hwcap2 = auxv->val;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (hwcap & HWCAP_PACA)
+ putstr("# HWCAP_PACA present\n");
+ else
+ putstr("# HWCAP_PACA not present\n");
+
+ if (hwcap2 & HWCAP2_BTI) {
+ putstr("# HWCAP2_BTI present\n");
+ if (!(hwcap & HWCAP_PACA))
+ putstr("# Bad hardware? Expect problems.\n");
+ } else {
+ putstr("# HWCAP2_BTI not present\n");
+ skip_all = 1;
+ }
+
+ putstr("# Test binary");
+ if (!BTI)
+ putstr(" not");
+ putstr(" built for BTI\n");
+
+ sa.sa_handler = (sighandler_t)(void *)handler;
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ sigaction(SIGILL, &sa, NULL);
+ sigaddset(&sa.sa_mask, SIGILL);
+ sigprocmask(SIG_UNBLOCK, &sa.sa_mask, NULL);
+
+ do_test(1, 1, 1, nohint_func);
+ do_test(1, 1, 1, bti_none_func);
+ do_test(1, 0, 0, bti_c_func);
+ do_test(0, 0, 1, bti_j_func);
+ do_test(0, 0, 0, bti_jc_func);
+ do_test(1, 0, 0, paciasp_func);
+
+ print_summary();
+
+ if (test_num - 1 != EXPECTED_TESTS)
+ putstr("# WARNING - EXPECTED TEST COUNT WRONG\n");
+
+ if (test_failed)
+ exit(1);
+ else
+ exit(0);
+}
diff --git a/tools/testing/selftests/arm64/bti/teststubs.S b/tools/testing/selftests/arm64/bti/teststubs.S
new file mode 100644
index 000000000000..b62c8c35f67e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/teststubs.S
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn bti_none_func
+ bti
+ ret
+endfn
+
+startfn bti_c_func
+ bti c
+ ret
+endfn
+
+startfn bti_j_func
+ bti j
+ ret
+endfn
+
+startfn bti_jc_func
+ bti jc
+ ret
+endfn
+
+startfn paciasp_func
+ paciasp
+ autiasp
+ ret
+endfn
+
+startfn nohint_func
+ ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/trampoline.S b/tools/testing/selftests/arm64/bti/trampoline.S
new file mode 100644
index 000000000000..09beb3f361f1
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/trampoline.S
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn call_using_br_x0
+ bti c
+ br x0
+endfn
+
+startfn call_using_br_x16
+ bti c
+ mov x16, x0
+ br x16
+endfn
+
+startfn call_using_blr
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ blr x0
+ ldp x29, x30, [sp], #16
+ autiasp
+ ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
index 9210691aa998..e3e08d9c7020 100644
--- a/tools/testing/selftests/arm64/fp/sve-test.S
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -284,16 +284,28 @@ endfunction
// Set up test pattern in the FFR
// x0: pid
// x2: generation
+//
+// We need to generate a canonical FFR value, which consists of a number of
+// low "1" bits, followed by a number of zeros. This gives us 17 unique values
+// per 16 bits of FFR, so we create a 4 bit signature out of the PID and
+// generation, and use that as the initial number of ones in the pattern.
+// We fill the upper lanes of FFR with zeros.
// Beware: corrupts P0.
function setup_ffr
mov x4, x30
- bl pattern
+ and w0, w0, #0x3
+ bfi w0, w2, #2, #2
+ mov w1, #1
+ lsl w1, w1, w0
+ sub w1, w1, #1
+
ldr x0, =ffrref
- ldr x1, =scratch
- rdvl x2, #1
- lsr x2, x2, #3
- bl memcpy
+ strh w1, [x0], 2
+ rdvl x1, #1
+ lsr x1, x1, #3
+ sub x1, x1, #2
+ bl memclr
mov x0, #0
ldr x1, =ffrref
diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
index 0b3af552632a..409e3e53d00a 100644
--- a/tools/testing/selftests/arm64/mte/Makefile
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -1,14 +1,18 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (C) 2020 ARM Limited
-CFLAGS += -std=gnu99 -I. -lpthread
+# preserve CC value from top level Makefile
+ifeq ($(CC),cc)
+CC := $(CROSS_COMPILE)gcc
+endif
+
+CFLAGS += -std=gnu99 -I. -pthread
+LDFLAGS += -pthread
SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
PROGS := $(patsubst %.c,%,$(SRCS))
#Add mte compiler option
-ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),)
CFLAGS += -march=armv8.5-a+memtag
-endif
#check if the compiler works well
mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
@@ -19,11 +23,14 @@ TEST_GEN_PROGS := $(PROGS)
# Get Kernel headers installed and use them.
KSFT_KHDR_INSTALL := 1
+else
+ $(warning compiler "$(CC)" does not support the ARMv8.5 MTE extension.)
+ $(warning test program "mte" will not be created.)
endif
# Include KSFT lib.mk.
include ../../lib.mk
ifeq ($(mte_cc_support),1)
-$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S
+$(TEST_GEN_PROGS): mte_common_util.c mte_helper.S
endif
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
index 3b23c4d61d38..88c74bc46d4f 100644
--- a/tools/testing/selftests/arm64/mte/check_ksm_options.c
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -33,7 +33,10 @@ static unsigned long read_sysfs(char *str)
ksft_print_msg("ERR: missing %s\n", str);
return 0;
}
- fscanf(f, "%lu", &val);
+ if (fscanf(f, "%lu", &val) != 1) {
+ ksft_print_msg("ERR: parsing %s\n", str);
+ val = 0;
+ }
fclose(f);
return val;
}
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
index 4bfa80f2a8c3..1de7a0abd0ae 100644
--- a/tools/testing/selftests/arm64/mte/check_user_mem.c
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -33,7 +33,8 @@ static int check_usermem_access_fault(int mem_type, int mode, int mapping)
if (fd == -1)
return KSFT_FAIL;
for (i = 0; i < len; i++)
- write(fd, &val, sizeof(val));
+ if (write(fd, &val, sizeof(val)) != sizeof(val))
+ return KSFT_FAIL;
lseek(fd, 0, 0);
ptr = mte_allocate_memory(len, mem_type, mapping, true);
if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) {
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
index 39f8908988ea..f50ac31920d1 100644
--- a/tools/testing/selftests/arm64/mte/mte_common_util.c
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -181,10 +181,17 @@ void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags
}
/* Initialize the file for mappable size */
lseek(fd, 0, SEEK_SET);
- for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE)
- write(fd, buffer, INIT_BUFFER_SIZE);
+ for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) {
+ if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) {
+ perror("initialising buffer");
+ return NULL;
+ }
+ }
index -= INIT_BUFFER_SIZE;
- write(fd, buffer, size - index);
+ if (write(fd, buffer, size - index) != size - index) {
+ perror("initialising buffer");
+ return NULL;
+ }
return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd);
}
@@ -202,9 +209,15 @@ void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
/* Initialize the file for mappable size */
lseek(fd, 0, SEEK_SET);
for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE)
- write(fd, buffer, INIT_BUFFER_SIZE);
+ if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) {
+ perror("initialising buffer");
+ return NULL;
+ }
index -= INIT_BUFFER_SIZE;
- write(fd, buffer, map_size - index);
+ if (write(fd, buffer, map_size - index) != map_size - index) {
+ perror("initialising buffer");
+ return NULL;
+ }
return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
range_after, true, fd);
}
@@ -271,29 +284,20 @@ int mte_switch_mode(int mte_option, unsigned long incl_mask)
en |= (incl_mask << PR_MTE_TAG_SHIFT);
/* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
- if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) {
+ if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) {
ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n");
return -EINVAL;
}
return 0;
}
-#define ID_AA64PFR1_MTE_SHIFT 8
-#define ID_AA64PFR1_MTE 2
-
int mte_default_setup(void)
{
- unsigned long hwcaps = getauxval(AT_HWCAP);
+ unsigned long hwcaps2 = getauxval(AT_HWCAP2);
unsigned long en = 0;
int ret;
- if (!(hwcaps & HWCAP_CPUID)) {
- ksft_print_msg("FAIL: CPUID registers unavailable\n");
- return KSFT_FAIL;
- }
- /* Read ID_AA64PFR1_EL1 register */
- asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory");
- if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) {
+ if (!(hwcaps2 & HWCAP2_MTE)) {
ksft_print_msg("FAIL: MTE features unavailable\n");
return KSFT_SKIP;
}
@@ -333,6 +337,7 @@ int create_temp_file(void)
/* Create a file in the tmpfs filesystem */
fd = mkstemp(&filename[0]);
if (fd == -1) {
+ perror(filename);
ksft_print_msg("FAIL: Unable to open temporary file\n");
return 0;
}
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index c0c48fdb9ac1..4866f6a21901 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -1,4 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
+bpf-helpers*
+bpf-syscall*
test_verifier
test_maps
test_lru_map
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 044bfdcf5b74..511259c2c6c5 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-include ../../../../scripts/Kbuild.include
+include ../../../build/Build.include
include ../../../scripts/Makefile.arch
include ../../../scripts/Makefile.include
@@ -21,13 +21,18 @@ endif
BPF_GCC ?= $(shell command -v bpf-gcc;)
SAN_CFLAGS ?=
-CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS) \
+CFLAGS += -g -O0 -rdynamic -Wall $(GENFLAGS) $(SAN_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
-I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT) \
-Dbpf_prog_load=bpf_prog_test_load \
-Dbpf_load_program=bpf_test_load_program
LDLIBS += -lcap -lelf -lz -lrt -lpthread
+# Silence some warnings when compiled with clang
+ifneq ($(LLVM),)
+CFLAGS += -Wno-unused-command-line-argument
+endif
+
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
test_verifier_log test_dev_cgroup \
@@ -68,6 +73,7 @@ TEST_PROGS := test_kmod.sh \
test_bpftool_build.sh \
test_bpftool.sh \
test_bpftool_metadata.sh \
+ test_doc_build.sh \
test_xsk.sh
TEST_PROGS_EXTENDED := with_addr.sh \
@@ -103,6 +109,7 @@ override define CLEAN
$(call msg,CLEAN)
$(Q)$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
$(Q)$(MAKE) -C bpf_testmod clean
+ $(Q)$(MAKE) docs-clean
endef
include ../lib.mk
@@ -198,18 +205,27 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
$(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
CC=$(HOSTCC) LD=$(HOSTLD) \
+ EXTRA_CFLAGS='-g -O0' \
OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install
- $(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation
- $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
- -C $(BPFTOOLDIR)/Documentation \
- OUTPUT=$(BUILD_DIR)/bpftool/Documentation/ \
- prefix= DESTDIR=$(SCRATCH_DIR)/ install
+
+all: docs
+
+docs:
+ $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
+ -f Makefile.docs \
+ prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
+
+docs-clean:
+ $(Q)$(MAKE) $(submake_extras) \
+ -f Makefile.docs \
+ prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
../../../include/uapi/linux/bpf.h \
| $(INCLUDE_DIR) $(BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
+ EXTRA_CFLAGS='-g -O0' \
DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
ifneq ($(BPFOBJ),$(HOST_BPFOBJ))
@@ -217,11 +233,12 @@ $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
../../../include/uapi/linux/bpf.h \
| $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \
- OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
+ EXTRA_CFLAGS='-g -O0' \
+ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
endif
-$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
+$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
ifeq ($(VMLINUX_H),)
$(call msg,GEN,,$@)
$(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
@@ -292,6 +309,16 @@ endef
SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
+LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
+ linked_vars.skel.h linked_maps.skel.h
+
+test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
+linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
+linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
+linked_maps.skel.h-deps := linked_maps1.o linked_maps2.o
+
+LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
+
# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
# Parameters:
@@ -310,8 +337,9 @@ TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS))
TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
- $$(filter-out $(SKEL_BLACKLIST), \
+ $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\
$$(TRUNNER_BPF_SRCS)))
+TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
@@ -344,11 +372,22 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS))
-$(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \
- $(TRUNNER_OUTPUT)/%.o \
- | $(BPFTOOL) $(TRUNNER_OUTPUT)
+$(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
- $(Q)$$(BPFTOOL) gen skeleton $$< > $$@
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
+ $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
+
+$(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o))
+ $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps))
+ $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked3.o) $$(@:.skel.h=.linked2.o)
+ $(Q)diff $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
+ $(Q)$$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@
endif
# ensure we set up tests.h header generation rule just once
@@ -370,6 +409,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_OBJS) \
$(TRUNNER_BPF_SKELS) \
+ $(TRUNNER_BPF_SKELS_LINKED) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
$(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
@@ -382,11 +422,12 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
$(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
-# only copy extra resources if in flavored build
+# non-flavored in-srctree builds receive special treatment, in particular, we
+# do not need to copy extra resources (see e.g. test_btf_dump_case())
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
-ifneq ($2,)
+ifneq ($2:$(OUTPUT),:$(shell pwd))
$$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
- $(Q)cp -a $$^ $(TRUNNER_OUTPUT)/
+ $(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
endif
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
@@ -452,7 +493,7 @@ $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
# Make sure we are able to include and link libbpf against c++.
$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
$(call msg,CXX,,$@)
- $(Q)$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
+ $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
# Benchmark runner
$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
@@ -476,3 +517,5 @@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
feature \
$(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc bpf_testmod.ko)
+
+.PHONY: docs docs-clean
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
new file mode 100644
index 000000000000..ccf260021e83
--- /dev/null
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+include ../../../scripts/Makefile.include
+include ../../../scripts/utilities.mak
+
+INSTALL ?= install
+RM ?= rm -f
+RMDIR ?= rmdir --ignore-fail-on-non-empty
+
+ifeq ($(V),1)
+ Q =
+else
+ Q = @
+endif
+
+prefix ?= /usr/local
+mandir ?= $(prefix)/man
+man2dir = $(mandir)/man2
+man7dir = $(mandir)/man7
+
+SYSCALL_RST = bpf-syscall.rst
+MAN2_RST = $(SYSCALL_RST)
+
+HELPERS_RST = bpf-helpers.rst
+MAN7_RST = $(HELPERS_RST)
+
+_DOC_MAN2 = $(patsubst %.rst,%.2,$(MAN2_RST))
+DOC_MAN2 = $(addprefix $(OUTPUT),$(_DOC_MAN2))
+
+_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
+DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
+
+DOCTARGETS := helpers syscall
+
+docs: $(DOCTARGETS)
+syscall: man2
+helpers: man7
+man2: $(DOC_MAN2)
+man7: $(DOC_MAN7)
+
+RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+
+# Configure make rules for the man page bpf-$1.$2.
+# $1 - target for scripts/bpf_doc.py
+# $2 - man page section to generate the troff file
+define DOCS_RULES =
+$(OUTPUT)bpf-$1.rst: ../../../../include/uapi/linux/bpf.h
+ $$(QUIET_GEN)../../../../scripts/bpf_doc.py $1 \
+ --filename $$< > $$@
+
+$(OUTPUT)%.$2: $(OUTPUT)%.rst
+ifndef RST2MAN_DEP
+ $$(error "rst2man not found, but required to generate man pages")
+endif
+ $$(QUIET_GEN)rst2man $$< > $$@
+
+docs-clean-$1:
+ $$(call QUIET_CLEAN, eBPF_$1-manpage)
+ $(Q)$(RM) $$(DOC_MAN$2) $(OUTPUT)bpf-$1.rst
+
+docs-install-$1: docs
+ $$(call QUIET_INSTALL, eBPF_$1-manpage)
+ $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$$(man$2dir)
+ $(Q)$(INSTALL) -m 644 $$(DOC_MAN$2) $(DESTDIR)$$(man$2dir)
+
+docs-uninstall-$1:
+ $$(call QUIET_UNINST, eBPF_$1-manpage)
+ $(Q)$(RM) $$(addprefix $(DESTDIR)$$(man$2dir)/,$$(_DOC_MAN$2))
+ $(Q)$(RMDIR) $(DESTDIR)$$(man$2dir)
+
+.PHONY: $1 docs-clean-$1 docs-install-$1 docs-uninstall-$1
+endef
+
+# Create the make targets to generate manual pages by name and section
+$(eval $(call DOCS_RULES,helpers,7))
+$(eval $(call DOCS_RULES,syscall,2))
+
+docs-clean: $(foreach doctarget,$(DOCTARGETS), docs-clean-$(doctarget))
+docs-install: $(foreach doctarget,$(DOCTARGETS), docs-install-$(doctarget))
+docs-uninstall: $(foreach doctarget,$(DOCTARGETS), docs-uninstall-$(doctarget))
+
+.PHONY: docs docs-clean docs-install docs-uninstall man2 man7
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index fd148b8410fa..3353778c30f8 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -111,6 +111,45 @@ available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
__ https://reviews.llvm.org/D78466
+bpf_verif_scale/loop6.o test failure with Clang 12
+==================================================
+
+With Clang 12, the following bpf_verif_scale test failed:
+ * ``bpf_verif_scale/loop6.o``
+
+The verifier output looks like
+
+.. code-block:: c
+
+ R1 type=ctx expected=fp
+ The sequence of 8193 jumps is too complex.
+
+The reason is compiler generating the following code
+
+.. code-block:: c
+
+ ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+ 14: 16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6>
+ 15: bc 51 00 00 00 00 00 00 w1 = w5
+ 16: 04 01 00 00 ff ff ff ff w1 += -1
+ 17: 67 05 00 00 20 00 00 00 r5 <<= 32
+ 18: 77 05 00 00 20 00 00 00 r5 >>= 32
+ 19: a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4>
+ 20: b7 05 00 00 06 00 00 00 r5 = 6
+ 00000000000000a8 <LBB0_4>:
+ 21: b7 02 00 00 00 00 00 00 r2 = 0
+ 22: b7 01 00 00 00 00 00 00 r1 = 0
+ ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+ 23: 7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1
+ 24: 7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5
+
+Note that insn #15 has w1 = w5 and w1 is refined later but
+r5(w5) is eventually saved on stack at insn #24 for later use.
+This cause later verifier failure. The bug has been `fixed`__ in
+Clang 13.
+
+__ https://reviews.llvm.org/D97479
+
BPF CO-RE-based tests and Clang version
=======================================
@@ -131,3 +170,35 @@ failures:
.. _2: https://reviews.llvm.org/D85174
.. _3: https://reviews.llvm.org/D83878
.. _4: https://reviews.llvm.org/D83242
+
+Floating-point tests and Clang version
+======================================
+
+Certain selftests, e.g. core_reloc, require support for the floating-point
+types, which was introduced in `Clang 13`__. The older Clang versions will
+either crash when compiling these tests, or generate an incorrect BTF.
+
+__ https://reviews.llvm.org/D83289
+
+Kernel function call test and Clang version
+===========================================
+
+Some selftests (e.g. kfunc_call and bpf_tcp_ca) require a LLVM support
+to generate extern function in BTF. It was introduced in `Clang 13`__.
+
+Without it, the error from compiling bpf selftests looks like:
+
+.. code-block:: console
+
+ libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2
+
+__ https://reviews.llvm.org/D93563
+
+Clang dependencies for static linking tests
+===========================================
+
+linked_vars, linked_maps, and linked_funcs tests depend on `Clang fix`__ to
+generate valid BTF information for weak variables. Please make sure you use
+Clang that contains the fix.
+
+__ https://reviews.llvm.org/D100362
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 91f0fac632f4..029589c008c9 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -187,16 +187,6 @@ struct tcp_congestion_ops {
typeof(y) __y = (y); \
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
-static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked)
-{
- __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
-
- acked -= cwnd - tp->snd_cwnd;
- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
-
- return acked;
-}
-
static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
return tp->snd_cwnd < tp->snd_ssthresh;
@@ -213,22 +203,7 @@ static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
}
-static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked)
-{
- /* If credits accumulated at a higher w, apply them gently now. */
- if (tp->snd_cwnd_cnt >= w) {
- tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd++;
- }
-
- tp->snd_cwnd_cnt += acked;
- if (tp->snd_cwnd_cnt >= w) {
- __u32 delta = tp->snd_cwnd_cnt / w;
-
- tp->snd_cwnd_cnt -= delta * w;
- tp->snd_cwnd += delta;
- }
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
-}
+extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
+extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
#endif
diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
index 48f90490f922..b692e6ead9b5 100644
--- a/tools/testing/selftests/bpf/btf_helpers.c
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -23,6 +23,7 @@ static const char * const btf_kind_str_mapping[] = {
[BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
[BTF_KIND_VAR] = "VAR",
[BTF_KIND_DATASEC] = "DATASEC",
+ [BTF_KIND_FLOAT] = "FLOAT",
};
static const char *btf_kind_str(__u16 kind)
@@ -173,6 +174,9 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
}
break;
}
+ case BTF_KIND_FLOAT:
+ fprintf(out, " size=%u", t->size);
+ break;
default:
break;
}
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 37e1f303fc11..5192305159ec 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -44,3 +44,5 @@ CONFIG_SECURITYFS=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_READ_POLICY=y
CONFIG_BLK_DEV_LOOP=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_DYNAMIC_FTRACE=y
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index b8d6aef99db4..99628e1a1e58 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -57,6 +57,10 @@ int main(int argc, char **argv)
__u32 key = 0, pid;
int exit_code = 1;
char buf[256];
+ const struct timespec req = {
+ .tv_sec = 1,
+ .tv_nsec = 0,
+ };
cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
@@ -115,7 +119,7 @@ int main(int argc, char **argv)
goto close_pmu;
/* trigger some syscalls */
- sleep(1);
+ syscall(__NR_nanosleep, &req, NULL);
err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid);
if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
index f0a64d8ac59a..f4d870da7684 100644
--- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
@@ -9,10 +9,13 @@
#include <test_maps.h>
+static int nr_cpus;
+
static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
- int *values)
+ __s64 *values, bool is_pcpu)
{
- int i, err;
+ int i, j, err;
+ int cpu_offset = 0;
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
.elem_flags = 0,
.flags = 0,
@@ -20,22 +23,41 @@ static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
for (i = 0; i < max_entries; i++) {
keys[i] = i;
- values[i] = i + 1;
+ if (is_pcpu) {
+ cpu_offset = i * nr_cpus;
+ for (j = 0; j < nr_cpus; j++)
+ (values + cpu_offset)[j] = i + 1 + j;
+ } else {
+ values[i] = i + 1;
+ }
}
err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
}
-static void map_batch_verify(int *visited, __u32 max_entries,
- int *keys, int *values)
+static void map_batch_verify(int *visited, __u32 max_entries, int *keys,
+ __s64 *values, bool is_pcpu)
{
- int i;
+ int i, j;
+ int cpu_offset = 0;
memset(visited, 0, max_entries * sizeof(*visited));
for (i = 0; i < max_entries; i++) {
- CHECK(keys[i] + 1 != values[i], "key/value checking",
- "error: i %d key %d value %d\n", i, keys[i], values[i]);
+ if (is_pcpu) {
+ cpu_offset = i * nr_cpus;
+ for (j = 0; j < nr_cpus; j++) {
+ __s64 value = (values + cpu_offset)[j];
+ CHECK(keys[i] + j + 1 != value,
+ "key/value checking",
+ "error: i %d j %d key %d value %lld\n", i,
+ j, keys[i], value);
+ }
+ } else {
+ CHECK(keys[i] + 1 != values[i], "key/value checking",
+ "error: i %d key %d value %lld\n", i, keys[i],
+ values[i]);
+ }
visited[i] = 1;
}
for (i = 0; i < max_entries; i++) {
@@ -44,20 +66,21 @@ static void map_batch_verify(int *visited, __u32 max_entries,
}
}
-void test_array_map_batch_ops(void)
+static void __test_map_lookup_and_update_batch(bool is_pcpu)
{
struct bpf_create_map_attr xattr = {
.name = "array_map",
- .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY :
+ BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
- .value_size = sizeof(int),
+ .value_size = sizeof(__s64),
};
- int map_fd, *keys, *values, *visited;
+ int map_fd, *keys, *visited;
__u32 count, total, total_success;
const __u32 max_entries = 10;
- bool nospace_err;
__u64 batch = 0;
- int err, step;
+ int err, step, value_size;
+ void *values;
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
.elem_flags = 0,
.flags = 0,
@@ -68,35 +91,35 @@ void test_array_map_batch_ops(void)
CHECK(map_fd == -1,
"bpf_create_map_xattr()", "error:%s\n", strerror(errno));
- keys = malloc(max_entries * sizeof(int));
- values = malloc(max_entries * sizeof(int));
- visited = malloc(max_entries * sizeof(int));
+ value_size = sizeof(__s64);
+ if (is_pcpu)
+ value_size *= nr_cpus;
+
+ keys = calloc(max_entries, sizeof(*keys));
+ values = calloc(max_entries, value_size);
+ visited = calloc(max_entries, sizeof(*visited));
CHECK(!keys || !values || !visited, "malloc()", "error:%s\n",
strerror(errno));
- /* populate elements to the map */
- map_batch_update(map_fd, max_entries, keys, values);
-
/* test 1: lookup in a loop with various steps. */
total_success = 0;
for (step = 1; step < max_entries; step++) {
- map_batch_update(map_fd, max_entries, keys, values);
- map_batch_verify(visited, max_entries, keys, values);
+ map_batch_update(map_fd, max_entries, keys, values, is_pcpu);
+ map_batch_verify(visited, max_entries, keys, values, is_pcpu);
memset(keys, 0, max_entries * sizeof(*keys));
- memset(values, 0, max_entries * sizeof(*values));
+ memset(values, 0, max_entries * value_size);
batch = 0;
total = 0;
/* iteratively lookup/delete elements with 'step'
* elements each.
*/
count = step;
- nospace_err = false;
while (true) {
err = bpf_map_lookup_batch(map_fd,
- total ? &batch : NULL, &batch,
- keys + total,
- values + total,
- &count, &opts);
+ total ? &batch : NULL,
+ &batch, keys + total,
+ values + total * value_size,
+ &count, &opts);
CHECK((err && errno != ENOENT), "lookup with steps",
"error: %s\n", strerror(errno));
@@ -107,13 +130,10 @@ void test_array_map_batch_ops(void)
}
- if (nospace_err == true)
- continue;
-
CHECK(total != max_entries, "lookup with steps",
"total = %u, max_entries = %u\n", total, max_entries);
- map_batch_verify(visited, max_entries, keys, values);
+ map_batch_verify(visited, max_entries, keys, values, is_pcpu);
total_success++;
}
@@ -121,9 +141,30 @@ void test_array_map_batch_ops(void)
CHECK(total_success == 0, "check total_success",
"unexpected failure\n");
- printf("%s:PASS\n", __func__);
-
free(keys);
free(values);
free(visited);
}
+
+static void array_map_batch_ops(void)
+{
+ __test_map_lookup_and_update_batch(false);
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void array_percpu_map_batch_ops(void)
+{
+ __test_map_lookup_and_update_batch(true);
+ printf("test_%s:PASS\n", __func__);
+}
+
+void test_array_map_batch_ops(void)
+{
+ nr_cpus = libbpf_num_possible_cpus();
+
+ CHECK(nr_cpus < 0, "nr_cpus checking",
+ "error: get possible cpus failed");
+
+ array_map_batch_ops();
+ array_percpu_map_batch_ops();
+}
diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
new file mode 100644
index 000000000000..2e986e5e4cac
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+struct test_lpm_key {
+ __u32 prefix;
+ struct in_addr ipv4;
+};
+
+static void map_batch_update(int map_fd, __u32 max_entries,
+ struct test_lpm_key *keys, int *values)
+{
+ __u32 i;
+ int err;
+ char buff[16] = { 0 };
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ for (i = 0; i < max_entries; i++) {
+ keys[i].prefix = 32;
+ snprintf(buff, 16, "192.168.1.%d", i + 1);
+ inet_pton(AF_INET, buff, &keys[i].ipv4);
+ values[i] = i + 1;
+ }
+
+ err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
+ CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
+}
+
+static void map_batch_verify(int *visited, __u32 max_entries,
+ struct test_lpm_key *keys, int *values)
+{
+ char buff[16] = { 0 };
+ int lower_byte = 0;
+ __u32 i;
+
+ memset(visited, 0, max_entries * sizeof(*visited));
+ for (i = 0; i < max_entries; i++) {
+ inet_ntop(AF_INET, &keys[i].ipv4, buff, 32);
+ CHECK(sscanf(buff, "192.168.1.%d", &lower_byte) == EOF,
+ "sscanf()", "error: i %d\n", i);
+ CHECK(lower_byte != values[i], "key/value checking",
+ "error: i %d key %s value %d\n", i, buff, values[i]);
+ visited[i] = 1;
+ }
+ for (i = 0; i < max_entries; i++) {
+ CHECK(visited[i] != 1, "visited checking",
+ "error: keys array at index %d missing\n", i);
+ }
+}
+
+void test_lpm_trie_map_batch_ops(void)
+{
+ struct bpf_create_map_attr xattr = {
+ .name = "lpm_trie_map",
+ .map_type = BPF_MAP_TYPE_LPM_TRIE,
+ .key_size = sizeof(struct test_lpm_key),
+ .value_size = sizeof(int),
+ .map_flags = BPF_F_NO_PREALLOC,
+ };
+ struct test_lpm_key *keys, key;
+ int map_fd, *values, *visited;
+ __u32 step, count, total, total_success;
+ const __u32 max_entries = 10;
+ __u64 batch = 0;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ xattr.max_entries = max_entries;
+ map_fd = bpf_create_map_xattr(&xattr);
+ CHECK(map_fd == -1, "bpf_create_map_xattr()", "error:%s\n",
+ strerror(errno));
+
+ keys = malloc(max_entries * sizeof(struct test_lpm_key));
+ values = malloc(max_entries * sizeof(int));
+ visited = malloc(max_entries * sizeof(int));
+ CHECK(!keys || !values || !visited, "malloc()", "error:%s\n",
+ strerror(errno));
+
+ total_success = 0;
+ for (step = 1; step < max_entries; step++) {
+ map_batch_update(map_fd, max_entries, keys, values);
+ map_batch_verify(visited, max_entries, keys, values);
+ memset(keys, 0, max_entries * sizeof(*keys));
+ memset(values, 0, max_entries * sizeof(*values));
+ batch = 0;
+ total = 0;
+ /* iteratively lookup/delete elements with 'step'
+ * elements each.
+ */
+ count = step;
+ while (true) {
+ err = bpf_map_lookup_batch(map_fd,
+ total ? &batch : NULL, &batch,
+ keys + total, values + total, &count, &opts);
+
+ CHECK((err && errno != ENOENT), "lookup with steps",
+ "error: %s\n", strerror(errno));
+
+ total += count;
+ if (err)
+ break;
+ }
+
+ CHECK(total != max_entries, "lookup with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+
+ map_batch_verify(visited, max_entries, keys, values);
+
+ total = 0;
+ count = step;
+ while (total < max_entries) {
+ if (max_entries - total < step)
+ count = max_entries - total;
+ err = bpf_map_delete_batch(map_fd, keys + total, &count,
+ &opts);
+ CHECK((err && errno != ENOENT), "delete batch",
+ "error: %s\n", strerror(errno));
+ total += count;
+ if (err)
+ break;
+ }
+ CHECK(total != max_entries, "delete with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+
+ /* check map is empty, errono == ENOENT */
+ err = bpf_map_get_next_key(map_fd, NULL, &key);
+ CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()",
+ "error: %s\n", strerror(errno));
+
+ total_success++;
+ }
+
+ CHECK(total_success == 0, "check total_success",
+ "unexpected failure\n");
+
+ printf("%s:PASS\n", __func__);
+
+ free(keys);
+ free(values);
+ free(visited);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index a0ee87c8e1ea..9dc4e3dfbcf3 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -2,6 +2,44 @@
#include <test_progs.h>
#include "test_attach_probe.skel.h"
+#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
+
+#define OP_RT_RA_MASK 0xffff0000UL
+#define LIS_R2 0x3c400000UL
+#define ADDIS_R2_R12 0x3c4c0000UL
+#define ADDI_R2_R2 0x38420000UL
+
+static ssize_t get_offset(ssize_t addr, ssize_t base)
+{
+ u32 *insn = (u32 *) addr;
+
+ /*
+ * A PPC64 ABIv2 function may have a local and a global entry
+ * point. We need to use the local entry point when patching
+ * functions, so identify and step over the global entry point
+ * sequence.
+ *
+ * The global entry point sequence is always of the form:
+ *
+ * addis r2,r12,XXXX
+ * addi r2,r2,XXXX
+ *
+ * A linker optimisation may convert the addis to lis:
+ *
+ * lis r2,XXXX
+ * addi r2,r2,XXXX
+ */
+ if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+ ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+ ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+ return (ssize_t)(insn + 2) - base;
+ else
+ return addr - base;
+}
+#else
+#define get_offset(addr, base) (addr - base)
+#endif
+
ssize_t get_base_addr() {
size_t start, offset;
char buf[256];
@@ -36,7 +74,7 @@ void test_attach_probe(void)
if (CHECK(base_addr < 0, "get_base_addr",
"failed to find base addr: %zd", base_addr))
return;
- uprobe_offset = (size_t)&get_base_addr - base_addr;
+ uprobe_offset = get_offset((size_t)&get_base_addr, base_addr);
skel = test_attach_probe__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 74c45d557a2b..2d3590cfb5e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -147,6 +147,7 @@ static void test_task_stack(void)
return;
do_dummy_read(skel->progs.dump_task_stack);
+ do_dummy_read(skel->progs.get_task_user_stacks);
bpf_iter_task_stack__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 37c5494a0381..e25917f04602 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -6,6 +6,7 @@
#include <test_progs.h>
#include "bpf_dctcp.skel.h"
#include "bpf_cubic.skel.h"
+#include "bpf_tcp_nogpl.skel.h"
#define min(a, b) ((a) < (b) ? (a) : (b))
@@ -227,10 +228,53 @@ static void test_dctcp(void)
bpf_dctcp__destroy(dctcp_skel);
}
+static char *err_str;
+static bool found;
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ char *log_buf;
+
+ if (level != LIBBPF_WARN ||
+ strcmp(format, "libbpf: \n%s\n")) {
+ vprintf(format, args);
+ return 0;
+ }
+
+ log_buf = va_arg(args, char *);
+ if (!log_buf)
+ goto out;
+ if (err_str && strstr(log_buf, err_str) != NULL)
+ found = true;
+out:
+ printf(format, log_buf);
+ return 0;
+}
+
+static void test_invalid_license(void)
+{
+ libbpf_print_fn_t old_print_fn;
+ struct bpf_tcp_nogpl *skel;
+
+ err_str = "struct ops programs must have a GPL compatible license";
+ found = false;
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+ skel = bpf_tcp_nogpl__open_and_load();
+ ASSERT_NULL(skel, "bpf_tcp_nogpl");
+ ASSERT_EQ(found, true, "expected_err_msg");
+
+ bpf_tcp_nogpl__destroy(skel);
+ libbpf_set_print(old_print_fn);
+}
+
void test_bpf_tcp_ca(void)
{
if (test__start_subtest("dctcp"))
test_dctcp();
if (test__start_subtest("cubic"))
test_cubic();
+ if (test__start_subtest("invalid_license"))
+ test_invalid_license();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e698ee6bb6c2..3d002c245d2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -76,6 +76,7 @@ void test_bpf_verif_scale(void)
{ "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
{ "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
{ "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
+ { "loop6.o", BPF_PROG_TYPE_KPROBE },
/* partial unroll. 19k insn in a loop.
* Total program size 20.8k insn.
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 6a7ee7420701..0457ae32b270 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -1903,7 +1903,7 @@ static struct btf_raw_test raw_tests[] = {
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
- BTF_TYPE_ENC(0, 0x10000000, 4),
+ BTF_TYPE_ENC(0, 0x20000000, 4),
BTF_END_RAW,
},
.str_sec = "",
@@ -3531,6 +3531,136 @@ static struct btf_raw_test raw_tests[] = {
.max_entries = 1,
},
+{
+ .descr = "float test #1, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [2] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [3] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 8), /* [4] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 12), /* [5] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 16), /* [6] */
+ BTF_STRUCT_ENC(NAME_TBD, 5, 48), /* [7] */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 3, 32),
+ BTF_MEMBER_ENC(NAME_TBD, 4, 64),
+ BTF_MEMBER_ENC(NAME_TBD, 5, 128),
+ BTF_MEMBER_ENC(NAME_TBD, 6, 256),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0_Float16\0float\0double\0_Float80\0long_double"
+ "\0floats\0a\0b\0c\0d\0e"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 1,
+ .value_type_id = 7,
+ .max_entries = 1,
+},
+{
+ .descr = "float test #2, invalid vlen",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 1), 4),
+ /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "vlen != 0",
+},
+{
+ .descr = "float test #3, invalid kind_flag",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 1, 0), 4),
+ /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid btf_info kind_flag",
+},
+{
+ .descr = "float test #4, member does not fit",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */
+ BTF_STRUCT_ENC(NAME_TBD, 1, 2), /* [3] */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float\0floats\0x"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
+},
+{
+ .descr = "float test #5, member is not properly aligned",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */
+ BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [3] */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 8),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float\0floats\0x"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Member is not properly aligned",
+},
+{
+ .descr = "float test #6, invalid size",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 6), /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 6,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_size",
+},
+
}; /* struct btf_raw_test raw_tests[] */
static const char *get_next_str(const char *start, const char *end)
@@ -6281,11 +6411,12 @@ const struct btf_dedup_test dedup_tests[] = {
/* int[16] */
BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */
/* struct s { */
- BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [3] */
+ BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [3] */
BTF_MEMBER_ENC(NAME_NTH(3), 4, 0), /* struct s *next; */
BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */
BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */
BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */
+ BTF_MEMBER_ENC(NAME_NTH(8), 13, 672), /* float d; */
/* ptr -> [3] struct s */
BTF_PTR_ENC(3), /* [4] */
/* ptr -> [6] const int */
@@ -6296,39 +6427,43 @@ const struct btf_dedup_test dedup_tests[] = {
/* full copy of the above */
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */
BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */
- BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [9] */
+ BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [9] */
BTF_MEMBER_ENC(NAME_NTH(3), 10, 0),
BTF_MEMBER_ENC(NAME_NTH(4), 11, 64),
BTF_MEMBER_ENC(NAME_NTH(5), 8, 128),
BTF_MEMBER_ENC(NAME_NTH(6), 7, 640),
+ BTF_MEMBER_ENC(NAME_NTH(8), 13, 672),
BTF_PTR_ENC(9), /* [10] */
BTF_PTR_ENC(12), /* [11] */
BTF_CONST_ENC(7), /* [12] */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [13] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"),
+ BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"),
},
.expect = {
.raw_types = {
/* int */
- BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* int[16] */
BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */
/* struct s { */
- BTF_STRUCT_ENC(NAME_NTH(6), 4, 84), /* [3] */
- BTF_MEMBER_ENC(NAME_NTH(5), 4, 0), /* struct s *next; */
+ BTF_STRUCT_ENC(NAME_NTH(8), 5, 88), /* [3] */
+ BTF_MEMBER_ENC(NAME_NTH(7), 4, 0), /* struct s *next; */
BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */
BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */
BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */
+ BTF_MEMBER_ENC(NAME_NTH(4), 7, 672), /* float d; */
/* ptr -> [3] struct s */
BTF_PTR_ENC(3), /* [4] */
/* ptr -> [6] const int */
BTF_PTR_ENC(6), /* [5] */
/* const -> [1] int */
BTF_CONST_ENC(1), /* [6] */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [7] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"),
+ BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"),
},
.opts = {
.dont_resolve_fwds = false,
@@ -6449,9 +6584,10 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
},
.expect = {
.raw_types = {
@@ -6474,16 +6610,17 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
},
.opts = {
.dont_resolve_fwds = false,
},
},
{
- .descr = "dedup: no int duplicates",
+ .descr = "dedup: no int/float duplicates",
.input = {
.raw_types = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8),
@@ -6498,9 +6635,15 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
/* different byte size */
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+ /* all allowed sizes */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
BTF_END_RAW,
},
- BTF_STR_SEC("\0int\0some other int"),
+ BTF_STR_SEC("\0int\0some other int\0float"),
},
.expect = {
.raw_types = {
@@ -6516,9 +6659,15 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
/* different byte size */
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+ /* all allowed sizes */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
BTF_END_RAW,
},
- BTF_STR_SEC("\0int\0some other int"),
+ BTF_STR_SEC("\0int\0some other int\0float"),
},
.opts = {
.dont_resolve_fwds = false,
@@ -6630,6 +6779,7 @@ static int btf_type_size(const struct btf_type *t)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
return base_size;
case BTF_KIND_INT:
return base_size + sizeof(__u32);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index c60091ee8a21..5e129dc2073c 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -77,7 +77,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
snprintf(out_file, sizeof(out_file), "/tmp/%s.output.XXXXXX", t->file);
fd = mkstemp(out_file);
- if (CHECK(fd < 0, "create_tmp", "failed to create file: %d\n", fd)) {
+ if (!ASSERT_GE(fd, 0, "create_tmp")) {
err = fd;
goto done;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
index 8c52d72c876e..8ab5d3e358dd 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_endian.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
@@ -6,8 +6,6 @@
#include <test_progs.h>
#include <bpf/btf.h>
-static int duration = 0;
-
void test_btf_endian() {
#if __BYTE_ORDER == __LITTLE_ENDIAN
enum btf_endianness endian = BTF_LITTLE_ENDIAN;
@@ -71,7 +69,7 @@ void test_btf_endian() {
/* now modify original BTF */
var_id = btf__add_var(btf, "some_var", BTF_VAR_GLOBAL_ALLOCATED, 1);
- CHECK(var_id <= 0, "var_id", "failed %d\n", var_id);
+ ASSERT_GT(var_id, 0, "var_id");
btf__free(swap_btf);
swap_btf = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 4d9b514b3fd9..736796e56ed1 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -54,7 +54,7 @@ void test_cgroup_link(void)
for (i = 0; i < cg_nr; i++) {
cgs[i].fd = create_and_get_cgroup(cgs[i].path);
- if (CHECK(cgs[i].fd < 0, "cg_create", "fail: %d\n", cgs[i].fd))
+ if (!ASSERT_GE(cgs[i].fd, 0, "cg_create"))
goto cleanup;
}
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index 36af1c138faf..b62a39315336 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -128,6 +128,8 @@ static void test_check_mtu_xdp(__u32 mtu, __u32 ifindex)
test_check_mtu_run_xdp(skel, skel->progs.xdp_use_helper, mtu);
test_check_mtu_run_xdp(skel, skel->progs.xdp_exceed_mtu, mtu);
test_check_mtu_run_xdp(skel, skel->progs.xdp_minus_delta, mtu);
+ test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len, mtu);
+ test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len_exceed, mtu);
cleanup:
test_check_mtu__destroy(skel);
@@ -187,6 +189,8 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu, mtu);
test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu_da, mtu);
test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu);
+ test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu);
+ test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu);
cleanup:
test_check_mtu__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 06eb956ff7bb..607710826dca 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -210,11 +210,6 @@ static int duration = 0;
.bpf_obj_file = "test_core_reloc_existence.o", \
.btf_src_file = "btf__core_reloc_" #name ".o" \
-#define FIELD_EXISTS_ERR_CASE(name) { \
- FIELD_EXISTS_CASE_COMMON(name), \
- .fails = true, \
-}
-
#define BITFIELDS_CASE_COMMON(objfile, test_name_prefix, name) \
.case_name = test_name_prefix#name, \
.bpf_obj_file = objfile, \
@@ -222,7 +217,7 @@ static int duration = 0;
#define BITFIELDS_CASE(name, ...) { \
BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \
- "direct:", name), \
+ "probed:", name), \
.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \
.input_len = sizeof(struct core_reloc_##name), \
.output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \
@@ -230,7 +225,7 @@ static int duration = 0;
.output_len = sizeof(struct core_reloc_bitfields_output), \
}, { \
BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \
- "probed:", name), \
+ "direct:", name), \
.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \
.input_len = sizeof(struct core_reloc_##name), \
.output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \
@@ -266,6 +261,7 @@ static int duration = 0;
.arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \
.ptr_sz = 8, /* always 8-byte pointer for BPF */ \
.enum_sz = sizeof(((type *)0)->enum_field), \
+ .float_sz = sizeof(((type *)0)->float_field), \
}
#define SIZE_CASE(name) { \
@@ -550,8 +546,7 @@ static struct core_reloc_test_case test_cases[] = {
ARRAYS_ERR_CASE(arrays___err_too_small),
ARRAYS_ERR_CASE(arrays___err_too_shallow),
ARRAYS_ERR_CASE(arrays___err_non_array),
- ARRAYS_ERR_CASE(arrays___err_wrong_val_type1),
- ARRAYS_ERR_CASE(arrays___err_wrong_val_type2),
+ ARRAYS_ERR_CASE(arrays___err_wrong_val_type),
ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr),
/* enum/ptr/int handling scenarios */
@@ -642,13 +637,25 @@ static struct core_reloc_test_case test_cases[] = {
},
.output_len = sizeof(struct core_reloc_existence_output),
},
-
- FIELD_EXISTS_ERR_CASE(existence__err_int_sz),
- FIELD_EXISTS_ERR_CASE(existence__err_int_type),
- FIELD_EXISTS_ERR_CASE(existence__err_int_kind),
- FIELD_EXISTS_ERR_CASE(existence__err_arr_kind),
- FIELD_EXISTS_ERR_CASE(existence__err_arr_value_type),
- FIELD_EXISTS_ERR_CASE(existence__err_struct_type),
+ {
+ FIELD_EXISTS_CASE_COMMON(existence___wrong_field_defs),
+ .input = STRUCT_TO_CHAR_PTR(core_reloc_existence___wrong_field_defs) {
+ },
+ .input_len = sizeof(struct core_reloc_existence___wrong_field_defs),
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) {
+ .a_exists = 0,
+ .b_exists = 0,
+ .c_exists = 0,
+ .arr_exists = 0,
+ .s_exists = 0,
+ .a_value = 0xff000001u,
+ .b_value = 0xff000002u,
+ .c_value = 0xff000003u,
+ .arr_value = 0xff000004u,
+ .s_value = 0xff000005u,
+ },
+ .output_len = sizeof(struct core_reloc_existence_output),
+ },
/* bitfield relocation checks */
BITFIELDS_CASE(bitfields, {
@@ -857,13 +864,20 @@ void test_core_reloc(void)
"prog '%s' not found\n", probe_name))
goto cleanup;
+
+ if (test_case->btf_src_file) {
+ err = access(test_case->btf_src_file, R_OK);
+ if (!ASSERT_OK(err, "btf_src_file"))
+ goto cleanup;
+ }
+
load_attr.obj = obj;
load_attr.log_level = 0;
load_attr.target_btf_path = test_case->btf_src_file;
err = bpf_object__load_xattr(&load_attr);
if (err) {
if (!test_case->fails)
- CHECK(false, "obj_load", "failed to load prog '%s': %d\n", probe_name, err);
+ ASSERT_OK(err, "obj_load");
goto cleanup;
}
@@ -902,10 +916,8 @@ void test_core_reloc(void)
goto cleanup;
}
- if (test_case->fails) {
- CHECK(false, "obj_load_fail", "should fail to load prog '%s'\n", probe_name);
+ if (!ASSERT_FALSE(test_case->fails, "obj_load_should_fail"))
goto cleanup;
- }
equal = memcmp(data->out, test_case->output,
test_case->output_len) == 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 04ebbf1cb390..7cb111b11995 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -3,35 +3,57 @@
#include <test_progs.h>
#include "fentry_test.skel.h"
-void test_fentry_test(void)
+static int fentry_test(struct fentry_test *fentry_skel)
{
- struct fentry_test *fentry_skel = NULL;
int err, prog_fd, i;
__u32 duration = 0, retval;
+ struct bpf_link *link;
__u64 *result;
- fentry_skel = fentry_test__open_and_load();
- if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
- goto cleanup;
-
err = fentry_test__attach(fentry_skel);
- if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
- goto cleanup;
+ if (!ASSERT_OK(err, "fentry_attach"))
+ return err;
+
+ /* Check that already linked program can't be attached again. */
+ link = bpf_program__attach(fentry_skel->progs.test1);
+ if (!ASSERT_ERR_PTR(link, "fentry_attach_link"))
+ return -1;
prog_fd = bpf_program__fd(fentry_skel->progs.test1);
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
- CHECK(err || retval, "test_run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(retval, 0, "test_run");
result = (__u64 *)fentry_skel->bss;
- for (i = 0; i < 6; i++) {
- if (CHECK(result[i] != 1, "result",
- "fentry_test%d failed err %lld\n", i + 1, result[i]))
- goto cleanup;
+ for (i = 0; i < sizeof(*fentry_skel->bss) / sizeof(__u64); i++) {
+ if (!ASSERT_EQ(result[i], 1, "fentry_result"))
+ return -1;
}
+ fentry_test__detach(fentry_skel);
+
+ /* zero results for re-attach test */
+ memset(fentry_skel->bss, 0, sizeof(*fentry_skel->bss));
+ return 0;
+}
+
+void test_fentry_test(void)
+{
+ struct fentry_test *fentry_skel = NULL;
+ int err;
+
+ fentry_skel = fentry_test__open_and_load();
+ if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
+ goto cleanup;
+
+ err = fentry_test(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_first_attach"))
+ goto cleanup;
+
+ err = fentry_test(fentry_skel);
+ ASSERT_OK(err, "fentry_second_attach");
+
cleanup:
fentry_test__destroy(fentry_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 5c0448910426..63990842d20f 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -58,42 +58,73 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
test_cb cb)
{
struct bpf_object *obj = NULL, *tgt_obj;
+ __u32 retval, tgt_prog_id, info_len;
+ struct bpf_prog_info prog_info = {};
struct bpf_program **prog = NULL;
struct bpf_link **link = NULL;
- __u32 duration = 0, retval;
int err, tgt_fd, i;
+ struct btf *btf;
err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
&tgt_obj, &tgt_fd);
- if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
- target_obj_file, err, errno))
+ if (!ASSERT_OK(err, "tgt_prog_load"))
return;
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
.attach_prog_fd = tgt_fd,
);
+ info_len = sizeof(prog_info);
+ err = bpf_obj_get_info_by_fd(tgt_fd, &prog_info, &info_len);
+ if (!ASSERT_OK(err, "tgt_fd_get_info"))
+ goto close_prog;
+
+ tgt_prog_id = prog_info.id;
+ btf = bpf_object__btf(tgt_obj);
+
link = calloc(sizeof(struct bpf_link *), prog_cnt);
+ if (!ASSERT_OK_PTR(link, "link_ptr"))
+ goto close_prog;
+
prog = calloc(sizeof(struct bpf_program *), prog_cnt);
- if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory"))
+ if (!ASSERT_OK_PTR(prog, "prog_ptr"))
goto close_prog;
obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
- "failed to open %s: %ld\n", obj_file,
- PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
goto close_prog;
err = bpf_object__load(obj);
- if (CHECK(err, "obj_load", "err %d\n", err))
+ if (!ASSERT_OK(err, "obj_load"))
goto close_prog;
for (i = 0; i < prog_cnt; i++) {
+ struct bpf_link_info link_info;
+ char *tgt_name;
+ __s32 btf_id;
+
+ tgt_name = strstr(prog_name[i], "/");
+ if (!ASSERT_OK_PTR(tgt_name, "tgt_name"))
+ goto close_prog;
+ btf_id = btf__find_by_name_kind(btf, tgt_name + 1, BTF_KIND_FUNC);
+
prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]);
- if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name[i]))
+ if (!ASSERT_OK_PTR(prog[i], prog_name[i]))
goto close_prog;
+
link[i] = bpf_program__attach_trace(prog[i]);
- if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
+ if (!ASSERT_OK_PTR(link[i], "attach_trace"))
goto close_prog;
+
+ info_len = sizeof(link_info);
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_obj_get_info_by_fd(bpf_link__fd(link[i]),
+ &link_info, &info_len);
+ ASSERT_OK(err, "link_fd_get_info");
+ ASSERT_EQ(link_info.tracing.attach_type,
+ bpf_program__get_expected_attach_type(prog[i]),
+ "link_attach_type");
+ ASSERT_EQ(link_info.tracing.target_obj_id, tgt_prog_id, "link_tgt_obj_id");
+ ASSERT_EQ(link_info.tracing.target_btf_id, btf_id, "link_tgt_btf_id");
}
if (cb) {
@@ -106,10 +137,9 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
goto close_prog;
err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ NULL, NULL, &retval, NULL);
+ ASSERT_OK(err, "prog_run");
+ ASSERT_EQ(retval, 0, "prog_run_ret");
if (check_data_map(obj, prog_cnt, false))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
new file mode 100644
index 000000000000..ccc7e8a34ab6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include "fexit_sleep.skel.h"
+
+static int do_sleep(void *skel)
+{
+ struct fexit_sleep *fexit_skel = skel;
+ struct timespec ts1 = { .tv_nsec = 1 };
+ struct timespec ts2 = { .tv_sec = 10 };
+
+ fexit_skel->bss->pid = getpid();
+ (void)syscall(__NR_nanosleep, &ts1, NULL);
+ (void)syscall(__NR_nanosleep, &ts2, NULL);
+ return 0;
+}
+
+#define STACK_SIZE (1024 * 1024)
+static char child_stack[STACK_SIZE];
+
+void test_fexit_sleep(void)
+{
+ struct fexit_sleep *fexit_skel = NULL;
+ int wstatus, duration = 0;
+ pid_t cpid;
+ int err, fexit_cnt;
+
+ fexit_skel = fexit_sleep__open_and_load();
+ if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+ goto cleanup;
+
+ err = fexit_sleep__attach(fexit_skel);
+ if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+ goto cleanup;
+
+ cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel);
+ if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
+ goto cleanup;
+
+ /* wait until first sys_nanosleep ends and second sys_nanosleep starts */
+ while (READ_ONCE(fexit_skel->bss->fentry_cnt) != 2);
+ fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt);
+ if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt))
+ goto cleanup;
+
+ /* close progs and detach them. That will trigger two nop5->jmp5 rewrites
+ * in the trampolines to skip nanosleep_fexit prog.
+ * The nanosleep_fentry prog will get detached first.
+ * The nanosleep_fexit prog will get detached second.
+ * Detaching will trigger freeing of both progs JITed images.
+ * There will be two dying bpf_tramp_image-s, but only the initial
+ * bpf_tramp_image (with both _fentry and _fexit progs will be stuck
+ * waiting for percpu_ref_kill to confirm). The other one
+ * will be freed quickly.
+ */
+ close(bpf_program__fd(fexit_skel->progs.nanosleep_fentry));
+ close(bpf_program__fd(fexit_skel->progs.nanosleep_fexit));
+ fexit_sleep__detach(fexit_skel);
+
+ /* kill the thread to unwind sys_nanosleep stack through the trampoline */
+ kill(cpid, 9);
+
+ if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno)))
+ goto cleanup;
+ if (CHECK(WEXITSTATUS(wstatus) != 0, "exitstatus", "failed"))
+ goto cleanup;
+
+ /* The bypassed nanosleep_fexit prog shouldn't have executed.
+ * Unlike progs the maps were not freed and directly accessible.
+ */
+ fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt);
+ if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt))
+ goto cleanup;
+
+cleanup:
+ fexit_sleep__destroy(fexit_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 78d7a2765c27..6792e41f7f69 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -3,35 +3,57 @@
#include <test_progs.h>
#include "fexit_test.skel.h"
-void test_fexit_test(void)
+static int fexit_test(struct fexit_test *fexit_skel)
{
- struct fexit_test *fexit_skel = NULL;
int err, prog_fd, i;
__u32 duration = 0, retval;
+ struct bpf_link *link;
__u64 *result;
- fexit_skel = fexit_test__open_and_load();
- if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
- goto cleanup;
-
err = fexit_test__attach(fexit_skel);
- if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
- goto cleanup;
+ if (!ASSERT_OK(err, "fexit_attach"))
+ return err;
+
+ /* Check that already linked program can't be attached again. */
+ link = bpf_program__attach(fexit_skel->progs.test1);
+ if (!ASSERT_ERR_PTR(link, "fexit_attach_link"))
+ return -1;
prog_fd = bpf_program__fd(fexit_skel->progs.test1);
err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
NULL, NULL, &retval, &duration);
- CHECK(err || retval, "test_run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(retval, 0, "test_run");
result = (__u64 *)fexit_skel->bss;
- for (i = 0; i < 6; i++) {
- if (CHECK(result[i] != 1, "result",
- "fexit_test%d failed err %lld\n", i + 1, result[i]))
- goto cleanup;
+ for (i = 0; i < sizeof(*fexit_skel->bss) / sizeof(__u64); i++) {
+ if (!ASSERT_EQ(result[i], 1, "fexit_result"))
+ return -1;
}
+ fexit_test__detach(fexit_skel);
+
+ /* zero results for re-attach test */
+ memset(fexit_skel->bss, 0, sizeof(*fexit_skel->bss));
+ return 0;
+}
+
+void test_fexit_test(void)
+{
+ struct fexit_test *fexit_skel = NULL;
+ int err;
+
+ fexit_skel = fexit_test__open_and_load();
+ if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
+ goto cleanup;
+
+ err = fexit_test(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_first_attach"))
+ goto cleanup;
+
+ err = fexit_test(fexit_skel);
+ ASSERT_OK(err, "fexit_second_attach");
+
cleanup:
fexit_test__destroy(fexit_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
new file mode 100644
index 000000000000..68eb12a287d4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "for_each_hash_map_elem.skel.h"
+#include "for_each_array_map_elem.skel.h"
+
+static unsigned int duration;
+
+static void test_hash_map(void)
+{
+ int i, err, hashmap_fd, max_entries, percpu_map_fd;
+ struct for_each_hash_map_elem *skel;
+ __u64 *percpu_valbuf = NULL;
+ __u32 key, num_cpus, retval;
+ __u64 val;
+
+ skel = for_each_hash_map_elem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
+ return;
+
+ hashmap_fd = bpf_map__fd(skel->maps.hashmap);
+ max_entries = bpf_map__max_entries(skel->maps.hashmap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i + 1;
+ err = bpf_map_update_elem(hashmap_fd, &key, &val, BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ num_cpus = bpf_num_possible_cpus();
+ percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
+ percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
+ if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
+ goto out;
+
+ key = 1;
+ for (i = 0; i < num_cpus; i++)
+ percpu_valbuf[i] = i + 1;
+ err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
+ if (!ASSERT_OK(err, "percpu_map_update"))
+ goto out;
+
+ err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
+ 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+ &retval, &duration);
+ if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
+ err, errno, retval))
+ goto out;
+
+ ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output");
+ ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems");
+
+ key = 1;
+ err = bpf_map_lookup_elem(hashmap_fd, &key, &val);
+ ASSERT_ERR(err, "hashmap_lookup");
+
+ ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called");
+ ASSERT_LT(skel->bss->cpu, num_cpus, "num_cpus");
+ ASSERT_EQ(skel->bss->percpu_map_elems, 1, "percpu_map_elems");
+ ASSERT_EQ(skel->bss->percpu_key, 1, "percpu_key");
+ ASSERT_EQ(skel->bss->percpu_val, skel->bss->cpu + 1, "percpu_val");
+ ASSERT_EQ(skel->bss->percpu_output, 100, "percpu_output");
+out:
+ free(percpu_valbuf);
+ for_each_hash_map_elem__destroy(skel);
+}
+
+static void test_array_map(void)
+{
+ __u32 key, num_cpus, max_entries, retval;
+ int i, arraymap_fd, percpu_map_fd, err;
+ struct for_each_array_map_elem *skel;
+ __u64 *percpu_valbuf = NULL;
+ __u64 val, expected_total;
+
+ skel = for_each_array_map_elem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
+ return;
+
+ arraymap_fd = bpf_map__fd(skel->maps.arraymap);
+ expected_total = 0;
+ max_entries = bpf_map__max_entries(skel->maps.arraymap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i + 1;
+ /* skip the last iteration for expected total */
+ if (i != max_entries - 1)
+ expected_total += val;
+ err = bpf_map_update_elem(arraymap_fd, &key, &val, BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ num_cpus = bpf_num_possible_cpus();
+ percpu_map_fd = bpf_map__fd(skel->maps.percpu_map);
+ percpu_valbuf = malloc(sizeof(__u64) * num_cpus);
+ if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
+ goto out;
+
+ key = 0;
+ for (i = 0; i < num_cpus; i++)
+ percpu_valbuf[i] = i + 1;
+ err = bpf_map_update_elem(percpu_map_fd, &key, percpu_valbuf, BPF_ANY);
+ if (!ASSERT_OK(err, "percpu_map_update"))
+ goto out;
+
+ err = bpf_prog_test_run(bpf_program__fd(skel->progs.test_pkt_access),
+ 1, &pkt_v4, sizeof(pkt_v4), NULL, NULL,
+ &retval, &duration);
+ if (CHECK(err || retval, "ipv4", "err %d errno %d retval %d\n",
+ err, errno, retval))
+ goto out;
+
+ ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output");
+ ASSERT_EQ(skel->bss->cpu + 1, skel->bss->percpu_val, "percpu_val");
+
+out:
+ free(percpu_valbuf);
+ for_each_array_map_elem__destroy(skel);
+}
+
+void test_for_each(void)
+{
+ if (test__start_subtest("hash_map"))
+ test_hash_map();
+ if (test__start_subtest("array_map"))
+ test_array_map();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index 42c3a3103c26..d65107919998 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -134,7 +134,7 @@ void test_kfree_skb(void)
/* make sure kfree_skb program was triggered
* and it sent expected skb into ring buffer
*/
- CHECK_FAIL(!passed);
+ ASSERT_TRUE(passed, "passed");
err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok);
if (CHECK(err, "get_result",
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
new file mode 100644
index 000000000000..7fc0951ee75f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test_subprog.skel.h"
+
+static void test_main(void)
+{
+ struct kfunc_call_test *skel;
+ int prog_fd, retval, err;
+
+ skel = kfunc_call_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, (__u32 *)&retval, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run(test1)");
+ ASSERT_EQ(retval, 12, "test1-retval");
+
+ prog_fd = bpf_program__fd(skel->progs.kfunc_call_test2);
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, (__u32 *)&retval, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run(test2)");
+ ASSERT_EQ(retval, 3, "test2-retval");
+
+ kfunc_call_test__destroy(skel);
+}
+
+static void test_subprog(void)
+{
+ struct kfunc_call_test_subprog *skel;
+ int prog_fd, retval, err;
+
+ skel = kfunc_call_test_subprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+ err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+ NULL, NULL, (__u32 *)&retval, NULL);
+ ASSERT_OK(err, "bpf_prog_test_run(test1)");
+ ASSERT_EQ(retval, 10, "test1-retval");
+ ASSERT_NEQ(skel->data->active_res, -1, "active_res");
+ ASSERT_EQ(skel->data->sk_state, BPF_TCP_CLOSE, "sk_state");
+
+ kfunc_call_test_subprog__destroy(skel);
+}
+
+void test_kfunc_call(void)
+{
+ if (test__start_subtest("main"))
+ test_main();
+
+ if (test__start_subtest("subprog"))
+ test_subprog();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
new file mode 100644
index 000000000000..e9916f2817ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_funcs.skel.h"
+
+void test_linked_funcs(void)
+{
+ int err;
+ struct linked_funcs *skel;
+
+ skel = linked_funcs__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->rodata->my_tid = syscall(SYS_gettid);
+ skel->bss->syscall_id = SYS_getpgid;
+
+ err = linked_funcs__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = linked_funcs__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_val1, 2000 + 2000, "output_val1");
+ ASSERT_EQ(skel->bss->output_ctx1, SYS_getpgid, "output_ctx1");
+ ASSERT_EQ(skel->bss->output_weak1, 42, "output_weak1");
+
+ ASSERT_EQ(skel->bss->output_val2, 2 * 1000 + 2 * (2 * 1000), "output_val2");
+ ASSERT_EQ(skel->bss->output_ctx2, SYS_getpgid, "output_ctx2");
+ /* output_weak2 should never be updated */
+ ASSERT_EQ(skel->bss->output_weak2, 0, "output_weak2");
+
+cleanup:
+ linked_funcs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_maps.c b/tools/testing/selftests/bpf/prog_tests/linked_maps.c
new file mode 100644
index 000000000000..85dcaaaf2775
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_maps.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_maps.skel.h"
+
+void test_linked_maps(void)
+{
+ int err;
+ struct linked_maps *skel;
+
+ skel = linked_maps__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ err = linked_maps__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_first1, 2000, "output_first1");
+ ASSERT_EQ(skel->bss->output_second1, 2, "output_second1");
+ ASSERT_EQ(skel->bss->output_weak1, 2, "output_weak1");
+
+cleanup:
+ linked_maps__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_vars.c b/tools/testing/selftests/bpf/prog_tests/linked_vars.c
new file mode 100644
index 000000000000..267166abe4c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_vars.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_vars.skel.h"
+
+void test_linked_vars(void)
+{
+ int err;
+ struct linked_vars *skel;
+
+ skel = linked_vars__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->input_bss1 = 1000;
+ skel->bss->input_bss2 = 2000;
+ skel->bss->input_bss_weak = 3000;
+
+ err = linked_vars__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = linked_vars__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_bss1, 1000 + 2000 + 3000, "output_bss1");
+ ASSERT_EQ(skel->bss->output_bss2, 1000 + 2000 + 3000, "output_bss2");
+ /* 10 comes from "winner" input_data_weak in first obj file */
+ ASSERT_EQ(skel->bss->output_data1, 1 + 2 + 10, "output_bss1");
+ ASSERT_EQ(skel->bss->output_data2, 1 + 2 + 10, "output_bss2");
+ /* 100 comes from "winner" input_rodata_weak in first obj file */
+ ASSERT_EQ(skel->bss->output_rodata1, 11 + 22 + 100, "output_weak1");
+ ASSERT_EQ(skel->bss->output_rodata2, 11 + 22 + 100, "output_weak2");
+
+cleanup:
+ linked_vars__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
index c230a573c373..4972f92205c7 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_ptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -12,11 +12,22 @@ void test_map_ptr(void)
__u32 duration = 0, retval;
char buf[128];
int err;
+ int page_size = getpagesize();
- skel = map_ptr_kern__open_and_load();
- if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+ skel = map_ptr_kern__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
+ err = bpf_map__set_max_entries(skel->maps.m_ringbuf, page_size);
+ if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+ goto cleanup;
+
+ err = map_ptr_kern__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ skel->bss->page_size = page_size;
+
err = bpf_prog_test_run(bpf_program__fd(skel->progs.cg_skb), 1, &pkt_v4,
sizeof(pkt_v4), buf, NULL, &retval, NULL);
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 9c3c5c0f068f..37b002ca1167 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -29,22 +29,36 @@ void test_mmap(void)
struct test_mmap *skel;
__u64 val = 0;
- skel = test_mmap__open_and_load();
- if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+ skel = test_mmap__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ err = bpf_map__set_max_entries(skel->maps.rdonly_map, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ /* at least 4 pages of data */
+ err = bpf_map__set_max_entries(skel->maps.data_map,
+ 4 * (page_size / sizeof(u64)));
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = test_mmap__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
bss_map = skel->maps.bss;
data_map = skel->maps.data_map;
data_map_fd = bpf_map__fd(data_map);
rdmap_fd = bpf_map__fd(skel->maps.rdonly_map);
- tmp1 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
+ tmp1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
if (CHECK(tmp1 != MAP_FAILED, "rdonly_write_mmap", "unexpected success\n")) {
- munmap(tmp1, 4096);
+ munmap(tmp1, page_size);
goto cleanup;
}
/* now double-check if it's mmap()'able at all */
- tmp1 = mmap(NULL, 4096, PROT_READ, MAP_SHARED, rdmap_fd, 0);
+ tmp1 = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rdmap_fd, 0);
if (CHECK(tmp1 == MAP_FAILED, "rdonly_read_mmap", "failed: %d\n", errno))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index 5bc53d53d86e..d85a69b7ce44 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -45,12 +45,18 @@ static int trigger_module_test_write(int write_sz)
return 0;
}
+static int delete_module(const char *name, int flags)
+{
+ return syscall(__NR_delete_module, name, flags);
+}
+
void test_module_attach(void)
{
const int READ_SZ = 456;
const int WRITE_SZ = 457;
struct test_module_attach* skel;
struct test_module_attach__bss *bss;
+ struct bpf_link *link;
int err;
skel = test_module_attach__open();
@@ -84,6 +90,23 @@ void test_module_attach(void)
ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
+ test_module_attach__detach(skel);
+
+ /* attach fentry/fexit and make sure it get's module reference */
+ link = bpf_program__attach(skel->progs.handle_fentry);
+ if (!ASSERT_OK_PTR(link, "attach_fentry"))
+ goto cleanup;
+
+ ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module");
+ bpf_link__destroy(link);
+
+ link = bpf_program__attach(skel->progs.handle_fexit);
+ if (!ASSERT_OK_PTR(link, "attach_fexit"))
+ goto cleanup;
+
+ ASSERT_ERR(delete_module("bpf_testmod", 0), "delete_module");
+ bpf_link__destroy(link);
+
cleanup:
test_module_attach__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 31a3114906e2..2535788e135f 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -68,10 +68,10 @@ static void test_ns_current_pid_tgid_new_ns(void)
cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE,
CLONE_NEWPID | SIGCHLD, NULL);
- if (CHECK(cpid == -1, "clone", strerror(errno)))
+ if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
return;
- if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno)))
+ if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno)))
return;
if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed"))
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index 935a294f049a..131d7f7eeb42 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -2,12 +2,31 @@
#include <test_progs.h>
#include <network_helpers.h>
-void test_prog_run_xattr(void)
+#include "test_pkt_access.skel.h"
+
+static const __u32 duration;
+
+static void check_run_cnt(int prog_fd, __u64 run_cnt)
{
- const char *file = "./test_pkt_access.o";
- struct bpf_object *obj;
- char buf[10];
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
int err;
+
+ err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
+ return;
+
+ CHECK(run_cnt != info.run_cnt, "run_cnt",
+ "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
+}
+
+void test_prog_run_xattr(void)
+{
+ struct test_pkt_access *skel;
+ int err, stats_fd = -1;
+ char buf[10] = {};
+ __u64 run_cnt = 0;
+
struct bpf_prog_test_run_attr tattr = {
.repeat = 1,
.data_in = &pkt_v4,
@@ -16,12 +35,15 @@ void test_prog_run_xattr(void)
.data_size_out = 5,
};
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj,
- &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+ if (CHECK_ATTR(stats_fd < 0, "enable_stats", "failed %d\n", errno))
return;
- memset(buf, 0, sizeof(buf));
+ skel = test_pkt_access__open_and_load();
+ if (CHECK_ATTR(!skel, "open_and_load", "failed\n"))
+ goto cleanup;
+
+ tattr.prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
err = bpf_prog_test_run_xattr(&tattr);
CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
@@ -34,8 +56,12 @@ void test_prog_run_xattr(void)
CHECK_ATTR(buf[5] != 0, "overflow",
"BPF_PROG_TEST_RUN ignored size hint\n");
+ run_cnt += tattr.repeat;
+ check_run_cnt(tattr.prog_fd, run_cnt);
+
tattr.data_out = NULL;
tattr.data_size_out = 0;
+ tattr.repeat = 2;
errno = 0;
err = bpf_prog_test_run_xattr(&tattr);
@@ -46,5 +72,12 @@ void test_prog_run_xattr(void)
err = bpf_prog_test_run_xattr(&tattr);
CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err);
- bpf_object__close(obj);
+ run_cnt += tattr.repeat;
+ check_run_cnt(tattr.prog_fd, run_cnt);
+
+cleanup:
+ if (skel)
+ test_pkt_access__destroy(skel);
+ if (stats_fd != -1)
+ close(stats_fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index 6ace5e9efec1..d3c2de2c24d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -160,11 +160,8 @@ int test_resolve_btfids(void)
break;
if (i > 0) {
- ret = CHECK(test_set.ids[i - 1] > test_set.ids[i],
- "sort_check",
- "test_set is not sorted\n");
- if (ret)
- break;
+ if (!ASSERT_LE(test_set.ids[i - 1], test_set.ids[i], "sort_check"))
+ return -1;
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index fddbc5db5d6a..de78617f6550 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -87,11 +87,20 @@ void test_ringbuf(void)
pthread_t thread;
long bg_ret = -1;
int err, cnt;
+ int page_size = getpagesize();
- skel = test_ringbuf__open_and_load();
- if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ skel = test_ringbuf__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ err = bpf_map__set_max_entries(skel->maps.ringbuf, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = test_ringbuf__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
@@ -110,9 +119,9 @@ void test_ringbuf(void)
CHECK(skel->bss->avail_data != 3 * rec_sz,
"err_avail_size", "exp %ld, got %ld\n",
3L * rec_sz, skel->bss->avail_data);
- CHECK(skel->bss->ring_size != 4096,
+ CHECK(skel->bss->ring_size != page_size,
"err_ring_size", "exp %ld, got %ld\n",
- 4096L, skel->bss->ring_size);
+ (long)page_size, skel->bss->ring_size);
CHECK(skel->bss->cons_pos != 0,
"err_cons_pos", "exp %ld, got %ld\n",
0L, skel->bss->cons_pos);
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index d37161e59bb2..cef63e703924 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -41,13 +41,42 @@ static int process_sample(void *ctx, void *data, size_t len)
void test_ringbuf_multi(void)
{
struct test_ringbuf_multi *skel;
- struct ring_buffer *ringbuf;
+ struct ring_buffer *ringbuf = NULL;
int err;
+ int page_size = getpagesize();
+ int proto_fd = -1;
- skel = test_ringbuf_multi__open_and_load();
- if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ skel = test_ringbuf_multi__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ err = bpf_map__set_max_entries(skel->maps.ringbuf1, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = bpf_map__set_max_entries(skel->maps.ringbuf2, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = bpf_map__set_max_entries(bpf_map__inner_map(skel->maps.ringbuf_arr), page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ proto_fd = bpf_create_map(BPF_MAP_TYPE_RINGBUF, 0, 0, page_size, 0);
+ if (CHECK(proto_fd == -1, "bpf_create_map", "bpf_create_map failed\n"))
+ goto cleanup;
+
+ err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd);
+ if (CHECK(err != 0, "bpf_map__set_inner_map_fd", "bpf_map__set_inner_map_fd failed\n"))
+ goto cleanup;
+
+ err = test_ringbuf_multi__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
+ close(proto_fd);
+ proto_fd = -1;
+
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
@@ -97,6 +126,8 @@ void test_ringbuf_multi(void)
2L, skel->bss->total);
cleanup:
+ if (proto_fd >= 0)
+ close(proto_fd);
ring_buffer__free(ringbuf);
test_ringbuf_multi__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index 9ff0412e1fd3..45c82db3c58c 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -241,6 +241,48 @@ fail:
return -1;
}
+static __u64 socket_cookie(int fd)
+{
+ __u64 cookie;
+ socklen_t cookie_len = sizeof(cookie);
+
+ if (CHECK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len) < 0,
+ "getsockopt(SO_COOKIE)", "%s\n", strerror(errno)))
+ return 0;
+ return cookie;
+}
+
+static int fill_sk_lookup_ctx(struct bpf_sk_lookup *ctx, const char *local_ip, __u16 local_port,
+ const char *remote_ip, __u16 remote_port)
+{
+ void *local, *remote;
+ int err;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->local_port = local_port;
+ ctx->remote_port = htons(remote_port);
+
+ if (is_ipv6(local_ip)) {
+ ctx->family = AF_INET6;
+ local = &ctx->local_ip6[0];
+ remote = &ctx->remote_ip6[0];
+ } else {
+ ctx->family = AF_INET;
+ local = &ctx->local_ip4;
+ remote = &ctx->remote_ip4;
+ }
+
+ err = inet_pton(ctx->family, local_ip, local);
+ if (CHECK(err != 1, "inet_pton", "local_ip failed\n"))
+ return 1;
+
+ err = inet_pton(ctx->family, remote_ip, remote);
+ if (CHECK(err != 1, "inet_pton", "remote_ip failed\n"))
+ return 1;
+
+ return 0;
+}
+
static int send_byte(int fd)
{
ssize_t n;
@@ -1009,18 +1051,27 @@ static void test_drop_on_reuseport(struct test_sk_lookup *skel)
static void run_sk_assign(struct test_sk_lookup *skel,
struct bpf_program *lookup_prog,
- const char *listen_ip, const char *connect_ip)
+ const char *remote_ip, const char *local_ip)
{
- int client_fd, peer_fd, server_fds[MAX_SERVERS] = { -1 };
- struct bpf_link *lookup_link;
+ int server_fds[MAX_SERVERS] = { -1 };
+ struct bpf_sk_lookup ctx;
+ __u64 server_cookie;
int i, err;
- lookup_link = attach_lookup_prog(lookup_prog);
- if (!lookup_link)
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .ctx_in = &ctx,
+ .ctx_size_in = sizeof(ctx),
+ .ctx_out = &ctx,
+ .ctx_size_out = sizeof(ctx),
+ );
+
+ if (fill_sk_lookup_ctx(&ctx, local_ip, EXT_PORT, remote_ip, INT_PORT))
return;
+ ctx.protocol = IPPROTO_TCP;
+
for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
- server_fds[i] = make_server(SOCK_STREAM, listen_ip, 0, NULL);
+ server_fds[i] = make_server(SOCK_STREAM, local_ip, 0, NULL);
if (server_fds[i] < 0)
goto close_servers;
@@ -1030,23 +1081,25 @@ static void run_sk_assign(struct test_sk_lookup *skel,
goto close_servers;
}
- client_fd = make_client(SOCK_STREAM, connect_ip, EXT_PORT);
- if (client_fd < 0)
+ server_cookie = socket_cookie(server_fds[SERVER_B]);
+ if (!server_cookie)
+ return;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(lookup_prog), &opts);
+ if (CHECK(err, "test_run", "failed with error %d\n", errno))
+ goto close_servers;
+
+ if (CHECK(ctx.cookie == 0, "ctx.cookie", "no socket selected\n"))
goto close_servers;
- peer_fd = accept(server_fds[SERVER_B], NULL, NULL);
- if (CHECK(peer_fd < 0, "accept", "failed\n"))
- goto close_client;
+ CHECK(ctx.cookie != server_cookie, "ctx.cookie",
+ "selected sk %llu instead of %llu\n", ctx.cookie, server_cookie);
- close(peer_fd);
-close_client:
- close(client_fd);
close_servers:
for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
if (server_fds[i] != -1)
close(server_fds[i]);
}
- bpf_link__destroy(lookup_link);
}
static void run_sk_assign_v4(struct test_sk_lookup *skel,
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
new file mode 100644
index 000000000000..a958c22aec75
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <test_progs.h>
+#include "test_snprintf.skel.h"
+#include "test_snprintf_single.skel.h"
+
+#define EXP_NUM_OUT "-8 9 96 -424242 1337 DABBAD00"
+#define EXP_NUM_RET sizeof(EXP_NUM_OUT)
+
+#define EXP_IP_OUT "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
+#define EXP_IP_RET sizeof(EXP_IP_OUT)
+
+/* The third specifier, %pB, depends on compiler inlining so don't check it */
+#define EXP_SYM_OUT "schedule schedule+0x0/"
+#define MIN_SYM_RET sizeof(EXP_SYM_OUT)
+
+/* The third specifier, %p, is a hashed pointer which changes on every reboot */
+#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
+#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
+
+#define EXP_STR_OUT "str1 longstr"
+#define EXP_STR_RET sizeof(EXP_STR_OUT)
+
+#define EXP_OVER_OUT "%over"
+#define EXP_OVER_RET 10
+
+#define EXP_PAD_OUT " 4 000"
+#define EXP_PAD_RET 900007
+
+#define EXP_NO_ARG_OUT "simple case"
+#define EXP_NO_ARG_RET 12
+
+#define EXP_NO_BUF_RET 29
+
+void test_snprintf_positive(void)
+{
+ char exp_addr_out[] = EXP_ADDR_OUT;
+ char exp_sym_out[] = EXP_SYM_OUT;
+ struct test_snprintf *skel;
+
+ skel = test_snprintf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ if (!ASSERT_OK(test_snprintf__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ ASSERT_STREQ(skel->bss->num_out, EXP_NUM_OUT, "num_out");
+ ASSERT_EQ(skel->bss->num_ret, EXP_NUM_RET, "num_ret");
+
+ ASSERT_STREQ(skel->bss->ip_out, EXP_IP_OUT, "ip_out");
+ ASSERT_EQ(skel->bss->ip_ret, EXP_IP_RET, "ip_ret");
+
+ ASSERT_OK(memcmp(skel->bss->sym_out, exp_sym_out,
+ sizeof(exp_sym_out) - 1), "sym_out");
+ ASSERT_LT(MIN_SYM_RET, skel->bss->sym_ret, "sym_ret");
+
+ ASSERT_OK(memcmp(skel->bss->addr_out, exp_addr_out,
+ sizeof(exp_addr_out) - 1), "addr_out");
+ ASSERT_EQ(skel->bss->addr_ret, EXP_ADDR_RET, "addr_ret");
+
+ ASSERT_STREQ(skel->bss->str_out, EXP_STR_OUT, "str_out");
+ ASSERT_EQ(skel->bss->str_ret, EXP_STR_RET, "str_ret");
+
+ ASSERT_STREQ(skel->bss->over_out, EXP_OVER_OUT, "over_out");
+ ASSERT_EQ(skel->bss->over_ret, EXP_OVER_RET, "over_ret");
+
+ ASSERT_STREQ(skel->bss->pad_out, EXP_PAD_OUT, "pad_out");
+ ASSERT_EQ(skel->bss->pad_ret, EXP_PAD_RET, "pad_ret");
+
+ ASSERT_STREQ(skel->bss->noarg_out, EXP_NO_ARG_OUT, "no_arg_out");
+ ASSERT_EQ(skel->bss->noarg_ret, EXP_NO_ARG_RET, "no_arg_ret");
+
+ ASSERT_EQ(skel->bss->nobuf_ret, EXP_NO_BUF_RET, "no_buf_ret");
+
+cleanup:
+ test_snprintf__destroy(skel);
+}
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+/* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */
+static int load_single_snprintf(char *fmt)
+{
+ struct test_snprintf_single *skel;
+ int ret;
+
+ skel = test_snprintf_single__open();
+ if (!skel)
+ return -EINVAL;
+
+ memcpy(skel->rodata->fmt, fmt, min(strlen(fmt) + 1, 10));
+
+ ret = test_snprintf_single__load(skel);
+ test_snprintf_single__destroy(skel);
+
+ return ret;
+}
+
+void test_snprintf_negative(void)
+{
+ ASSERT_OK(load_single_snprintf("valid %d"), "valid usage");
+
+ ASSERT_ERR(load_single_snprintf("0123456789"), "no terminating zero");
+ ASSERT_ERR(load_single_snprintf("%d %d"), "too many specifiers");
+ ASSERT_ERR(load_single_snprintf("%pi5"), "invalid specifier 1");
+ ASSERT_ERR(load_single_snprintf("%a"), "invalid specifier 2");
+ ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3");
+ ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4");
+ ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
+ ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
+ ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
+}
+
+void test_snprintf(void)
+{
+ if (test__start_subtest("snprintf_positive"))
+ test_snprintf_positive();
+ if (test__start_subtest("snprintf_negative"))
+ test_snprintf_negative();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
index 686b40f11a45..76e1f5fe18fa 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
@@ -42,9 +42,7 @@ void test_snprintf_btf(void)
* and it set expected return values from bpf_trace_printk()s
* and all tests ran.
*/
- if (CHECK(bss->ret <= 0,
- "bpf_snprintf_btf: got return value",
- "ret <= 0 %ld test %d\n", bss->ret, bss->ran_subtests))
+ if (!ASSERT_GT(bss->ret, 0, "bpf_snprintf_ret"))
goto cleanup;
if (CHECK(bss->ran_subtests == 0, "check if subtests ran",
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index b8b48cac2ac3..ab77596b64e3 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -7,6 +7,7 @@
#include "test_skmsg_load_helpers.skel.h"
#include "test_sockmap_update.skel.h"
#include "test_sockmap_invalid_update.skel.h"
+#include "test_sockmap_skb_verdict_attach.skel.h"
#include "bpf_iter_sockmap.skel.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
@@ -281,6 +282,39 @@ out:
bpf_iter_sockmap__destroy(skel);
}
+static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
+ enum bpf_attach_type second)
+{
+ struct test_sockmap_skb_verdict_attach *skel;
+ int err, map, verdict;
+
+ skel = test_sockmap_skb_verdict_attach__open_and_load();
+ if (CHECK_FAIL(!skel)) {
+ perror("test_sockmap_skb_verdict_attach__open_and_load");
+ return;
+ }
+
+ verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ map = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(verdict, map, first, 0);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_attach");
+ goto out;
+ }
+
+ err = bpf_prog_attach(verdict, map, second, 0);
+ assert(err == -1 && errno == EBUSY);
+
+ err = bpf_prog_detach2(verdict, map, first);
+ if (CHECK_FAIL(err)) {
+ perror("bpf_prog_detach2");
+ goto out;
+ }
+out:
+ test_sockmap_skb_verdict_attach__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -301,4 +335,10 @@ void test_sockmap_basic(void)
test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash copy"))
test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap skb_verdict attach")) {
+ test_sockmap_skb_verdict_attach(BPF_SK_SKB_VERDICT,
+ BPF_SK_SKB_STREAM_VERDICT);
+ test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
+ BPF_SK_SKB_VERDICT);
+ }
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index d7d65a700799..648d9ae898d2 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -1014,8 +1014,8 @@ static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family,
int sotype)
{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+ int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_stream_parser);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
@@ -1125,8 +1125,8 @@ static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family,
int sotype)
{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+ int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_stream_parser);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
@@ -1603,6 +1603,141 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
+static void udp_redir_to_connected(int family, int sotype, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ struct sockaddr_storage addr;
+ int c0, c1, p0, p1;
+ unsigned int pass;
+ socklen_t len;
+ int err, n;
+ u64 value;
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ p0 = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (p0 < 0)
+ return;
+ len = sizeof(addr);
+ err = xgetsockname(p0, sockaddr(&addr), &len);
+ if (err)
+ goto close_peer0;
+
+ c0 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
+ if (c0 < 0)
+ goto close_peer0;
+ err = xconnect(c0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+ err = xgetsockname(c0, sockaddr(&addr), &len);
+ if (err)
+ goto close_cli0;
+ err = xconnect(p0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+
+ p1 = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (p1 < 0)
+ goto close_cli0;
+ err = xgetsockname(p1, sockaddr(&addr), &len);
+ if (err)
+ goto close_cli0;
+
+ c1 = xsocket(family, sotype | SOCK_NONBLOCK, 0);
+ if (c1 < 0)
+ goto close_peer1;
+ err = xconnect(c1, sockaddr(&addr), len);
+ if (err)
+ goto close_cli1;
+ err = xgetsockname(c1, sockaddr(&addr), &len);
+ if (err)
+ goto close_cli1;
+ err = xconnect(p1, sockaddr(&addr), len);
+ if (err)
+ goto close_cli1;
+
+ key = 0;
+ value = p0;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_cli1;
+
+ key = 1;
+ value = p1;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ goto close_cli1;
+
+ n = write(c1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto close_cli1;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto close_cli1;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+ n = read(mode == REDIR_INGRESS ? p0 : c0, &b, 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: read", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete read", log_prefix);
+
+close_cli1:
+ xclose(c1);
+close_peer1:
+ xclose(p1);
+close_cli0:
+ xclose(c0);
+close_peer0:
+ xclose(p0);
+}
+
+static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ udp_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ udp_skb_redir_to_connected(skel, map, family);
+}
+
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
int family)
{
@@ -1611,6 +1746,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_redir(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
+ test_udp_redir(skel, map, family);
}
void test_sockmap_listen(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index d5b44b135c00..4b937e5dbaca 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -3,6 +3,7 @@
#include "cgroup_helpers.h"
#include <linux/tcp.h>
+#include "sockopt_sk.skel.h"
#ifndef SOL_TCP
#define SOL_TCP IPPROTO_TCP
@@ -191,60 +192,30 @@ err:
return -1;
}
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+static void run_test(int cgroup_fd)
{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
+ struct sockopt_sk *skel;
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err) {
- log_err("Failed to deduct types for %s BPF program", title);
- return -1;
- }
+ skel = sockopt_sk__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
- prog = bpf_object__find_program_by_title(obj, title);
- if (!prog) {
- log_err("Failed to find %s BPF program", title);
- return -1;
- }
-
- err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
- attach_type, 0);
- if (err) {
- log_err("Failed to attach %s BPF program", title);
- return -1;
- }
-
- return 0;
-}
-
-static void run_test(int cgroup_fd)
-{
- struct bpf_prog_load_attr attr = {
- .file = "./sockopt_sk.o",
- };
- struct bpf_object *obj;
- int ignored;
- int err;
-
- err = bpf_prog_load_xattr(&attr, &obj, &ignored);
- if (CHECK_FAIL(err))
- return;
+ skel->bss->page_size = getpagesize();
- err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
- if (CHECK_FAIL(err))
- goto close_bpf_object;
+ skel->links._setsockopt =
+ bpf_program__attach_cgroup(skel->progs._setsockopt, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links._setsockopt, "setsockopt_link"))
+ goto cleanup;
- err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
- if (CHECK_FAIL(err))
- goto close_bpf_object;
+ skel->links._getsockopt =
+ bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links._getsockopt, "getsockopt_link"))
+ goto cleanup;
- CHECK_FAIL(getsetsockopt());
+ ASSERT_OK(getsetsockopt(), "getsetsockopt");
-close_bpf_object:
- bpf_object__close(obj);
+cleanup:
+ sockopt_sk__destroy(skel);
}
void test_sockopt_sk(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/static_linked.c b/tools/testing/selftests/bpf/prog_tests/static_linked.c
new file mode 100644
index 000000000000..46556976dccc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/static_linked.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+#include "test_static_linked.skel.h"
+
+void test_static_linked(void)
+{
+ int err;
+ struct test_static_linked* skel;
+
+ skel = test_static_linked__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->rodata->rovar1 = 1;
+ skel->bss->static_var1 = 2;
+ skel->bss->static_var11 = 3;
+
+ skel->rodata->rovar2 = 4;
+ skel->bss->static_var2 = 5;
+ skel->bss->static_var22 = 6;
+
+ err = test_static_linked__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = test_static_linked__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->var1, 1 * 2 + 2 + 3, "var1");
+ ASSERT_EQ(skel->bss->var2, 4 * 3 + 5 + 6, "var2");
+
+cleanup:
+ test_static_linked__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
new file mode 100644
index 000000000000..035c263aab1b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#define _GNU_SOURCE /* See feature_test_macros(7) */
+#include <unistd.h>
+#include <sys/syscall.h> /* For SYS_xxx definitions */
+#include <sys/types.h>
+#include <test_progs.h>
+#include "task_local_storage.skel.h"
+#include "task_local_storage_exit_creds.skel.h"
+#include "task_ls_recursion.skel.h"
+
+static void test_sys_enter_exit(void)
+{
+ struct task_local_storage *skel;
+ int err;
+
+ skel = task_local_storage__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ skel->bss->target_pid = syscall(SYS_gettid);
+
+ err = task_local_storage__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ syscall(SYS_gettid);
+ syscall(SYS_gettid);
+
+ /* 3x syscalls: 1x attach and 2x gettid */
+ ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
+ ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
+ ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
+out:
+ task_local_storage__destroy(skel);
+}
+
+static void test_exit_creds(void)
+{
+ struct task_local_storage_exit_creds *skel;
+ int err;
+
+ skel = task_local_storage_exit_creds__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = task_local_storage_exit_creds__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* trigger at least one exit_creds() */
+ if (CHECK_FAIL(system("ls > /dev/null")))
+ goto out;
+
+ /* sync rcu to make sure exit_creds() is called for "ls" */
+ kern_sync_rcu();
+ ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
+ ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
+out:
+ task_local_storage_exit_creds__destroy(skel);
+}
+
+static void test_recursion(void)
+{
+ struct task_ls_recursion *skel;
+ int err;
+
+ skel = task_ls_recursion__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = task_ls_recursion__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* trigger sys_enter, make sure it does not cause deadlock */
+ syscall(SYS_gettid);
+
+out:
+ task_ls_recursion__destroy(skel);
+}
+
+void test_task_local_storage(void)
+{
+ if (test__start_subtest("sys_enter_exit"))
+ test_sys_enter_exit();
+ if (test__start_subtest("exit_creds"))
+ test_exit_creds();
+ if (test__start_subtest("recursion"))
+ test_recursion();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
index b54bc0c351b7..0252f61d611a 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -68,7 +68,8 @@ void test_test_ima(void)
goto close_prog;
snprintf(cmd, sizeof(cmd), "./ima_setup.sh setup %s", measured_dir);
- if (CHECK_FAIL(system(cmd)))
+ err = system(cmd);
+ if (CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno))
goto close_clean;
err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
@@ -81,7 +82,8 @@ void test_test_ima(void)
close_clean:
snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir);
- CHECK_FAIL(system(cmd));
+ err = system(cmd);
+ CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno);
close_prog:
ima__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index 2755e4f81499..244c01125126 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -51,43 +51,64 @@ int exec_cmd(int *monitored_pid)
return -EINVAL;
}
-void test_test_lsm(void)
+static int test_lsm(struct lsm *skel)
{
- struct lsm *skel = NULL;
- int err, duration = 0;
+ struct bpf_link *link;
int buf = 1234;
-
- skel = lsm__open_and_load();
- if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
- goto close_prog;
+ int err;
err = lsm__attach(skel);
- if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
- goto close_prog;
+ if (!ASSERT_OK(err, "attach"))
+ return err;
+
+ /* Check that already linked program can't be attached again. */
+ link = bpf_program__attach(skel->progs.test_int_hook);
+ if (!ASSERT_ERR_PTR(link, "attach_link"))
+ return -1;
err = exec_cmd(&skel->bss->monitored_pid);
- if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
- goto close_prog;
+ if (!ASSERT_OK(err, "exec_cmd"))
+ return err;
- CHECK(skel->bss->bprm_count != 1, "bprm_count", "bprm_count = %d\n",
- skel->bss->bprm_count);
+ ASSERT_EQ(skel->bss->bprm_count, 1, "bprm_count");
skel->bss->monitored_pid = getpid();
err = stack_mprotect();
- if (CHECK(errno != EPERM, "stack_mprotect", "want err=EPERM, got %d\n",
- errno))
- goto close_prog;
+ if (!ASSERT_EQ(errno, EPERM, "stack_mprotect"))
+ return err;
- CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
- "mprotect_count = %d\n", skel->bss->mprotect_count);
+ ASSERT_EQ(skel->bss->mprotect_count, 1, "mprotect_count");
syscall(__NR_setdomainname, &buf, -2L);
syscall(__NR_setdomainname, 0, -3L);
syscall(__NR_setdomainname, ~0L, -4L);
- CHECK(skel->bss->copy_test != 3, "copy_test",
- "copy_test = %d\n", skel->bss->copy_test);
+ ASSERT_EQ(skel->bss->copy_test, 3, "copy_test");
+
+ lsm__detach(skel);
+
+ skel->bss->copy_test = 0;
+ skel->bss->bprm_count = 0;
+ skel->bss->mprotect_count = 0;
+ return 0;
+}
+
+void test_test_lsm(void)
+{
+ struct lsm *skel = NULL;
+ int err;
+
+ skel = lsm__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "lsm_skel_load"))
+ goto close_prog;
+
+ err = test_lsm(skel);
+ if (!ASSERT_OK(err, "test_lsm_first_attach"))
+ goto close_prog;
+
+ err = test_lsm(skel);
+ ASSERT_OK(err, "test_lsm_second_attach");
close_prog:
lsm__destroy(skel);
diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
index 115a3b0ad984..474c6a62078a 100644
--- a/tools/testing/selftests/bpf/progs/bind4_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -57,6 +57,27 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
return 0;
}
+static __inline int bind_reuseport(struct bpf_sock_addr *ctx)
+{
+ int val = 1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || !val)
+ return 1;
+ val = 0;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || val)
+ return 1;
+
+ return 0;
+}
+
static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt)
{
int old, tmp, new = 0xeb9f;
@@ -127,6 +148,10 @@ int bind_v4_prog(struct bpf_sock_addr *ctx)
if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY))
return 0;
+ /* Set reuseport and unset */
+ if (bind_reuseport(ctx))
+ return 0;
+
ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP);
ctx->user_port = bpf_htons(SERV4_REWRITE_PORT);
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
index 4c0d348034b9..c19cfa869f30 100644
--- a/tools/testing/selftests/bpf/progs/bind6_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -63,6 +63,27 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
return 0;
}
+static __inline int bind_reuseport(struct bpf_sock_addr *ctx)
+{
+ int val = 1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || !val)
+ return 1;
+ val = 0;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || val)
+ return 1;
+
+ return 0;
+}
+
static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt)
{
int old, tmp, new = 0xeb9f;
@@ -141,6 +162,10 @@ int bind_v6_prog(struct bpf_sock_addr *ctx)
if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY))
return 0;
+ /* Set reuseport and unset */
+ if (bind_reuseport(ctx))
+ return 0;
+
ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0);
ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1);
ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2);
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index 6939bfd8690f..f62df4d023f9 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -174,8 +174,8 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk)
* as long as it is used in one of the func ptr
* under SEC(".struct_ops").
*/
-SEC("struct_ops/bictcp_init")
-void BPF_PROG(bictcp_init, struct sock *sk)
+SEC("struct_ops/bpf_cubic_init")
+void BPF_PROG(bpf_cubic_init, struct sock *sk)
{
struct bictcp *ca = inet_csk_ca(sk);
@@ -192,7 +192,7 @@ void BPF_PROG(bictcp_init, struct sock *sk)
* The remaining tcp-cubic functions have an easier way.
*/
SEC("no-sec-prefix-bictcp_cwnd_event")
-void BPF_PROG(bictcp_cwnd_event, struct sock *sk, enum tcp_ca_event event)
+void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_TX_START) {
struct bictcp *ca = inet_csk_ca(sk);
@@ -384,7 +384,7 @@ tcp_friendliness:
}
/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */
-void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -403,7 +403,7 @@ void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
-__u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
+__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -420,7 +420,7 @@ __u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
}
-void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state)
+void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
{
if (new_state == TCP_CA_Loss) {
bictcp_reset(inet_csk_ca(sk));
@@ -496,7 +496,7 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay)
}
}
-void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
+void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -525,21 +525,21 @@ void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
hystart_update(sk, delay);
}
-__u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
+extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
- return max(tp->snd_cwnd, tp->prior_cwnd);
+__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk)
+{
+ return tcp_reno_undo_cwnd(sk);
}
SEC(".struct_ops")
struct tcp_congestion_ops cubic = {
- .init = (void *)bictcp_init,
- .ssthresh = (void *)bictcp_recalc_ssthresh,
- .cong_avoid = (void *)bictcp_cong_avoid,
- .set_state = (void *)bictcp_state,
- .undo_cwnd = (void *)tcp_reno_undo_cwnd,
- .cwnd_event = (void *)bictcp_cwnd_event,
- .pkts_acked = (void *)bictcp_acked,
+ .init = (void *)bpf_cubic_init,
+ .ssthresh = (void *)bpf_cubic_recalc_ssthresh,
+ .cong_avoid = (void *)bpf_cubic_cong_avoid,
+ .set_state = (void *)bpf_cubic_state,
+ .undo_cwnd = (void *)bpf_cubic_undo_cwnd,
+ .cwnd_event = (void *)bpf_cubic_cwnd_event,
+ .pkts_acked = (void *)bpf_cubic_acked,
.name = "bpf_cubic",
};
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 4dc1a967776a..fd42247da8b4 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -194,22 +194,12 @@ __u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
}
-SEC("struct_ops/tcp_reno_cong_avoid")
-void BPF_PROG(tcp_reno_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (!tcp_is_cwnd_limited(sk))
- return;
+extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
- /* In "safe" area, increase. */
- if (tcp_in_slow_start(tp)) {
- acked = tcp_slow_start(tp, acked);
- if (!acked)
- return;
- }
- /* In dangerous area, increase slowly. */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+SEC("struct_ops/dctcp_reno_cong_avoid")
+void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+{
+ tcp_reno_cong_avoid(sk, ack, acked);
}
SEC(".struct_ops")
@@ -226,7 +216,7 @@ struct tcp_congestion_ops dctcp = {
.in_ack_event = (void *)dctcp_update_alpha,
.cwnd_event = (void *)dctcp_cwnd_event,
.ssthresh = (void *)dctcp_ssthresh,
- .cong_avoid = (void *)tcp_reno_cong_avoid,
+ .cong_avoid = (void *)dctcp_cong_avoid,
.undo_cwnd = (void *)dctcp_cwnd_undo,
.set_state = (void *)dctcp_state,
.flags = TCP_CONG_NEEDS_ECN,
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
index 50e59a2e142e..43c36f5f7649 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -35,3 +35,30 @@ int dump_task_stack(struct bpf_iter__task *ctx)
return 0;
}
+
+SEC("iter/task")
+int get_task_user_stacks(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ uint64_t buf_sz = 0;
+ int64_t res;
+
+ if (task == (void *)0)
+ return 0;
+
+ res = bpf_get_task_stack(task, entries,
+ MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, BPF_F_USER_STACK);
+ if (res <= 0)
+ return 0;
+
+ buf_sz += res;
+
+ /* If the verifier doesn't refine bpf_get_task_stack res, and instead
+ * assumes res is entirely unknown, this program will fail to load as
+ * the verifier will believe that max buf_sz value allows reading
+ * past the end of entries in bpf_seq_write call
+ */
+ bpf_seq_write(seq, &entries, buf_sz);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c
new file mode 100644
index 000000000000..2ecd833dcd41
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "X";
+
+void BPF_STRUCT_OPS(nogpltcp_init, struct sock *sk)
+{
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops bpf_nogpltcp = {
+ .init = (void *)nogpltcp_init,
+ .name = "bpf_nogpltcp",
+};
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c
deleted file mode 100644
index dd0ffa518f36..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_arr_kind x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c
deleted file mode 100644
index bc83372088ad..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_arr_value_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c
deleted file mode 100644
index 917bec41be08..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_int_kind x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c
deleted file mode 100644
index 6ec7e6ec1c91..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_int_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c
deleted file mode 100644
index 7bbcacf2b0d1..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_int_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c
deleted file mode 100644
index f384dd38ec70..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_struct_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c
new file mode 100644
index 000000000000..d14b496190c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___wrong_field_defs x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index 31975c96e2c9..8aaa24a00322 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -174,6 +174,12 @@ struct struct_in_struct {
};
};
+struct struct_in_array {};
+
+struct struct_in_array_typed {};
+
+typedef struct struct_in_array_typed struct_in_array_t[2];
+
struct struct_with_embedded_stuff {
int a;
struct {
@@ -203,6 +209,14 @@ struct struct_with_embedded_stuff {
} r[5];
struct struct_in_struct s[10];
int t[11];
+ struct struct_in_array (*u)[2];
+ struct_in_array_t *v;
+};
+
+struct float_struct {
+ float f;
+ const double *d;
+ volatile long double *ld;
};
struct root_struct {
@@ -219,6 +233,7 @@ struct root_struct {
union_fwd_t *_12;
union_fwd_ptr_t _13;
struct struct_with_embedded_stuff _14;
+ struct float_struct _15;
};
/* ------ END-EXPECTED-OUTPUT ------ */
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 9a2850850121..c95c0cabe951 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -700,27 +700,11 @@ struct core_reloc_existence___minimal {
int a;
};
-struct core_reloc_existence___err_wrong_int_sz {
- short a;
-};
-
-struct core_reloc_existence___err_wrong_int_type {
+struct core_reloc_existence___wrong_field_defs {
+ void *a;
int b[1];
-};
-
-struct core_reloc_existence___err_wrong_int_kind {
struct{ int x; } c;
-};
-
-struct core_reloc_existence___err_wrong_arr_kind {
int arr;
-};
-
-struct core_reloc_existence___err_wrong_arr_value_type {
- short arr[1];
-};
-
-struct core_reloc_existence___err_wrong_struct_type {
int s;
};
@@ -807,6 +791,7 @@ struct core_reloc_size_output {
int arr_elem_sz;
int ptr_sz;
int enum_sz;
+ int float_sz;
};
struct core_reloc_size {
@@ -816,6 +801,7 @@ struct core_reloc_size {
int arr_field[4];
void *ptr_field;
enum { VALUE = 123 } enum_field;
+ float float_field;
};
struct core_reloc_size___diff_sz {
@@ -825,6 +811,7 @@ struct core_reloc_size___diff_sz {
char arr_field[10];
void *ptr_field;
enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
+ double float_field;
};
/* Error case of two candidates with the fields (int_field) at the same
@@ -839,6 +826,7 @@ struct core_reloc_size___err_ambiguous1 {
int arr_field[4];
void *ptr_field;
enum { VALUE___1 = 123 } enum_field;
+ float float_field;
};
struct core_reloc_size___err_ambiguous2 {
@@ -850,6 +838,7 @@ struct core_reloc_size___err_ambiguous2 {
int arr_field[4];
void *ptr_field;
enum { VALUE___2 = 123 } enum_field;
+ float float_field;
};
/*
diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c
index 5f645fdaba6f..52a550d281d9 100644
--- a/tools/testing/selftests/bpf/progs/fentry_test.c
+++ b/tools/testing/selftests/bpf/progs/fentry_test.c
@@ -64,7 +64,7 @@ __u64 test7_result = 0;
SEC("fentry/bpf_fentry_test7")
int BPF_PROG(test7, struct bpf_fentry_test_t *arg)
{
- if (arg == 0)
+ if (!arg)
test7_result = 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c
new file mode 100644
index 000000000000..03a672d76353
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+int pid = 0;
+int fentry_cnt = 0;
+int fexit_cnt = 0;
+
+SEC("fentry/__x64_sys_nanosleep")
+int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs)
+{
+ if ((int)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ fentry_cnt++;
+ return 0;
+}
+
+SEC("fexit/__x64_sys_nanosleep")
+int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret)
+{
+ if ((int)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ fexit_cnt++;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
index 0952affb22a6..8f1ccb7302e1 100644
--- a/tools/testing/selftests/bpf/progs/fexit_test.c
+++ b/tools/testing/selftests/bpf/progs/fexit_test.c
@@ -65,7 +65,7 @@ __u64 test7_result = 0;
SEC("fexit/bpf_fentry_test7")
int BPF_PROG(test7, struct bpf_fentry_test_t *arg)
{
- if (arg == 0)
+ if (!arg)
test7_result = 1;
return 0;
}
@@ -74,7 +74,7 @@ __u64 test8_result = 0;
SEC("fexit/bpf_fentry_test8")
int BPF_PROG(test8, struct bpf_fentry_test_t *arg)
{
- if (arg->a == 0)
+ if (!arg->a)
test8_result = 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
new file mode 100644
index 000000000000..75e8e1069fe7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} arraymap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} percpu_map SEC(".maps");
+
+struct callback_ctx {
+ int output;
+};
+
+static __u64
+check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ data->output += *val;
+ if (*key == 1)
+ return 1; /* stop the iteration */
+ return 0;
+}
+
+__u32 cpu = 0;
+__u64 percpu_val = 0;
+
+static __u64
+check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ cpu = bpf_get_smp_processor_id();
+ percpu_val = *val;
+ return 0;
+}
+
+u32 arraymap_output = 0;
+
+SEC("classifier")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ struct callback_ctx data;
+
+ data.output = 0;
+ bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0);
+ arraymap_output = data.output;
+
+ bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
new file mode 100644
index 000000000000..913dd91aafff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} hashmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} percpu_map SEC(".maps");
+
+struct callback_ctx {
+ struct __sk_buff *ctx;
+ int input;
+ int output;
+};
+
+static __u64
+check_hash_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ struct __sk_buff *skb = data->ctx;
+ __u32 k;
+ __u64 v;
+
+ if (skb) {
+ k = *key;
+ v = *val;
+ if (skb->len == 10000 && k == 10 && v == 10)
+ data->output = 3; /* impossible path */
+ else
+ data->output = 4;
+ } else {
+ data->output = data->input;
+ bpf_map_delete_elem(map, key);
+ }
+
+ return 0;
+}
+
+__u32 cpu = 0;
+__u32 percpu_called = 0;
+__u32 percpu_key = 0;
+__u64 percpu_val = 0;
+int percpu_output = 0;
+
+static __u64
+check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *unused)
+{
+ struct callback_ctx data;
+
+ percpu_called++;
+ cpu = bpf_get_smp_processor_id();
+ percpu_key = *key;
+ percpu_val = *val;
+
+ data.ctx = 0;
+ data.input = 100;
+ data.output = 0;
+ bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
+ percpu_output = data.output;
+
+ return 0;
+}
+
+int hashmap_output = 0;
+int hashmap_elems = 0;
+int percpu_map_elems = 0;
+
+SEC("classifier")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ struct callback_ctx data;
+
+ data.ctx = skb;
+ data.input = 10;
+ data.output = 0;
+ hashmap_elems = bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
+ hashmap_output = data.output;
+
+ percpu_map_elems = bpf_for_each_map_elem(&percpu_map, check_percpu_elem,
+ (void *)0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
new file mode 100644
index 000000000000..470f8723e463
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
+extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
+ __u32 c, __u64 d) __ksym;
+
+SEC("classifier")
+int kfunc_call_test2(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ return bpf_kfunc_call_test2((struct sock *)sk, 1, 2);
+}
+
+SEC("classifier")
+int kfunc_call_test1(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+ __u64 a = 1ULL << 32;
+ __u32 ret;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ a = bpf_kfunc_call_test1((struct sock *)sk, 1, a | 2, 3, a | 4);
+ ret = a >> 32; /* ret should be 2 */
+ ret += (__u32)a; /* ret should be 12 */
+
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
new file mode 100644
index 000000000000..b2dcb7d9cb03
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+extern const int bpf_prog_active __ksym;
+extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
+ __u32 c, __u64 d) __ksym;
+extern struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
+int active_res = -1;
+int sk_state = -1;
+
+int __noinline f1(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+ int *active;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active,
+ bpf_get_smp_processor_id());
+ if (active)
+ active_res = *active;
+
+ sk_state = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
+
+ return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
+}
+
+SEC("classifier")
+int kfunc_call_test1(struct __sk_buff *skb)
+{
+ return f1(skb);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c
new file mode 100644
index 000000000000..b964ec1390c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* weak and shared between two files */
+const volatile int my_tid __weak;
+long syscall_id __weak;
+
+int output_val1;
+int output_ctx1;
+int output_weak1;
+
+/* same "subprog" name in all files, but it's ok because they all are static */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 1;
+}
+
+/* Global functions can't be void */
+int set_output_val1(int x)
+{
+ output_val1 = x + subprog(x);
+ return x;
+}
+
+/* This function can't be verified as global, as it assumes raw_tp/sys_enter
+ * context and accesses syscall id (second argument). So we mark it as
+ * __hidden, so that libbpf will mark it as static in the final object file,
+ * right before verifying it in the kernel.
+ *
+ * But we don't mark it as __hidden here, rather at extern site. __hidden is
+ * "contaminating" visibility, so it will get propagated from either extern or
+ * actual definition (including from the losing __weak definition).
+ */
+void set_output_ctx1(__u64 *ctx)
+{
+ output_ctx1 = ctx[1]; /* long id, same as in BPF_PROG below */
+}
+
+/* this weak instance should win because it's the first one */
+__weak int set_output_weak(int x)
+{
+ output_weak1 = x;
+ return x;
+}
+
+extern int set_output_val2(int x);
+
+/* here we'll force set_output_ctx2() to be __hidden in the final obj file */
+__hidden extern void set_output_ctx2(__u64 *ctx);
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler1, struct pt_regs *regs, long id)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
+ return 0;
+
+ set_output_val2(1000);
+ set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */
+
+ /* keep input value the same across both files to avoid dependency on
+ * handler call order; differentiate by output_weak1 vs output_weak2.
+ */
+ set_output_weak(42);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c
new file mode 100644
index 000000000000..575e958e60b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* weak and shared between both files */
+const volatile int my_tid __weak;
+long syscall_id __weak;
+
+int output_val2;
+int output_ctx2;
+int output_weak2; /* should stay zero */
+
+/* same "subprog" name in all files, but it's ok because they all are static */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 2;
+}
+
+/* Global functions can't be void */
+int set_output_val2(int x)
+{
+ output_val2 = 2 * x + 2 * subprog(x);
+ return 2 * x;
+}
+
+/* This function can't be verified as global, as it assumes raw_tp/sys_enter
+ * context and accesses syscall id (second argument). So we mark it as
+ * __hidden, so that libbpf will mark it as static in the final object file,
+ * right before verifying it in the kernel.
+ *
+ * But we don't mark it as __hidden here, rather at extern site. __hidden is
+ * "contaminating" visibility, so it will get propagated from either extern or
+ * actual definition (including from the losing __weak definition).
+ */
+void set_output_ctx2(__u64 *ctx)
+{
+ output_ctx2 = ctx[1]; /* long id, same as in BPF_PROG below */
+}
+
+/* this weak instance should lose, because it will be processed second */
+__weak int set_output_weak(int x)
+{
+ output_weak2 = x;
+ return 2 * x;
+}
+
+extern int set_output_val1(int x);
+
+/* here we'll force set_output_ctx1() to be __hidden in the final obj file */
+__hidden extern void set_output_ctx1(__u64 *ctx);
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler2, struct pt_regs *regs, long id)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
+ return 0;
+
+ set_output_val1(2000);
+ set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */
+
+ /* keep input value the same across both files to avoid dependency on
+ * handler call order; differentiate by output_weak1 vs output_weak2.
+ */
+ set_output_weak(42);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c
new file mode 100644
index 000000000000..52291515cc72
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_maps1.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct my_key { long x; };
+struct my_value { long x; };
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct my_key);
+ __type(value, struct my_value);
+ __uint(max_entries, 16);
+} map1 SEC(".maps");
+
+ /* Matches map2 definition in linked_maps2.c. Order of the attributes doesn't
+ * matter.
+ */
+typedef struct {
+ __uint(max_entries, 8);
+ __type(key, int);
+ __type(value, int);
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+} map2_t;
+
+extern map2_t map2 SEC(".maps");
+
+/* This should be the winning map definition, but we have no way of verifying,
+ * so we just make sure that it links and works without errors
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 16);
+} map_weak __weak SEC(".maps");
+
+int output_first1;
+int output_second1;
+int output_weak1;
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler_enter1)
+{
+ /* update values with key = 1 */
+ int key = 1, val = 1;
+ struct my_key key_struct = { .x = 1 };
+ struct my_value val_struct = { .x = 1000 };
+
+ bpf_map_update_elem(&map1, &key_struct, &val_struct, 0);
+ bpf_map_update_elem(&map2, &key, &val, 0);
+ bpf_map_update_elem(&map_weak, &key, &val, 0);
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int BPF_PROG(handler_exit1)
+{
+ /* lookup values with key = 2, set in another file */
+ int key = 2, *val;
+ struct my_key key_struct = { .x = 2 };
+ struct my_value *value_struct;
+
+ value_struct = bpf_map_lookup_elem(&map1, &key_struct);
+ if (value_struct)
+ output_first1 = value_struct->x;
+
+ val = bpf_map_lookup_elem(&map2, &key);
+ if (val)
+ output_second1 = *val;
+
+ val = bpf_map_lookup_elem(&map_weak, &key);
+ if (val)
+ output_weak1 = *val;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_maps2.c b/tools/testing/selftests/bpf/progs/linked_maps2.c
new file mode 100644
index 000000000000..0693687474ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_maps2.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* modifiers and typedefs are ignored when comparing key/value types */
+typedef struct my_key { long x; } key_type;
+typedef struct my_value { long x; } value_type;
+
+extern struct {
+ __uint(max_entries, 16);
+ __type(key, key_type);
+ __type(value, value_type);
+ __uint(type, BPF_MAP_TYPE_HASH);
+} map1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 8);
+} map2 SEC(".maps");
+
+/* this definition will lose, but it has to exactly match the winner */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 16);
+} map_weak __weak SEC(".maps");
+
+int output_first2;
+int output_second2;
+int output_weak2;
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler_enter2)
+{
+ /* update values with key = 2 */
+ int key = 2, val = 2;
+ key_type key_struct = { .x = 2 };
+ value_type val_struct = { .x = 2000 };
+
+ bpf_map_update_elem(&map1, &key_struct, &val_struct, 0);
+ bpf_map_update_elem(&map2, &key, &val, 0);
+ bpf_map_update_elem(&map_weak, &key, &val, 0);
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int BPF_PROG(handler_exit2)
+{
+ /* lookup values with key = 1, set in another file */
+ int key = 1, *val;
+ key_type key_struct = { .x = 1 };
+ value_type *value_struct;
+
+ value_struct = bpf_map_lookup_elem(&map1, &key_struct);
+ if (value_struct)
+ output_first2 = value_struct->x;
+
+ val = bpf_map_lookup_elem(&map2, &key);
+ if (val)
+ output_second2 = *val;
+
+ val = bpf_map_lookup_elem(&map_weak, &key);
+ if (val)
+ output_weak2 = *val;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_vars1.c b/tools/testing/selftests/bpf/progs/linked_vars1.c
new file mode 100644
index 000000000000..ef9e9d0bb0ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_vars1.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+/* this weak extern will be strict due to the other file's strong extern */
+extern bool CONFIG_BPF_SYSCALL __kconfig __weak;
+extern const void bpf_link_fops __ksym __weak;
+
+int input_bss1;
+int input_data1 = 1;
+const volatile int input_rodata1 = 11;
+
+int input_bss_weak __weak;
+/* these two definitions should win */
+int input_data_weak __weak = 10;
+const volatile int input_rodata_weak __weak = 100;
+
+extern int input_bss2;
+extern int input_data2;
+extern const int input_rodata2;
+
+int output_bss1;
+int output_data1;
+int output_rodata1;
+
+long output_sink1;
+
+static __noinline int get_bss_res(void)
+{
+ /* just make sure all the relocations work against .text as well */
+ return input_bss1 + input_bss2 + input_bss_weak;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler1)
+{
+ output_bss1 = get_bss_res();
+ output_data1 = input_data1 + input_data2 + input_data_weak;
+ output_rodata1 = input_rodata1 + input_rodata2 + input_rodata_weak;
+
+ /* make sure we actually use above special externs, otherwise compiler
+ * will optimize them out
+ */
+ output_sink1 = LINUX_KERNEL_VERSION
+ + CONFIG_BPF_SYSCALL
+ + (long)&bpf_link_fops;
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_vars2.c b/tools/testing/selftests/bpf/progs/linked_vars2.c
new file mode 100644
index 000000000000..e4f5bd388a3c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_vars2.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+/* when an extern is defined as both strong and weak, resulting symbol will be strong */
+extern bool CONFIG_BPF_SYSCALL __kconfig;
+extern const void __start_BTF __ksym;
+
+int input_bss2;
+int input_data2 = 2;
+const volatile int input_rodata2 = 22;
+
+int input_bss_weak __weak;
+/* these two weak variables should lose */
+int input_data_weak __weak = 20;
+const volatile int input_rodata_weak __weak = 200;
+
+extern int input_bss1;
+extern int input_data1;
+extern const int input_rodata1;
+
+int output_bss2;
+int output_data2;
+int output_rodata2;
+
+int output_sink2;
+
+static __noinline int get_data_res(void)
+{
+ /* just make sure all the relocations work against .text as well */
+ return input_data1 + input_data2 + input_data_weak;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler2)
+{
+ output_bss2 = input_bss1 + input_bss2 + input_bss_weak;
+ output_data2 = get_data_res();
+ output_rodata2 = input_rodata1 + input_rodata2 + input_rodata_weak;
+
+ /* make sure we actually use above special externs, otherwise compiler
+ * will optimize them out
+ */
+ output_sink2 = LINUX_KERNEL_VERSION
+ + CONFIG_BPF_SYSCALL
+ + (long)&__start_BTF;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
new file mode 100644
index 000000000000..38de0331e6b4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop6.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* typically virtio scsi has max SGs of 6 */
+#define VIRTIO_MAX_SGS 6
+
+/* Verifier will fail with SG_MAX = 128. The failure can be
+ * workarounded with a smaller SG_MAX, e.g. 10.
+ */
+#define WORKAROUND
+#ifdef WORKAROUND
+#define SG_MAX 10
+#else
+/* typically virtio blk has max SEG of 128 */
+#define SG_MAX 128
+#endif
+
+#define SG_CHAIN 0x01UL
+#define SG_END 0x02UL
+
+struct scatterlist {
+ unsigned long page_link;
+ unsigned int offset;
+ unsigned int length;
+};
+
+#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN)
+#define sg_is_last(sg) ((sg)->page_link & SG_END)
+#define sg_chain_ptr(sg) \
+ ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
+
+static inline struct scatterlist *__sg_next(struct scatterlist *sgp)
+{
+ struct scatterlist sg;
+
+ bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+ if (sg_is_last(&sg))
+ return NULL;
+
+ sgp++;
+
+ bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+ if (sg_is_chain(&sg))
+ sgp = sg_chain_ptr(&sg);
+
+ return sgp;
+}
+
+static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
+{
+ struct scatterlist *sgp;
+
+ bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i);
+ return sgp;
+}
+
+int config = 0;
+int result = 0;
+
+SEC("kprobe/virtqueue_add_sgs")
+int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
+ unsigned int out_sgs, unsigned int in_sgs)
+{
+ struct scatterlist *sgp = NULL;
+ __u64 length1 = 0, length2 = 0;
+ unsigned int i, n, len;
+
+ if (config != 0)
+ return 0;
+
+ for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) {
+ for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+ sgp = __sg_next(sgp)) {
+ bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+ length1 += len;
+ n++;
+ }
+ }
+
+ for (i = 0; (i < VIRTIO_MAX_SGS) && (i < in_sgs); i++) {
+ for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+ sgp = __sg_next(sgp)) {
+ bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+ length2 += len;
+ n++;
+ }
+ }
+
+ config = 1;
+ result = length2 - length1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index d8850bc6a9f1..d1d304c980f0 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -12,6 +12,7 @@ _Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND");
enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC;
__u32 g_line = 0;
+int page_size = 0; /* userspace should set it */
#define VERIFY_TYPE(type, func) ({ \
g_map_type = type; \
@@ -635,7 +636,6 @@ struct bpf_ringbuf_map {
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
} m_ringbuf SEC(".maps");
static inline int check_ringbuf(void)
@@ -643,7 +643,7 @@ static inline int check_ringbuf(void)
struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf;
struct bpf_map *map = (struct bpf_map *)&m_ringbuf;
- VERIFY(check(&ringbuf->map, map, 0, 0, 1 << 12));
+ VERIFY(check(&ringbuf->map, map, 0, 0, page_size));
return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
index cf6823f42e80..7f2eaa2f89f8 100644
--- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
@@ -4,7 +4,6 @@
#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
-#define NULL 0
#define INLINE __always_inline
#define skb_shorter(skb, len) ((void *)(long)(skb)->data + (len) > (void *)(long)skb->data_end)
diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
index fdb4bf4408fa..eeaf6e75c9a2 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
@@ -8,18 +8,6 @@ int _version SEC("version") = 1;
SEC("sk_msg1")
int bpf_prog1(struct sk_msg_md *msg)
{
- void *data_end = (void *)(long) msg->data_end;
- void *data = (void *)(long) msg->data;
-
- char *d;
-
- if (data + 8 > data_end)
- return SK_DROP;
-
- bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
- d = (char *)data;
- bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
-
return SK_PASS;
}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index d3597f81e6e9..8acdb99b5959 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -6,11 +6,8 @@
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
-#endif
+int page_size = 0; /* userspace should set it */
#ifndef SOL_TCP
#define SOL_TCP IPPROTO_TCP
@@ -90,7 +87,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
* program can only see the first PAGE_SIZE
* bytes of data.
*/
- if (optval_end - optval != PAGE_SIZE)
+ if (optval_end - optval != page_size)
return 0; /* EPERM, unexpected data size */
return 1;
@@ -161,7 +158,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
/* Original optlen is larger than PAGE_SIZE. */
- if (ctx->optlen != PAGE_SIZE * 2)
+ if (ctx->optlen != page_size * 2)
return 0; /* EPERM, unexpected data size */
if (optval + 1 > optval_end)
@@ -175,7 +172,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
* program can only see the first PAGE_SIZE
* bytes of data.
*/
- if (optval_end - optval != PAGE_SIZE)
+ if (optval_end - optval != page_size)
return 0; /* EPERM, unexpected data size */
return 1;
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage.c b/tools/testing/selftests/bpf/progs/task_local_storage.c
new file mode 100644
index 000000000000..80a0a20db88d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_storage.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} enter_id SEC(".maps");
+
+#define MAGIC_VALUE 0xabcd1234
+
+pid_t target_pid = 0;
+int mismatch_cnt = 0;
+int enter_cnt = 0;
+int exit_cnt = 0;
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ ptr = bpf_task_storage_get(&enter_id, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return 0;
+
+ __sync_fetch_and_add(&enter_cnt, 1);
+ *ptr = MAGIC_VALUE + enter_cnt;
+
+ return 0;
+}
+
+SEC("tp_btf/sys_exit")
+int BPF_PROG(on_exit, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ ptr = bpf_task_storage_get(&enter_id, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return 0;
+
+ __sync_fetch_and_add(&exit_cnt, 1);
+ if (*ptr != MAGIC_VALUE + exit_cnt)
+ __sync_fetch_and_add(&mismatch_cnt, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
new file mode 100644
index 000000000000..81758c0aef99
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, __u64);
+} task_storage SEC(".maps");
+
+int valid_ptr_count = 0;
+int null_ptr_count = 0;
+
+SEC("fentry/exit_creds")
+int BPF_PROG(trace_exit_creds, struct task_struct *task)
+{
+ __u64 *ptr;
+
+ ptr = bpf_task_storage_get(&task_storage, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ __sync_fetch_and_add(&valid_ptr_count, 1);
+ else
+ __sync_fetch_and_add(&null_ptr_count, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
new file mode 100644
index 000000000000..564583dca7c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_b SEC(".maps");
+
+SEC("fentry/bpf_local_storage_lookup")
+int BPF_PROG(on_lookup)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ bpf_task_storage_delete(&map_a, task);
+ bpf_task_storage_delete(&map_b, task);
+ return 0;
+}
+
+SEC("fentry/bpf_local_storage_update")
+int BPF_PROG(on_update)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ long *ptr;
+
+ ptr = bpf_task_storage_get(&map_a, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr += 1;
+
+ ptr = bpf_task_storage_get(&map_b, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr += 1;
+
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ ptr = bpf_task_storage_get(&map_a, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr = 200;
+
+ ptr = bpf_task_storage_get(&map_b, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr = 100;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c
index b7787b43f9db..c4a9bae96e75 100644
--- a/tools/testing/selftests/bpf/progs/test_check_mtu.c
+++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c
@@ -105,6 +105,54 @@ int xdp_minus_delta(struct xdp_md *ctx)
return retval;
}
+SEC("xdp")
+int xdp_input_len(struct xdp_md *ctx)
+{
+ int retval = XDP_PASS; /* Expected retval on successful test */
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ __u32 data_len = data_end - data;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input len is L3, like MTU and iph->tot_len.
+ * Remember XDP data_len is L2.
+ */
+ __u32 mtu_len = data_len - ETH_HLEN;
+
+ if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0))
+ retval = XDP_ABORTED;
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("xdp")
+int xdp_input_len_exceed(struct xdp_md *ctx)
+{
+ int retval = XDP_ABORTED; /* Fail */
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ int err;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input length value is L3 size like MTU.
+ */
+ __u32 mtu_len = GLOBAL_USER_MTU;
+
+ mtu_len += 1; /* Exceed with 1 */
+
+ err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0);
+ if (err == BPF_MTU_CHK_RET_FRAG_NEEDED)
+ retval = XDP_PASS ; /* Success in exceeding MTU check */
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
SEC("classifier")
int tc_use_helper(struct __sk_buff *ctx)
{
@@ -196,3 +244,47 @@ int tc_minus_delta(struct __sk_buff *ctx)
global_bpf_mtu_xdp = mtu_len;
return retval;
}
+
+SEC("classifier")
+int tc_input_len(struct __sk_buff *ctx)
+{
+ int retval = BPF_OK; /* Expected retval on successful test */
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input length value is L3 size.
+ */
+ __u32 mtu_len = GLOBAL_USER_MTU;
+
+ if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0))
+ retval = BPF_DROP;
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("classifier")
+int tc_input_len_exceed(struct __sk_buff *ctx)
+{
+ int retval = BPF_DROP; /* Fail */
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ int err;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input length value is L3 size like MTU.
+ */
+ __u32 mtu_len = GLOBAL_USER_MTU;
+
+ mtu_len += 1; /* Exceed with 1 */
+
+ err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0);
+ if (err == BPF_MTU_CHK_RET_FRAG_NEEDED)
+ retval = BPF_OK; /* Success in exceeding MTU check */
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
index d7fb6cfc7891..7b2d576aeea1 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
@@ -21,6 +21,7 @@ struct core_reloc_size_output {
int arr_elem_sz;
int ptr_sz;
int enum_sz;
+ int float_sz;
};
struct core_reloc_size {
@@ -30,6 +31,7 @@ struct core_reloc_size {
int arr_field[4];
void *ptr_field;
enum { VALUE = 123 } enum_field;
+ float float_field;
};
SEC("raw_tracepoint/sys_enter")
@@ -45,6 +47,7 @@ int test_core_size(void *ctx)
out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]);
out->ptr_sz = bpf_core_field_size(in->ptr_field);
out->enum_sz = bpf_core_field_size(in->enum_field);
+ out->float_sz = bpf_core_field_size(in->float_field);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
index 61c2ae92ce41..97b7031d0e22 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func10.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
@@ -14,7 +14,7 @@ struct Big {
__noinline int foo(const struct Big *big)
{
- if (big == 0)
+ if (!big)
return 0;
return bpf_get_prandom_u32() < big->y;
diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c
index 4eb42cff5fe9..5a5cc19a15bf 100644
--- a/tools/testing/selftests/bpf/progs/test_mmap.c
+++ b/tools/testing/selftests/bpf/progs/test_mmap.c
@@ -9,7 +9,6 @@ char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 4096);
__uint(map_flags, BPF_F_MMAPABLE | BPF_F_RDONLY_PROG);
__type(key, __u32);
__type(value, char);
@@ -17,7 +16,6 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 512 * 4); /* at least 4 pages of data */
__uint(map_flags, BPF_F_MMAPABLE);
__type(key, __u32);
__type(value, __u64);
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index 8ba9959b036b..6b3f288b7c63 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -15,7 +15,6 @@ struct sample {
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
} ringbuf SEC(".maps");
/* inputs */
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
index edf3b6953533..197b86546dca 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -15,7 +15,6 @@ struct sample {
struct ringbuf_map {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
} ringbuf1 SEC(".maps"),
ringbuf2 SEC(".maps");
@@ -31,6 +30,17 @@ struct {
},
};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __array(values, struct ringbuf_map);
+} ringbuf_hash SEC(".maps") = {
+ .values = {
+ [0] = &ringbuf1,
+ },
+};
+
/* inputs */
int pid = 0;
int target_ring = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index 1032b292af5b..ac6f7f205e25 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -64,6 +64,10 @@ static const int PROG_DONE = 1;
static const __u32 KEY_SERVER_A = SERVER_A;
static const __u32 KEY_SERVER_B = SERVER_B;
+static const __u16 SRC_PORT = bpf_htons(8008);
+static const __u32 SRC_IP4 = IP4(127, 0, 0, 2);
+static const __u32 SRC_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000002);
+
static const __u16 DST_PORT = 7007; /* Host byte order */
static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
@@ -398,11 +402,12 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
if (LSW(ctx->protocol, 0) != IPPROTO_TCP)
return SK_DROP;
- /* Narrow loads from remote_port field. Expect non-0 value. */
- if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 &&
- LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0)
+ /* Narrow loads from remote_port field. Expect SRC_PORT. */
+ if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) ||
+ LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff) ||
+ LSB(ctx->remote_port, 2) != 0 || LSB(ctx->remote_port, 3) != 0)
return SK_DROP;
- if (LSW(ctx->remote_port, 0) == 0)
+ if (LSW(ctx->remote_port, 0) != SRC_PORT)
return SK_DROP;
/* Narrow loads from local_port field. Expect DST_PORT. */
@@ -415,11 +420,14 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
/* Narrow loads from IPv4 fields */
if (v4) {
- /* Expect non-0.0.0.0 in remote_ip4 */
- if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 &&
- LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0)
+ /* Expect SRC_IP4 in remote_ip4 */
+ if (LSB(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xff) ||
+ LSB(ctx->remote_ip4, 1) != ((SRC_IP4 >> 8) & 0xff) ||
+ LSB(ctx->remote_ip4, 2) != ((SRC_IP4 >> 16) & 0xff) ||
+ LSB(ctx->remote_ip4, 3) != ((SRC_IP4 >> 24) & 0xff))
return SK_DROP;
- if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0)
+ if (LSW(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip4, 1) != ((SRC_IP4 >> 16) & 0xffff))
return SK_DROP;
/* Expect DST_IP4 in local_ip4 */
@@ -448,20 +456,32 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
/* Narrow loads from IPv6 fields */
if (!v4) {
- /* Expect non-:: IP in remote_ip6 */
- if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 &&
- LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 &&
- LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 &&
- LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 &&
- LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 &&
- LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 &&
- LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 &&
- LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0)
+ /* Expect SRC_IP6 in remote_ip6 */
+ if (LSB(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[0], 2) != ((SRC_IP6[0] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[0], 3) != ((SRC_IP6[0] >> 24) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 2) != ((SRC_IP6[1] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 3) != ((SRC_IP6[1] >> 24) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 2) != ((SRC_IP6[2] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 3) != ((SRC_IP6[2] >> 24) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 2) != ((SRC_IP6[3] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 3) != ((SRC_IP6[3] >> 24) & 0xff))
return SK_DROP;
- if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 &&
- LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 &&
- LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 &&
- LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0)
+ if (LSW(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 16) & 0xffff) ||
+ LSW(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 16) & 0xffff) ||
+ LSW(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 16) & 0xffff) ||
+ LSW(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 16) & 0xffff))
return SK_DROP;
/* Expect DST_IP6 in local_ip6 */
if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) ||
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
new file mode 100644
index 000000000000..951a0301c553
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char num_out[64] = {};
+long num_ret = 0;
+
+char ip_out[64] = {};
+long ip_ret = 0;
+
+char sym_out[64] = {};
+long sym_ret = 0;
+
+char addr_out[64] = {};
+long addr_ret = 0;
+
+char str_out[64] = {};
+long str_ret = 0;
+
+char over_out[6] = {};
+long over_ret = 0;
+
+char pad_out[10] = {};
+long pad_ret = 0;
+
+char noarg_out[64] = {};
+long noarg_ret = 0;
+
+long nobuf_ret = 0;
+
+extern const void schedule __ksym;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ /* Convenient values to pretty-print */
+ const __u8 ex_ipv4[] = {127, 0, 0, 1};
+ const __u8 ex_ipv6[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ static const char str1[] = "str1";
+ static const char longstr[] = "longstr";
+
+ /* Integer types */
+ num_ret = BPF_SNPRINTF(num_out, sizeof(num_out),
+ "%d %u %x %li %llu %lX",
+ -8, 9, 150, -424242, 1337, 0xDABBAD00);
+ /* IP addresses */
+ ip_ret = BPF_SNPRINTF(ip_out, sizeof(ip_out), "%pi4 %pI6",
+ &ex_ipv4, &ex_ipv6);
+ /* Symbol lookup formatting */
+ sym_ret = BPF_SNPRINTF(sym_out, sizeof(sym_out), "%ps %pS %pB",
+ &schedule, &schedule, &schedule);
+ /* Kernel pointers */
+ addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p",
+ 0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55);
+ /* Strings embedding */
+ str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s %+05s",
+ str1, longstr);
+ /* Overflow */
+ over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow");
+ /* Padding of fixed width numbers */
+ pad_ret = BPF_SNPRINTF(pad_out, sizeof(pad_out), "%5d %0900000X", 4, 4);
+ /* No args */
+ noarg_ret = BPF_SNPRINTF(noarg_out, sizeof(noarg_out), "simple case");
+ /* No buffer */
+ nobuf_ret = BPF_SNPRINTF(NULL, 0, "only interested in length %d", 60);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
new file mode 100644
index 000000000000..402adaf344f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* The format string is filled from the userspace such that loading fails */
+static const char fmt[10];
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ unsigned long long arg = 42;
+
+ bpf_snprintf(NULL, 0, fmt, &arg, sizeof(arg));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index a3a366c57ce1..a39eba9f5201 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -29,15 +29,16 @@ struct {
} verdict_map SEC(".maps");
static volatile bool test_sockmap; /* toggled by user-space */
+static volatile bool test_ingress; /* toggled by user-space */
SEC("sk_skb/stream_parser")
-int prog_skb_parser(struct __sk_buff *skb)
+int prog_stream_parser(struct __sk_buff *skb)
{
return skb->len;
}
SEC("sk_skb/stream_verdict")
-int prog_skb_verdict(struct __sk_buff *skb)
+int prog_stream_verdict(struct __sk_buff *skb)
{
unsigned int *count;
__u32 zero = 0;
@@ -55,6 +56,27 @@ int prog_skb_verdict(struct __sk_buff *skb)
return verdict;
}
+SEC("sk_skb/skb_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ unsigned int *count;
+ __u32 zero = 0;
+ int verdict;
+
+ if (test_sockmap)
+ verdict = bpf_sk_redirect_map(skb, &sock_map, zero,
+ test_ingress ? BPF_F_INGRESS : 0);
+ else
+ verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero,
+ test_ingress ? BPF_F_INGRESS : 0);
+
+ count = bpf_map_lookup_elem(&verdict_map, &verdict);
+ if (count)
+ (*count)++;
+
+ return verdict;
+}
+
SEC("sk_msg")
int prog_msg_verdict(struct sk_msg_md *msg)
{
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
new file mode 100644
index 000000000000..2d31f66e4f23
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb/skb_verdict")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked1.c b/tools/testing/selftests/bpf/progs/test_static_linked1.c
new file mode 100644
index 000000000000..ea1a6c4c7172
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_static_linked1.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* 8-byte aligned .bss */
+static volatile long static_var1;
+static volatile int static_var11;
+int var1 = 0;
+/* 4-byte aligned .rodata */
+const volatile int rovar1;
+
+/* same "subprog" name in both files */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 2;
+}
+
+SEC("raw_tp/sys_enter")
+int handler1(const void *ctx)
+{
+ var1 = subprog(rovar1) + static_var1 + static_var11;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
+int VERSION SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked2.c b/tools/testing/selftests/bpf/progs/test_static_linked2.c
new file mode 100644
index 000000000000..54d8d1ab577c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_static_linked2.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* 4-byte aligned .bss */
+static volatile int static_var2;
+static volatile int static_var22;
+int var2 = 0;
+/* 8-byte aligned .rodata */
+const volatile long rovar2;
+
+/* same "subprog" name in both files */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 3;
+}
+
+SEC("raw_tp/sys_enter")
+int handler2(const void *ctx)
+{
+ var2 = subprog(rovar2) + static_var2 + static_var22;
+
+ return 0;
+}
+
+/* different name and/or type of the variable doesn't matter */
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 37bce7a7c394..84cd63259554 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -24,14 +24,29 @@ static const int cfg_port = 8000;
static const int cfg_udp_src = 20000;
+#define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
+
#define UDP_PORT 5555
#define MPLS_OVER_UDP_PORT 6635
#define ETH_OVER_UDP_PORT 7777
+#define VXLAN_UDP_PORT 8472
+
+#define EXTPROTO_VXLAN 0x1
+
+#define VXLAN_N_VID (1u << 24)
+#define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8)
+#define VXLAN_FLAGS 0x8
+#define VXLAN_VNI 1
/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
MPLS_LS_S_MASK | 0xff);
+struct vxlanhdr {
+ __be32 vx_flags;
+ __be32 vx_vni;
+} __attribute__((packed));
+
struct gre_hdr {
__be16 flags;
__be16 protocol;
@@ -45,13 +60,13 @@ union l4hdr {
struct v4hdr {
struct iphdr ip;
union l4hdr l4hdr;
- __u8 pad[16]; /* enough space for L2 header */
+ __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
} __attribute__((packed));
struct v6hdr {
struct ipv6hdr ip;
union l4hdr l4hdr;
- __u8 pad[16]; /* enough space for L2 header */
+ __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
} __attribute__((packed));
static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -69,14 +84,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = ~((csum & 0xffff) + (csum >> 16));
}
-static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
- __u16 l2_proto)
+static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto, __u16 ext_proto)
{
__u16 udp_dst = UDP_PORT;
struct iphdr iph_inner;
struct v4hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
+ __u8 *l2_hdr = NULL;
int tcp_off;
__u64 flags;
@@ -141,7 +157,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
break;
case ETH_P_TEB:
l2_len = ETH_HLEN;
- udp_dst = ETH_OVER_UDP_PORT;
+ if (ext_proto & EXTPROTO_VXLAN) {
+ udp_dst = VXLAN_UDP_PORT;
+ l2_len += sizeof(struct vxlanhdr);
+ } else
+ udp_dst = ETH_OVER_UDP_PORT;
break;
}
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -171,14 +191,26 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
}
/* add L2 encap (if specified) */
+ l2_hdr = (__u8 *)&h_outer + olen;
switch (l2_proto) {
case ETH_P_MPLS_UC:
- *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ *(__u32 *)l2_hdr = mpls_label;
break;
case ETH_P_TEB:
- if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
- ETH_HLEN))
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+ if (ext_proto & EXTPROTO_VXLAN) {
+ struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+ vxlan_hdr->vx_flags = VXLAN_FLAGS;
+ vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+ l2_hdr += sizeof(struct vxlanhdr);
+ }
+
+ if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT;
+
break;
}
olen += l2_len;
@@ -214,14 +246,21 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK;
}
-static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
__u16 l2_proto)
{
+ return __encap_ipv4(skb, encap_proto, l2_proto, 0);
+}
+
+static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto, __u16 ext_proto)
+{
__u16 udp_dst = UDP_PORT;
struct ipv6hdr iph_inner;
struct v6hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
+ __u8 *l2_hdr = NULL;
__u16 tot_len;
__u64 flags;
@@ -249,7 +288,11 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
break;
case ETH_P_TEB:
l2_len = ETH_HLEN;
- udp_dst = ETH_OVER_UDP_PORT;
+ if (ext_proto & EXTPROTO_VXLAN) {
+ udp_dst = VXLAN_UDP_PORT;
+ l2_len += sizeof(struct vxlanhdr);
+ } else
+ udp_dst = ETH_OVER_UDP_PORT;
break;
}
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -267,7 +310,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
- sizeof(h_outer.l4hdr.udp);
+ sizeof(h_outer.l4hdr.udp) + l2_len;
h_outer.l4hdr.udp.check = 0;
h_outer.l4hdr.udp.len = bpf_htons(tot_len);
break;
@@ -278,13 +321,24 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
}
/* add L2 encap (if specified) */
+ l2_hdr = (__u8 *)&h_outer + olen;
switch (l2_proto) {
case ETH_P_MPLS_UC:
- *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ *(__u32 *)l2_hdr = mpls_label;
break;
case ETH_P_TEB:
- if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
- ETH_HLEN))
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+ if (ext_proto & EXTPROTO_VXLAN) {
+ struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+ vxlan_hdr->vx_flags = VXLAN_FLAGS;
+ vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+ l2_hdr += sizeof(struct vxlanhdr);
+ }
+
+ if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT;
break;
}
@@ -309,6 +363,12 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK;
}
+static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto)
+{
+ return __encap_ipv6(skb, encap_proto, l2_proto, 0);
+}
+
SEC("encap_ipip_none")
int __encap_ipip_none(struct __sk_buff *skb)
{
@@ -372,6 +432,17 @@ int __encap_udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("encap_vxlan_eth")
+int __encap_vxlan_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return __encap_ipv4(skb, IPPROTO_UDP,
+ ETH_P_TEB,
+ EXTPROTO_VXLAN);
+ else
+ return TC_ACT_OK;
+}
+
SEC("encap_sit_none")
int __encap_sit_none(struct __sk_buff *skb)
{
@@ -444,6 +515,17 @@ int __encap_ip6udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("encap_ip6vxlan_eth")
+int __encap_ip6vxlan_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return __encap_ipv6(skb, IPPROTO_UDP,
+ ETH_P_TEB,
+ EXTPROTO_VXLAN);
+ else
+ return TC_ACT_OK;
+}
+
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{
char buf[sizeof(struct v6hdr)];
@@ -479,6 +561,9 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
case ETH_OVER_UDP_PORT:
olen += ETH_HLEN;
break;
+ case VXLAN_UDP_PORT:
+ olen += ETH_HLEN + sizeof(struct vxlanhdr);
+ break;
}
break;
default:
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index 9afe947cfae9..e7b673117436 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -396,7 +396,7 @@ int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
SEC("geneve_set_tunnel")
int _geneve_set_tunnel(struct __sk_buff *skb)
{
- int ret, ret2;
+ int ret;
struct bpf_tunnel_key key;
struct geneve_opt gopt;
@@ -508,10 +508,8 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb)
}
ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
+ if (ret < 0)
+ gopt.opt_class = 0;
bpf_trace_printk(fmt, sizeof(fmt),
key.tunnel_id, key.remote_ipv4, gopt.opt_class);
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index 2db3c60e1e61..ac349a5cea7e 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -85,23 +85,6 @@ make_with_tmpdir() {
echo
}
-make_doc_and_clean() {
- echo -e "\$PWD: $PWD"
- echo -e "command: make -s $* doc >/dev/null"
- RST2MAN_OPTS="--exit-status=1" make $J -s $* doc
- if [ $? -ne 0 ] ; then
- ERROR=1
- printf "FAILURE: Errors or warnings when building documentation\n"
- fi
- (
- if [ $# -ge 1 ] ; then
- cd ${@: -1}
- fi
- make -s doc-clean
- )
- echo
-}
-
echo "Trying to build bpftool"
echo -e "... through kbuild\n"
@@ -162,7 +145,3 @@ make_and_clean
make_with_tmpdir OUTPUT
make_with_tmpdir O
-
-echo -e "Checking documentation build\n"
-# From tools/bpf/bpftool
-make_doc_and_clean
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index 2023725f1962..e2394eea4b7f 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -66,4 +66,7 @@
#define BTF_FUNC_ENC(name, func_proto) \
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto)
+#define BTF_TYPE_FLOAT_ENC(name, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
+
#endif /* _TEST_BTF_H */
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh
new file mode 100755
index 000000000000..7eb940a7b2eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+
+# Assume script is located under tools/testing/selftests/bpf/. We want to start
+# build attempts from the top of kernel repository.
+SCRIPT_REL_PATH=$(realpath --relative-to=$PWD $0)
+SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
+KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
+cd $KDIR_ROOT_DIR
+
+for tgt in docs docs-clean; do
+ make -s -C $PWD/$SCRIPT_REL_DIR $tgt;
+done
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f7c2fd89d01a..dda52cb649dc 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -130,6 +130,20 @@ extern int test__join_cgroup(const char *path);
#define CHECK_ATTR(condition, tag, format...) \
_CHECK(condition, tag, tattr.duration, format)
+#define ASSERT_TRUE(actual, name) ({ \
+ static int duration = 0; \
+ bool ___ok = (actual); \
+ CHECK(!___ok, (name), "unexpected %s: got FALSE\n", (name)); \
+ ___ok; \
+})
+
+#define ASSERT_FALSE(actual, name) ({ \
+ static int duration = 0; \
+ bool ___ok = !(actual); \
+ CHECK(!___ok, (name), "unexpected %s: got TRUE\n", (name)); \
+ ___ok; \
+})
+
#define ASSERT_EQ(actual, expected, name) ({ \
static int duration = 0; \
typeof(actual) ___act = (actual); \
@@ -152,6 +166,50 @@ extern int test__join_cgroup(const char *path);
___ok; \
})
+#define ASSERT_LT(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act < ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld >= expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_LE(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act <= ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld > expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_GT(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act > ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld <= expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_GE(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act >= ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld < expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
#define ASSERT_STREQ(actual, expected, name) ({ \
static int duration = 0; \
const char *___act = actual; \
@@ -167,7 +225,8 @@ extern int test__join_cgroup(const char *path);
static int duration = 0; \
long long ___res = (res); \
bool ___ok = ___res == 0; \
- CHECK(!___ok, (name), "unexpected error: %lld\n", ___res); \
+ CHECK(!___ok, (name), "unexpected error: %lld (errno %d)\n", \
+ ___res, errno); \
___ok; \
})
@@ -199,7 +258,7 @@ extern int test__join_cgroup(const char *path);
#define ASSERT_ERR_PTR(ptr, name) ({ \
static int duration = 0; \
const void *___res = (ptr); \
- bool ___ok = IS_ERR(___res) \
+ bool ___ok = IS_ERR(___res); \
CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \
___ok; \
})
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 427ca00a3217..eefd445b96fc 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -732,7 +732,7 @@ static int sendmsg_test(struct sockmap_options *opt)
* socket is not a valid test. So in this case lets not
* enable kTLS but still run the test.
*/
- if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) {
+ if (!txmsg_redir || txmsg_ingress) {
err = sockmap_init_ktls(opt->verbose, rx_fd);
if (err)
return err;
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 7c76b841b17b..c9dde9b9d987 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -44,8 +44,8 @@ setup() {
# clamp route to reserve room for tunnel headers
ip -netns "${ns1}" -4 route flush table main
ip -netns "${ns1}" -6 route flush table main
- ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1
- ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1
+ ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1450 dev veth1
+ ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1430 dev veth1
sleep 1
@@ -105,6 +105,12 @@ if [[ "$#" -eq "0" ]]; then
echo "sit"
$0 ipv6 sit none 100
+ echo "ip4 vxlan"
+ $0 ipv4 vxlan eth 2000
+
+ echo "ip6 vxlan"
+ $0 ipv6 ip6vxlan eth 2000
+
for mac in none mpls eth ; do
echo "ip gre $mac"
$0 ipv4 gre $mac 100
@@ -214,6 +220,9 @@ if [[ "$tuntype" =~ "udp" ]]; then
targs="encap fou encap-sport auto encap-dport $dport"
elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
ttype=$gretaptype
+elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
+ ttype="vxlan"
+ targs="id 1 dstport 8472 udp6zerocsumrx"
else
ttype=$tuntype
targs=""
@@ -242,7 +251,7 @@ if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
# No support for TEB fou tunnel; expect failure.
expect_tun_fail=1
-elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+elif [[ "$tuntype" =~ (gre|vxlan) && "$mac" == "eth" ]]; then
# Share ethernet address between tunnel/veth2 so L2 decap works.
ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
awk '/ether/ { print $2 }')
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 58b5a349d3ba..1512092e1e68 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -105,7 +105,7 @@ struct bpf_test {
enum bpf_prog_type prog_type;
uint8_t flags;
void (*fill_helper)(struct bpf_test *self);
- uint8_t runs;
+ int runs;
#define bpf_testdata_struct_t \
struct { \
uint32_t retval, retval_unpriv; \
@@ -1165,7 +1165,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
run_errs = 0;
run_successes = 0;
- if (!alignment_prevented_execution && fd_prog >= 0) {
+ if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {
uint32_t expected_val;
int i;
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 88a7483eaae4..46633a3bfb0b 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -71,13 +71,21 @@
#
# Run (full output without color-coding):
# sudo ./test_xsk.sh
+#
+# Run with verbose output:
+# sudo ./test_xsk.sh -v
+#
+# Run and dump packet contents:
+# sudo ./test_xsk.sh -D
. xsk_prereqs.sh
-while getopts c flag
+while getopts "cvD" flag
do
case "${flag}" in
c) colorconsole=1;;
+ v) verbose=1;;
+ D) dump_pkts=1;;
esac
done
@@ -95,17 +103,22 @@ NS1=af_xdp${VETH1_POSTFIX}
MTU=1500
setup_vethPairs() {
- echo "setting up ${VETH0}: namespace: ${NS0}"
+ if [[ $verbose -eq 1 ]]; then
+ echo "setting up ${VETH0}: namespace: ${NS0}"
+ fi
ip netns add ${NS1}
- ip link add ${VETH0} type veth peer name ${VETH1}
+ ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4
if [ -f /proc/net/if_inet6 ]; then
echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
fi
- echo "setting up ${VETH1}: namespace: ${NS1}"
+ if [[ $verbose -eq 1 ]]; then
+ echo "setting up ${VETH1}: namespace: ${NS1}"
+ fi
ip link set ${VETH1} netns ${NS1}
ip netns exec ${NS1} ip link set ${VETH1} mtu ${MTU}
ip link set ${VETH0} mtu ${MTU}
ip netns exec ${NS1} ip link set ${VETH1} up
+ ip netns exec ${NS1} ip link set dev lo up
ip link set ${VETH0} up
}
@@ -125,121 +138,24 @@ echo "${VETH0}:${VETH1},${NS1}" > ${SPECFILE}
validate_veth_spec_file
-echo "Spec file created: ${SPECFILE}"
-
-test_status $retval "${TEST_NAME}"
-
-## START TESTS
-
-statusList=()
-
-### TEST 1
-TEST_NAME="XSK KSELFTEST FRAMEWORK"
-
-echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Generic mode"
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-retval=$?
-if [ $retval -eq 0 ]; then
- echo "Switching interfaces [${VETH0}, ${VETH1}] to XDP Native mode"
- vethXDPnative ${VETH0} ${VETH1} ${NS1}
+if [[ $verbose -eq 1 ]]; then
+ echo "Spec file created: ${SPECFILE}"
+ VERBOSE_ARG="-v"
fi
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 2
-TEST_NAME="SKB NOPOLL"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 3
-TEST_NAME="SKB POLL"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S" "-p")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 4
-TEST_NAME="DRV NOPOLL"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 5
-TEST_NAME="DRV POLL"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N" "-p")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 6
-TEST_NAME="SKB SOCKET TEARDOWN"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S" "-T")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-
-### TEST 7
-TEST_NAME="DRV SOCKET TEARDOWN"
-
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
-
-params=("-N" "-T")
-execxdpxceiver params
+if [[ $dump_pkts -eq 1 ]]; then
+ DUMP_PKTS_ARG="-D"
+fi
-retval=$?
test_status $retval "${TEST_NAME}"
-statusList+=($retval)
-### TEST 8
-TEST_NAME="SKB BIDIRECTIONAL SOCKETS"
-
-vethXDPgeneric ${VETH0} ${VETH1} ${NS1}
-
-params=("-S" "-B")
-execxdpxceiver params
-
-retval=$?
-test_status $retval "${TEST_NAME}"
-statusList+=($retval)
+## START TESTS
-### TEST 9
-TEST_NAME="DRV BIDIRECTIONAL SOCKETS"
+statusList=()
-vethXDPnative ${VETH0} ${VETH1} ${NS1}
+TEST_NAME="XSK KSELFTESTS"
-params=("-N" "-B")
-execxdpxceiver params
+execxdpxceiver
retval=$?
test_status $retval "${TEST_NAME}"
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index 1b138cd2b187..1b1c798e9248 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -186,7 +186,7 @@
},
.fixup_map_hash_48b = { 3 },
.errstr_unpriv = "R0 leaks addr",
- .errstr = "invalid access to map value, value_size=48 off=44 size=8",
+ .errstr = "R0 unbounded memory access",
.result_unpriv = REJECT,
.result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index 57ed67b86074..8a1caf46ffbc 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -261,8 +261,6 @@
},
.fixup_map_hash_8b = { 3 },
/* not actually fully unbounded, but the bound is very high */
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root",
- .result_unpriv = REJECT,
.errstr = "value -4294967168 makes map_value pointer be out of bounds",
.result = REJECT,
},
@@ -298,9 +296,6 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_8b = { 3 },
- /* not actually fully unbounded, but the bound is very high */
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root",
- .result_unpriv = REJECT,
.errstr = "value -4294967168 makes map_value pointer be out of bounds",
.result = REJECT,
},
diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c b/tools/testing/selftests/bpf/verifier/bounds_deduction.c
index 1fd07a4f27ac..91869aea6d64 100644
--- a/tools/testing/selftests/bpf/verifier/bounds_deduction.c
+++ b/tools/testing/selftests/bpf/verifier/bounds_deduction.c
@@ -6,8 +6,9 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
.errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
},
{
"check deducing bounds from const, 2",
@@ -20,6 +21,8 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 1,
},
@@ -31,20 +34,24 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
.errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
},
{
"check deducing bounds from const, 4",
.insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R6 has pointer with unsupported alu operation",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -55,8 +62,9 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
.errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
},
{
"check deducing bounds from const, 6",
@@ -67,8 +75,9 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
.errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
},
{
"check deducing bounds from const, 7",
@@ -80,8 +89,9 @@
offsetof(struct __sk_buff, mark)),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
.errstr = "dereference of modified ctx ptr",
+ .result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
@@ -94,8 +104,9 @@
offsetof(struct __sk_buff, mark)),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
.errstr = "dereference of modified ctx ptr",
+ .result = REJECT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
@@ -106,8 +117,9 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .result = REJECT,
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
.errstr = "R0 tried to subtract pointer from scalar",
+ .result = REJECT,
},
{
"check deducing bounds from const, 10",
@@ -119,6 +131,6 @@
BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .result = REJECT,
.errstr = "math between ctx pointer and register with unbounded min value is not allowed",
+ .result = REJECT,
},
diff --git a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
index 9baca7a75c42..c2aa6f26738b 100644
--- a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
+++ b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
@@ -19,7 +19,6 @@
},
.fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
.result = REJECT,
},
{
@@ -43,7 +42,6 @@
},
.fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
.result = REJECT,
},
{
@@ -69,7 +67,6 @@
},
.fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
- .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds",
.result = REJECT,
},
{
@@ -94,7 +91,6 @@
},
.fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
- .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds",
.result = REJECT,
},
{
@@ -141,7 +137,6 @@
},
.fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
.result = REJECT,
},
{
@@ -210,7 +205,6 @@
},
.fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
.result = REJECT,
},
{
@@ -260,7 +254,6 @@
},
.fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
.result = REJECT,
},
{
@@ -287,7 +280,6 @@
},
.fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
.result = REJECT,
},
{
@@ -313,7 +305,6 @@
},
.fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
.result = REJECT,
},
{
@@ -342,7 +333,6 @@
},
.fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
- .errstr_unpriv = "R7 has unknown scalar with mixed signed bounds",
.result = REJECT,
},
{
@@ -372,7 +362,6 @@
},
.fixup_map_hash_8b = { 4 },
.errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
.result = REJECT,
},
{
@@ -400,7 +389,5 @@
},
.fixup_map_hash_8b = { 3 },
.errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
.result = REJECT,
- .result_unpriv = REJECT,
},
diff --git a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
index 69b048cf46d9..3e024c891178 100644
--- a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
+++ b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
@@ -42,3 +42,46 @@
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
},
+{
+ "bpf_get_task_stack return R0 range is refined",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_6, 0), // ctx->meta->seq
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1, 8), // ctx->task
+ BPF_LD_MAP_FD(BPF_REG_1, 0), // fixup_map_array_48b
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), // keep buf for seq_write
+ BPF_MOV64_IMM(BPF_REG_3, 48),
+ BPF_MOV64_IMM(BPF_REG_4, 0),
+ BPF_EMIT_CALL(BPF_FUNC_get_task_stack),
+ BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_9),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_EMIT_CALL(BPF_FUNC_seq_write),
+
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_ITER,
+ .kfunc = "task",
+ .runs = -1, // Don't run, just load
+ .fixup_map_array_48b = { 3 },
+},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index eb888c8479c3..336a749673d1 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -19,7 +19,7 @@
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 1,
@@ -136,7 +136,7 @@
{
"calls: wrong src reg",
.insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 3, 0, 0),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
@@ -397,7 +397,7 @@
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.fixup_map_hash_48b = { 3 },
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -1977,7 +1977,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
},
@@ -2003,7 +2003,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
@@ -2028,7 +2028,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
index fb13ca2d5606..d78627be060f 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -239,6 +239,7 @@
.result = ACCEPT,
.prog_type = BPF_PROG_TYPE_SK_LOOKUP,
.expected_attach_type = BPF_SK_LOOKUP,
+ .runs = -1,
},
/* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
{
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 5cf361d8eb1c..17fe33a75034 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -85,7 +85,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -103,7 +103,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -121,7 +121,7 @@
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -137,7 +137,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
@@ -152,7 +152,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
index b117bdd3806d..1f82021429bf 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -75,6 +75,8 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_16b = { 4 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R1 has pointer with unsupported alu operation",
.result = ACCEPT,
},
{
@@ -91,5 +93,7 @@
BPF_EXIT_INSN(),
},
.fixup_map_hash_16b = { 4 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 has pointer with unsupported alu operation",
.result = ACCEPT,
},
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
index b018ad71e0a8..bd436df5cc32 100644
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ b/tools/testing/selftests/bpf/verifier/unpriv.c
@@ -497,7 +497,7 @@
.result = ACCEPT,
},
{
- "unpriv: adding of fp",
+ "unpriv: adding of fp, reg",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_MOV64_IMM(BPF_REG_1, 0),
@@ -510,6 +510,19 @@
.result = ACCEPT,
},
{
+ "unpriv: adding of fp, imm",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+ BPF_EXIT_INSN(),
+ },
+ .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+},
+{
"unpriv: cmp of stack pointer",
.insns = {
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index ed4e76b24649..e5913fd3b903 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -21,8 +21,6 @@
.fixup_map_hash_16b = { 5 },
.fixup_map_array_48b = { 8 },
.result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 tried to add from different maps",
.retval = 1,
},
{
@@ -122,7 +120,7 @@
.fixup_map_array_48b = { 1 },
.result = ACCEPT,
.result_unpriv = REJECT,
- .errstr_unpriv = "R2 tried to add from different pointers or scalars",
+ .errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
.retval = 0,
},
{
@@ -169,7 +167,7 @@
.fixup_map_array_48b = { 1 },
.result = ACCEPT,
.result_unpriv = REJECT,
- .errstr_unpriv = "R2 tried to add from different maps or paths",
+ .errstr_unpriv = "R2 tried to add from different maps, paths or scalars",
.retval = 0,
},
{
@@ -517,6 +515,27 @@
.retval = 0xabcdef12,
},
{
+ "map access: value_ptr += N, value_ptr -= N known scalar",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+ BPF_MOV32_IMM(BPF_REG_1, 0x12345678),
+ BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 2),
+ BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 3 },
+ .result = ACCEPT,
+ .retval = 0x12345678,
+},
+{
"map access: unknown scalar += value_ptr, 1",
.insns = {
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 26ae8d0b6ce3..8889b3f55236 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -17,19 +17,22 @@ KCONFIG_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vm
KCONFIG_API_URL="https://api.github.com/repos/libbpf/libbpf/contents/travis-ci/vmtest/configs/latest.config"
INDEX_URL="https://raw.githubusercontent.com/libbpf/libbpf/master/travis-ci/vmtest/configs/INDEX"
NUM_COMPILE_JOBS="$(nproc)"
+LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")"
+LOG_FILE="${LOG_FILE_BASE}.log"
+EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status"
usage()
{
cat <<EOF
-Usage: $0 [-i] [-d <output_dir>] -- [<command>]
+Usage: $0 [-i] [-s] [-d <output_dir>] -- [<command>]
<command> is the command you would normally run when you are in
tools/testing/selftests/bpf. e.g:
$0 -- ./test_progs -t test_lsm
-If no command is specified, "${DEFAULT_COMMAND}" will be run by
-default.
+If no command is specified and a debug shell (-s) is not requested,
+"${DEFAULT_COMMAND}" will be run by default.
If you build your kernel using KBUILD_OUTPUT= or O= options, these
can be passed as environment variables to the script:
@@ -46,6 +49,9 @@ Options:
-d) Update the output directory (default: ${OUTPUT_DIR})
-j) Number of jobs for compilation, similar to -j in make
(default: ${NUM_COMPILE_JOBS})
+ -s) Instead of powering off the VM, start an interactive
+ shell. If <command> is specified, the shell runs after
+ the command finishes executing
EOF
}
@@ -146,7 +152,7 @@ update_init_script()
local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d"
local init_script="${init_script_dir}/S50-startup"
local command="$1"
- local log_file="$2"
+ local exit_command="$2"
mount_image
@@ -160,17 +166,26 @@ EOF
fi
- sudo bash -c "cat >${init_script}" <<EOF
-#!/bin/bash
+ sudo bash -c "echo '#!/bin/bash' > ${init_script}"
+
+ if [[ "${command}" != "" ]]; then
+ sudo bash -c "cat >>${init_script}" <<EOF
+# Have a default value in the exit status file
+# incase the VM is forcefully stopped.
+echo "130" > "/root/${EXIT_STATUS_FILE}"
{
cd /root/bpf
echo ${command}
stdbuf -oL -eL ${command}
-} 2>&1 | tee /root/${log_file}
-poweroff -f
+ echo "\$?" > "/root/${EXIT_STATUS_FILE}"
+} 2>&1 | tee "/root/${LOG_FILE}"
+# Ensure that the logs are written to disk
+sync
EOF
+ fi
+ sudo bash -c "echo ${exit_command} >> ${init_script}"
sudo chmod a+x "${init_script}"
unmount_image
}
@@ -221,10 +236,12 @@ EOF
copy_logs()
{
local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
- local log_file="${mount_dir}/root/$1"
+ local log_file="${mount_dir}/root/${LOG_FILE}"
+ local exit_status_file="${mount_dir}/root/${EXIT_STATUS_FILE}"
mount_image
sudo cp ${log_file} "${OUTPUT_DIR}"
+ sudo cp ${exit_status_file} "${OUTPUT_DIR}"
sudo rm -f ${log_file}
unmount_image
}
@@ -263,14 +280,15 @@ main()
{
local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
local kernel_checkout=$(realpath "${script_dir}"/../../../../)
- local log_file="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S.log")"
# By default the script searches for the kernel in the checkout directory but
# it also obeys environment variables O= and KBUILD_OUTPUT=
local kernel_bzimage="${kernel_checkout}/${X86_BZIMAGE}"
local command="${DEFAULT_COMMAND}"
local update_image="no"
+ local exit_command="poweroff -f"
+ local debug_shell="no"
- while getopts 'hkid:j:' opt; do
+ while getopts 'hskid:j:' opt; do
case ${opt} in
i)
update_image="yes"
@@ -281,6 +299,11 @@ main()
j)
NUM_COMPILE_JOBS="$OPTARG"
;;
+ s)
+ command=""
+ debug_shell="yes"
+ exit_command="bash"
+ ;;
h)
usage
exit 0
@@ -299,7 +322,7 @@ main()
done
shift $((OPTIND -1))
- if [[ $# -eq 0 ]]; then
+ if [[ $# -eq 0 && "${debug_shell}" == "no" ]]; then
echo "No command specified, will run ${DEFAULT_COMMAND} in the vm"
else
command="$@"
@@ -347,19 +370,25 @@ main()
fi
update_selftests "${kernel_checkout}" "${make_command}"
- update_init_script "${command}" "${log_file}"
+ update_init_script "${command}" "${exit_command}"
run_vm "${kernel_bzimage}"
- copy_logs "${log_file}"
- echo "Logs saved in ${OUTPUT_DIR}/${log_file}"
+ if [[ "${command}" != "" ]]; then
+ copy_logs
+ echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}"
+ fi
}
catch()
{
local exit_code=$1
+ local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}"
# This is just a cleanup and the directory may
# have already been unmounted. So, don't let this
# clobber the error code we intend to return.
unmount_image || true
+ if [[ -f "${exit_status_file}" ]]; then
+ exit_code="$(cat ${exit_status_file})"
+ fi
exit ${exit_code}
}
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index f4a96d5ff524..1135fb980814 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -18,12 +18,7 @@
* These selftests test AF_XDP SKB and Native/DRV modes using veth
* Virtual Ethernet interfaces.
*
- * The following tests are run:
- *
- * 1. AF_XDP SKB mode
- * Generic mode XDP is driver independent, used when the driver does
- * not have support for XDP. Works on any netdevice using sockets and
- * generic XDP path. XDP hook from netif_receive_skb().
+ * For each mode, the following tests are run:
* a. nopoll - soft-irq processing
* b. poll - using poll() syscall
* c. Socket Teardown
@@ -33,19 +28,25 @@
* Configure sockets as bi-directional tx/rx sockets, sets up fill and
* completion rings on each socket, tx/rx in both directions. Only nopoll
* mode is used
+ * e. Statistics
+ * Trigger some error conditions and ensure that the appropriate statistics
+ * are incremented. Within this test, the following statistics are tested:
+ * i. rx dropped
+ * Increase the UMEM frame headroom to a value which results in
+ * insufficient space in the rx buffer for both the packet and the headroom.
+ * ii. tx invalid
+ * Set the 'len' field of tx descriptors to an invalid value (umem frame
+ * size + 1).
+ * iii. rx ring full
+ * Reduce the size of the RX ring to a fraction of the fill ring size.
+ * iv. fill queue empty
+ * Do not populate the fill queue and then try to receive pkts.
+ * f. bpf_link resource persistence
+ * Configure sockets at indexes 0 and 1, run a traffic on queue ids 0,
+ * then remove xsk sockets from queue 0 on both veth interfaces and
+ * finally run a traffic on queues ids 1
*
- * 2. AF_XDP DRV/Native mode
- * Works on any netdevice with XDP_REDIRECT support, driver dependent. Processes
- * packets before SKB allocation. Provides better performance than SKB. Driver
- * hook available just after DMA of buffer descriptor.
- * a. nopoll
- * b. poll
- * c. Socket Teardown
- * d. Bi-directional sockets
- * - Only copy mode is supported because veth does not currently support
- * zero-copy mode
- *
- * Total tests: 8
+ * Total tests: 12
*
* Flow:
* -----
@@ -58,7 +59,7 @@
* - Rx thread verifies if all 10k packets were received and delivered in-order,
* and have the right content
*
- * Enable/disable debug mode:
+ * Enable/disable packet dump mode:
* --------------------------
* To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
* parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
@@ -96,35 +97,34 @@ typedef __u16 __sum16;
#include "xdpxceiver.h"
#include "../kselftest.h"
+static const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
+static const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
+static const char *IP1 = "192.168.100.162";
+static const char *IP2 = "192.168.100.161";
+static const u16 UDP_PORT1 = 2020;
+static const u16 UDP_PORT2 = 2121;
+
static void __exit_with_error(int error, const char *file, const char *func, int line)
{
- ksft_test_result_fail
- ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
- ksft_exit_xfail();
+ if (configured_mode == TEST_MODE_UNCONFIGURED) {
+ ksft_exit_fail_msg
+ ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
+ } else {
+ ksft_test_result_fail
+ ("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error, strerror(error));
+ ksft_exit_xfail();
+ }
}
#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
#define print_ksft_result(void)\
- (ksft_test_result_pass("PASS: %s %s %s%s\n", uut ? "DRV" : "SKB", opt_poll ? "POLL" :\
- "NOPOLL", opt_teardown ? "Socket Teardown" : "",\
- opt_bidi ? "Bi-directional Sockets" : ""))
-
-static void pthread_init_mutex(void)
-{
- pthread_mutex_init(&sync_mutex, NULL);
- pthread_mutex_init(&sync_mutex_tx, NULL);
- pthread_cond_init(&signal_rx_condition, NULL);
- pthread_cond_init(&signal_tx_condition, NULL);
-}
-
-static void pthread_destroy_mutex(void)
-{
- pthread_mutex_destroy(&sync_mutex);
- pthread_mutex_destroy(&sync_mutex_tx);
- pthread_cond_destroy(&signal_rx_condition);
- pthread_cond_destroy(&signal_tx_condition);
-}
+ (ksft_test_result_pass("PASS: %s %s %s%s%s%s\n", configured_mode ? "DRV" : "SKB",\
+ test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\
+ test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\
+ test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\
+ test_type == TEST_TYPE_STATS ? "Stats" : "",\
+ test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
static void *memset32_htonl(void *dest, u32 val, u32 size)
{
@@ -143,24 +143,11 @@ static void *memset32_htonl(void *dest, u32 val, u32 size)
}
/*
- * This function code has been taken from
- * Linux kernel lib/checksum.c
- */
-static inline unsigned short from32to16(unsigned int x)
-{
- /* add up 16-bit and 16-bit for 16+c bit */
- x = (x & 0xffff) + (x >> 16);
- /* add up carry.. */
- x = (x & 0xffff) + (x >> 16);
- return x;
-}
-
-/*
* Fold a partial checksum
* This function code has been taken from
* Linux kernel include/asm-generic/checksum.h
*/
-static inline __u16 csum_fold(__u32 csum)
+static __u16 csum_fold(__u32 csum)
{
u32 sum = (__force u32)csum;
@@ -173,7 +160,7 @@ static inline __u16 csum_fold(__u32 csum)
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
-static inline u32 from64to32(u64 x)
+static u32 from64to32(u64 x)
{
/* add up 32-bit and 32-bit for 32+c bit */
x = (x & 0xffffffff) + (x >> 32);
@@ -182,13 +169,11 @@ static inline u32 from64to32(u64 x)
return (u32)x;
}
-__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum);
-
/*
* This function code has been taken from
* Linux kernel lib/checksum.c
*/
-__u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+static __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
{
unsigned long long s = (__force u32)sum;
@@ -206,13 +191,12 @@ __u32 csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u3
* This function has been taken from
* Linux kernel include/asm-generic/checksum.h
*/
-static inline __u16
-csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
+static __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, __u8 proto, __u32 sum)
{
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
}
-static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
+static u16 udp_csum(u32 saddr, u32 daddr, u32 len, u8 proto, u16 *udp_pkt)
{
u32 csum = 0;
u32 cnt = 0;
@@ -267,26 +251,37 @@ static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr)
memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, PKT_SIZE);
}
-static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size)
+static void xsk_configure_umem(struct ifobject *data, void *buffer, int idx)
{
+ struct xsk_umem_config cfg = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+ .frame_headroom = frame_headroom,
+ .flags = XSK_UMEM__DEFAULT_FLAGS
+ };
+ int size = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ struct xsk_umem_info *umem;
int ret;
- data->umem = calloc(1, sizeof(struct xsk_umem_info));
- if (!data->umem)
+ umem = calloc(1, sizeof(struct xsk_umem_info));
+ if (!umem)
exit_with_error(errno);
- ret = xsk_umem__create(&data->umem->umem, buffer, size,
- &data->umem->fq, &data->umem->cq, NULL);
+ ret = xsk_umem__create(&umem->umem, buffer, size,
+ &umem->fq, &umem->cq, &cfg);
if (ret)
exit_with_error(ret);
- data->umem->buffer = buffer;
+ umem->buffer = buffer;
+
+ data->umem_arr[idx] = umem;
}
static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
{
int ret, i;
- u32 idx;
+ u32 idx = 0;
ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
@@ -296,51 +291,48 @@ static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
}
-static int xsk_configure_socket(struct ifobject *ifobject)
+static int xsk_configure_socket(struct ifobject *ifobject, int idx)
{
struct xsk_socket_config cfg;
+ struct xsk_socket_info *xsk;
struct xsk_ring_cons *rxr;
struct xsk_ring_prod *txr;
int ret;
- ifobject->xsk = calloc(1, sizeof(struct xsk_socket_info));
- if (!ifobject->xsk)
+ xsk = calloc(1, sizeof(struct xsk_socket_info));
+ if (!xsk)
exit_with_error(errno);
- ifobject->xsk->umem = ifobject->umem;
- cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ xsk->umem = ifobject->umem;
+ cfg.rx_size = rxqsize;
cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
cfg.libbpf_flags = 0;
- cfg.xdp_flags = opt_xdp_flags;
- cfg.bind_flags = opt_xdp_bind_flags;
+ cfg.xdp_flags = xdp_flags;
+ cfg.bind_flags = xdp_bind_flags;
- if (!opt_bidi) {
- rxr = (ifobject->fv.vector == rx) ? &ifobject->xsk->rx : NULL;
- txr = (ifobject->fv.vector == tx) ? &ifobject->xsk->tx : NULL;
+ if (test_type != TEST_TYPE_BIDI) {
+ rxr = (ifobject->fv.vector == rx) ? &xsk->rx : NULL;
+ txr = (ifobject->fv.vector == tx) ? &xsk->tx : NULL;
} else {
- rxr = &ifobject->xsk->rx;
- txr = &ifobject->xsk->tx;
+ rxr = &xsk->rx;
+ txr = &xsk->tx;
}
- ret = xsk_socket__create(&ifobject->xsk->xsk, ifobject->ifname,
- opt_queue, ifobject->umem->umem, rxr, txr, &cfg);
-
+ ret = xsk_socket__create(&xsk->xsk, ifobject->ifname, idx,
+ ifobject->umem->umem, rxr, txr, &cfg);
if (ret)
return 1;
+ ifobject->xsk_arr[idx] = xsk;
+
return 0;
}
static struct option long_options[] = {
{"interface", required_argument, 0, 'i'},
{"queue", optional_argument, 0, 'q'},
- {"poll", no_argument, 0, 'p'},
- {"xdp-skb", no_argument, 0, 'S'},
- {"xdp-native", no_argument, 0, 'N'},
- {"copy", no_argument, 0, 'c'},
- {"tear-down", no_argument, 0, 'T'},
- {"bidi", optional_argument, 0, 'B'},
- {"debug", optional_argument, 0, 'D'},
+ {"dump-pkts", optional_argument, 0, 'D'},
+ {"verbose", no_argument, 0, 'v'},
{"tx-pkt-count", optional_argument, 0, 'C'},
{0, 0, 0, 0}
};
@@ -352,23 +344,21 @@ static void usage(const char *prog)
" Options:\n"
" -i, --interface Use interface\n"
" -q, --queue=n Use queue n (default 0)\n"
- " -p, --poll Use poll syscall\n"
- " -S, --xdp-skb=n Use XDP SKB mode\n"
- " -N, --xdp-native=n Enforce XDP DRV (native) mode\n"
- " -c, --copy Force copy mode\n"
- " -T, --tear-down Tear down sockets by repeatedly recreating them\n"
- " -B, --bidi Bi-directional sockets test\n"
- " -D, --debug Debug mode - dump packets L2 - L5\n"
+ " -D, --dump-pkts Dump packets L2 - L5\n"
+ " -v, --verbose Verbose output\n"
" -C, --tx-pkt-count=n Number of packets to send\n";
ksft_print_msg(str, prog);
}
-static bool switch_namespace(int idx)
+static int switch_namespace(const char *nsname)
{
char fqns[26] = "/var/run/netns/";
int nsfd;
- strncat(fqns, ifdict[idx]->nsname, sizeof(fqns) - strlen(fqns) - 1);
+ if (!nsname || strlen(nsname) == 0)
+ return -1;
+
+ strncat(fqns, nsname, sizeof(fqns) - strlen(fqns) - 1);
nsfd = open(fqns, O_RDONLY);
if (nsfd == -1)
@@ -377,26 +367,9 @@ static bool switch_namespace(int idx)
if (setns(nsfd, 0) == -1)
exit_with_error(errno);
- return true;
-}
+ print_verbose("NS switched: %s\n", nsname);
-static void *nsswitchthread(void *args)
-{
- struct targs *targs = args;
-
- targs->retptr = false;
-
- if (switch_namespace(targs->idx)) {
- ifdict[targs->idx]->ifindex = if_nametoindex(ifdict[targs->idx]->ifname);
- if (!ifdict[targs->idx]->ifindex) {
- ksft_test_result_fail("ERROR: [%s] interface \"%s\" does not exist\n",
- __func__, ifdict[targs->idx]->ifname);
- } else {
- ksft_print_msg("Interface found: %s\n", ifdict[targs->idx]->ifname);
- targs->retptr = true;
- }
- }
- pthread_exit(NULL);
+ return nsfd;
}
static int validate_interfaces(void)
@@ -408,33 +381,6 @@ static int validate_interfaces(void)
ret = false;
ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>.");
}
- if (strcmp(ifdict[i]->nsname, "")) {
- struct targs *targs;
-
- targs = malloc(sizeof(*targs));
- if (!targs)
- exit_with_error(errno);
-
- targs->idx = i;
- if (pthread_create(&ns_thread, NULL, nsswitchthread, targs))
- exit_with_error(errno);
-
- pthread_join(ns_thread, NULL);
-
- if (targs->retptr)
- ksft_print_msg("NS switched: %s\n", ifdict[i]->nsname);
-
- free(targs);
- } else {
- ifdict[i]->ifindex = if_nametoindex(ifdict[i]->ifname);
- if (!ifdict[i]->ifindex) {
- ksft_test_result_fail
- ("ERROR: interface \"%s\" does not exist\n", ifdict[i]->ifname);
- ret = false;
- } else {
- ksft_print_msg("Interface found: %s\n", ifdict[i]->ifname);
- }
- }
}
return ret;
}
@@ -446,7 +392,7 @@ static void parse_command_line(int argc, char **argv)
opterr = 0;
for (;;) {
- c = getopt_long(argc, argv, "i:q:pSNcTBDC:", long_options, &option_index);
+ c = getopt_long(argc, argv, "i:DC:v", long_options, &option_index);
if (c == -1)
break;
@@ -466,43 +412,26 @@ static void parse_command_line(int argc, char **argv)
MAX_INTERFACES_NAMESPACE_CHARS);
interface_index++;
break;
- case 'q':
- opt_queue = atoi(optarg);
- break;
- case 'p':
- opt_poll = 1;
- break;
- case 'S':
- opt_xdp_flags |= XDP_FLAGS_SKB_MODE;
- opt_xdp_bind_flags |= XDP_COPY;
- uut = ORDER_CONTENT_VALIDATE_XDP_SKB;
- break;
- case 'N':
- opt_xdp_flags |= XDP_FLAGS_DRV_MODE;
- opt_xdp_bind_flags |= XDP_COPY;
- uut = ORDER_CONTENT_VALIDATE_XDP_DRV;
- break;
- case 'c':
- opt_xdp_bind_flags |= XDP_COPY;
- break;
- case 'T':
- opt_teardown = 1;
- break;
- case 'B':
- opt_bidi = 1;
- break;
case 'D':
debug_pkt_dump = 1;
break;
case 'C':
opt_pkt_count = atoi(optarg);
break;
+ case 'v':
+ opt_verbose = 1;
+ break;
default:
usage(basename(argv[0]));
ksft_exit_xfail();
}
}
+ if (!opt_pkt_count) {
+ print_verbose("No tx-pkt-count specified, using default %u\n", DEFAULT_PKT_CNT);
+ opt_pkt_count = DEFAULT_PKT_CNT;
+ }
+
if (!validate_interfaces()) {
usage(basename(argv[0]));
ksft_exit_xfail();
@@ -519,7 +448,7 @@ static void kick_tx(struct xsk_socket_info *xsk)
exit_with_error(errno);
}
-static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
+static void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
{
unsigned int rcvd;
u32 idx;
@@ -527,7 +456,7 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk, int batch_size)
if (!xsk->outstanding_tx)
return;
- if (!NEED_WAKEUP || xsk_ring_prod__needs_wakeup(&xsk->tx))
+ if (xsk_ring_prod__needs_wakeup(&xsk->tx))
kick_tx(xsk);
rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
@@ -597,8 +526,10 @@ static void rx_pkt(struct xsk_socket_info *xsk, struct pollfd *fds)
static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
{
- u32 idx;
+ u32 idx = 0;
unsigned int i;
+ bool tx_invalid_test = stat_test_type == STAT_TEST_TX_INVALID;
+ u32 len = tx_invalid_test ? XSK_UMEM__DEFAULT_FRAME_SIZE + 1 : PKT_SIZE;
while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < batch_size)
complete_tx_only(xsk, batch_size);
@@ -607,17 +538,21 @@ static void tx_only(struct xsk_socket_info *xsk, u32 *frameptr, int batch_size)
struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
tx_desc->addr = (*frameptr + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT;
- tx_desc->len = PKT_SIZE;
+ tx_desc->len = len;
}
xsk_ring_prod__submit(&xsk->tx, batch_size);
- xsk->outstanding_tx += batch_size;
+ if (!tx_invalid_test) {
+ xsk->outstanding_tx += batch_size;
+ } else if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+ kick_tx(xsk);
+ }
*frameptr += batch_size;
*frameptr %= num_frames;
complete_tx_only(xsk, batch_size);
}
-static inline int get_batch_size(int pkt_cnt)
+static int get_batch_size(int pkt_cnt)
{
if (!opt_pkt_count)
return BATCH_SIZE;
@@ -654,7 +589,7 @@ static void tx_only_all(struct ifobject *ifobject)
while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) {
int batch_size = get_batch_size(pkt_cnt);
- if (opt_poll) {
+ if (test_type == TEST_TYPE_POLL) {
ret = poll(fds, 1, POLL_TMOUT);
if (ret <= 0)
continue;
@@ -673,48 +608,43 @@ static void tx_only_all(struct ifobject *ifobject)
static void worker_pkt_dump(void)
{
- struct in_addr ipaddr;
+ struct ethhdr *ethhdr;
+ struct iphdr *iphdr;
+ struct udphdr *udphdr;
+ char s[128];
+ int payload;
+ void *ptr;
fprintf(stdout, "---------------------------------------\n");
for (int iter = 0; iter < num_frames - 1; iter++) {
+ ptr = pkt_buf[iter]->payload;
+ ethhdr = ptr;
+ iphdr = ptr + sizeof(*ethhdr);
+ udphdr = ptr + sizeof(*ethhdr) + sizeof(*iphdr);
+
/*extract L2 frame */
fprintf(stdout, "DEBUG>> L2: dst mac: ");
for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ((struct ethhdr *)
- pkt_buf[iter]->payload)->h_dest[i]);
+ fprintf(stdout, "%02X", ethhdr->h_dest[i]);
fprintf(stdout, "\nDEBUG>> L2: src mac: ");
for (int i = 0; i < ETH_ALEN; i++)
- fprintf(stdout, "%02X", ((struct ethhdr *)
- pkt_buf[iter]->payload)->h_source[i]);
+ fprintf(stdout, "%02X", ethhdr->h_source[i]);
/*extract L3 frame */
- fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n",
- ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->ihl);
-
- ipaddr.s_addr =
- ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->saddr;
- fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n", inet_ntoa(ipaddr));
-
- ipaddr.s_addr =
- ((struct iphdr *)(pkt_buf[iter]->payload + sizeof(struct ethhdr)))->daddr;
- fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n", inet_ntoa(ipaddr));
-
+ fprintf(stdout, "\nDEBUG>> L3: ip_hdr->ihl: %02X\n", iphdr->ihl);
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->saddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->saddr, s, sizeof(s)));
+ fprintf(stdout, "DEBUG>> L3: ip_hdr->daddr: %s\n",
+ inet_ntop(AF_INET, &iphdr->daddr, s, sizeof(s)));
/*extract L4 frame */
- fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n",
- ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
- sizeof(struct ethhdr) +
- sizeof(struct iphdr)))->source));
-
- fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n",
- ntohs(((struct udphdr *)(pkt_buf[iter]->payload +
- sizeof(struct ethhdr) +
- sizeof(struct iphdr)))->dest));
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->src: %d\n", ntohs(udphdr->source));
+ fprintf(stdout, "DEBUG>> L4: udp_hdr->dst: %d\n", ntohs(udphdr->dest));
/*extract L5 frame */
- int payload = *((uint32_t *)(pkt_buf[iter]->payload + PKT_HDR_SIZE));
+ payload = *((uint32_t *)(ptr + PKT_HDR_SIZE));
if (payload == EOT) {
- ksft_print_msg("End-of-transmission frame received\n");
+ print_verbose("End-of-transmission frame received\n");
fprintf(stdout, "---------------------------------------\n");
break;
}
@@ -723,6 +653,48 @@ static void worker_pkt_dump(void)
}
}
+static void worker_stats_validate(struct ifobject *ifobject)
+{
+ struct xdp_statistics stats;
+ socklen_t optlen;
+ int err;
+ struct xsk_socket *xsk = stat_test_type == STAT_TEST_TX_INVALID ?
+ ifdict[!ifobject->ifdict_index]->xsk->xsk :
+ ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
+ unsigned long xsk_stat = 0, expected_stat = opt_pkt_count;
+
+ sigvar = 0;
+
+ optlen = sizeof(stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+ if (err)
+ return;
+
+ if (optlen == sizeof(struct xdp_statistics)) {
+ switch (stat_test_type) {
+ case STAT_TEST_RX_DROPPED:
+ xsk_stat = stats.rx_dropped;
+ break;
+ case STAT_TEST_TX_INVALID:
+ xsk_stat = stats.tx_invalid_descs;
+ break;
+ case STAT_TEST_RX_FULL:
+ xsk_stat = stats.rx_ring_full;
+ expected_stat -= RX_FULL_RXQSIZE;
+ break;
+ case STAT_TEST_RX_FILL_EMPTY:
+ xsk_stat = stats.rx_fill_ring_empty_descs;
+ break;
+ default:
+ break;
+ }
+
+ if (xsk_stat == expected_stat)
+ sigvar = 1;
+ }
+}
+
static void worker_pkt_validate(void)
{
u32 payloadseqnum = -2;
@@ -746,7 +718,7 @@ static void worker_pkt_validate(void)
}
if (payloadseqnum == EOT) {
- ksft_print_msg("End-of-transmission frame received: PASS\n");
+ print_verbose("End-of-transmission frame received: PASS\n");
sigvar = 1;
break;
}
@@ -773,37 +745,69 @@ static void worker_pkt_validate(void)
}
}
-static void thread_common_ops(struct ifobject *ifobject, void *bufs, pthread_mutex_t *mutexptr,
- atomic_int *spinningptr)
+static void thread_common_ops(struct ifobject *ifobject, void *bufs)
{
+ int umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
int ctr = 0;
int ret;
- xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
- ret = xsk_configure_socket(ifobject);
+ ifobject->ns_fd = switch_namespace(ifobject->nsname);
+
+ if (test_type == TEST_TYPE_BPF_RES)
+ umem_sz *= 2;
+
+ bufs = mmap(NULL, umem_sz,
+ PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (bufs == MAP_FAILED)
+ exit_with_error(errno);
+
+ xsk_configure_umem(ifobject, bufs, 0);
+ ifobject->umem = ifobject->umem_arr[0];
+ ret = xsk_configure_socket(ifobject, 0);
/* Retry Create Socket if it fails as xsk_socket__create()
* is asynchronous
- *
- * Essential to lock Mutex here to prevent Tx thread from
- * entering before Rx and causing a deadlock
*/
- pthread_mutex_lock(mutexptr);
while (ret && ctr < SOCK_RECONF_CTR) {
- atomic_store(spinningptr, 1);
- xsk_configure_umem(ifobject, bufs, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE);
- ret = xsk_configure_socket(ifobject);
+ xsk_configure_umem(ifobject, bufs, 0);
+ ifobject->umem = ifobject->umem_arr[0];
+ ret = xsk_configure_socket(ifobject, 0);
usleep(USLEEP_MAX);
ctr++;
}
- atomic_store(spinningptr, 0);
- pthread_mutex_unlock(mutexptr);
if (ctr >= SOCK_RECONF_CTR)
exit_with_error(ret);
+
+ ifobject->umem = ifobject->umem_arr[0];
+ ifobject->xsk = ifobject->xsk_arr[0];
+
+ if (test_type == TEST_TYPE_BPF_RES) {
+ xsk_configure_umem(ifobject, (u8 *)bufs + (umem_sz / 2), 1);
+ ifobject->umem = ifobject->umem_arr[1];
+ ret = xsk_configure_socket(ifobject, 1);
+ }
+
+ ifobject->umem = ifobject->umem_arr[0];
+ ifobject->xsk = ifobject->xsk_arr[0];
+ print_verbose("Interface [%s] vector [%s]\n",
+ ifobject->ifname, ifobject->fv.vector == tx ? "Tx" : "Rx");
+}
+
+static bool testapp_is_test_two_stepped(void)
+{
+ return (test_type != TEST_TYPE_BIDI && test_type != TEST_TYPE_BPF_RES) || second_step;
+}
+
+static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
+{
+ if (testapp_is_test_two_stepped()) {
+ xsk_socket__delete(ifobj->xsk->xsk);
+ (void)xsk_umem__delete(ifobj->umem->umem);
+ }
}
-static void *worker_testapp_validate(void *arg)
+static void *worker_testapp_validate_tx(void *arg)
{
struct udphdr *udp_hdr =
(struct udphdr *)(pkt_data + sizeof(struct ethhdr) + sizeof(struct iphdr));
@@ -813,149 +817,97 @@ static void *worker_testapp_validate(void *arg)
struct generic_data data;
void *bufs = NULL;
- pthread_attr_setstacksize(&attr, THREAD_STACK);
-
- if (!bidi_pass) {
- bufs = mmap(NULL, num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE,
- PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (bufs == MAP_FAILED)
- exit_with_error(errno);
-
- if (strcmp(ifobject->nsname, ""))
- switch_namespace(ifobject->ifdict_index);
+ if (!second_step)
+ thread_common_ops(ifobject, bufs);
+
+ for (int i = 0; i < num_frames; i++) {
+ /*send EOT frame */
+ if (i == (num_frames - 1))
+ data.seqnum = -1;
+ else
+ data.seqnum = i;
+ gen_udp_hdr(&data, ifobject, udp_hdr);
+ gen_ip_hdr(ifobject, ip_hdr);
+ gen_udp_csum(udp_hdr, ip_hdr);
+ gen_eth_hdr(ifobject, eth_hdr);
+ gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
}
- if (ifobject->fv.vector == tx) {
- int spinningrxctr = 0;
-
- if (!bidi_pass)
- thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_tx);
-
- while (atomic_load(&spinning_rx) && spinningrxctr < SOCK_RECONF_CTR) {
- spinningrxctr++;
- usleep(USLEEP_MAX);
- }
+ print_verbose("Sending %d packets on interface %s\n",
+ (opt_pkt_count - 1), ifobject->ifname);
+ tx_only_all(ifobject);
- ksft_print_msg("Interface [%s] vector [Tx]\n", ifobject->ifname);
- for (int i = 0; i < num_frames; i++) {
- /*send EOT frame */
- if (i == (num_frames - 1))
- data.seqnum = -1;
- else
- data.seqnum = i;
- gen_udp_hdr(&data, ifobject, udp_hdr);
- gen_ip_hdr(ifobject, ip_hdr);
- gen_udp_csum(udp_hdr, ip_hdr);
- gen_eth_hdr(ifobject, eth_hdr);
- gen_eth_frame(ifobject->umem, i * XSK_UMEM__DEFAULT_FRAME_SIZE);
- }
+ testapp_cleanup_xsk_res(ifobject);
+ pthread_exit(NULL);
+}
- ksft_print_msg("Sending %d packets on interface %s\n",
- (opt_pkt_count - 1), ifobject->ifname);
- tx_only_all(ifobject);
- } else if (ifobject->fv.vector == rx) {
- struct pollfd fds[MAX_SOCKS] = { };
- int ret;
+static void *worker_testapp_validate_rx(void *arg)
+{
+ struct ifobject *ifobject = (struct ifobject *)arg;
+ struct pollfd fds[MAX_SOCKS] = { };
+ void *bufs = NULL;
- if (!bidi_pass)
- thread_common_ops(ifobject, bufs, &sync_mutex_tx, &spinning_rx);
+ if (!second_step)
+ thread_common_ops(ifobject, bufs);
- ksft_print_msg("Interface [%s] vector [Rx]\n", ifobject->ifname);
+ if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
xsk_populate_fill_ring(ifobject->umem);
- TAILQ_INIT(&head);
- if (debug_pkt_dump) {
- pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
- if (!pkt_buf)
- exit_with_error(errno);
- }
+ TAILQ_INIT(&head);
+ if (debug_pkt_dump) {
+ pkt_buf = calloc(num_frames, sizeof(*pkt_buf));
+ if (!pkt_buf)
+ exit_with_error(errno);
+ }
- fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
- fds[0].events = POLLIN;
+ fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
+ fds[0].events = POLLIN;
- pthread_mutex_lock(&sync_mutex);
- pthread_cond_signal(&signal_rx_condition);
- pthread_mutex_unlock(&sync_mutex);
+ pthread_barrier_wait(&barr);
- while (1) {
- if (opt_poll) {
- ret = poll(fds, 1, POLL_TMOUT);
- if (ret <= 0)
- continue;
- }
+ while (1) {
+ if (test_type != TEST_TYPE_STATS) {
rx_pkt(ifobject->xsk, fds);
worker_pkt_validate();
-
- if (sigvar)
- break;
+ } else {
+ worker_stats_validate(ifobject);
}
+ if (sigvar)
+ break;
+ }
- ksft_print_msg("Received %d packets on interface %s\n",
- pkt_counter, ifobject->ifname);
+ print_verbose("Received %d packets on interface %s\n",
+ pkt_counter, ifobject->ifname);
- if (opt_teardown)
- ksft_print_msg("Destroying socket\n");
- }
+ if (test_type == TEST_TYPE_TEARDOWN)
+ print_verbose("Destroying socket\n");
- if (!opt_bidi || bidi_pass) {
- xsk_socket__delete(ifobject->xsk->xsk);
- (void)xsk_umem__delete(ifobject->umem->umem);
- }
+ testapp_cleanup_xsk_res(ifobject);
pthread_exit(NULL);
}
static void testapp_validate(void)
{
- struct timespec max_wait = { 0, 0 };
+ bool bidi = test_type == TEST_TYPE_BIDI;
+ bool bpf = test_type == TEST_TYPE_BPF_RES;
- pthread_attr_init(&attr);
- pthread_attr_setstacksize(&attr, THREAD_STACK);
-
- if (opt_bidi && bidi_pass) {
- pthread_init_mutex();
- if (!switching_notify) {
- ksft_print_msg("Switching Tx/Rx vectors\n");
- switching_notify++;
- }
- }
-
- pthread_mutex_lock(&sync_mutex);
+ if (pthread_barrier_init(&barr, NULL, 2))
+ exit_with_error(errno);
/*Spawn RX thread */
- if (!opt_bidi || !bidi_pass) {
- if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[1]))
- exit_with_error(errno);
- } else if (opt_bidi && bidi_pass) {
- /*switch Tx/Rx vectors */
- ifdict[0]->fv.vector = rx;
- if (pthread_create(&t0, &attr, worker_testapp_validate, ifdict[0]))
- exit_with_error(errno);
- }
+ pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
- if (clock_gettime(CLOCK_REALTIME, &max_wait))
+ pthread_barrier_wait(&barr);
+ if (pthread_barrier_destroy(&barr))
exit_with_error(errno);
- max_wait.tv_sec += TMOUT_SEC;
-
- if (pthread_cond_timedwait(&signal_rx_condition, &sync_mutex, &max_wait) == ETIMEDOUT)
- exit_with_error(errno);
-
- pthread_mutex_unlock(&sync_mutex);
/*Spawn TX thread */
- if (!opt_bidi || !bidi_pass) {
- if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[0]))
- exit_with_error(errno);
- } else if (opt_bidi && bidi_pass) {
- /*switch Tx/Rx vectors */
- ifdict[1]->fv.vector = tx;
- if (pthread_create(&t1, &attr, worker_testapp_validate, ifdict[1]))
- exit_with_error(errno);
- }
+ pthread_create(&t1, NULL, ifdict_tx->func_ptr, ifdict_tx);
pthread_join(t1, NULL);
pthread_join(t0, NULL);
- if (debug_pkt_dump) {
+ if (debug_pkt_dump && test_type != TEST_TYPE_STATS) {
worker_pkt_dump();
for (int iter = 0; iter < num_frames - 1; iter++) {
free(pkt_buf[iter]->payload);
@@ -964,73 +916,217 @@ static void testapp_validate(void)
free(pkt_buf);
}
- if (!opt_teardown && !opt_bidi)
+ if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS))
print_ksft_result();
}
-static void testapp_sockets(void)
+static void testapp_teardown(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
+ pkt_counter = 0;
+ prev_pkt = -1;
+ sigvar = 0;
+ print_verbose("Creating socket\n");
+ testapp_validate();
+ }
+
+ print_ksft_result();
+}
+
+static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2)
{
- for (int i = 0; i < (opt_teardown ? MAX_TEARDOWN_ITER : MAX_BIDI_ITER); i++) {
+ void *(*tmp_func_ptr)(void *) = ifobj1->func_ptr;
+ enum fvector tmp_vector = ifobj1->fv.vector;
+
+ ifobj1->func_ptr = ifobj2->func_ptr;
+ ifobj1->fv.vector = ifobj2->fv.vector;
+
+ ifobj2->func_ptr = tmp_func_ptr;
+ ifobj2->fv.vector = tmp_vector;
+
+ ifdict_tx = ifobj1;
+ ifdict_rx = ifobj2;
+}
+
+static void testapp_bidi(void)
+{
+ for (int i = 0; i < MAX_BIDI_ITER; i++) {
pkt_counter = 0;
prev_pkt = -1;
sigvar = 0;
- ksft_print_msg("Creating socket\n");
+ print_verbose("Creating socket\n");
testapp_validate();
- opt_bidi ? bidi_pass++ : bidi_pass;
+ if (!second_step) {
+ print_verbose("Switching Tx/Rx vectors\n");
+ swap_vectors(ifdict[1], ifdict[0]);
+ }
+ second_step = true;
}
+ swap_vectors(ifdict[0], ifdict[1]);
+
print_ksft_result();
}
-static void init_iface_config(struct ifaceconfigobj *ifaceconfig)
+static void swap_xsk_res(void)
{
- /*Init interface0 */
- ifdict[0]->fv.vector = tx;
- memcpy(ifdict[0]->dst_mac, ifaceconfig->dst_mac, ETH_ALEN);
- memcpy(ifdict[0]->src_mac, ifaceconfig->src_mac, ETH_ALEN);
- ifdict[0]->dst_ip = ifaceconfig->dst_ip.s_addr;
- ifdict[0]->src_ip = ifaceconfig->src_ip.s_addr;
- ifdict[0]->dst_port = ifaceconfig->dst_port;
- ifdict[0]->src_port = ifaceconfig->src_port;
-
- /*Init interface1 */
- ifdict[1]->fv.vector = rx;
- memcpy(ifdict[1]->dst_mac, ifaceconfig->src_mac, ETH_ALEN);
- memcpy(ifdict[1]->src_mac, ifaceconfig->dst_mac, ETH_ALEN);
- ifdict[1]->dst_ip = ifaceconfig->src_ip.s_addr;
- ifdict[1]->src_ip = ifaceconfig->dst_ip.s_addr;
- ifdict[1]->dst_port = ifaceconfig->src_port;
- ifdict[1]->src_port = ifaceconfig->dst_port;
+ xsk_socket__delete(ifdict_tx->xsk->xsk);
+ xsk_umem__delete(ifdict_tx->umem->umem);
+ xsk_socket__delete(ifdict_rx->xsk->xsk);
+ xsk_umem__delete(ifdict_rx->umem->umem);
+ ifdict_tx->umem = ifdict_tx->umem_arr[1];
+ ifdict_tx->xsk = ifdict_tx->xsk_arr[1];
+ ifdict_rx->umem = ifdict_rx->umem_arr[1];
+ ifdict_rx->xsk = ifdict_rx->xsk_arr[1];
+}
+
+static void testapp_bpf_res(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_BPF_ITER; i++) {
+ pkt_counter = 0;
+ prev_pkt = -1;
+ sigvar = 0;
+ print_verbose("Creating socket\n");
+ testapp_validate();
+ if (!second_step)
+ swap_xsk_res();
+ second_step = true;
+ }
+
+ print_ksft_result();
+}
+
+static void testapp_stats(void)
+{
+ for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) {
+ stat_test_type = i;
+
+ /* reset defaults */
+ rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+
+ switch (stat_test_type) {
+ case STAT_TEST_RX_DROPPED:
+ frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE -
+ XDP_PACKET_HEADROOM - 1;
+ break;
+ case STAT_TEST_RX_FULL:
+ rxqsize = RX_FULL_RXQSIZE;
+ break;
+ default:
+ break;
+ }
+ testapp_validate();
+ }
+
+ print_ksft_result();
+}
+
+static void init_iface(struct ifobject *ifobj, const char *dst_mac,
+ const char *src_mac, const char *dst_ip,
+ const char *src_ip, const u16 dst_port,
+ const u16 src_port, enum fvector vector)
+{
+ struct in_addr ip;
+
+ memcpy(ifobj->dst_mac, dst_mac, ETH_ALEN);
+ memcpy(ifobj->src_mac, src_mac, ETH_ALEN);
+
+ inet_aton(dst_ip, &ip);
+ ifobj->dst_ip = ip.s_addr;
+
+ inet_aton(src_ip, &ip);
+ ifobj->src_ip = ip.s_addr;
+
+ ifobj->dst_port = dst_port;
+ ifobj->src_port = src_port;
+
+ if (vector == tx) {
+ ifobj->fv.vector = tx;
+ ifobj->func_ptr = worker_testapp_validate_tx;
+ ifdict_tx = ifobj;
+ } else {
+ ifobj->fv.vector = rx;
+ ifobj->func_ptr = worker_testapp_validate_rx;
+ ifdict_rx = ifobj;
+ }
+}
+
+static void run_pkt_test(int mode, int type)
+{
+ test_type = type;
+
+ /* reset defaults after potential previous test */
+ xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+ pkt_counter = 0;
+ second_step = 0;
+ prev_pkt = -1;
+ sigvar = 0;
+ stat_test_type = -1;
+ rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+
+ configured_mode = mode;
+
+ switch (mode) {
+ case (TEST_MODE_SKB):
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case (TEST_MODE_DRV):
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+ break;
+ default:
+ break;
+ }
+
+ switch (test_type) {
+ case TEST_TYPE_STATS:
+ testapp_stats();
+ break;
+ case TEST_TYPE_TEARDOWN:
+ testapp_teardown();
+ break;
+ case TEST_TYPE_BIDI:
+ testapp_bidi();
+ break;
+ case TEST_TYPE_BPF_RES:
+ testapp_bpf_res();
+ break;
+ default:
+ testapp_validate();
+ break;
+ }
}
int main(int argc, char **argv)
{
struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
+ bool failure = false;
+ int i, j;
if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
exit_with_error(errno);
- const char *MAC1 = "\x00\x0A\x56\x9E\xEE\x62";
- const char *MAC2 = "\x00\x0A\x56\x9E\xEE\x61";
- const char *IP1 = "192.168.100.162";
- const char *IP2 = "192.168.100.161";
- u16 UDP_DST_PORT = 2020;
- u16 UDP_SRC_PORT = 2121;
-
- ifaceconfig = malloc(sizeof(struct ifaceconfigobj));
- memcpy(ifaceconfig->dst_mac, MAC1, ETH_ALEN);
- memcpy(ifaceconfig->src_mac, MAC2, ETH_ALEN);
- inet_aton(IP1, &ifaceconfig->dst_ip);
- inet_aton(IP2, &ifaceconfig->src_ip);
- ifaceconfig->dst_port = UDP_DST_PORT;
- ifaceconfig->src_port = UDP_SRC_PORT;
-
for (int i = 0; i < MAX_INTERFACES; i++) {
ifdict[i] = malloc(sizeof(struct ifobject));
if (!ifdict[i])
exit_with_error(errno);
ifdict[i]->ifdict_index = i;
+ ifdict[i]->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
+ if (!ifdict[i]->xsk_arr) {
+ failure = true;
+ goto cleanup;
+ }
+ ifdict[i]->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
+ if (!ifdict[i]->umem_arr) {
+ failure = true;
+ goto cleanup;
+ }
}
setlocale(LC_ALL, "");
@@ -1039,25 +1135,27 @@ int main(int argc, char **argv)
num_frames = ++opt_pkt_count;
- init_iface_config(ifaceconfig);
-
- pthread_init_mutex();
+ init_iface(ifdict[0], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
+ init_iface(ifdict[1], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
- ksft_set_plan(1);
+ ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
- if (!opt_teardown && !opt_bidi) {
- testapp_validate();
- } else if (opt_teardown && opt_bidi) {
- ksft_test_result_fail("ERROR: parameters -T and -B cannot be used together\n");
- ksft_exit_xfail();
- } else {
- testapp_sockets();
+ for (i = 0; i < TEST_MODE_MAX; i++) {
+ for (j = 0; j < TEST_TYPE_MAX; j++)
+ run_pkt_test(i, j);
}
- for (int i = 0; i < MAX_INTERFACES; i++)
+cleanup:
+ for (int i = 0; i < MAX_INTERFACES; i++) {
+ if (ifdict[i]->ns_fd != -1)
+ close(ifdict[i]->ns_fd);
+ free(ifdict[i]->xsk_arr);
+ free(ifdict[i]->umem_arr);
free(ifdict[i]);
+ }
- pthread_destroy_mutex();
+ if (failure)
+ exit_with_error(errno);
ksft_exit_pass();
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 0e9f9b7e61c2..6c428b276ab6 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -23,6 +23,7 @@
#define MAX_SOCKS 1
#define MAX_TEARDOWN_ITER 10
#define MAX_BIDI_ITER 2
+#define MAX_BPF_ITER 2
#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
sizeof(struct udphdr))
#define MIN_PKT_SIZE 64
@@ -33,41 +34,63 @@
#define IP_PKT_TOS 0x9
#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr))
#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
-#define TMOUT_SEC (3)
#define EOT (-1)
#define USLEEP_MAX 200000
-#define THREAD_STACK 60000000
#define SOCK_RECONF_CTR 10
#define BATCH_SIZE 64
#define POLL_TMOUT 1000
-#define NEED_WAKEUP true
+#define DEFAULT_PKT_CNT 10000
+#define RX_FULL_RXQSIZE 32
+
+#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
typedef __u32 u32;
typedef __u16 u16;
typedef __u8 u8;
-enum TESTS {
- ORDER_CONTENT_VALIDATE_XDP_SKB = 0,
- ORDER_CONTENT_VALIDATE_XDP_DRV = 1,
+enum TEST_MODES {
+ TEST_MODE_UNCONFIGURED = -1,
+ TEST_MODE_SKB,
+ TEST_MODE_DRV,
+ TEST_MODE_MAX
+};
+
+enum TEST_TYPES {
+ TEST_TYPE_NOPOLL,
+ TEST_TYPE_POLL,
+ TEST_TYPE_TEARDOWN,
+ TEST_TYPE_BIDI,
+ TEST_TYPE_STATS,
+ TEST_TYPE_BPF_RES,
+ TEST_TYPE_MAX
+};
+
+enum STAT_TEST_TYPES {
+ STAT_TEST_RX_DROPPED,
+ STAT_TEST_TX_INVALID,
+ STAT_TEST_RX_FULL,
+ STAT_TEST_RX_FILL_EMPTY,
+ STAT_TEST_TYPE_MAX
};
-u8 uut;
-u8 debug_pkt_dump;
-u32 num_frames;
-u8 switching_notify;
-u8 bidi_pass;
+static int configured_mode = TEST_MODE_UNCONFIGURED;
+static u8 debug_pkt_dump;
+static u32 num_frames;
+static bool second_step;
+static int test_type;
-static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static int opt_queue;
static int opt_pkt_count;
-static int opt_poll;
-static int opt_teardown;
-static int opt_bidi;
-static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP;
+static u8 opt_verbose;
+
+static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE];
static u32 pkt_counter;
-static u32 prev_pkt = -1;
+static long prev_pkt = -1;
static int sigvar;
+static int stat_test_type;
+static u32 rxqsize;
+static u32 frame_headroom;
struct xsk_umem_info {
struct xsk_ring_prod fq;
@@ -99,47 +122,32 @@ struct generic_data {
u32 seqnum;
};
-struct ifaceconfigobj {
- u8 dst_mac[ETH_ALEN];
- u8 src_mac[ETH_ALEN];
- struct in_addr dst_ip;
- struct in_addr src_ip;
- u16 src_port;
- u16 dst_port;
-} *ifaceconfig;
-
struct ifobject {
- int ifindex;
- int ifdict_index;
char ifname[MAX_INTERFACE_NAME_CHARS];
char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
- struct flow_vector fv;
struct xsk_socket_info *xsk;
+ struct xsk_socket_info **xsk_arr;
+ struct xsk_umem_info **umem_arr;
struct xsk_umem_info *umem;
- u8 dst_mac[ETH_ALEN];
- u8 src_mac[ETH_ALEN];
+ void *(*func_ptr)(void *arg);
+ struct flow_vector fv;
+ int ns_fd;
+ int ifdict_index;
u32 dst_ip;
u32 src_ip;
u16 src_port;
u16 dst_port;
+ u8 dst_mac[ETH_ALEN];
+ u8 src_mac[ETH_ALEN];
};
static struct ifobject *ifdict[MAX_INTERFACES];
+static struct ifobject *ifdict_rx;
+static struct ifobject *ifdict_tx;
/*threads*/
-atomic_int spinning_tx;
-atomic_int spinning_rx;
-pthread_mutex_t sync_mutex;
-pthread_mutex_t sync_mutex_tx;
-pthread_cond_t signal_rx_condition;
-pthread_cond_t signal_tx_condition;
-pthread_t t0, t1, ns_thread;
-pthread_attr_t attr;
-
-struct targs {
- bool retptr;
- int idx;
-};
+pthread_barrier_t barr;
+pthread_t t0, t1;
TAILQ_HEAD(head_s, pkt) head = TAILQ_HEAD_INITIALIZER(head);
struct head_s *head_p;
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
index 9d54c4645127..dac1c5f78752 100755
--- a/tools/testing/selftests/bpf/xsk_prereqs.sh
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -82,24 +82,21 @@ clear_configs()
{
if [ $(ip netns show | grep $3 &>/dev/null; echo $?;) == 0 ]; then
[ $(ip netns exec $3 ip link show $2 &>/dev/null; echo $?;) == 0 ] &&
- { echo "removing link $1:$2"; ip netns exec $3 ip link del $2; }
- echo "removing ns $3"
+ { ip netns exec $3 ip link del $2; }
ip netns del $3
fi
#Once we delete a veth pair node, the entire veth pair is removed,
#this is just to be cautious just incase the NS does not exist then
#veth node inside NS won't get removed so we explicitly remove it
[ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
- { echo "removing link $1"; ip link del $1; }
+ { ip link del $1; }
if [ -f ${SPECFILE} ]; then
- echo "removing spec file:" ${SPECFILE}
rm -f ${SPECFILE}
fi
}
cleanup_exit()
{
- echo "cleaning up..."
clear_configs $1 $2 $3
}
@@ -108,28 +105,7 @@ validate_ip_utility()
[ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip 1; }
}
-vethXDPgeneric()
-{
- ip link set dev $1 xdpdrv off
- ip netns exec $3 ip link set dev $2 xdpdrv off
-}
-
-vethXDPnative()
-{
- ip link set dev $1 xdpgeneric off
- ip netns exec $3 ip link set dev $2 xdpgeneric off
-}
-
execxdpxceiver()
{
- local -a 'paramkeys=("${!'"$1"'[@]}")' copy
- paramkeysstr=${paramkeys[*]}
-
- for index in $paramkeysstr;
- do
- current=$1"[$index]"
- copy[$index]=${!current}
- done
-
- ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} ${copy[*]} -C ${NUMPKTS}
+ ./${XSKOBJ} -i ${VETH0} -i ${VETH1},${NS1} -C ${NUMPKTS} ${VERBOSE_ARG} ${DUMP_PKTS_ARG}
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 1fedfc9da434..42d44e27802c 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -446,6 +446,35 @@ __invalid_nexthop_test()
log_test "Unresolved neigh: nexthop does not exist: $desc"
}
+__invalid_nexthop_bucket_test()
+{
+ local desc=$1; shift
+ local dip=$1; shift
+ local via_add=$1; shift
+ local trap_name="unresolved_neigh"
+
+ RET=0
+
+ # Check that route to nexthop that does not exist triggers
+ # unresolved_neigh
+ ip nexthop add id 1 via $via_add dev $rp2
+ ip nexthop add id 10 group 1 type resilient buckets 32
+ ip route add $dip nhid 10
+
+ t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+ ping_do $h1 $dip
+ t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets ]]; then
+ check_err 1 "Trap counter did not increase"
+ fi
+
+ ip route del $dip nhid 10
+ ip nexthop del id 10
+ ip nexthop del id 1
+ log_test "Unresolved neigh: nexthop bucket does not exist: $desc"
+}
+
unresolved_neigh_test()
{
__host_miss_test "IPv4" 198.51.100.1
@@ -453,6 +482,8 @@ unresolved_neigh_test()
__invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4
__invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \
2001:db8:2::4
+ __invalid_nexthop_bucket_test "IPv4" 198.51.100.1 198.51.100.4
+ __invalid_nexthop_bucket_test "IPv6" 2001:db8:2::1 2001:db8:2::4
}
vrf_without_routes_create()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
index 6f3a70df63bc..e00435753008 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
@@ -120,12 +120,13 @@ __mirror_gre_test()
sleep 5
for ((i = 0; i < count; ++i)); do
+ local sip=$(mirror_gre_ipv6_addr 1 $i)::1
local dip=$(mirror_gre_ipv6_addr 1 $i)::2
local htun=h3-gt6-$i
local message
icmp6_capture_install $htun
- mirror_test v$h1 "" $dip $htun 100 10
+ mirror_test v$h1 $sip $dip $htun 100 10
icmp6_capture_uninstall $htun
done
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
index f813ffefc07e..65f43a7ce9c9 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
@@ -55,10 +55,6 @@ port_test()
| jq '.[][][] | select(.name=="physical_ports") |.["occ"]')
[[ $occ -eq $max_ports ]]
- if [[ $should_fail -eq 0 ]]; then
- check_err $? "Mismatch ports number: Expected $max_ports, got $occ."
- else
- check_err_fail $should_fail $? "Reached more ports than expected"
- fi
+ check_err_fail $should_fail $? "Attempt to create $max_ports ports (actual result $occ)"
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index ed346da5d3cb..a217f9f6775b 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -33,6 +33,7 @@ ALL_TESTS="
nexthop_obj_invalid_test
nexthop_obj_offload_test
nexthop_obj_group_offload_test
+ nexthop_obj_bucket_offload_test
nexthop_obj_blackhole_offload_test
nexthop_obj_route_offload_test
devlink_reload_test
@@ -739,11 +740,28 @@ nexthop_obj_invalid_test()
ip nexthop add id 1 dev $swp1
ip nexthop add id 2 dev $swp1
+ ip nexthop add id 3 via 192.0.2.3 dev $swp1
ip nexthop add id 10 group 1/2
check_fail $? "managed to configure a nexthop group with device-only nexthops when should not"
+ ip nexthop add id 10 group 3 type resilient buckets 7
+ check_fail $? "managed to configure a too small resilient nexthop group when should not"
+
+ ip nexthop add id 10 group 3 type resilient buckets 129
+ check_fail $? "managed to configure a resilient nexthop group with invalid number of buckets when should not"
+
+ ip nexthop add id 10 group 1/2 type resilient buckets 32
+ check_fail $? "managed to configure a resilient nexthop group with device-only nexthops when should not"
+
+ ip nexthop add id 10 group 3 type resilient buckets 32
+ check_err $? "failed to configure a valid resilient nexthop group"
+ ip nexthop replace id 3 dev $swp1
+ check_fail $? "managed to populate a nexthop bucket with a device-only nexthop when should not"
+
log_test "nexthop objects - invalid configurations"
+ ip nexthop del id 10
+ ip nexthop del id 3
ip nexthop del id 2
ip nexthop del id 1
@@ -858,6 +876,70 @@ nexthop_obj_group_offload_test()
simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
}
+nexthop_obj_bucket_offload_test()
+{
+ # Test offload indication of nexthop buckets
+ RET=0
+
+ simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+ simple_if_init $swp2
+ setup_wait
+
+ ip nexthop add id 1 via 192.0.2.2 dev $swp1
+ ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
+ ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0
+ ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+ dev $swp1
+ ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
+ dev $swp1
+ ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+ dev $swp1
+
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 1
+ check_err $? "IPv4 nexthop buckets not marked as offloaded when should"
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 2
+ check_err $? "IPv6 nexthop buckets not marked as offloaded when should"
+
+ # Invalidate nexthop id 1
+ ip neigh replace 192.0.2.2 nud failed dev $swp1
+ busywait "$TIMEOUT" wait_for_trap \
+ ip nexthop bucket show nhid 1
+ check_err $? "IPv4 nexthop buckets not marked with trap when should"
+
+ # Invalidate nexthop id 2
+ ip neigh replace 2001:db8:1::2 nud failed dev $swp1
+ busywait "$TIMEOUT" wait_for_trap \
+ ip nexthop bucket show nhid 2
+ check_err $? "IPv6 nexthop buckets not marked with trap when should"
+
+ # Revalidate nexthop id 1 by changing its configuration
+ ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 1
+ check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop"
+
+ # Revalidate nexthop id 2 by changing its neighbour
+ ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+ dev $swp1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 2
+ check_err $? "nexthop bucket not marked as offloaded after revalidating neighbour"
+
+ log_test "nexthop bucket offload indication"
+
+ ip neigh del 2001:db8:1::2 dev $swp1
+ ip neigh del 192.0.2.3 dev $swp1
+ ip neigh del 192.0.2.2 dev $swp1
+ ip nexthop del id 10
+ ip nexthop del id 2
+ ip nexthop del id 1
+
+ simple_if_fini $swp2
+ simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
nexthop_obj_blackhole_offload_test()
{
# Test offload indication of blackhole nexthop objects
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index b0cb1aaffdda..33ddd01689be 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -507,8 +507,8 @@ do_red_test()
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
local diff=$((limit - backlog))
pct=$((100 * diff / limit))
- ((0 <= pct && pct <= 5))
- check_err $? "backlog $backlog / $limit expected <= 5% distance"
+ ((0 <= pct && pct <= 10))
+ check_err $? "backlog $backlog / $limit expected <= 10% distance"
log_test "TC $((vlan - 10)): RED backlog > limit"
stop_traffic
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index 3f007c5f8361..f3ef3274f9b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -67,6 +67,13 @@ red_test()
{
install_qdisc
+ # Make sure that we get the non-zero value if there is any.
+ local cur=$(busywait 1100 until_counter_is "> 0" \
+ qdisc_stats_get $swp3 10: .backlog)
+ (( cur == 0 ))
+ check_err $? "backlog of $cur observed on non-busy qdisc"
+ log_test "$QDISC backlog properly cleaned"
+
do_red_test 10 $BACKLOG1
do_red_test 11 $BACKLOG2
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
deleted file mode 100755
index 0231205a7147..000000000000
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/q_in_vni_veto.sh
+++ /dev/null
@@ -1,77 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-lib_dir=$(dirname $0)/../../../../net/forwarding
-
-VXPORT=4789
-
-ALL_TESTS="
- create_dot1d_and_dot1ad_vxlans
-"
-NUM_NETIFS=2
-source $lib_dir/lib.sh
-
-setup_prepare()
-{
- swp1=${NETIFS[p1]}
- swp2=${NETIFS[p2]}
-
- ip link set dev $swp1 up
- ip link set dev $swp2 up
-}
-
-cleanup()
-{
- pre_cleanup
-
- ip link set dev $swp2 down
- ip link set dev $swp1 down
-}
-
-create_dot1d_and_dot1ad_vxlans()
-{
- RET=0
-
- ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
- vlan_default_pvid 0 mcast_snooping 0
- ip link set dev br0 up
-
- ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \
- "$VXPORT" nolearning noudpcsum tos inherit ttl 100
- ip link set dev vx100 up
-
- ip link set dev $swp1 master br0
- ip link set dev vx100 master br0
- bridge vlan add vid 100 dev vx100 pvid untagged
-
- ip link add dev br1 type bridge vlan_filtering 0 mcast_snooping 0
- ip link set dev br1 up
-
- ip link add name vx200 type vxlan id 2000 local 192.0.2.17 dstport \
- "$VXPORT" nolearning noudpcsum tos inherit ttl 100
- ip link set dev vx200 up
-
- ip link set dev $swp2 master br1
- ip link set dev vx200 master br1 2>/dev/null
- check_fail $? "802.1d and 802.1ad VxLANs at the same time not rejected"
-
- ip link set dev vx200 master br1 2>&1 >/dev/null \
- | grep -q mlxsw_spectrum
- check_err $? "802.1d and 802.1ad VxLANs at the same time rejected without extack"
-
- log_test "create 802.1d and 802.1ad VxLANs"
-
- ip link del dev vx200
- ip link del dev br1
- ip link del dev vx100
- ip link del dev br0
-}
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-tests_run
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index 4a1c9328555f..50654f8a8c37 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -30,6 +30,7 @@ trap cleanup EXIT
ALL_TESTS="router tc_flower mirror_gre tc_police port"
for current_test in ${TESTS:-$ALL_TESTS}; do
+ RET_FIN=0
source ${current_test}_scale.sh
num_netifs_var=${current_test^^}_NUM_NETIFS
@@ -48,8 +49,9 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
else
log_test "'$current_test' overflow $target"
fi
+ RET_FIN=$(( RET_FIN || RET ))
done
done
current_test=""
-exit "$RET"
+exit "$RET_FIN"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 087a884f66cd..685dfb3478b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -24,6 +24,7 @@ trap cleanup EXIT
ALL_TESTS="router tc_flower mirror_gre tc_police port"
for current_test in ${TESTS:-$ALL_TESTS}; do
+ RET_FIN=0
source ${current_test}_scale.sh
num_netifs_var=${current_test^^}_NUM_NETIFS
@@ -50,8 +51,9 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
log_test "'$current_test' [$profile] overflow $target"
fi
done
+ RET_FIN=$(( RET_FIN || RET ))
done
done
current_test=""
-exit "$RET"
+exit "$RET_FIN"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
index cc0f07e72cf2..aa74be9f47c8 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -98,11 +98,7 @@ __tc_flower_test()
jq -r '[ .[] | select(.kind == "flower") |
.options | .in_hw ]' | jq .[] | wc -l)
[[ $((offload_count - 1)) -eq $count ]]
- if [[ $should_fail -eq 0 ]]; then
- check_err $? "Offload mismatch"
- else
- check_err_fail $should_fail $? "Offload more than expacted"
- fi
+ check_err_fail $should_fail $? "Attempt to offload $count rules (actual result $((offload_count - 1)))"
}
tc_flower_test()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index 553cb9fad508..5ec3beb637c8 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -11,6 +11,7 @@ ALL_TESTS="
matchall_mirror_behind_flower_ingress_test
matchall_sample_behind_flower_ingress_test
matchall_mirror_behind_flower_egress_test
+ matchall_proto_match_test
police_limits_test
multi_police_test
"
@@ -18,6 +19,7 @@ NUM_NETIFS=2
source $lib_dir/tc_common.sh
source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
switch_create()
{
@@ -166,7 +168,8 @@ matchall_sample_egress_test()
RET=0
# It is forbidden in mlxsw driver to have matchall with sample action
- # bound on egress
+ # bound on egress. Spectrum-1 specific restriction
+ [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
tc qdisc add dev $swp1 clsact
@@ -289,6 +292,22 @@ matchall_mirror_behind_flower_egress_test()
matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2"
}
+matchall_proto_match_test()
+{
+ RET=0
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ matchall skip_sw \
+ action sample group 1 rate 100
+ check_fail $? "Incorrect success to add matchall rule with protocol match"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "matchall protocol match"
+}
+
police_limits_test()
{
RET=0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
new file mode 100755
index 000000000000..093bed088ad0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -0,0 +1,657 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that packets are sampled when tc-sample is used and that reported
+# metadata is correct. Two sets of hosts (with and without LAG) are used, since
+# metadata extraction in mlxsw is a bit different when LAG is involved.
+#
+# +---------------------------------+ +---------------------------------+
+# | H1 (vrf) | | H3 (vrf) |
+# | + $h1 | | + $h3_lag |
+# | | 192.0.2.1/28 | | | 192.0.2.17/28 |
+# | | | | | |
+# | | default via 192.0.2.2 | | | default via 192.0.2.18 |
+# +----|----------------------------+ +----|----------------------------+
+# | |
+# +----|-----------------------------------------|----------------------------+
+# | | 192.0.2.2/28 | 192.0.2.18/28 |
+# | + $rp1 + $rp3_lag |
+# | |
+# | + $rp2 + $rp4_lag |
+# | | 198.51.100.2/28 | 198.51.100.18/28 |
+# +----|-----------------------------------------|----------------------------+
+# | |
+# +----|----------------------------+ +----|----------------------------+
+# | | default via 198.51.100.2 | | | default via 198.51.100.18 |
+# | | | | | |
+# | | 198.51.100.1/28 | | | 198.51.100.17/28 |
+# | + $h2 | | + $h4_lag |
+# | H2 (vrf) | | H4 (vrf) |
+# +---------------------------------+ +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ tc_sample_rate_test
+ tc_sample_max_rate_test
+ tc_sample_conflict_test
+ tc_sample_group_conflict_test
+ tc_sample_md_iif_test
+ tc_sample_md_lag_iif_test
+ tc_sample_md_oif_test
+ tc_sample_md_lag_oif_test
+ tc_sample_md_out_tc_test
+ tc_sample_md_out_tc_occ_test
+ tc_sample_md_latency_test
+ tc_sample_acl_group_conflict_test
+ tc_sample_acl_rate_test
+ tc_sample_acl_max_rate_test
+"
+NUM_NETIFS=8
+CAPTURE_FILE=$(mktemp)
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+
+ ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/28
+
+ ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+}
+
+h2_destroy()
+{
+ ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+ simple_if_fini $h2 198.51.100.1/28
+}
+
+h3_create()
+{
+ ip link set dev $h3 down
+ ip link add name ${h3}_bond type bond mode 802.3ad
+ ip link set dev $h3 master ${h3}_bond
+
+ simple_if_init ${h3}_bond 192.0.2.17/28
+
+ ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18
+}
+
+h3_destroy()
+{
+ ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18
+
+ simple_if_fini ${h3}_bond 192.0.2.17/28
+
+ ip link set dev $h3 nomaster
+ ip link del dev ${h3}_bond
+}
+
+h4_create()
+{
+ ip link set dev $h4 down
+ ip link add name ${h4}_bond type bond mode 802.3ad
+ ip link set dev $h4 master ${h4}_bond
+
+ simple_if_init ${h4}_bond 198.51.100.17/28
+
+ ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18
+}
+
+h4_destroy()
+{
+ ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18
+
+ simple_if_fini ${h4}_bond 198.51.100.17/28
+
+ ip link set dev $h4 nomaster
+ ip link del dev ${h4}_bond
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ __addr_add_del $rp1 add 192.0.2.2/28
+ tc qdisc add dev $rp1 clsact
+
+ ip link set dev $rp2 up
+ __addr_add_del $rp2 add 198.51.100.2/28
+ tc qdisc add dev $rp2 clsact
+
+ ip link add name ${rp3}_bond type bond mode 802.3ad
+ ip link set dev $rp3 master ${rp3}_bond
+ __addr_add_del ${rp3}_bond add 192.0.2.18/28
+ tc qdisc add dev $rp3 clsact
+ ip link set dev ${rp3}_bond up
+
+ ip link add name ${rp4}_bond type bond mode 802.3ad
+ ip link set dev $rp4 master ${rp4}_bond
+ __addr_add_del ${rp4}_bond add 198.51.100.18/28
+ tc qdisc add dev $rp4 clsact
+ ip link set dev ${rp4}_bond up
+}
+
+router_destroy()
+{
+ ip link set dev ${rp4}_bond down
+ tc qdisc del dev $rp4 clsact
+ __addr_add_del ${rp4}_bond del 198.51.100.18/28
+ ip link set dev $rp4 nomaster
+ ip link del dev ${rp4}_bond
+
+ ip link set dev ${rp3}_bond down
+ tc qdisc del dev $rp3 clsact
+ __addr_add_del ${rp3}_bond del 192.0.2.18/28
+ ip link set dev $rp3 nomaster
+ ip link del dev ${rp3}_bond
+
+ tc qdisc del dev $rp2 clsact
+ __addr_add_del $rp2 del 198.51.100.2/28
+ ip link set dev $rp2 down
+
+ tc qdisc del dev $rp1 clsact
+ __addr_add_del $rp1 del 192.0.2.2/28
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+ h3=${NETIFS[p5]}
+ rp3=${NETIFS[p6]}
+ h4=${NETIFS[p7]}
+ rp4=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+ h4_create
+ router_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ rm -f $CAPTURE_FILE
+
+ router_destroy
+ h4_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+psample_capture_start()
+{
+ rm -f $CAPTURE_FILE
+
+ psample &> $CAPTURE_FILE &
+
+ sleep 1
+}
+
+psample_capture_stop()
+{
+ { kill %% && wait %%; } 2>/dev/null
+}
+
+__tc_sample_rate_test()
+{
+ local desc=$1; shift
+ local dip=$1; shift
+ local pkts pct
+
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 32 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B $dip -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+ pct=$((100 * (pkts - 100) / 100))
+ (( -25 <= pct && pct <= 25))
+ check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+ log_test "tc sample rate ($desc)"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_rate_test()
+{
+ __tc_sample_rate_test "forward" 198.51.100.1
+ __tc_sample_rate_test "local receive" 192.0.2.2
+}
+
+tc_sample_max_rate_test()
+{
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate $((35 * 10 ** 8)) group 1
+ check_err $? "Failed to configure sampling rule with max rate"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate $((35 * 10 ** 8 + 1)) \
+ group 1 &> /dev/null
+ check_fail $? "Managed to configure sampling rate above maximum"
+
+ log_test "tc sample maximum rate"
+}
+
+tc_sample_conflict_test()
+{
+ RET=0
+
+ # Test that two sampling rules cannot be configured on the same port,
+ # even when they share the same parameters.
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+ skip_sw action sample rate 1024 group 1 &> /dev/null
+ check_fail $? "Managed to configure second sampling rule"
+
+ # Delete the first rule and make sure the second rule can now be
+ # configured.
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+
+ tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule after deletion"
+
+ log_test "tc sample conflict test"
+
+ tc filter del dev $rp1 ingress protocol all pref 2 handle 102 matchall
+}
+
+tc_sample_group_conflict_test()
+{
+ RET=0
+
+ # Test that two sampling rules cannot be configured on the same port
+ # with different groups.
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+ skip_sw action sample rate 1024 group 2 &> /dev/null
+ check_fail $? "Managed to configure sampling rule with conflicting group"
+
+ log_test "tc sample group conflict test"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_iif_test()
+{
+ local rp1_ifindex
+
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]')
+ grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected in-ifindex"
+
+ log_test "tc sample iif"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_iif_test()
+{
+ local rp3_ifindex
+
+ RET=0
+
+ tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+ -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]')
+ grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected in-ifindex"
+
+ log_test "tc sample lag iif"
+
+ tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_oif_test()
+{
+ local rp2_ifindex
+
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]')
+ grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-ifindex"
+
+ log_test "tc sample oif"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_oif_test()
+{
+ local rp4_ifindex
+
+ RET=0
+
+ tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+ -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]')
+ grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-ifindex"
+
+ log_test "tc sample lag oif"
+
+ tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_test()
+{
+ RET=0
+
+ # Output traffic class is not supported on Spectrum-1.
+ [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ # By default, all the packets should go to the same traffic class (0).
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "out-tc 0 " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-tc (0)"
+
+ # Map all priorities to highest traffic class (7) and check reported
+ # out-tc.
+ tc qdisc replace dev $rp2 root handle 1: \
+ prio bands 3 priomap 0 0 0 0 0 0 0 0
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "out-tc 7 " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-tc (7)"
+
+ log_test "tc sample out-tc"
+
+ tc qdisc del dev $rp2 root handle 1:
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_occ_test()
+{
+ local backlog pct occ
+
+ RET=0
+
+ # Output traffic class occupancy is not supported on Spectrum-1.
+ [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ # Configure a shaper on egress to create congestion.
+ tc qdisc replace dev $rp2 root handle 1: \
+ tbf rate 1Mbit burst 256k limit 1M
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q &
+
+ # Allow congestion to reach steady state.
+ sleep 10
+
+ backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]')
+
+ # Kill mausezahn.
+ { kill %% && wait %%; } 2>/dev/null
+
+ psample_capture_stop
+
+ # Record last congestion sample.
+ occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \
+ cut -d ' ' -f 16)
+
+ pct=$((100 * (occ - backlog) / backlog))
+ (( -1 <= pct && pct <= 1))
+ check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%"
+
+ log_test "tc sample out-tc-occ"
+
+ tc qdisc del dev $rp2 root handle 1:
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_latency_test()
+{
+ RET=0
+
+ # Egress sampling not supported on Spectrum-1.
+ [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+ tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "latency " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have latency attribute"
+
+ log_test "tc sample latency"
+
+ tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_acl_group_conflict_test()
+{
+ RET=0
+
+ # Test that two flower sampling rules cannot be configured on the same
+ # port with different groups.
+
+ # Policy-based sampling is not supported on Spectrum-1.
+ [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule with same group"
+
+ tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \
+ skip_sw action sample rate 1024 group 2 &> /dev/null
+ check_fail $? "Managed to configure sampling rule with conflicting group"
+
+ log_test "tc sample (w/ flower) group conflict test"
+
+ tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+__tc_sample_acl_rate_test()
+{
+ local bind=$1; shift
+ local port=$1; shift
+ local pkts pct
+
+ RET=0
+
+ # Policy-based sampling is not supported on Spectrum-1.
+ [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+ tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+ pct=$((100 * (pkts - 100) / 100))
+ (( -25 <= pct && pct <= 25))
+ check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+ # Setup a filter that should not match any packet and make sure packets
+ # are not sampled.
+ tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "group 1 " $CAPTURE_FILE
+ check_fail $? "Sampled packets when should not"
+
+ log_test "tc sample (w/ flower) rate ($bind)"
+
+ tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+}
+
+tc_sample_acl_rate_test()
+{
+ __tc_sample_acl_rate_test ingress $rp1
+ __tc_sample_acl_rate_test egress $rp2
+}
+
+tc_sample_acl_max_rate_test()
+{
+ RET=0
+
+ # Policy-based sampling is not supported on Spectrum-1.
+ [[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw action sample rate $((2 ** 24 - 1)) group 1
+ check_err $? "Failed to configure sampling rule with max rate"
+
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw action sample rate $((2 ** 24)) \
+ group 1 &> /dev/null
+ check_fail $? "Managed to configure sampling rate above maximum"
+
+ log_test "tc sample (w/ flower) maximum rate"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
index 9f64d5c7107b..7ca1f030d209 100644
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
@@ -24,8 +24,11 @@ function check {
local code=$1
local str=$2
local exp_str=$3
+ local exp_fail=$4
- if [ $code -ne 0 ]; then
+ [ -z "$exp_fail" ] && cop="-ne" || cop="-eq"
+
+ if [ $code $cop 0 ]; then
((num_errors++))
return
fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
new file mode 100755
index 000000000000..0c56746e9ce0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
@@ -0,0 +1,110 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+NSIM_NETDEV=$(make_netdev)
+[ a$ETHTOOL == a ] && ETHTOOL=ethtool
+
+set -o pipefail
+
+# netdevsim starts out with None/None
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "Configured FEC encodings: None
+Active FEC encoding: None"
+
+# Test Auto
+$ETHTOOL --set-fec $NSIM_NETDEV encoding auto
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "Configured FEC encodings: Auto
+Active FEC encoding: Off"
+
+# Test case in-sensitivity
+for o in off Off OFF; do
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+ check $?
+ s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+ check $? "$s" "Configured FEC encodings: Off
+Active FEC encoding: Off"
+done
+
+for o in BaseR baser BAser; do
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+ check $?
+ s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+ check $? "$s" "Configured FEC encodings: BaseR
+Active FEC encoding: BaseR"
+done
+
+for o in llrs rs; do
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+ check $?
+ s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+ check $? "$s" "Configured FEC encodings: ${o^^}
+Active FEC encoding: ${o^^}"
+done
+
+# Test mutliple bits
+$ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "Configured FEC encodings: RS LLRS
+Active FEC encoding: LLRS"
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding rs off auto
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "Configured FEC encodings: Auto Off RS
+Active FEC encoding: RS"
+
+# Make sure other link modes are rejected
+$ETHTOOL --set-fec $NSIM_NETDEV encoding FIBRE 2>/dev/null
+check $? '' '' 1
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding bla-bla-bla 2>/dev/null
+check $? '' '' 1
+
+# Try JSON
+$ETHTOOL --json --show-fec $NSIM_NETDEV | jq empty >>/dev/null 2>&1
+if [ $? -eq 0 ]; then
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding auto
+ check $?
+
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]')
+ check $? "$s" '"Auto"'
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]')
+ check $? "$s" '"Off"'
+
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding auto RS
+ check $?
+
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]')
+ check $? "$s" '"Auto"
+"RS"'
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]')
+ check $? "$s" '"RS"'
+fi
+
+# Test error injection
+echo 11 > $NSIM_DEV_DFS/ethtool/get_err
+
+$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1
+check $? '' '' 1
+
+echo 0 > $NSIM_DEV_DFS/ethtool/get_err
+echo 11 > $NSIM_DEV_DFS/ethtool/set_err
+
+$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1
+check $?
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding RS 2>/dev/null
+check $? '' '' 1
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $((num_passes)) checks"
+ exit 0
+else
+ echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
index be0c1b5ee6b8..ba75c81cda91 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -11,14 +11,33 @@ ALL_TESTS="
nexthop_single_add_err_test
nexthop_group_add_test
nexthop_group_add_err_test
+ nexthop_res_group_add_test
+ nexthop_res_group_add_err_test
nexthop_group_replace_test
nexthop_group_replace_err_test
+ nexthop_res_group_replace_test
+ nexthop_res_group_replace_err_test
+ nexthop_res_group_idle_timer_test
+ nexthop_res_group_idle_timer_del_test
+ nexthop_res_group_increase_idle_timer_test
+ nexthop_res_group_decrease_idle_timer_test
+ nexthop_res_group_unbalanced_timer_test
+ nexthop_res_group_unbalanced_timer_del_test
+ nexthop_res_group_no_unbalanced_timer_test
+ nexthop_res_group_short_unbalanced_timer_test
+ nexthop_res_group_increase_unbalanced_timer_test
+ nexthop_res_group_decrease_unbalanced_timer_test
+ nexthop_res_group_force_migrate_busy_test
nexthop_single_replace_test
nexthop_single_replace_err_test
nexthop_single_in_group_replace_test
nexthop_single_in_group_replace_err_test
+ nexthop_single_in_res_group_replace_test
+ nexthop_single_in_res_group_replace_err_test
nexthop_single_in_group_delete_test
nexthop_single_in_group_delete_err_test
+ nexthop_single_in_res_group_delete_test
+ nexthop_single_in_res_group_delete_err_test
nexthop_replay_test
nexthop_replay_err_test
"
@@ -27,6 +46,7 @@ DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/
NUM_NETIFS=0
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
@@ -44,6 +64,28 @@ nexthop_check()
return 0
}
+nexthop_bucket_nhid_count_check()
+{
+ local group_id=$1; shift
+ local expected
+ local count
+ local nhid
+ local ret
+
+ while (($# > 0)); do
+ nhid=$1; shift
+ expected=$1; shift
+
+ count=$($IP nexthop bucket show id $group_id nhid $nhid |
+ grep "trap" | wc -l)
+ if ((expected != count)); then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
nexthop_resource_check()
{
local expected_occ=$1; shift
@@ -159,6 +201,71 @@ nexthop_group_add_err_test()
nexthop_resource_set 9999
}
+nexthop_res_group_add_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ $IP nexthop add id 10 group 1,3/2,2 type resilient buckets 5
+ nexthop_check "id 10" "id 10 group 1,3/2,2 type resilient buckets 5 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected weighted nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 3
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 7
+ check_err $? "Wrong weighted nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ log_test "Resilient nexthop group add and delete"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_add_err_test()
+{
+ RET=0
+
+ nexthop_resource_set 2
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4 &> /dev/null
+ check_fail $? "Nexthop group addition succeeded when should fail"
+
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Resilient nexthop group add failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
nexthop_group_replace_test()
{
RET=0
@@ -206,6 +313,411 @@ nexthop_group_replace_err_test()
nexthop_resource_set 9999
}
+nexthop_res_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+ $IP nexthop replace id 10 group 1/2/3 type resilient
+ nexthop_check "id 10" "id 10 group 1/2/3 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 3 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 9
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Resilient nexthop group replace"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_replace_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+ $IP nexthop replace id 10 group 1/2/3 type resilient &> /dev/null
+ check_fail $? "Nexthop group replacement succeeded when should fail"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_bucket_nhid_count_check 10 1 3
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 3
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 9
+ check_err $? "Wrong nexthop occupancy after failure"
+
+ log_test "Resilient nexthop group replace failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+}
+
+nexthop_res_mark_buckets_busy()
+{
+ local group_id=$1; shift
+ local nhid=$1; shift
+ local count=$1; shift
+ local index
+
+ for index in $($IP -j nexthop bucket show id $group_id nhid $nhid |
+ jq '.[].bucket.index' | head -n ${count:--0})
+ do
+ echo $group_id $index \
+ > $DEBUGFS_NET_DIR/fib/nexthop_bucket_activity
+ done
+}
+
+nexthop_res_num_nhid_buckets()
+{
+ local group_id=$1; shift
+ local nhid=$1; shift
+
+ $IP -j nexthop bucket show id $group_id nhid $nhid | jq length
+}
+
+nexthop_res_group_idle_timer_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 idle_timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "Group expected to be unbalanced"
+
+ sleep 6
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after idle timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_idle_timer_del_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,50/2,50/3,1 \
+ type resilient buckets 8 idle_timer 6
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4 3 0
+ check_err $? "Group expected to be unbalanced"
+
+ sleep 4
+
+ # Deletion prompts group replacement. Check that the bucket timers
+ # are kept.
+ $IP nexthop delete id 3
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "Group expected to still be unbalanced"
+
+ sleep 4
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after idle timer (with delete)"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+ sleep 4
+
+ # 6 seconds, past the original timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group still expected to be unbalanced"
+
+ sleep 4
+
+ # 10 seconds, past the new timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer increase"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_decrease_timer_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 8
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 4
+ sleep 4
+
+ # 6 seconds, past the new timer, before the old timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer decrease"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_del_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,100/2,100/3,1 \
+ type resilient buckets 8 $timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,100/2,300/3,1 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+ sleep 4
+
+ # 6 seconds, past the original timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group still expected to be unbalanced"
+
+ sleep 4
+
+ # 10 seconds, past the new timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer increase"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_idle_timer_test()
+{
+ __nexthop_res_group_increase_timer_test idle_timer
+}
+
+nexthop_res_group_decrease_idle_timer_test()
+{
+ __nexthop_res_group_decrease_timer_test idle_timer
+}
+
+nexthop_res_group_unbalanced_timer_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 6 unbalanced_timer 10
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ for i in 1 2; do
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "$i: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+ done
+
+ # 3 x sleep 4 > unbalanced timer 10
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_unbalanced_timer_del_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,50/2,50/3,1 type resilient \
+ buckets 8 idle_timer 6 unbalanced_timer 10
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+ # Check that NH delete does not reset unbalanced time.
+ sleep 4
+ $IP nexthop delete id 3
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "1: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "2: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+
+ # 3 x sleep 4 > unbalanced timer 10
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced timer (with delete)"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_no_unbalanced_timer_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ for i in $(seq 3); do
+ sleep 60
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "$i: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+ done
+
+ log_test "Buckets never force-migrated without unbalanced timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_short_unbalanced_timer_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 120 unbalanced_timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 5
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced < idle timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_unbalanced_timer_test()
+{
+ __nexthop_res_group_increase_timer_test unbalanced_timer
+}
+
+nexthop_res_group_decrease_unbalanced_timer_test()
+{
+ __nexthop_res_group_decrease_timer_test unbalanced_timer
+}
+
+nexthop_res_group_force_migrate_busy_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 120
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ $IP nexthop replace id 10 group 2 type resilient
+ nexthop_bucket_nhid_count_check 10 2 8
+ check_err $? "All buckets expected to have migrated"
+
+ log_test "Busy buckets force-migrated when NH removed"
+
+ $IP nexthop flush &> /dev/null
+}
+
nexthop_single_replace_test()
{
RET=0
@@ -299,6 +811,63 @@ nexthop_single_in_group_replace_err_test()
nexthop_resource_set 9999
}
+nexthop_single_in_res_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1
+ check_err $? "Failed to replace nexthop when should not"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in resilient group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_replace_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
+ check_fail $? "Nexthop replacement succeeded when should fail"
+
+ nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry after failure"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in resilient group failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
nexthop_single_in_group_delete_test()
{
RET=0
@@ -346,6 +915,57 @@ nexthop_single_in_group_delete_err_test()
nexthop_resource_set 9999
}
+nexthop_single_in_res_group_delete_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ $IP nexthop del id 1
+ nexthop_check "id 10" "id 10 group 2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 2 4
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 5
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in resilient group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_delete_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2/3 type resilient buckets 6
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+ $IP nexthop del id 1
+
+ # We failed to replace the two nexthop buckets that were originally
+ # assigned to nhid 1.
+ nexthop_bucket_nhid_count_check 10 2 2 3 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 8
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in resilient group failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
nexthop_replay_test()
{
RET=0
diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
new file mode 100755
index 000000000000..ee10b1a8933c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
@@ -0,0 +1,181 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the psample module. It makes use of netdevsim
+# which periodically generates "sampled" packets.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ psample_enable_test
+ psample_group_num_test
+ psample_md_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/
+CAPTURE_FILE=$(mktemp)
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+psample_capture()
+{
+ rm -f $CAPTURE_FILE
+
+ timeout 2 ip netns exec testns1 psample &> $CAPTURE_FILE
+}
+
+psample_enable_test()
+{
+ RET=0
+
+ echo 1 > $PSAMPLE_DIR/enable
+ check_err $? "Failed to enable sampling when should not"
+
+ echo 1 > $PSAMPLE_DIR/enable 2>/dev/null
+ check_fail $? "Sampling enablement succeeded when should fail"
+
+ psample_capture
+ if [ $(cat $CAPTURE_FILE | wc -l) -eq 0 ]; then
+ check_err 1 "Failed to capture sampled packets"
+ fi
+
+ echo 0 > $PSAMPLE_DIR/enable
+ check_err $? "Failed to disable sampling when should not"
+
+ echo 0 > $PSAMPLE_DIR/enable 2>/dev/null
+ check_fail $? "Sampling disablement succeeded when should fail"
+
+ psample_capture
+ if [ $(cat $CAPTURE_FILE | wc -l) -ne 0 ]; then
+ check_err 1 "Captured sampled packets when should not"
+ fi
+
+ log_test "psample enable / disable"
+}
+
+psample_group_num_test()
+{
+ RET=0
+
+ echo 1234 > $PSAMPLE_DIR/group_num
+ echo 1 > $PSAMPLE_DIR/enable
+
+ psample_capture
+ grep -q -e "group 1234" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong group number"
+
+ # New group number should only be used after disable / enable.
+ echo 4321 > $PSAMPLE_DIR/group_num
+
+ psample_capture
+ grep -q -e "group 4321" $CAPTURE_FILE
+ check_fail $? "Group number changed while sampling is active"
+
+ echo 0 > $PSAMPLE_DIR/enable && echo 1 > $PSAMPLE_DIR/enable
+
+ psample_capture
+ grep -q -e "group 4321" $CAPTURE_FILE
+ check_err $? "Group number did not change after restarting sampling"
+
+ log_test "psample group number"
+
+ echo 0 > $PSAMPLE_DIR/enable
+}
+
+psample_md_test()
+{
+ RET=0
+
+ echo 1 > $PSAMPLE_DIR/enable
+
+ echo 1234 > $PSAMPLE_DIR/in_ifindex
+ echo 4321 > $PSAMPLE_DIR/out_ifindex
+ psample_capture
+
+ grep -q -e "in-ifindex 1234" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong in-ifindex"
+
+ grep -q -e "out-ifindex 4321" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong out-ifindex"
+
+ echo 5 > $PSAMPLE_DIR/out_tc
+ psample_capture
+
+ grep -q -e "out-tc 5" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong out-tc"
+
+ echo $((2**16 - 1)) > $PSAMPLE_DIR/out_tc
+ psample_capture
+
+ grep -q -e "out-tc " $CAPTURE_FILE
+ check_fail $? "Sampled packets reported with out-tc when should not"
+
+ echo 1 > $PSAMPLE_DIR/out_tc
+ echo 10000 > $PSAMPLE_DIR/out_tc_occ_max
+ psample_capture
+
+ grep -q -e "out-tc-occ " $CAPTURE_FILE
+ check_err $? "Sampled packets not reported with out-tc-occ when should"
+
+ echo 0 > $PSAMPLE_DIR/out_tc_occ_max
+ psample_capture
+
+ grep -q -e "out-tc-occ " $CAPTURE_FILE
+ check_fail $? "Sampled packets reported with out-tc-occ when should not"
+
+ echo 10000 > $PSAMPLE_DIR/latency_max
+ psample_capture
+
+ grep -q -e "latency " $CAPTURE_FILE
+ check_err $? "Sampled packets not reported with latency when should"
+
+ echo 0 > $PSAMPLE_DIR/latency_max
+ psample_capture
+
+ grep -q -e "latency " $CAPTURE_FILE
+ check_fail $? "Sampled packets reported with latency when should not"
+
+ log_test "psample metadata"
+
+ echo 0 > $PSAMPLE_DIR/enable
+}
+
+setup_prepare()
+{
+ modprobe netdevsim &> /dev/null
+
+ echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+ while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+ set -e
+
+ ip netns add testns1
+ devlink dev reload $DEVLINK_DEV netns testns1
+
+ set +e
+}
+
+cleanup()
+{
+ pre_cleanup
+ rm -f $CAPTURE_FILE
+ ip netns del testns1
+ echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+ modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c
index 5ebc1aec7923..0e393cb5f42d 100644
--- a/tools/testing/selftests/firmware/fw_namespace.c
+++ b/tools/testing/selftests/firmware/fw_namespace.c
@@ -95,7 +95,7 @@ static bool test_fw_in_ns(const char *fw_name, const char *sys_path, bool block_
}
if (block_fw_in_parent_ns)
umount("/lib/firmware");
- return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ return WEXITSTATUS(status) == EXIT_SUCCESS;
}
if (unshare(CLONE_NEWNS) != 0) {
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 32b87cc77c8e..7bd7e776c266 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -8,10 +8,13 @@
/x86_64/debug_regs
/x86_64/evmcs_test
/x86_64/get_cpuid_test
+/x86_64/get_msr_index_features
/x86_64/kvm_pv_test
+/x86_64/hyperv_clock
/x86_64/hyperv_cpuid
/x86_64/mmio_warning_test
/x86_64/platform_info_test
+/x86_64/set_boot_cpu_id
/x86_64/set_sregs_test
/x86_64/smm_test
/x86_64/state_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index a6d61f451f88..cb95b5bace7b 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-include ../../../../scripts/Kbuild.include
+include ../../../build/Build.include
all:
@@ -39,12 +39,15 @@ LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += x86_64/state_test
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
index 2f2eeb8a1d86..5aadf84c91c0 100644
--- a/tools/testing/selftests/kvm/hardware_disable_test.c
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -108,7 +108,7 @@ static void run_test(uint32_t run)
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
vm_create_irqchip(vm);
- fprintf(stderr, "%s: [%d] start vcpus\n", __func__, run);
+ pr_debug("%s: [%d] start vcpus\n", __func__, run);
for (i = 0; i < VCPU_NUM; ++i) {
vm_vcpu_add_default(vm, i, guest_code);
payloads[i].vm = vm;
@@ -124,7 +124,7 @@ static void run_test(uint32_t run)
check_set_affinity(throw_away, &cpu_set);
}
}
- fprintf(stderr, "%s: [%d] all threads launched\n", __func__, run);
+ pr_debug("%s: [%d] all threads launched\n", __func__, run);
sem_post(sem);
for (i = 0; i < VCPU_NUM; ++i)
check_join(threads[i], &b);
@@ -147,16 +147,16 @@ int main(int argc, char **argv)
if (pid == 0)
run_test(i); /* This function always exits */
- fprintf(stderr, "%s: [%d] waiting semaphore\n", __func__, i);
+ pr_debug("%s: [%d] waiting semaphore\n", __func__, i);
sem_wait(sem);
r = (rand() % DELAY_US_MAX) + 1;
- fprintf(stderr, "%s: [%d] waiting %dus\n", __func__, i, r);
+ pr_debug("%s: [%d] waiting %dus\n", __func__, i, r);
usleep(r);
r = waitpid(pid, &s, WNOHANG);
TEST_ASSERT(r != pid,
"%s: [%d] child exited unexpectedly status: [%d]",
__func__, i, s);
- fprintf(stderr, "%s: [%d] killing child\n", __func__, i);
+ pr_debug("%s: [%d] killing child\n", __func__, i);
kill(pid, SIGKILL);
}
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 2d7eb6989e83..0f4258eaa629 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -16,6 +16,7 @@
#include "sparsebit.h"
+#define KVM_DEV_PATH "/dev/kvm"
#define KVM_MAX_VCPUS 512
/*
@@ -133,6 +134,7 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
void *arg);
void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg);
void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index e5fbf16f725b..b8849a1aca79 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1697,11 +1697,16 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
{
int ret;
- ret = ioctl(vm->fd, cmd, arg);
+ ret = _vm_ioctl(vm, cmd, arg);
TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
cmd, ret, errno, strerror(errno));
}
+int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+ return ioctl(vm->fd, cmd, arg);
+}
+
/*
* KVM system ioctl
*
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
index 34465dc562d8..91ce1b5d480b 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
@@ -10,8 +10,6 @@
#include "sparsebit.h"
-#define KVM_DEV_PATH "/dev/kvm"
-
struct userspace_mem_region {
struct kvm_userspace_memory_region region;
struct sparsebit *unused_phy_pages;
diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
new file mode 100644
index 000000000000..cb953df4d7d0
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_GET_MSR_INDEX_LIST and
+ * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+static int kvm_num_index_msrs(int kvm_fd, int nmsrs)
+{
+ struct kvm_msr_list *list;
+ int r;
+
+ list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+ list->nmsrs = nmsrs;
+ r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+ TEST_ASSERT(r == -1 && errno == E2BIG,
+ "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
+ r);
+
+ r = list->nmsrs;
+ free(list);
+ return r;
+}
+
+static void test_get_msr_index(void)
+{
+ int old_res, res, kvm_fd, r;
+ struct kvm_msr_list *list;
+
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ old_res = kvm_num_index_msrs(kvm_fd, 0);
+ TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
+
+ if (old_res != 1) {
+ res = kvm_num_index_msrs(kvm_fd, 1);
+ TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1");
+ TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical");
+ }
+
+ list = malloc(sizeof(*list) + old_res * sizeof(list->indices[0]));
+ list->nmsrs = old_res;
+ r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+
+ TEST_ASSERT(r == 0,
+ "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i",
+ r);
+ TEST_ASSERT(list->nmsrs == old_res, "Expecting nmsrs to be identical");
+ free(list);
+
+ close(kvm_fd);
+}
+
+static int kvm_num_feature_msrs(int kvm_fd, int nmsrs)
+{
+ struct kvm_msr_list *list;
+ int r;
+
+ list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+ list->nmsrs = nmsrs;
+ r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
+ TEST_ASSERT(r == -1 && errno == E2BIG,
+ "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST probe, r: %i",
+ r);
+
+ r = list->nmsrs;
+ free(list);
+ return r;
+}
+
+struct kvm_msr_list *kvm_get_msr_feature_list(int kvm_fd, int nmsrs)
+{
+ struct kvm_msr_list *list;
+ int r;
+
+ list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+ list->nmsrs = nmsrs;
+ r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
+
+ TEST_ASSERT(r == 0,
+ "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i",
+ r);
+
+ return list;
+}
+
+static void test_get_msr_feature(void)
+{
+ int res, old_res, i, kvm_fd;
+ struct kvm_msr_list *feature_list;
+
+ kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+ if (kvm_fd < 0)
+ exit(KSFT_SKIP);
+
+ old_res = kvm_num_feature_msrs(kvm_fd, 0);
+ TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
+
+ if (old_res != 1) {
+ res = kvm_num_feature_msrs(kvm_fd, 1);
+ TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1");
+ TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical");
+ }
+
+ feature_list = kvm_get_msr_feature_list(kvm_fd, old_res);
+ TEST_ASSERT(old_res == feature_list->nmsrs,
+ "Unmatching number of msr indexes");
+
+ for (i = 0; i < feature_list->nmsrs; i++)
+ kvm_get_feature_msr(feature_list->indices[i]);
+
+ free(feature_list);
+ close(kvm_fd);
+}
+
+int main(int argc, char *argv[])
+{
+ if (kvm_check_cap(KVM_CAP_GET_MSR_FEATURES))
+ test_get_msr_feature();
+
+ test_get_msr_index();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
new file mode 100644
index 000000000000..7f1d2765572c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V clocksources
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct ms_hyperv_tsc_page {
+ volatile u32 tsc_sequence;
+ u32 reserved1;
+ volatile u64 tsc_scale;
+ volatile s64 tsc_offset;
+} __packed;
+
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
+#define HV_X64_MSR_REFERENCE_TSC 0x40000021
+#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
+
+/* Simplified mul_u64_u64_shr() */
+static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } rm, rn, rh, a0, b0;
+ u64 c;
+
+ a0.ll = a;
+ b0.ll = b;
+
+ rm.ll = (u64)a0.l.low * b0.l.high;
+ rn.ll = (u64)a0.l.high * b0.l.low;
+ rh.ll = (u64)a0.l.high * b0.l.high;
+
+ rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
+ rh.l.high = (c >> 32) + rh.l.high;
+
+ return rh.ll;
+}
+
+static inline void nop_loop(void)
+{
+ int i;
+
+ for (i = 0; i < 1000000; i++)
+ asm volatile("nop");
+}
+
+static inline void check_tsc_msr_rdtsc(void)
+{
+ u64 tsc_freq, r1, r2, t1, t2;
+ s64 delta_ns;
+
+ tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
+ GUEST_ASSERT(tsc_freq > 0);
+
+ /* First, check MSR-based clocksource */
+ r1 = rdtsc();
+ t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ nop_loop();
+ r2 = rdtsc();
+ t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+
+ GUEST_ASSERT(r2 > r1 && t2 > t1);
+
+ /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+ delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+ if (delta_ns < 0)
+ delta_ns = -delta_ns;
+
+ /* 1% tolerance */
+ GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
+}
+
+static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page)
+{
+ return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
+}
+
+static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
+{
+ u64 r1, r2, t1, t2;
+
+ /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
+ t1 = get_tscpage_ts(tsc_page);
+ r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+
+ /* 10 ms tolerance */
+ GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
+ nop_loop();
+
+ t2 = get_tscpage_ts(tsc_page);
+ r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
+}
+
+static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
+{
+ u64 tsc_scale, tsc_offset;
+
+ /* Set Guest OS id to enable Hyper-V emulation */
+ GUEST_SYNC(1);
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48);
+ GUEST_SYNC(2);
+
+ check_tsc_msr_rdtsc();
+
+ GUEST_SYNC(3);
+
+ /* Set up TSC page is disabled state, check that it's clean */
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
+ GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+ GUEST_ASSERT(tsc_page->tsc_scale == 0);
+ GUEST_ASSERT(tsc_page->tsc_offset == 0);
+
+ GUEST_SYNC(4);
+
+ /* Set up TSC page is enabled state */
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
+ GUEST_ASSERT(tsc_page->tsc_sequence != 0);
+
+ GUEST_SYNC(5);
+
+ check_tsc_msr_tsc_page(tsc_page);
+
+ GUEST_SYNC(6);
+
+ tsc_offset = tsc_page->tsc_offset;
+ /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
+
+ GUEST_SYNC(7);
+ /* Sanity check TSC page timestamp, it should be close to 0 */
+ GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000);
+
+ GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
+
+ nop_loop();
+
+ /*
+ * Enable Re-enlightenment and check that TSC page stays constant across
+ * KVM_SET_CLOCK.
+ */
+ wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
+ wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
+ tsc_offset = tsc_page->tsc_offset;
+ tsc_scale = tsc_page->tsc_scale;
+ GUEST_SYNC(8);
+ GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
+ GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
+
+ GUEST_SYNC(9);
+
+ check_tsc_msr_tsc_page(tsc_page);
+
+ /*
+ * Disable re-enlightenment and TSC page, check that KVM doesn't update
+ * it anymore.
+ */
+ wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
+ wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
+ memset(tsc_page, 0, sizeof(*tsc_page));
+
+ GUEST_SYNC(10);
+ GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+ GUEST_ASSERT(tsc_page->tsc_offset == 0);
+ GUEST_ASSERT(tsc_page->tsc_scale == 0);
+
+ GUEST_DONE();
+}
+
+#define VCPU_ID 0
+
+static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)
+{
+ u64 tsc_freq, r1, r2, t1, t2;
+ s64 delta_ns;
+
+ tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);
+ TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
+
+ /* First, check MSR-based clocksource */
+ r1 = rdtsc();
+ t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
+ nop_loop();
+ r2 = rdtsc();
+ t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
+
+ TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
+
+ /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+ delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+ if (delta_ns < 0)
+ delta_ns = -delta_ns;
+
+ /* 1% tolerance */
+ TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
+ "Elapsed time does not match (MSR=%ld, TSC=%ld)",
+ (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
+}
+
+int main(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ struct ucall uc;
+ vm_vaddr_t tsc_page_gva;
+ int stage;
+
+ vm = vm_create_default(VCPU_ID, 0, guest_main);
+ run = vcpu_state(vm, VCPU_ID);
+
+ vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+ tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
+ TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
+ "TSC page has to be page aligned\n");
+ vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
+
+ host_check_tsc_msr_rdtsc(vm);
+
+ for (stage = 1;; stage++) {
+ _vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Stage %d: unexpected exit reason: %u (%s),\n",
+ stage, run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+ __FILE__, uc.args[1]);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ /* Keep in sync with guest_main() */
+ TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n",
+ stage);
+ goto out;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ /* Reset kvmclock triggering TSC page update */
+ if (stage == 7 || stage == 8 || stage == 10) {
+ struct kvm_clock_data clock = {0};
+
+ vm_ioctl(vm, KVM_SET_CLOCK, &clock);
+ }
+ }
+
+out:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
new file mode 100644
index 000000000000..12c558fc8074
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_SET_BOOT_CPU_ID works as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#define _GNU_SOURCE /* for program_invocation_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define N_VCPU 2
+#define VCPU_ID0 0
+#define VCPU_ID1 1
+
+static uint32_t get_bsp_flag(void)
+{
+ return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
+}
+
+static void guest_bsp_vcpu(void *arg)
+{
+ GUEST_SYNC(1);
+
+ GUEST_ASSERT(get_bsp_flag() != 0);
+
+ GUEST_DONE();
+}
+
+static void guest_not_bsp_vcpu(void *arg)
+{
+ GUEST_SYNC(1);
+
+ GUEST_ASSERT(get_bsp_flag() == 0);
+
+ GUEST_DONE();
+}
+
+static void test_set_boot_busy(struct kvm_vm *vm)
+{
+ int res;
+
+ res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID0);
+ TEST_ASSERT(res == -1 && errno == EBUSY,
+ "KVM_SET_BOOT_CPU_ID set while running vm");
+}
+
+static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+{
+ struct ucall uc;
+ int stage;
+
+ for (stage = 0; stage < 2; stage++) {
+
+ vcpu_run(vm, vcpuid);
+
+ switch (get_ucall(vm, vcpuid, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ test_set_boot_busy(vm);
+ break;
+ case UCALL_DONE:
+ TEST_ASSERT(stage == 1,
+ "Expected GUEST_DONE in stage 2, got stage %d",
+ stage);
+ break;
+ case UCALL_ABORT:
+ TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx",
+ (const char *)uc.args[0], __FILE__,
+ uc.args[1], uc.args[2], uc.args[3]);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+ }
+ }
+}
+
+static struct kvm_vm *create_vm(void)
+{
+ struct kvm_vm *vm;
+ uint64_t vcpu_pages = (DEFAULT_STACK_PGS) * 2;
+ uint64_t extra_pg_pages = vcpu_pages / PTES_PER_MIN_PAGE * N_VCPU;
+ uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages;
+
+ pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages);
+ vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR);
+
+ kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ vm_create_irqchip(vm);
+
+ return vm;
+}
+
+static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code)
+{
+ if (bsp_code)
+ vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu);
+ else
+ vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu);
+
+ vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
+}
+
+static void run_vm_bsp(uint32_t bsp_vcpu)
+{
+ struct kvm_vm *vm;
+ bool is_bsp_vcpu1 = bsp_vcpu == VCPU_ID1;
+
+ vm = create_vm();
+
+ if (is_bsp_vcpu1)
+ vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
+
+ add_x86_vcpu(vm, VCPU_ID0, !is_bsp_vcpu1);
+ add_x86_vcpu(vm, VCPU_ID1, is_bsp_vcpu1);
+
+ run_vcpu(vm, VCPU_ID0);
+ run_vcpu(vm, VCPU_ID1);
+
+ kvm_vm_free(vm);
+}
+
+static void check_set_bsp_busy(void)
+{
+ struct kvm_vm *vm;
+ int res;
+
+ vm = create_vm();
+
+ add_x86_vcpu(vm, VCPU_ID0, true);
+ add_x86_vcpu(vm, VCPU_ID1, false);
+
+ res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
+ TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set after adding vcpu");
+
+ run_vcpu(vm, VCPU_ID0);
+ run_vcpu(vm, VCPU_ID1);
+
+ res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
+ TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set to a terminated vcpu");
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ if (!kvm_check_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
+ print_skip("set_boot_cpu_id not available");
+ return 0;
+ }
+
+ run_vm_bsp(VCPU_ID0);
+ run_vm_bsp(VCPU_ID1);
+ run_vm_bsp(VCPU_ID0);
+
+ check_set_bsp_busy();
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index a5ce26d548e4..0af84ad48aa7 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -1,6 +1,10 @@
# This mimics the top-level Makefile. We do it explicitly here so that this
# Makefile can operate with or without the kbuild infrastructure.
+ifneq ($(LLVM),)
+CC := clang
+else
CC := $(CROSS_COMPILE)gcc
+endif
ifeq (0,$(MAKELEVEL))
ifeq ($(OUTPUT),)
@@ -74,7 +78,8 @@ ifdef building_out_of_srctree
rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \
fi
@if [ "X$(TEST_PROGS)" != "X" ]; then \
- $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS)) ; \
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) \
+ $(addprefix $(OUTPUT)/,$(TEST_PROGS))) ; \
else \
$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS)); \
fi
diff --git a/tools/testing/selftests/lkdtm/.gitignore b/tools/testing/selftests/lkdtm/.gitignore
index f26212605b6b..d4b0be857deb 100644
--- a/tools/testing/selftests/lkdtm/.gitignore
+++ b/tools/testing/selftests/lkdtm/.gitignore
@@ -1,2 +1,3 @@
*.sh
!run.sh
+!stack-entropy.sh
diff --git a/tools/testing/selftests/lkdtm/Makefile b/tools/testing/selftests/lkdtm/Makefile
index 1bcc9ee990eb..c71109ceeb2d 100644
--- a/tools/testing/selftests/lkdtm/Makefile
+++ b/tools/testing/selftests/lkdtm/Makefile
@@ -5,6 +5,7 @@ include ../lib.mk
# NOTE: $(OUTPUT) won't get default value if used before lib.mk
TEST_FILES := tests.txt
+TEST_PROGS := stack-entropy.sh
TEST_GEN_PROGS = $(patsubst %,$(OUTPUT)/%.sh,$(shell awk '{print $$1}' tests.txt | sed -e 's/\#//'))
all: $(TEST_GEN_PROGS)
diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh
new file mode 100755
index 000000000000..b1b8a5097cbb
--- /dev/null
+++ b/tools/testing/selftests/lkdtm/stack-entropy.sh
@@ -0,0 +1,36 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test.
+set -e
+samples="${1:-1000}"
+
+# Capture dmesg continuously since it may fill up depending on sample size.
+log=$(mktemp -t stack-entropy-XXXXXX)
+dmesg --follow >"$log" & pid=$!
+report=-1
+for i in $(seq 1 $samples); do
+ echo "REPORT_STACK" >/sys/kernel/debug/provoke-crash/DIRECT
+ if [ -t 1 ]; then
+ percent=$(( 100 * $i / $samples ))
+ if [ "$percent" -ne "$report" ]; then
+ /bin/echo -en "$percent%\r"
+ report="$percent"
+ fi
+ fi
+done
+kill "$pid"
+
+# Count unique offsets since last run.
+seen=$(tac "$log" | grep -m1 -B"$samples"0 'Starting stack offset' | \
+ grep 'Stack offset' | awk '{print $NF}' | sort | uniq -c | wc -l)
+bits=$(echo "obase=2; $seen" | bc | wc -L)
+echo "Bits of stack entropy: $bits"
+rm -f "$log"
+
+# We would expect any functional stack randomization to be at least 5 bits.
+if [ "$bits" -lt 5 ]; then
+ exit 1
+else
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 25f198bec0b2..3915bb7bfc39 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -23,6 +23,8 @@ TEST_PROGS += drop_monitor_tests.sh
TEST_PROGS += vrf_route_leaking.sh
TEST_PROGS += bareudp.sh
TEST_PROGS += unicast_extensions.sh
+TEST_PROGS += udpgro_fwd.sh
+TEST_PROGS += veth.sh
TEST_PROGS_EXTENDED := in_netns.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -37,6 +39,8 @@ TEST_GEN_FILES += ipsec
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_FILES := settings
+
KSFT_KHDR_INSTALL := 1
include ../lib.mk
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index d98fb85e201c..49774a8a7736 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -19,10 +19,39 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture"
-
-ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
+IPV4_TESTS="
+ ipv4_fcnal
+ ipv4_grp_fcnal
+ ipv4_res_grp_fcnal
+ ipv4_withv6_fcnal
+ ipv4_fcnal_runtime
+ ipv4_large_grp
+ ipv4_large_res_grp
+ ipv4_compat_mode
+ ipv4_fdb_grp_fcnal
+ ipv4_torture
+ ipv4_res_torture
+"
+
+IPV6_TESTS="
+ ipv6_fcnal
+ ipv6_grp_fcnal
+ ipv6_res_grp_fcnal
+ ipv6_fcnal_runtime
+ ipv6_large_grp
+ ipv6_large_res_grp
+ ipv6_compat_mode
+ ipv6_fdb_grp_fcnal
+ ipv6_torture
+ ipv6_res_torture
+"
+
+ALL_TESTS="
+ basic
+ basic_res
+ ${IPV4_TESTS}
+ ${IPV6_TESTS}
+"
TESTS="${ALL_TESTS}"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -232,6 +261,19 @@ check_nexthop()
check_output "${out}" "${expected}"
}
+check_nexthop_bucket()
+{
+ local nharg="$1"
+ local expected="$2"
+ local out
+
+ # remove the idle time since we cannot match it
+ out=$($IP nexthop bucket ${nharg} \
+ | sed s/idle_time\ [0-9.]*\ // 2>/dev/null)
+
+ check_output "${out}" "${expected}"
+}
+
check_route()
{
local pfx="$1"
@@ -308,6 +350,25 @@ check_large_grp()
log_test $? 0 "Dump large (x$ecmp) ecmp groups"
}
+check_large_res_grp()
+{
+ local ipv=$1
+ local buckets=$2
+ local ipstr=""
+
+ if [ $ipv -eq 4 ]; then
+ ipstr="172.16.1.2"
+ else
+ ipstr="2001:db8:91::2"
+ fi
+
+ # create a resilient group with $buckets buckets and dump them
+ run_cmd "$IP nexthop add id 100 via $ipstr dev veth1"
+ run_cmd "$IP nexthop add id 1000 group 100 type resilient buckets $buckets"
+ run_cmd "$IP nexthop bucket list"
+ log_test $? 0 "Dump large (x$buckets) nexthop buckets"
+}
+
start_ip_monitor()
{
local mtype=$1
@@ -344,6 +405,15 @@ check_nexthop_fdb_support()
fi
}
+check_nexthop_res_support()
+{
+ $IP nexthop help 2>&1 | grep -q resilient
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing resilient nexthop group support"
+ return $ksft_skip
+ fi
+}
+
ipv6_fdb_grp_fcnal()
{
local rc
@@ -666,6 +736,70 @@ ipv6_grp_fcnal()
log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
}
+ipv6_res_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv6 resilient groups functional"
+ echo "--------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ #
+ # migration of nexthop buckets - equal weights
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 63"
+ check_nexthop "id 102" \
+ "id 102 group 62 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 62 id 102 index 1 nhid 62"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 62/63 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 63 id 102 index 1 nhid 62"
+ log_test $? 0 "Nexthop buckets updated after replace"
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ #
+ # migration of nexthop buckets - unequal weights
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 63"
+ check_nexthop "id 102" \
+ "id 102 group 62,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 62 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 62,3/63 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+ log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+}
+
ipv6_fcnal_runtime()
{
local rc
@@ -824,6 +958,22 @@ ipv6_large_grp()
$IP nexthop flush >/dev/null 2>&1
}
+ipv6_large_res_grp()
+{
+ echo
+ echo "IPv6 large resilient group (128k buckets)"
+ echo "-----------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ check_large_res_grp 6 $((128 * 1024))
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
ipv6_del_add_loop1()
{
while :; do
@@ -874,11 +1024,67 @@ ipv6_torture()
sleep 300
kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
# if we did not crash, success
log_test 0 0 "IPv6 torture test"
}
+ipv6_res_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101 type resilient
+ done >/dev/null 2>&1
+}
+
+ipv6_res_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv6 runtime resilient nexthop group torture"
+ echo "--------------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+ run_cmd "$IP route add 2001:db8:101::1 nhid 102"
+ run_cmd "$IP route add 2001:db8:101::2 nhid 102"
+
+ ipv6_del_add_loop1 &
+ pid1=$!
+ ipv6_res_grp_replace_loop &
+ pid2=$!
+ ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec me mausezahn -6 veth1 \
+ -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \
+ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+ # if we did not crash, success
+ log_test 0 0 "IPv6 resilient nexthop group torture test"
+}
ipv4_fcnal()
{
@@ -1038,6 +1244,70 @@ ipv4_grp_fcnal()
log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
}
+ipv4_res_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv4 resilient groups functional"
+ echo "--------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ #
+ # migration of nexthop buckets - equal weights
+ #
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 13"
+ check_nexthop "id 102" \
+ "id 102 group 12 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 12 id 102 index 1 nhid 12"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 12/13 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 13 id 102 index 1 nhid 12"
+ log_test $? 0 "Nexthop buckets updated after replace"
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ #
+ # migration of nexthop buckets - unequal weights
+ #
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 13"
+ check_nexthop "id 102" \
+ "id 102 group 12,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 12 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 12,3/13 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 13 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+ log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+}
+
ipv4_withv6_fcnal()
{
local lladdr
@@ -1259,6 +1529,22 @@ ipv4_large_grp()
$IP nexthop flush >/dev/null 2>&1
}
+ipv4_large_res_grp()
+{
+ echo
+ echo "IPv4 large resilient group (128k buckets)"
+ echo "-----------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ check_large_res_grp 4 $((128 * 1024))
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
sysctl_nexthop_compat_mode_check()
{
local sysctlname="net.ipv4.nexthop_compat_mode"
@@ -1476,11 +1762,68 @@ ipv4_torture()
sleep 300
kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
# if we did not crash, success
log_test 0 0 "IPv4 torture test"
}
+ipv4_res_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101 type resilient
+ done >/dev/null 2>&1
+}
+
+ipv4_res_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv4 runtime resilient nexthop group torture"
+ echo "--------------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+ run_cmd "$IP route add 172.16.101.1 nhid 102"
+ run_cmd "$IP route add 172.16.101.2 nhid 102"
+
+ ipv4_del_add_loop1 &
+ pid1=$!
+ ipv4_res_grp_replace_loop &
+ pid2=$!
+ ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec me mausezahn veth1 \
+ -B 172.16.101.2 -A 172.16.1.1 -c 0 \
+ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+ # if we did not crash, success
+ log_test 0 0 "IPv4 resilient nexthop group torture test"
+}
+
basic()
{
echo
@@ -1590,6 +1933,219 @@ basic()
log_test $? 2 "Nexthop group and blackhole"
$IP nexthop flush >/dev/null 2>&1
+
+ # Test to ensure that flushing with a multi-part nexthop dump works as
+ # expected.
+ local batch_file=$(mktemp)
+
+ for i in $(seq 1 $((64 * 1024))); do
+ echo "nexthop add id $i blackhole" >> $batch_file
+ done
+
+ $IP -b $batch_file
+ $IP nexthop flush >/dev/null 2>&1
+ [[ $($IP nexthop | wc -l) -eq 0 ]]
+ log_test $? 0 "Large scale nexthop flushing"
+
+ rm $batch_file
+}
+
+check_nexthop_buckets_balance()
+{
+ local nharg=$1; shift
+ local ret
+
+ while (($# > 0)); do
+ local selector=$1; shift
+ local condition=$1; shift
+ local count
+
+ count=$($IP -j nexthop bucket ${nharg} ${selector} | jq length)
+ (( $count $condition ))
+ ret=$?
+ if ((ret != 0)); then
+ return $ret
+ fi
+ done
+
+ return 0
+}
+
+basic_res()
+{
+ echo
+ echo "Basic resilient nexthop group functional tests"
+ echo "----------------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ run_cmd "$IP nexthop add id 1 dev veth1"
+
+ #
+ # resilient nexthop group addition
+ #
+
+ run_cmd "$IP nexthop add id 101 group 1 type resilient buckets 8"
+ log_test $? 0 "Add a nexthop group with default parameters"
+
+ run_cmd "$IP nexthop get id 101"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 120 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group with default parameters"
+
+ run_cmd "$IP nexthop add id 102 group 1 type resilient
+ buckets 4 idle_timer 100 unbalanced_timer 5"
+ run_cmd "$IP nexthop get id 102"
+ check_nexthop "id 102" \
+ "id 102 group 1 type resilient buckets 4 idle_timer 100 unbalanced_timer 5 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group with non-default parameters"
+
+ run_cmd "$IP nexthop add id 103 group 1 type resilient buckets 0"
+ log_test $? 2 "Add a nexthop group with 0 buckets"
+
+ #
+ # resilient nexthop group replacement
+ #
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient
+ buckets 8 idle_timer 240 unbalanced_timer 80"
+ log_test $? 0 "Replace nexthop group parameters"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 240 unbalanced_timer 80 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing parameters"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient idle_timer 512"
+ log_test $? 0 "Replace idle timer"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 80 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing idle timer"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient unbalanced_timer 256"
+ log_test $? 0 "Replace unbalanced timer"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing unbalanced timer"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient"
+ log_test $? 0 "Replace with no parameters"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing no parameters"
+
+ run_cmd "$IP nexthop replace id 101 group 1"
+ log_test $? 2 "Replace nexthop group type - implicit"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type mpath"
+ log_test $? 2 "Replace nexthop group type - explicit"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient buckets 1024"
+ log_test $? 2 "Replace number of nexthop buckets"
+
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing with invalid parameters"
+
+ #
+ # resilient nexthop buckets dump
+ #
+
+ $IP nexthop flush >/dev/null 2>&1
+ run_cmd "$IP nexthop add id 1 dev veth1"
+ run_cmd "$IP nexthop add id 2 dev veth3"
+ run_cmd "$IP nexthop add id 101 group 1/2 type resilient buckets 4"
+ run_cmd "$IP nexthop add id 201 group 1/2"
+
+ check_nexthop_bucket "" \
+ "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+ log_test $? 0 "Dump all nexthop buckets"
+
+ check_nexthop_bucket "list id 101" \
+ "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+ log_test $? 0 "Dump all nexthop buckets in a group"
+
+ (( $($IP -j nexthop bucket list id 101 |
+ jq '[.[] | select(.bucket.idle_time > 0 and
+ .bucket.idle_time < 2)] | length') == 4 ))
+ log_test $? 0 "All nexthop buckets report a positive near-zero idle time"
+
+ check_nexthop_bucket "list dev veth1" \
+ "id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+ log_test $? 0 "Dump all nexthop buckets with a specific nexthop device"
+
+ check_nexthop_bucket "list nhid 2" \
+ "id 101 index 0 nhid 2 id 101 index 1 nhid 2"
+ log_test $? 0 "Dump all nexthop buckets with a specific nexthop identifier"
+
+ run_cmd "$IP nexthop bucket list id 111"
+ log_test $? 2 "Dump all nexthop buckets in a non-existent group"
+
+ run_cmd "$IP nexthop bucket list id 201"
+ log_test $? 2 "Dump all nexthop buckets in a non-resilient group"
+
+ run_cmd "$IP nexthop bucket list dev bla"
+ log_test $? 255 "Dump all nexthop buckets using a non-existent device"
+
+ run_cmd "$IP nexthop bucket list groups"
+ log_test $? 255 "Dump all nexthop buckets with invalid 'groups' keyword"
+
+ run_cmd "$IP nexthop bucket list fdb"
+ log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
+
+ #
+ # resilient nexthop buckets get requests
+ #
+
+ check_nexthop_bucket "get id 101 index 0" "id 101 index 0 nhid 2"
+ log_test $? 0 "Get a valid nexthop bucket"
+
+ run_cmd "$IP nexthop bucket get id 101 index 999"
+ log_test $? 2 "Get a nexthop bucket with valid group, but invalid index"
+
+ run_cmd "$IP nexthop bucket get id 201 index 0"
+ log_test $? 2 "Get a nexthop bucket from a non-resilient group"
+
+ run_cmd "$IP nexthop bucket get id 999 index 0"
+ log_test $? 2 "Get a nexthop bucket from a non-existent group"
+
+ #
+ # tests for bucket migration
+ #
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ run_cmd "$IP nexthop add id 1 dev veth1"
+ run_cmd "$IP nexthop add id 2 dev veth3"
+ run_cmd "$IP nexthop add id 101
+ group 1/2 type resilient buckets 10
+ idle_timer 1 unbalanced_timer 20"
+
+ check_nexthop_buckets_balance "list id 101" \
+ "nhid 1" "== 5" \
+ "nhid 2" "== 5"
+ log_test $? 0 "Initial bucket allocation"
+
+ run_cmd "$IP nexthop replace id 101
+ group 1,2/2,3 type resilient"
+ check_nexthop_buckets_balance "list id 101" \
+ "nhid 1" "== 4" \
+ "nhid 2" "== 6"
+ log_test $? 0 "Bucket allocation after replace"
+
+ # Check that increase in idle timer does not make buckets appear busy.
+ run_cmd "$IP nexthop replace id 101
+ group 1,2/2,3 type resilient
+ idle_timer 10"
+ run_cmd "$IP nexthop replace id 101
+ group 1/2 type resilient"
+ check_nexthop_buckets_balance "list id 101" \
+ "nhid 1" "== 5" \
+ "nhid 2" "== 5"
+ log_test $? 0 "Buckets migrated after idle timer change"
+
+ $IP nexthop flush >/dev/null 2>&1
}
################################################################################
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 2b5707738609..76d9487fb03c 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -9,7 +9,7 @@ ret=0
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr"
+TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr ipv4_mangle ipv6_mangle"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -1653,6 +1653,154 @@ ipv4_route_v6_gw_test()
route_cleanup
}
+socat_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+iptables_check()
+{
+ iptables -t mangle -L OUTPUT &> /dev/null
+ if [ $? -ne 0 ]; then
+ echo "iptables configuration not supported. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+ip6tables_check()
+{
+ ip6tables -t mangle -L OUTPUT &> /dev/null
+ if [ $? -ne 0 ]; then
+ echo "ip6tables configuration not supported. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+ipv4_mangle_test()
+{
+ local rc
+
+ echo
+ echo "IPv4 mangling tests"
+
+ socat_check || return 1
+ iptables_check || return 1
+
+ route_setup
+ sleep 2
+
+ local tmp_file=$(mktemp)
+ ip netns exec ns2 socat UDP4-LISTEN:54321,fork $tmp_file &
+
+ # Add a FIB rule and a route that will direct our connection to the
+ # listening server.
+ $IP rule add pref 100 ipproto udp sport 12345 dport 54321 table 123
+ $IP route add table 123 172.16.101.0/24 dev veth1
+
+ # Add an unreachable route to the main table that will block our
+ # connection in case the FIB rule is not hit.
+ $IP route add unreachable 172.16.101.2/32
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters"
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=11111"
+ log_test $? 1 " Connection with incorrect parameters"
+
+ # Add a mangling rule and make sure connection is still successful.
+ $NS_EXEC iptables -t mangle -A OUTPUT -j MARK --set-mark 1
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters - mangling"
+
+ # Delete the mangling rule and make sure connection is still
+ # successful.
+ $NS_EXEC iptables -t mangle -D OUTPUT -j MARK --set-mark 1
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters - no mangling"
+
+ # Verify connections were indeed successful on server side.
+ [[ $(cat $tmp_file | wc -l) -eq 3 ]]
+ log_test $? 0 " Connection check - server side"
+
+ $IP route del unreachable 172.16.101.2/32
+ $IP route del table 123 172.16.101.0/24 dev veth1
+ $IP rule del pref 100
+
+ { kill %% && wait %%; } 2>/dev/null
+ rm $tmp_file
+
+ route_cleanup
+}
+
+ipv6_mangle_test()
+{
+ local rc
+
+ echo
+ echo "IPv6 mangling tests"
+
+ socat_check || return 1
+ ip6tables_check || return 1
+
+ route_setup
+ sleep 2
+
+ local tmp_file=$(mktemp)
+ ip netns exec ns2 socat UDP6-LISTEN:54321,fork $tmp_file &
+
+ # Add a FIB rule and a route that will direct our connection to the
+ # listening server.
+ $IP -6 rule add pref 100 ipproto udp sport 12345 dport 54321 table 123
+ $IP -6 route add table 123 2001:db8:101::/64 dev veth1
+
+ # Add an unreachable route to the main table that will block our
+ # connection in case the FIB rule is not hit.
+ $IP -6 route add unreachable 2001:db8:101::2/128
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters"
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=11111"
+ log_test $? 1 " Connection with incorrect parameters"
+
+ # Add a mangling rule and make sure connection is still successful.
+ $NS_EXEC ip6tables -t mangle -A OUTPUT -j MARK --set-mark 1
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters - mangling"
+
+ # Delete the mangling rule and make sure connection is still
+ # successful.
+ $NS_EXEC ip6tables -t mangle -D OUTPUT -j MARK --set-mark 1
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters - no mangling"
+
+ # Verify connections were indeed successful on server side.
+ [[ $(cat $tmp_file | wc -l) -eq 3 ]]
+ log_test $? 0 " Connection check - server side"
+
+ $IP -6 route del unreachable 2001:db8:101::2/128
+ $IP -6 route del table 123 2001:db8:101::/64 dev veth1
+ $IP -6 rule del pref 100
+
+ { kill %% && wait %%; } 2>/dev/null
+ rm $tmp_file
+
+ route_cleanup
+}
+
################################################################################
# usage
@@ -1725,6 +1873,8 @@ do
ipv6_route_metrics) ipv6_route_metrics_test;;
ipv4_route_metrics) ipv4_route_metrics_test;;
ipv4_route_v6_gw) ipv4_route_v6_gw_test;;
+ ipv4_mangle) ipv4_mangle_test;;
+ ipv6_mangle) ipv6_mangle_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac
diff --git a/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
new file mode 100755
index 000000000000..5148d97a5df8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
@@ -0,0 +1,366 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + h1.10 | | + h2.20 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | | | | |
+# | + $h1 | | + $h2 |
+# | | | | | |
+# +----|---------------+ +--|-------------------+
+# | |
+# +----|--------------------------------------------------|--------------------+
+# | SW | | |
+# | +--|-------------------------------+ +----------------|------------------+ |
+# | | + $swp1 BR1 (802.1ad) | | BR2 (802.1d) + $swp2 | |
+# | | vid 100 pvid untagged | | | | |
+# | | | | + $swp2.20 | |
+# | | | | | |
+# | | + vx100 (vxlan) | | + vx200 (vxlan) | |
+# | | local 192.0.2.17 | | local 192.0.2.17 | |
+# | | remote 192.0.2.34 | | remote 192.0.2.50 | |
+# | | id 1000 dstport $VXPORT | | id 2000 dstport $VXPORT | |
+# | | vid 100 pvid untagged | | | |
+# | +--------------------------------- + +-----------------------------------+ |
+# | |
+# | 192.0.2.32/28 via 192.0.2.18 |
+# | 192.0.2.48/28 via 192.0.2.18 |
+# | |
+# | + $rp1 |
+# | | 192.0.2.17/28 |
+# +----|-----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | VRP2 (vrf) |
+# | + $rp2 |
+# | 192.0.2.18/28 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | + v1 (veth) + v3 (veth) |
+# | | 192.0.2.33/28 | 192.0.2.49/28 |
+# +----|---------------------------------------|----------------+
+# | |
+# +----|------------------------------+ +----|------------------------------+
+# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) |
+# | 192.0.2.34/28 | | 192.0.2.50/28 |
+# | | | |
+# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 |
+# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 |
+# | | | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# | | BR3 (802.1ad) | | | | BR3 (802.1d) | |
+# | | + vx100 (vxlan) | | | | + vx200 (vxlan) | |
+# | | local 192.0.2.34 | | | | local 192.0.2.50 | |
+# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | |
+# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | |
+# | | id 1000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | |
+# | | vid 100 pvid untagged | | | | | |
+# | | | | | | + w1.20 | |
+# | | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 100 pvid untagged | | | | | | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | | | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | VW2 (vrf) | | | | | VW2 (vrf) | |
+# | | + w2 (veth) | | | | + w2 (veth) | |
+# | | | | | | | | | |
+# | | | | | | | | | |
+# | | + w2.10 | | | | + w2.20 | |
+# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# +-----------------------------------+ +-----------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+ ping_ipv4
+ "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ tc qdisc add dev $h1 clsact
+ vlan_create $h1 10 v$h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 10
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ tc qdisc add dev $h2 clsact
+ vlan_create $h2 20 v$h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 20
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+ ip address add dev $rp1 192.0.2.17/28
+
+ ip route add 192.0.2.32/28 nexthop via 192.0.2.18
+ ip route add 192.0.2.48/28 nexthop via 192.0.2.18
+}
+
+rp1_unset_addr()
+{
+ ip route del 192.0.2.48/28 nexthop via 192.0.2.18
+ ip route del 192.0.2.32/28 nexthop via 192.0.2.18
+
+ ip address del dev $rp1 192.0.2.17/28
+}
+
+switch_create()
+{
+ #### BR1 ####
+ ip link add name br1 type bridge vlan_filtering 1 \
+ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ #### BR2 ####
+ ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br2 address $(mac_get $swp2)
+ ip link set dev br2 up
+
+ ip link set dev $rp1 up
+ rp1_set_addr
+
+ #### VX100 ####
+ ip link add name vx100 type vxlan id 1000 local 192.0.2.17 \
+ dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+ ip link set dev vx100 up
+
+ ip link set dev vx100 master br1
+ bridge vlan add vid 100 dev vx100 pvid untagged
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ bridge vlan add vid 100 dev $swp1 pvid untagged
+
+ #### VX200 ####
+ ip link add name vx200 type vxlan id 2000 local 192.0.2.17 \
+ dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+ ip link set dev vx200 up
+
+ ip link set dev vx200 master br2
+
+ ip link set dev $swp2 up
+ ip link add name $swp2.20 link $swp2 type vlan id 20
+ ip link set dev $swp2.20 master br2
+ ip link set dev $swp2.20 up
+
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+ bridge fdb append dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self
+}
+
+switch_destroy()
+{
+ bridge fdb del dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self
+ bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+
+ ip link set dev vx200 nomaster
+ ip link set dev vx200 down
+ ip link del dev vx200
+
+ ip link del dev $swp2.20
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ bridge vlan del vid 100 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev vx100 nomaster
+ ip link set dev vx100 down
+ ip link del dev vx100
+
+ rp1_unset_addr
+ ip link set dev $rp1 down
+
+ ip link set dev br2 down
+ ip link del dev br2
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 192.0.2.18/28
+ __simple_if_init v1 v$rp2 192.0.2.33/28
+ __simple_if_init v3 v$rp2 192.0.2.49/28
+ tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+ tc qdisc del dev v1 clsact
+ __simple_if_fini v3 192.0.2.49/28
+ __simple_if_fini v1 192.0.2.33/28
+ simple_if_fini $rp2 192.0.2.18/28
+}
+
+ns_init_common()
+{
+ local in_if=$1; shift
+ local in_addr=$1; shift
+ local other_in_addr=$1; shift
+ local vxlan_name=$1; shift
+ local vxlan_id=$1; shift
+ local vlan_id=$1; shift
+ local host_addr=$1; shift
+ local nh_addr=$1; shift
+
+ ip link set dev $in_if up
+ ip address add dev $in_if $in_addr/28
+ tc qdisc add dev $in_if clsact
+
+ ip link add name br3 type bridge vlan_filtering 0
+ ip link set dev br3 up
+
+ ip link add name w1 type veth peer name w2
+
+ ip link set dev w1 master br3
+ ip link set dev w1 up
+
+ ip link add name $vxlan_name type vxlan id $vxlan_id local $in_addr \
+ dstport "$VXPORT"
+ ip link set dev $vxlan_name up
+ bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst 192.0.2.17 self
+ bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst $other_in_addr self
+
+ ip link set dev $vxlan_name master br3
+ tc qdisc add dev $vxlan_name clsact
+
+ simple_if_init w2
+ vlan_create w2 $vlan_id vw2 $host_addr/28
+
+ ip route add 192.0.2.16/28 nexthop via $nh_addr
+ ip route add $other_in_addr/32 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 \
+ ns_init_common v2 192.0.2.34 192.0.2.50 vx100 1000 10 192.0.2.3 \
+ 192.0.2.33
+
+ in_ns ns1 bridge vlan add vid 100 dev vx100 pvid untagged
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+ns2_create()
+{
+ ip netns add ns2
+ ip link set dev v4 netns ns2
+ in_ns ns2 \
+ ns_init_common v4 192.0.2.50 192.0.2.34 vx200 2000 20 192.0.2.4 \
+ 192.0.2.49
+
+ in_ns ns2 ip link add name w1.20 link w1 type vlan id 20
+ in_ns ns2 ip link set dev w1.20 master br3
+ in_ns ns2 ip link set dev w1.20 up
+}
+
+ns2_destroy()
+{
+ ip netns exec ns2 ip link set dev v4 netns 1
+ ip netns del ns2
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ ip link add name v3 type veth peer name v4
+ vrp2_create
+ ns1_create
+ ns2_create
+
+ r1_mac=$(in_ns ns1 mac_get w2)
+ r2_mac=$(in_ns ns2 mac_get w2)
+ h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns2_destroy
+ ns1_destroy
+ vrp2_destroy
+ ip link del dev v3
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.3 ": local->remote 1 through VxLAN with an 802.1ad bridge"
+ ping_test $h2 192.0.2.4 ": local->remote 2 through VxLAN with an 802.1d bridge"
+}
+
+test_all()
+{
+ echo "Running tests with UDP port $VXPORT"
+ tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
index 66496659bea7..e134a5f529c9 100644
--- a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
+++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
@@ -224,7 +224,7 @@ fib_ipv4_plen_test()
ip -n $ns link set dev dummy1 up
# Add two routes with the same key and different prefix length and
- # make sure both are in hardware. It can be verfied that both are
+ # make sure both are in hardware. It can be verified that both are
# sharing the same leaf by checking the /proc/net/fib_trie
ip -n $ns route add 192.0.2.0/24 dev dummy1
ip -n $ns route add 192.0.2.0/25 dev dummy1
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
new file mode 100755
index 000000000000..088b65e64d66
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
@@ -0,0 +1,361 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.1/28 | |
+# | 2001:db8:1::1/64 | |
+# +-------------------|-----+
+# |
+# +-------------------|------------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 192.0.2.2/28 |
+# | 2001:db8:1::2/64 |
+# | |
+# | + g1a (gre) + g1b (gre) |
+# | loc=192.0.2.65 loc=192.0.2.81 |
+# | rem=192.0.2.66 --. rem=192.0.2.82 --. |
+# | tos=inherit | tos=inherit | |
+# | .------------------' | |
+# | | .------------------' |
+# | v v |
+# | + $ul1.111 (vlan) + $ul1.222 (vlan) |
+# | | 192.0.2.129/28 | 192.0.2.145/28 |
+# | \ / |
+# | \________________/ |
+# | | |
+# | + $ul1 |
+# +------------|-------------------------------+
+# |
+# +------------|-------------------------------+
+# | SW2 + $ul2 |
+# | _______|________ |
+# | / \ |
+# | / \ |
+# | + $ul2.111 (vlan) + $ul2.222 (vlan) |
+# | ^ 192.0.2.130/28 ^ 192.0.2.146/28 |
+# | | | |
+# | | '------------------. |
+# | '------------------. | |
+# | + g2a (gre) | + g2b (gre) | |
+# | loc=192.0.2.66 | loc=192.0.2.82 | |
+# | rem=192.0.2.65 --' rem=192.0.2.81 --' |
+# | tos=inherit tos=inherit |
+# | |
+# | $ol2 + |
+# | 192.0.2.17/28 | |
+# | 2001:db8:2::1/64 | |
+# +-------------------|------------------------+
+# |
+# +-------------------|-----+
+# | H2 | |
+# | $h2 + |
+# | 192.0.2.18/28 |
+# | 2001:db8:2::2/64 |
+# +-------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ multipath_ipv4
+ multipath_ipv6
+ multipath_ipv6_l4
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+ ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+ ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64
+ __simple_if_init $ul1 v$ol1
+ vlan_create $ul1 111 v$ol1 192.0.2.129/28
+ vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+ tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+ __simple_if_init g1a v$ol1 192.0.2.65/32
+ ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+ tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+ __simple_if_init g1b v$ol1 192.0.2.81/32
+ ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+ ip -6 nexthop add id 101 dev g1a
+ ip -6 nexthop add id 102 dev g1b
+ ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+ idle_timer 0
+
+ ip route add vrf v$ol1 192.0.2.16/28 nhid 103
+ ip route add vrf v$ol1 2001:db8:2::/64 nhid 103
+}
+
+sw1_destroy()
+{
+ ip route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 192.0.2.16/28
+
+ ip nexthop del id 103
+ ip -6 nexthop del id 102
+ ip -6 nexthop del id 101
+
+ ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+ __simple_if_fini g1b 192.0.2.81/32
+ tunnel_destroy g1b
+
+ ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+ __simple_if_fini g1a 192.0.2.65/32
+ tunnel_destroy g1a
+
+ vlan_destroy $ul1 222
+ vlan_destroy $ul1 111
+ __simple_if_fini $ul1
+ simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64
+}
+
+sw2_create()
+{
+ simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64
+ __simple_if_init $ul2 v$ol2
+ vlan_create $ul2 111 v$ol2 192.0.2.130/28
+ vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+ tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+ __simple_if_init g2a v$ol2 192.0.2.66/32
+ ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+ tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+ __simple_if_init g2b v$ol2 192.0.2.82/32
+ ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+ ip -6 nexthop add id 201 dev g2a
+ ip -6 nexthop add id 202 dev g2b
+ ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+ idle_timer 0
+
+ ip route add vrf v$ol2 192.0.2.0/28 nhid 203
+ ip route add vrf v$ol2 2001:db8:1::/64 nhid 203
+
+ tc qdisc add dev $ul2 clsact
+ tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+ tc qdisc del dev $ul2 clsact
+
+ ip route del vrf v$ol2 2001:db8:1::/64
+ ip route del vrf v$ol2 192.0.2.0/28
+
+ ip nexthop del id 203
+ ip -6 nexthop del id 202
+ ip -6 nexthop del id 201
+
+ ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+ __simple_if_fini g2b 192.0.2.82/32
+ tunnel_destroy g2b
+
+ ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+ __simple_if_fini g2a 192.0.2.66/32
+ tunnel_destroy g2a
+
+ vlan_destroy $ul2 222
+ vlan_destroy $ul2 111
+ __simple_if_fini $ul2
+ simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64
+ ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+ ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+multipath4_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+ type resilient
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ ip vrf exec v$h1 \
+ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip nexthop replace id 103 group 101/102 type resilient
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 0
+ ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+ type resilient
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ # Generate 16384 echo requests, each with a random flow label.
+ for ((i=0; i < 16384; ++i)); do
+ ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
+ done
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip nexthop replace id 103 group 101/102 type resilient
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+ ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+ type resilient
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ ip vrf exec v$h1 \
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip nexthop replace id 103 group 101/102 type resilient
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.18
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+multipath_ipv4()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6()
+{
+ log_info "Running IPv6 multipath tests"
+ multipath6_test "ECMP" 1 1
+ multipath6_test "Weighted MP 2:1" 2 1
+ multipath6_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6_l4()
+{
+ log_info "Running IPv6 L4 hash multipath tests"
+ multipath6_l4_test "ECMP" 1 1
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index be71012b8fc5..42e28c983d41 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -353,6 +353,11 @@ wait_for_offload()
"$@" | grep -q offload
}
+wait_for_trap()
+{
+ "$@" | grep -q trap
+}
+
until_counter_is()
{
local expr=$1; shift
@@ -767,6 +772,15 @@ rate()
echo $((8 * (t1 - t0) / interval))
}
+packets_rate()
+{
+ local t0=$1; shift
+ local t1=$1; shift
+ local interval=$1; shift
+
+ echo $(((t1 - t0) / interval))
+}
+
mac_get()
{
local if_name=$1
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
index c02291e9841e..880e3ab9d088 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -271,7 +271,7 @@ test_span_gre_fdb_roaming()
while ((RET == 0)); do
bridge fdb del dev $swp3 $h3mac vlan 555 master 2>/dev/null
- bridge fdb add dev $swp2 $h3mac vlan 555 master
+ bridge fdb add dev $swp2 $h3mac vlan 555 master static
sleep 1
fail_test_span_gre_dir $tundev ingress
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index 13db1cb50e57..6406cd76a19d 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -20,6 +20,13 @@ mirror_uninstall()
tc filter del dev $swp1 $direction pref 1000
}
+is_ipv6()
+{
+ local addr=$1; shift
+
+ [[ -z ${addr//[0-9a-fA-F:]/} ]]
+}
+
mirror_test()
{
local vrf_name=$1; shift
@@ -29,9 +36,17 @@ mirror_test()
local pref=$1; shift
local expect=$1; shift
+ if is_ipv6 $dip; then
+ local proto=-6
+ local type="icmp6 type=128" # Echo request.
+ else
+ local proto=
+ local type="icmp echoreq"
+ fi
+
local t0=$(tc_rule_stats_get $dev $pref)
- $MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
- -c 10 -d 100msec -t icmp type=8
+ $MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
+ -c 10 -d 100msec -t $type
sleep 0.5
local t1=$(tc_rule_stats_get $dev $pref)
local delta=$((t1 - t0))
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
new file mode 100755
index 000000000000..4898dd4118f1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -0,0 +1,400 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ multipath_test
+"
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+ vrf_create "vrf-r1"
+ ip link set dev $rp11 master vrf-r1
+ ip link set dev $rp12 master vrf-r1
+ ip link set dev $rp13 master vrf-r1
+
+ ip link set dev vrf-r1 up
+ ip link set dev $rp11 up
+ ip link set dev $rp12 up
+ ip link set dev $rp13 up
+
+ ip address add 192.0.2.1/24 dev $rp11
+ ip address add 2001:db8:1::1/64 dev $rp11
+
+ ip address add 169.254.2.12/24 dev $rp12
+ ip address add fe80:2::12/64 dev $rp12
+
+ ip address add 169.254.3.13/24 dev $rp13
+ ip address add fe80:3::13/64 dev $rp13
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-r1
+ ip route del 198.51.100.0/24 vrf vrf-r1
+
+ ip address del fe80:3::13/64 dev $rp13
+ ip address del 169.254.3.13/24 dev $rp13
+
+ ip address del fe80:2::12/64 dev $rp12
+ ip address del 169.254.2.12/24 dev $rp12
+
+ ip address del 2001:db8:1::1/64 dev $rp11
+ ip address del 192.0.2.1/24 dev $rp11
+
+ ip nexthop del id 103
+ ip nexthop del id 101
+ ip nexthop del id 102
+ ip nexthop del id 106
+ ip nexthop del id 104
+ ip nexthop del id 105
+
+ ip link set dev $rp13 down
+ ip link set dev $rp12 down
+ ip link set dev $rp11 down
+
+ vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+ vrf_create "vrf-r2"
+ ip link set dev $rp21 master vrf-r2
+ ip link set dev $rp22 master vrf-r2
+ ip link set dev $rp23 master vrf-r2
+
+ ip link set dev vrf-r2 up
+ ip link set dev $rp21 up
+ ip link set dev $rp22 up
+ ip link set dev $rp23 up
+
+ ip address add 198.51.100.1/24 dev $rp21
+ ip address add 2001:db8:2::1/64 dev $rp21
+
+ ip address add 169.254.2.22/24 dev $rp22
+ ip address add fe80:2::22/64 dev $rp22
+
+ ip address add 169.254.3.23/24 dev $rp23
+ ip address add fe80:3::23/64 dev $rp23
+}
+
+router2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-r2
+ ip route del 192.0.2.0/24 vrf vrf-r2
+
+ ip address del fe80:3::23/64 dev $rp23
+ ip address del 169.254.3.23/24 dev $rp23
+
+ ip address del fe80:2::22/64 dev $rp22
+ ip address del 169.254.2.22/24 dev $rp22
+
+ ip address del 2001:db8:2::1/64 dev $rp21
+ ip address del 198.51.100.1/24 dev $rp21
+
+ ip nexthop del id 201
+ ip nexthop del id 202
+ ip nexthop del id 204
+ ip nexthop del id 205
+
+ ip link set dev $rp23 down
+ ip link set dev $rp22 down
+ ip link set dev $rp21 down
+
+ vrf_destroy "vrf-r2"
+}
+
+routing_nh_obj()
+{
+ ip nexthop add id 101 via 169.254.2.22 dev $rp12
+ ip nexthop add id 102 via 169.254.3.23 dev $rp13
+ ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103
+
+ ip nexthop add id 104 via fe80:2::22 dev $rp12
+ ip nexthop add id 105 via fe80:3::23 dev $rp13
+ ip nexthop add id 106 group 104/105 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 106
+
+ ip nexthop add id 201 via 169.254.2.12 dev $rp22
+ ip nexthop add id 202 via 169.254.3.13 dev $rp23
+ ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203
+
+ ip nexthop add id 204 via fe80:2::12 dev $rp22
+ ip nexthop add id 205 via fe80:3::13 dev $rp23
+ ip nexthop add id 206 group 204/205 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206
+}
+
+multipath4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the provided weights.
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ # Restore settings.
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the provided weights.
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d 1msec -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath_test()
+{
+ # Without an idle timer, weight replacement should happen immediately.
+ log_info "Running multipath tests without an idle timer"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 0
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 0
+
+ log_info "Running IPv4 multipath tests"
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+ multipath4_test "ECMP" 1 1
+ ip nexthop replace id 103 group 101,2/102,1 type resilient
+ multipath4_test "Weighted MP 2:1" 2 1
+ ip nexthop replace id 103 group 101,11/102,45 type resilient
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+ multipath6_l4_test "ECMP" 1 1
+ ip nexthop replace id 106 group 104,2/105,1 type resilient
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ ip nexthop replace id 106 group 104,11/105,45 type resilient
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+ # With an idle timer, weight replacement should not happen, so the
+ # expected ratio should always be the initial one (1:1).
+ log_info "Running multipath tests with an idle timer of 120 seconds"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 120
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 120
+
+ log_info "Running IPv4 multipath tests"
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+ multipath4_test "ECMP" 1 1
+ ip nexthop replace id 103 group 101,2/102,1 type resilient
+ multipath4_test "Weighted MP 2:1" 1 1
+ ip nexthop replace id 103 group 101,11/102,45 type resilient
+ multipath4_test "Weighted MP 11:45" 1 1
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+ multipath6_l4_test "ECMP" 1 1
+ ip nexthop replace id 106 group 104,2/105,1 type resilient
+ multipath6_l4_test "Weighted MP 2:1" 1 1
+ ip nexthop replace id 106 group 104,11/105,45 type resilient
+ multipath6_l4_test "Weighted MP 11:45" 1 1
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+ # With a short idle timer and enough idle time, weight replacement
+ # should happen.
+ log_info "Running multipath tests with an idle timer of 5 seconds"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 5
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 5
+
+ log_info "Running IPv4 multipath tests"
+ sleep 10
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+ multipath4_test "ECMP" 1 1
+ sleep 10
+ ip nexthop replace id 103 group 101,2/102,1 type resilient
+ multipath4_test "Weighted MP 2:1" 2 1
+ sleep 10
+ ip nexthop replace id 103 group 101,11/102,45 type resilient
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ sleep 10
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+ multipath6_l4_test "ECMP" 1 1
+ sleep 10
+ ip nexthop replace id 106 group 104,2/105,1 type resilient
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ sleep 10
+ ip nexthop replace id 106 group 104,11/105,45 type resilient
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp11=${NETIFS[p2]}
+
+ rp12=${NETIFS[p3]}
+ rp22=${NETIFS[p4]}
+
+ rp13=${NETIFS[p5]}
+ rp23=${NETIFS[p6]}
+
+ rp21=${NETIFS[p7]}
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+ip nexthop ls >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+ echo "Nexthop objects not supported; skipping tests"
+ exit 0
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh
index 160f9cccdfb7..4f9f17cb45d6 100755
--- a/tools/testing/selftests/net/forwarding/tc_police.sh
+++ b/tools/testing/selftests/net/forwarding/tc_police.sh
@@ -35,6 +35,8 @@ ALL_TESTS="
police_shared_test
police_rx_mirror_test
police_tx_mirror_test
+ police_pps_rx_test
+ police_pps_tx_test
"
NUM_NETIFS=6
source tc_common.sh
@@ -290,6 +292,60 @@ police_tx_mirror_test()
police_mirror_common_test $rp2 egress "police tx and mirror"
}
+police_pps_common_test()
+{
+ local test_name=$1; shift
+
+ RET=0
+
+ # Rule to measure bandwidth on ingress of $h2
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action drop
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=54321 -p 1000 -c 0 -q &
+
+ local t0=$(tc_rule_stats_get $h2 1 ingress .packets)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h2 1 ingress .packets)
+
+ local er=$((2000))
+ local nr=$(packets_rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ log_test "$test_name"
+
+ { kill %% && wait %%; } 2>/dev/null
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_pps_rx_test()
+{
+ # Rule to police traffic destined to $h2 on ingress of $rp1
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok
+
+ police_pps_common_test "police pps on rx"
+
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_pps_tx_test()
+{
+ # Rule to police traffic destined to $h2 on egress of $rp2
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok
+
+ police_pps_common_test "police pps on tx"
+
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index ce6bea9675c0..eb307ca37bfa 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -657,10 +657,21 @@ test_ecn_decap()
{
# In accordance with INET_ECN_decapsulate()
__test_ecn_decap 00 00 0x00
+ __test_ecn_decap 00 01 0x00
+ __test_ecn_decap 00 02 0x00
+ # 00 03 is tested in test_ecn_decap_error()
+ __test_ecn_decap 01 00 0x01
__test_ecn_decap 01 01 0x01
- __test_ecn_decap 02 01 0x02
+ __test_ecn_decap 01 02 0x01
__test_ecn_decap 01 03 0x03
+ __test_ecn_decap 02 00 0x02
+ __test_ecn_decap 02 01 0x01
+ __test_ecn_decap 02 02 0x02
__test_ecn_decap 02 03 0x03
+ __test_ecn_decap 03 00 0x03
+ __test_ecn_decap 03 01 0x03
+ __test_ecn_decap 03 02 0x03
+ __test_ecn_decap 03 03 0x03
test_ecn_decap_error
}
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 00bb158b4a5d..f1464f09b080 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -6,7 +6,7 @@ KSFT_KHDR_INSTALL := 1
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
- simult_flows.sh
+ simult_flows.sh mptcp_sockopt.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 39edce4f541c..2674ba20d524 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -5,8 +5,9 @@ rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
ns="ns1-$rndh"
ksft_skip=4
test_cnt=1
+timeout_poll=100
+timeout_test=$((timeout_poll * 2 + 1))
ret=0
-pids=()
flush_pids()
{
@@ -14,18 +15,14 @@ flush_pids()
# give it some time
sleep 1.1
- for pid in ${pids[@]}; do
- [ -d /proc/$pid ] && kill -SIGUSR1 $pid >/dev/null 2>&1
- done
- pids=()
+ ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null
}
cleanup()
{
+ ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null
+
ip netns del $ns
- for pid in ${pids[@]}; do
- [ -d /proc/$pid ] && kill -9 $pid >/dev/null 2>&1
- done
}
ip -Version > /dev/null 2>&1
@@ -79,39 +76,57 @@ trap cleanup EXIT
ip netns add $ns
ip -n $ns link set dev lo up
-echo "a" | ip netns exec $ns ./mptcp_connect -p 10000 -l 0.0.0.0 -t 100 >/dev/null &
+echo "a" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p 10000 -l -t ${timeout_poll} \
+ 0.0.0.0 >/dev/null &
sleep 0.1
-pids[0]=$!
chk_msk_nr 0 "no msk on netns creation"
-echo "b" | ip netns exec $ns ./mptcp_connect -p 10000 127.0.0.1 -j -t 100 >/dev/null &
+echo "b" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p 10000 -j -t ${timeout_poll} \
+ 127.0.0.1 >/dev/null &
sleep 0.1
-pids[1]=$!
chk_msk_nr 2 "after MPC handshake "
chk_msk_remote_key_nr 2 "....chk remote_key"
chk_msk_fallback_nr 0 "....chk no fallback"
flush_pids
-echo "a" | ip netns exec $ns ./mptcp_connect -p 10001 -s TCP -l 0.0.0.0 -t 100 >/dev/null &
-pids[0]=$!
+echo "a" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \
+ 0.0.0.0 >/dev/null &
sleep 0.1
-echo "b" | ip netns exec $ns ./mptcp_connect -p 10001 127.0.0.1 -j -t 100 >/dev/null &
-pids[1]=$!
+echo "b" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p 10001 -j -t ${timeout_poll} \
+ 127.0.0.1 >/dev/null &
sleep 0.1
chk_msk_fallback_nr 1 "check fallback"
flush_pids
NR_CLIENTS=100
for I in `seq 1 $NR_CLIENTS`; do
- echo "a" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) -l 0.0.0.0 -t 100 -w 10 >/dev/null &
- pids[$((I*2))]=$!
+ echo "a" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p $((I+10001)) -l -w 10 \
+ -t ${timeout_poll} 0.0.0.0 >/dev/null &
done
sleep 0.1
for I in `seq 1 $NR_CLIENTS`; do
- echo "b" | ip netns exec $ns ./mptcp_connect -p $((I+10001)) 127.0.0.1 -t 100 -w 10 >/dev/null &
- pids[$((I*2 + 1))]=$!
+ echo "b" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p $((I+10001)) -w 10 \
+ -t ${timeout_poll} 127.0.0.1 >/dev/null &
done
sleep 1.5
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 77bb62feb872..d88e1fdfb147 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -45,7 +45,14 @@ enum cfg_mode {
CFG_MODE_SENDFILE,
};
+enum cfg_peek {
+ CFG_NONE_PEEK,
+ CFG_WITH_PEEK,
+ CFG_AFTER_PEEK,
+};
+
static enum cfg_mode cfg_mode = CFG_MODE_POLL;
+static enum cfg_peek cfg_peek = CFG_NONE_PEEK;
static const char *cfg_host;
static const char *cfg_port = "12000";
static int cfg_sock_proto = IPPROTO_MPTCP;
@@ -55,7 +62,9 @@ static int cfg_sndbuf;
static int cfg_rcvbuf;
static bool cfg_join;
static bool cfg_remove;
+static unsigned int cfg_do_w;
static int cfg_wait;
+static uint32_t cfg_mark;
static void die_usage(void)
{
@@ -68,8 +77,11 @@ static void die_usage(void)
fprintf(stderr, "\t-p num -- use port num\n");
fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
+ fprintf(stderr, "\t-M mark -- set socket packet mark\n");
fprintf(stderr, "\t-u -- check mptcp ulp\n");
fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
+ fprintf(stderr,
+ "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
exit(1);
}
@@ -139,6 +151,17 @@ static void set_sndbuf(int fd, unsigned int size)
}
}
+static void set_mark(int fd, uint32_t mark)
+{
+ int err;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark));
+ if (err) {
+ perror("set SO_MARK");
+ exit(1);
+ }
+}
+
static int sock_listen_mptcp(const char * const listenaddr,
const char * const port)
{
@@ -247,6 +270,9 @@ static int sock_connect_mptcp(const char * const remoteaddr,
continue;
}
+ if (cfg_mark)
+ set_mark(sock, cfg_mark);
+
if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
break; /* success */
@@ -272,8 +298,8 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
if (cfg_join && first && do_w > 100)
do_w = 100;
- if (cfg_remove && do_w > 50)
- do_w = 50;
+ if (cfg_remove && do_w > cfg_do_w)
+ do_w = cfg_do_w;
bw = write(fd, buf, do_w);
if (bw < 0)
@@ -314,6 +340,8 @@ static size_t do_write(const int fd, char *buf, const size_t len)
static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
{
+ int ret = 0;
+ char tmp[16384];
size_t cap = rand();
cap &= 0xffff;
@@ -323,7 +351,17 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
else if (cap > len)
cap = len;
- return read(fd, buf, cap);
+ if (cfg_peek == CFG_WITH_PEEK) {
+ ret = recv(fd, buf, cap, MSG_PEEK);
+ ret = (ret < 0) ? ret : read(fd, tmp, ret);
+ } else if (cfg_peek == CFG_AFTER_PEEK) {
+ ret = recv(fd, buf, cap, MSG_PEEK);
+ ret = (ret < 0) ? ret : read(fd, buf, cap);
+ } else {
+ ret = read(fd, buf, cap);
+ }
+
+ return ret;
}
static void set_nonblock(int fd)
@@ -802,6 +840,26 @@ int parse_mode(const char *mode)
return 0;
}
+int parse_peek(const char *mode)
+{
+ if (!strcasecmp(mode, "saveWithPeek"))
+ return CFG_WITH_PEEK;
+ if (!strcasecmp(mode, "saveAfterPeek"))
+ return CFG_AFTER_PEEK;
+
+ fprintf(stderr, "Unknown: %s\n", mode);
+ fprintf(stderr, "Supported MSG_PEEK mode are:\n");
+ fprintf(stderr,
+ "\t\t\"saveWithPeek\" - recv data with flags 'MSG_PEEK' and save the peek data into file\n");
+ fprintf(stderr,
+ "\t\t\"saveAfterPeek\" - read and save data into file after recv with flags 'MSG_PEEK'\n");
+
+ die_usage();
+
+ /* silence compiler warning */
+ return 0;
+}
+
static int parse_int(const char *size)
{
unsigned long s;
@@ -829,7 +887,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) {
+ while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:")) != -1) {
switch (c) {
case 'j':
cfg_join = true;
@@ -840,6 +898,9 @@ static void parse_opts(int argc, char **argv)
cfg_remove = true;
cfg_mode = CFG_MODE_POLL;
cfg_wait = 400000;
+ cfg_do_w = atoi(optarg);
+ if (cfg_do_w <= 0)
+ cfg_do_w = 50;
break;
case 'l':
listen_mode = true;
@@ -876,6 +937,12 @@ static void parse_opts(int argc, char **argv)
case 'w':
cfg_wait = atoi(optarg)*1000000;
break;
+ case 'M':
+ cfg_mark = strtol(optarg, NULL, 0);
+ break;
+ case 'P':
+ cfg_peek = parse_peek(optarg);
+ break;
}
}
@@ -907,6 +974,8 @@ int main(int argc, char *argv[])
set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf)
set_sndbuf(fd, cfg_sndbuf);
+ if (cfg_mark)
+ set_mark(fd, cfg_mark);
return main_loop_s(fd);
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 10a030b53b23..9236609731b1 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -11,7 +11,8 @@ cin=""
cout=""
ksft_skip=4
capture=false
-timeout=30
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
ipv6=true
ethtool_random_on=true
tc_delay="$((RANDOM%50))"
@@ -273,7 +274,7 @@ check_mptcp_disabled()
ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
local err=0
- LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
+ LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
grep -q "^socket: Protocol not available$" && err=1
ip netns delete ${disabled_ns}
@@ -374,7 +375,7 @@ do_transfer()
local srv_proto="$4"
local connect_addr="$5"
local local_addr="$6"
- local extra_args=""
+ local extra_args="$7"
local port
port=$((10000+$TEST_COUNT))
@@ -393,9 +394,9 @@ do_transfer()
fi
if [ -n "$extra_args" ] && $options_log; then
- options_log=false
echo "INFO: extra options: $extra_args"
fi
+ options_log=false
:> "$cout"
:> "$sout"
@@ -425,19 +426,32 @@ do_transfer()
sleep 1
fi
+ NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+ nstat -n
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+ nstat -n
+ fi
+
local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent")
local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv")
- ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" &
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ $extra_args $local_addr < "$sin" > "$sout" &
local spid=$!
wait_local_port_listen "${listener_ns}" "${port}"
local start
start=$(date +%s%3N)
- ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" &
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_args $connect_addr < "$cin" > "$cout" &
local cpid=$!
wait $cpid
@@ -575,6 +589,7 @@ run_tests_lo()
local connector_ns="$2"
local connect_addr="$3"
local loopback="$4"
+ local extra_args="$5"
local lret=0
# skip if test programs are running inside same netns for subsequent runs.
@@ -594,7 +609,8 @@ run_tests_lo()
local_addr="0.0.0.0"
fi
- do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${local_addr}
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
+ ${connect_addr} ${local_addr} "${extra_args}"
lret=$?
if [ $lret -ne 0 ]; then
ret=$lret
@@ -608,14 +624,16 @@ run_tests_lo()
fi
fi
- do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr}
+ do_transfer ${listener_ns} ${connector_ns} MPTCP TCP \
+ ${connect_addr} ${local_addr} "${extra_args}"
lret=$?
if [ $lret -ne 0 ]; then
ret=$lret
return 1
fi
- do_transfer ${listener_ns} ${connector_ns} TCP MPTCP ${connect_addr} ${local_addr}
+ do_transfer ${listener_ns} ${connector_ns} TCP MPTCP \
+ ${connect_addr} ${local_addr} "${extra_args}"
lret=$?
if [ $lret -ne 0 ]; then
ret=$lret
@@ -623,7 +641,8 @@ run_tests_lo()
fi
if [ $do_tcp -gt 1 ] ;then
- do_transfer ${listener_ns} ${connector_ns} TCP TCP ${connect_addr} ${local_addr}
+ do_transfer ${listener_ns} ${connector_ns} TCP TCP \
+ ${connect_addr} ${local_addr} "${extra_args}"
lret=$?
if [ $lret -ne 0 ]; then
ret=$lret
@@ -639,6 +658,15 @@ run_tests()
run_tests_lo $1 $2 $3 0
}
+run_tests_peekmode()
+{
+ local peekmode="$1"
+
+ echo "INFO: with peek mode: ${peekmode}"
+ run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}"
+ run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
+}
+
make_file "$cin" "client"
make_file "$sin" "server"
@@ -718,6 +746,9 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
run_tests "$ns4" $sender dead:beef:3::1
done
+run_tests_peekmode "saveWithPeek"
+run_tests_peekmode "saveAfterPeek"
+
time_end=$(date +%s)
time_run=$((time_end-time_start))
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 964db9ed544f..fd99485cf2a4 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -8,9 +8,11 @@ cin=""
cinsent=""
cout=""
ksft_skip=4
-timeout=30
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
mptcp_connect=""
capture=0
+do_all_tests=1
TEST_COUNT=0
@@ -76,6 +78,7 @@ cleanup_partial()
for netns in "$ns1" "$ns2"; do
ip netns del $netns
+ rm -f /tmp/$netns.{nstat,out}
done
}
@@ -121,12 +124,6 @@ reset_with_add_addr_timeout()
-j DROP
}
-for arg in "$@"; do
- if [ "$arg" = "-c" ]; then
- capture=1
- fi
-done
-
ip -Version > /dev/null 2>&1
if [ $? -ne 0 ];then
echo "SKIP: Could not run test without ip tool"
@@ -237,10 +234,17 @@ do_transfer()
sleep 1
fi
+ NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+ nstat -n
+ NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+ nstat -n
+
if [ $speed = "fast" ]; then
mptcp_connect="./mptcp_connect -j"
- else
- mptcp_connect="./mptcp_connect -r"
+ elif [ $speed = "slow" ]; then
+ mptcp_connect="./mptcp_connect -r 50"
+ elif [ $speed = "least" ]; then
+ mptcp_connect="./mptcp_connect -r 10"
fi
local local_addr
@@ -250,17 +254,26 @@ do_transfer()
local_addr="0.0.0.0"
fi
- ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port \
- -s ${srv_proto} ${local_addr} < "$sin" > "$sout" &
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ ${local_addr} < "$sin" > "$sout" &
spid=$!
sleep 1
if [ "$test_link_fail" -eq 0 ];then
- ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ $mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $connect_addr < "$cin" > "$cout" &
else
- ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | tee "$cinsent" | \
- ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr > "$cout" &
+ ( cat "$cin" ; sleep 2; link_failure $listener_ns ; cat "$cin" ) | \
+ tee "$cinsent" | \
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ $mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $connect_addr > "$cout" &
fi
cpid=$!
@@ -284,17 +297,25 @@ do_transfer()
let rm_nr_ns1=-addr_nr_ns1
if [ $rm_nr_ns1 -lt 8 ]; then
counter=1
- sleep 1
-
- while [ $counter -le $rm_nr_ns1 ]
- do
- ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
+ dump=(`ip netns exec ${listener_ns} ./pm_nl_ctl dump`)
+ if [ ${#dump[@]} -gt 0 ]; then
+ id=${dump[1]}
sleep 1
- let counter+=1
- done
- else
+
+ while [ $counter -le $rm_nr_ns1 ]
+ do
+ ip netns exec ${listener_ns} ./pm_nl_ctl del $id
+ sleep 1
+ let counter+=1
+ let id+=1
+ done
+ fi
+ elif [ $rm_nr_ns1 -eq 8 ]; then
sleep 1
ip netns exec ${listener_ns} ./pm_nl_ctl flush
+ elif [ $rm_nr_ns1 -eq 9 ]; then
+ sleep 1
+ ip netns exec ${listener_ns} ./pm_nl_ctl del 0 ${connect_addr}
fi
fi
@@ -318,17 +339,31 @@ do_transfer()
let rm_nr_ns2=-addr_nr_ns2
if [ $rm_nr_ns2 -lt 8 ]; then
counter=1
- sleep 1
-
- while [ $counter -le $rm_nr_ns2 ]
- do
- ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
+ dump=(`ip netns exec ${connector_ns} ./pm_nl_ctl dump`)
+ if [ ${#dump[@]} -gt 0 ]; then
+ id=${dump[1]}
sleep 1
- let counter+=1
- done
- else
+
+ while [ $counter -le $rm_nr_ns2 ]
+ do
+ ip netns exec ${connector_ns} ./pm_nl_ctl del $id
+ sleep 1
+ let counter+=1
+ let id+=1
+ done
+ fi
+ elif [ $rm_nr_ns2 -eq 8 ]; then
sleep 1
ip netns exec ${connector_ns} ./pm_nl_ctl flush
+ elif [ $rm_nr_ns2 -eq 9 ]; then
+ local addr
+ if is_v6 "${connect_addr}"; then
+ addr="dead:beef:1::2"
+ else
+ addr="10.0.1.2"
+ fi
+ sleep 1
+ ip netns exec ${connector_ns} ./pm_nl_ctl del 0 $addr
fi
fi
@@ -354,12 +389,19 @@ do_transfer()
kill $cappid
fi
+ NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+ nstat | grep Tcp > /tmp/${listener_ns}.out
+ NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+ nstat | grep Tcp > /tmp/${connector_ns}.out
+
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
echo " client exit code $retc, server $rets" 1>&2
echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
+ ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
+ cat /tmp/${listener_ns}.out
echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
+ ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+ cat /tmp/${connector_ns}.out
cat "$capout"
ret=1
@@ -610,11 +652,22 @@ chk_rm_nr()
{
local rm_addr_nr=$1
local rm_subflow_nr=$2
+ local invert=${3:-""}
local count
local dump_stats
+ local addr_ns
+ local subflow_ns
+
+ if [ -z $invert ]; then
+ addr_ns=$ns1
+ subflow_ns=$ns2
+ elif [ $invert = "invert" ]; then
+ addr_ns=$ns2
+ subflow_ns=$ns1
+ fi
printf "%-39s %s" " " "rm "
- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
+ count=`ip netns exec $addr_ns nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$rm_addr_nr" ]; then
echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
@@ -625,7 +678,7 @@ chk_rm_nr()
fi
echo -n " - sf "
- count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
+ count=`ip netns exec $subflow_ns nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
[ -z "$count" ] && count=0
if [ "$count" != "$rm_subflow_nr" ]; then
echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
@@ -724,6 +777,14 @@ subflows_tests()
ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows, limited by server" 2 2 1
+
+ # single subflow, dev
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow dev ns2eth3
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "single subflow, dev" 1 1 1
}
signal_address_tests()
@@ -767,6 +828,28 @@ signal_address_tests()
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr "multiple subflows and signal" 3 3 3
chk_add_nr 1 1
+
+ # signal addresses
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "signal addresses" 3 3 3
+ chk_add_nr 3 3
+
+ # signal invalid addresses
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr "signal invalid addresses" 1 1 1
+ chk_add_nr 3 3
}
link_failure_tests()
@@ -802,6 +885,26 @@ add_addr_timeout_tests()
run_tests $ns1 $ns2 dead:beef:1::1 0 0 0 slow
chk_join_nr "signal address, ADD_ADDR6 timeout" 1 1 1
chk_add_nr 4 0
+
+ # signal addresses timeout
+ reset_with_add_addr_timeout
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
+ chk_join_nr "signal addresses, ADD_ADDR timeout" 2 2 2
+ chk_add_nr 8 0
+
+ # signal invalid addresses timeout
+ reset_with_add_addr_timeout
+ ip netns exec $ns1 ./pm_nl_ctl limits 2 2
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 2 2
+ run_tests $ns1 $ns2 10.0.1.1 0 0 0 least
+ chk_join_nr "invalid address, ADD_ADDR timeout" 1 1 1
+ chk_add_nr 8 0
}
remove_tests()
@@ -833,7 +936,7 @@ remove_tests()
run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
chk_join_nr "remove single address" 1 1 1
chk_add_nr 1 1
- chk_rm_nr 0 0
+ chk_rm_nr 1 1 invert
# subflow and signal, remove
reset
@@ -858,6 +961,30 @@ remove_tests()
chk_add_nr 1 1
chk_rm_nr 2 2
+ # addresses remove
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
+ chk_join_nr "remove addresses" 3 3 3
+ chk_add_nr 3 3
+ chk_rm_nr 3 3 invert
+
+ # invalid addresses remove
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ run_tests $ns1 $ns2 10.0.1.1 0 -3 0 slow
+ chk_join_nr "remove invalid addresses" 1 1 1
+ chk_add_nr 3 3
+ chk_rm_nr 3 1 invert
+
# subflows and signal, flush
reset
ip netns exec $ns1 ./pm_nl_ctl limits 0 3
@@ -869,6 +996,60 @@ remove_tests()
chk_join_nr "flush subflows and signal" 3 3 3
chk_add_nr 1 1
chk_rm_nr 2 2
+
+ # subflows flush
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow id 150
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+ chk_join_nr "flush subflows" 3 3 3
+ chk_rm_nr 3 3
+
+ # addresses flush
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal id 250
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.4.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ run_tests $ns1 $ns2 10.0.1.1 0 -8 -8 slow
+ chk_join_nr "flush addresses" 3 3 3
+ chk_add_nr 3 3
+ chk_rm_nr 3 3 invert
+
+ # invalid addresses flush
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 3 3
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.12.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.3.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.14.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 3 3
+ run_tests $ns1 $ns2 10.0.1.1 0 -8 0 slow
+ chk_join_nr "flush invalid addresses" 1 1 1
+ chk_add_nr 3 3
+ chk_rm_nr 3 1 invert
+
+ # remove id 0 subflow
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1 0 0 -9 slow
+ chk_join_nr "remove id 0 subflow" 1 1 1
+ chk_rm_nr 1 1
+
+ # remove id 0 address
+ reset
+ ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+ run_tests $ns1 $ns2 10.0.1.1 0 -9 0 slow
+ chk_join_nr "remove id 0 address" 1 1 1
+ chk_add_nr 1 1
+ chk_rm_nr 1 1 invert
}
add_tests()
@@ -945,7 +1126,7 @@ ipv6_tests()
run_tests $ns1 $ns2 dead:beef:1::1 0 -1 0 slow
chk_join_nr "remove single address IPv6" 1 1 1
chk_add_nr 1 1
- chk_rm_nr 0 0
+ chk_rm_nr 1 1 invert
# subflow and signal IPv6, remove
reset
@@ -1088,7 +1269,7 @@ add_addr_ports_tests()
run_tests $ns1 $ns2 10.0.1.1 0 -1 0 slow
chk_join_nr "remove single address with port" 1 1 1
chk_add_nr 1 1 1
- chk_rm_nr 0 0
+ chk_rm_nr 1 1 invert
# subflow and signal with port, remove
reset
@@ -1221,7 +1402,8 @@ usage()
echo " -4 v4mapped_tests"
echo " -b backup_tests"
echo " -p add_addr_ports_tests"
- echo " -c syncookies_tests"
+ echo " -k syncookies_tests"
+ echo " -c capture pcap files"
echo " -h help"
}
@@ -1235,12 +1417,24 @@ make_file "$cin" "client" 1
make_file "$sin" "server" 1
trap cleanup EXIT
-if [ -z $1 ]; then
+for arg in "$@"; do
+ # check for "capture" arg before launching tests
+ if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then
+ capture=1
+ fi
+
+ # exception for the capture option, the rest means: a part of the tests
+ if [ "${arg}" != "-c" ]; then
+ do_all_tests=0
+ fi
+done
+
+if [ $do_all_tests -eq 1 ]; then
all_tests
exit $ret
fi
-while getopts 'fsltra64bpch' opt; do
+while getopts 'fsltra64bpkch' opt; do
case $opt in
f)
subflows_tests
@@ -1272,9 +1466,11 @@ while getopts 'fsltra64bpch' opt; do
p)
add_addr_ports_tests
;;
- c)
+ k)
syncookies_tests
;;
+ c)
+ ;;
h | *)
usage
;;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
new file mode 100755
index 000000000000..2fa13946ac04
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -0,0 +1,276 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ret=0
+sin=""
+sout=""
+cin=""
+cout=""
+ksft_skip=4
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
+mptcp_connect=""
+do_all_tests=1
+
+add_mark_rules()
+{
+ local ns=$1
+ local m=$2
+
+ for t in iptables ip6tables; do
+ # just to debug: check we have multiple subflows connection requests
+ ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT
+
+ # RST packets might be handled by a internal dummy socket
+ ip netns exec $ns $t -A OUTPUT -p tcp --tcp-flags RST RST -m mark --mark 0 -j ACCEPT
+
+ ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark $m -j ACCEPT
+ ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark 0 -j DROP
+ done
+}
+
+init()
+{
+ rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+
+ ns1="ns1-$rndh"
+ ns2="ns2-$rndh"
+
+ for netns in "$ns1" "$ns2";do
+ ip netns add $netns || exit $ksft_skip
+ ip -net $netns link set lo up
+ ip netns exec $netns sysctl -q net.mptcp.enabled=1
+ ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
+ ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
+ done
+
+ for i in `seq 1 4`; do
+ ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
+ ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
+ ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
+ ip -net "$ns1" link set ns1eth$i up
+
+ ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i
+ ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad
+ ip -net "$ns2" link set ns2eth$i up
+
+ # let $ns2 reach any $ns1 address from any interface
+ ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
+
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal
+
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal
+ done
+
+ ip netns exec $ns1 ./pm_nl_ctl limits 8 8
+ ip netns exec $ns2 ./pm_nl_ctl limits 8 8
+
+ add_mark_rules $ns1 1
+ add_mark_rules $ns2 2
+}
+
+cleanup()
+{
+ for netns in "$ns1" "$ns2"; do
+ ip netns del $netns
+ done
+ rm -f "$cin" "$cout"
+ rm -f "$sin" "$sout"
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+iptables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run all tests without iptables tool"
+ exit $ksft_skip
+fi
+
+ip6tables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run all tests without ip6tables tool"
+ exit $ksft_skip
+fi
+
+check_mark()
+{
+ local ns=$1
+ local af=$2
+
+ tables=iptables
+
+ if [ $af -eq 6 ];then
+ tables=ip6tables
+ fi
+
+ counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP)
+ values=${counters%DROP*}
+
+ for v in $values; do
+ if [ $v -ne 0 ]; then
+ echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+print_file_err()
+{
+ ls -l "$1" 1>&2
+ echo "Trailing bytes are: "
+ tail -c 27 "$1"
+}
+
+check_transfer()
+{
+ in=$1
+ out=$2
+ what=$3
+
+ cmp "$in" "$out" > /dev/null 2>&1
+ if [ $? -ne 0 ] ;then
+ echo "[ FAIL ] $what does not match (in, out):"
+ print_file_err "$in"
+ print_file_err "$out"
+ ret=1
+
+ return 1
+ fi
+
+ return 0
+}
+
+# $1: IP address
+is_v6()
+{
+ [ -z "${1##*:*}" ]
+}
+
+do_transfer()
+{
+ listener_ns="$1"
+ connector_ns="$2"
+ cl_proto="$3"
+ srv_proto="$4"
+ connect_addr="$5"
+
+ port=12001
+
+ :> "$cout"
+ :> "$sout"
+
+ mptcp_connect="./mptcp_connect -r 20"
+
+ local local_addr
+ if is_v6 "${connect_addr}"; then
+ local_addr="::"
+ else
+ local_addr="0.0.0.0"
+ fi
+
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} \
+ ${local_addr} < "$sin" > "$sout" &
+ spid=$!
+
+ sleep 1
+
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} \
+ $connect_addr < "$cin" > "$cout" &
+
+ cpid=$!
+
+ wait $cpid
+ retc=$?
+ wait $spid
+ rets=$?
+
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ echo " client exit code $retc, server $rets" 1>&2
+ echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
+ ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
+
+ echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
+ ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+
+ ret=1
+ return 1
+ fi
+
+ if [ $local_addr = "::" ];then
+ check_mark $listener_ns 6
+ check_mark $connector_ns 6
+ else
+ check_mark $listener_ns 4
+ check_mark $connector_ns 4
+ fi
+
+ check_transfer $cin $sout "file received by server"
+
+ rets=$?
+
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
+ return 0
+ fi
+
+ return 1
+}
+
+make_file()
+{
+ name=$1
+ who=$2
+ size=$3
+
+ dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null
+ echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+
+ echo "Created $name (size $size KB) containing data sent by $who"
+}
+
+run_tests()
+{
+ listener_ns="$1"
+ connector_ns="$2"
+ connect_addr="$3"
+ lret=0
+
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
+
+ lret=$?
+
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return
+ fi
+}
+
+sin=$(mktemp)
+sout=$(mktemp)
+cin=$(mktemp)
+cout=$(mktemp)
+init
+make_file "$cin" "client" 1
+make_file "$sin" "server" 1
+trap cleanup EXIT
+
+run_tests $ns1 $ns2 10.0.1.1
+run_tests $ns1 $ns2 dead:beef:1::1
+
+
+if [ $ret -eq 0 ];then
+ echo "PASS: all packets had packet mark set"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index a617e293734c..3c741abe034e 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -100,12 +100,12 @@ done
check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
-for i in `seq 9 256`; do
+ip netns exec $ns1 ./pm_nl_ctl del 9
+for i in `seq 10 255`; do
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i
ip netns exec $ns1 ./pm_nl_ctl del $i
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $((i+1))
done
check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
-id 2 flags 10.0.0.9
id 3 flags signal,backup 10.0.1.3
id 4 flags signal 10.0.1.4
id 5 flags signal 10.0.1.5
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index 7b4167f3f9a2..115decfdc1ef 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -26,7 +26,7 @@ static void syntax(char *argv[])
{
fprintf(stderr, "%s add|get|set|del|flush|dump|accept [<args>]\n", argv[0]);
fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
- fprintf(stderr, "\tdel <id>\n");
+ fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
fprintf(stderr, "\tset <ip> [flags backup|nobackup]\n");
fprintf(stderr, "\tflush\n");
@@ -301,6 +301,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
1024];
struct rtattr *rta, *nest;
struct nlmsghdr *nh;
+ u_int16_t family;
int nest_start;
u_int8_t id;
int off = 0;
@@ -310,11 +311,14 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR,
MPTCP_PM_VER);
- /* the only argument is the address id */
- if (argc != 3)
+ /* the only argument is the address id (nonzero) */
+ if (argc != 3 && argc != 4)
syntax(argv);
id = atoi(argv[2]);
+ /* zero id with the IP address */
+ if (!id && argc != 4)
+ syntax(argv);
nest_start = off;
nest = (void *)(data + off);
@@ -328,6 +332,30 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
rta->rta_len = RTA_LENGTH(1);
memcpy(RTA_DATA(rta), &id, 1);
off += NLMSG_ALIGN(rta->rta_len);
+
+ if (!id) {
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, argv[3], RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, argv[3], RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else {
+ error(1, errno, "can't parse ip %s", argv[3]);
+ }
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
nest->rta_len = off - nest_start;
do_nl_req(fd, nh, off, 0);
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index f039ee57eb3c..3aeef3bcb101 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -7,7 +7,8 @@ ns2="ns2-$rndh"
ns3="ns3-$rndh"
capture=false
ksft_skip=4
-timeout=30
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
test_cnt=1
ret=0
bail=0
@@ -157,14 +158,20 @@ do_transfer()
sleep 1
fi
- ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" &
+ timeout ${timeout_test} \
+ ip netns exec ${ns3} \
+ ./mptcp_connect -jt ${timeout_poll} -l -p $port \
+ 0.0.0.0 < "$sin" > "$sout" &
local spid=$!
wait_local_port_listen "${ns3}" "${port}"
local start
start=$(date +%s%3N)
- ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" &
+ timeout ${timeout_test} \
+ ip netns exec ${ns1} \
+ ./mptcp_connect -jt ${timeout_poll} -p $port \
+ 10.0.3.3 < "$cin" > "$cout" &
local cpid=$!
wait $cpid
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
index 7b01b7c2ec10..066efd30e294 100644
--- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -30,25 +30,25 @@ struct reuse_opts {
};
struct reuse_opts unreusable_opts[12] = {
- {0, 0, 0, 0},
- {0, 0, 0, 1},
- {0, 0, 1, 0},
- {0, 0, 1, 1},
- {0, 1, 0, 0},
- {0, 1, 0, 1},
- {0, 1, 1, 0},
- {0, 1, 1, 1},
- {1, 0, 0, 0},
- {1, 0, 0, 1},
- {1, 0, 1, 0},
- {1, 0, 1, 1},
+ {{0, 0}, {0, 0}},
+ {{0, 0}, {0, 1}},
+ {{0, 0}, {1, 0}},
+ {{0, 0}, {1, 1}},
+ {{0, 1}, {0, 0}},
+ {{0, 1}, {0, 1}},
+ {{0, 1}, {1, 0}},
+ {{0, 1}, {1, 1}},
+ {{1, 0}, {0, 0}},
+ {{1, 0}, {0, 1}},
+ {{1, 0}, {1, 0}},
+ {{1, 0}, {1, 1}},
};
struct reuse_opts reusable_opts[4] = {
- {1, 1, 0, 0},
- {1, 1, 0, 1},
- {1, 1, 1, 0},
- {1, 1, 1, 1},
+ {{1, 1}, {0, 0}},
+ {{1, 1}, {0, 1}},
+ {{1, 1}, {1, 0}},
+ {{1, 1}, {1, 1}},
};
int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport)
diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings
new file mode 100644
index 000000000000..694d70710ff0
--- /dev/null
+++ b/tools/testing/selftests/net/settings
@@ -0,0 +1 @@
+timeout=300
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index b4cca382d125..59067f64b775 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -2,9 +2,12 @@
/*
* Test the SO_TXTIME API
*
- * Takes two streams of { payload, delivery time }[], one input and one output.
- * Sends the input stream and verifies arrival matches the output stream.
- * The two streams can differ due to out-of-order delivery and drops.
+ * Takes a stream of { payload, delivery time }[], to be sent across two
+ * processes. Start this program on two separate network namespaces or
+ * connected hosts, one instance in transmit mode and the other in receive
+ * mode using the '-r' option. Receiver will compare arrival timestamps to
+ * the expected stream. Sender will read transmit timestamps from the error
+ * queue. The streams can differ due to out-of-order delivery and drops.
*/
#define _GNU_SOURCE
@@ -28,14 +31,17 @@
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
+#include <poll.h>
static int cfg_clockid = CLOCK_TAI;
-static bool cfg_do_ipv4;
-static bool cfg_do_ipv6;
static uint16_t cfg_port = 8000;
static int cfg_variance_us = 4000;
+static uint64_t cfg_start_time_ns;
+static int cfg_mark;
+static bool cfg_rx;
static uint64_t glob_tstart;
+static uint64_t tdeliver_max;
/* encode one timed transmission (of a 1B payload) */
struct timed_send {
@@ -44,18 +50,21 @@ struct timed_send {
};
#define MAX_NUM_PKT 8
-static struct timed_send cfg_in[MAX_NUM_PKT];
-static struct timed_send cfg_out[MAX_NUM_PKT];
+static struct timed_send cfg_buf[MAX_NUM_PKT];
static int cfg_num_pkt;
static int cfg_errq_level;
static int cfg_errq_type;
-static uint64_t gettime_ns(void)
+static struct sockaddr_storage cfg_dst_addr;
+static struct sockaddr_storage cfg_src_addr;
+static socklen_t cfg_alen;
+
+static uint64_t gettime_ns(clockid_t clock)
{
struct timespec ts;
- if (clock_gettime(cfg_clockid, &ts))
+ if (clock_gettime(clock, &ts))
error(1, errno, "gettime");
return ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec;
@@ -75,6 +84,8 @@ static void do_send_one(int fdt, struct timed_send *ts)
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
+ msg.msg_name = (struct sockaddr *)&cfg_dst_addr;
+ msg.msg_namelen = cfg_alen;
if (ts->delay_us >= 0) {
memset(control, 0, sizeof(control));
@@ -82,6 +93,8 @@ static void do_send_one(int fdt, struct timed_send *ts)
msg.msg_controllen = sizeof(control);
tdeliver = glob_tstart + ts->delay_us * 1000;
+ tdeliver_max = tdeliver_max > tdeliver ?
+ tdeliver_max : tdeliver;
cm = CMSG_FIRSTHDR(&msg);
cm->cmsg_level = SOL_SOCKET;
@@ -98,7 +111,7 @@ static void do_send_one(int fdt, struct timed_send *ts)
}
-static bool do_recv_one(int fdr, struct timed_send *ts)
+static void do_recv_one(int fdr, struct timed_send *ts)
{
int64_t tstop, texpect;
char rbuf[2];
@@ -106,13 +119,13 @@ static bool do_recv_one(int fdr, struct timed_send *ts)
ret = recv(fdr, rbuf, sizeof(rbuf), 0);
if (ret == -1 && errno == EAGAIN)
- return true;
+ error(1, EAGAIN, "recv: timeout");
if (ret == -1)
error(1, errno, "read");
if (ret != 1)
error(1, 0, "read: %dB", ret);
- tstop = (gettime_ns() - glob_tstart) / 1000;
+ tstop = (gettime_ns(cfg_clockid) - glob_tstart) / 1000;
texpect = ts->delay_us >= 0 ? ts->delay_us : 0;
fprintf(stderr, "payload:%c delay:%lld expected:%lld (us)\n",
@@ -123,8 +136,6 @@ static bool do_recv_one(int fdr, struct timed_send *ts)
if (llabs(tstop - texpect) > cfg_variance_us)
error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
-
- return false;
}
static void do_recv_verify_empty(int fdr)
@@ -137,18 +148,18 @@ static void do_recv_verify_empty(int fdr)
error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno);
}
-static void do_recv_errqueue_timeout(int fdt)
+static int do_recv_errqueue_timeout(int fdt)
{
char control[CMSG_SPACE(sizeof(struct sock_extended_err)) +
CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0};
char data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr) +
sizeof(struct udphdr) + 1];
struct sock_extended_err *err;
+ int ret, num_tstamp = 0;
struct msghdr msg = {0};
struct iovec iov = {0};
struct cmsghdr *cm;
int64_t tstamp = 0;
- int ret;
iov.iov_base = data;
iov.iov_len = sizeof(data);
@@ -206,9 +217,47 @@ static void do_recv_errqueue_timeout(int fdt)
msg.msg_flags = 0;
msg.msg_controllen = sizeof(control);
+ num_tstamp++;
}
- error(1, 0, "recv: timeout");
+ return num_tstamp;
+}
+
+static void recv_errqueue_msgs(int fdt)
+{
+ struct pollfd pfd = { .fd = fdt, .events = POLLERR };
+ const int timeout_ms = 10;
+ int ret, num_tstamp = 0;
+
+ do {
+ ret = poll(&pfd, 1, timeout_ms);
+ if (ret == -1)
+ error(1, errno, "poll");
+
+ if (ret && (pfd.revents & POLLERR))
+ num_tstamp += do_recv_errqueue_timeout(fdt);
+
+ if (num_tstamp == cfg_num_pkt)
+ break;
+
+ } while (gettime_ns(cfg_clockid) < tdeliver_max);
+}
+
+static void start_time_wait(void)
+{
+ uint64_t now;
+ int err;
+
+ if (!cfg_start_time_ns)
+ return;
+
+ now = gettime_ns(CLOCK_REALTIME);
+ if (cfg_start_time_ns < now)
+ return;
+
+ err = usleep((cfg_start_time_ns - now) / 1000);
+ if (err)
+ error(1, errno, "usleep");
}
static void setsockopt_txtime(int fd)
@@ -245,6 +294,10 @@ static int setup_tx(struct sockaddr *addr, socklen_t alen)
setsockopt_txtime(fd);
+ if (cfg_mark &&
+ setsockopt(fd, SOL_SOCKET, SO_MARK, &cfg_mark, sizeof(cfg_mark)))
+ error(1, errno, "setsockopt mark");
+
return fd;
}
@@ -266,31 +319,70 @@ static int setup_rx(struct sockaddr *addr, socklen_t alen)
return fd;
}
-static void do_test(struct sockaddr *addr, socklen_t alen)
+static void do_test_tx(struct sockaddr *addr, socklen_t alen)
{
- int fdt, fdr, i;
+ int fdt, i;
fprintf(stderr, "\nSO_TXTIME ipv%c clock %s\n",
addr->sa_family == PF_INET ? '4' : '6',
cfg_clockid == CLOCK_TAI ? "tai" : "monotonic");
fdt = setup_tx(addr, alen);
- fdr = setup_rx(addr, alen);
- glob_tstart = gettime_ns();
+ start_time_wait();
+ glob_tstart = gettime_ns(cfg_clockid);
for (i = 0; i < cfg_num_pkt; i++)
- do_send_one(fdt, &cfg_in[i]);
+ do_send_one(fdt, &cfg_buf[i]);
+
+ recv_errqueue_msgs(fdt);
+
+ if (close(fdt))
+ error(1, errno, "close t");
+}
+
+static void do_test_rx(struct sockaddr *addr, socklen_t alen)
+{
+ int fdr, i;
+
+ fdr = setup_rx(addr, alen);
+
+ start_time_wait();
+ glob_tstart = gettime_ns(cfg_clockid);
+
for (i = 0; i < cfg_num_pkt; i++)
- if (do_recv_one(fdr, &cfg_out[i]))
- do_recv_errqueue_timeout(fdt);
+ do_recv_one(fdr, &cfg_buf[i]);
do_recv_verify_empty(fdr);
if (close(fdr))
error(1, errno, "close r");
- if (close(fdt))
- error(1, errno, "close t");
+}
+
+static void setup_sockaddr(int domain, const char *str_addr,
+ struct sockaddr_storage *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ }
}
static int parse_io(const char *optarg, struct timed_send *array)
@@ -323,17 +415,46 @@ static int parse_io(const char *optarg, struct timed_send *array)
return aoff / 2;
}
+static void usage(const char *progname)
+{
+ fprintf(stderr, "\nUsage: %s [options] <payload>\n"
+ "Options:\n"
+ " -4 only IPv4\n"
+ " -6 only IPv6\n"
+ " -c <clock> monotonic (default) or tai\n"
+ " -D <addr> destination IP address (server)\n"
+ " -S <addr> source IP address (client)\n"
+ " -r run rx mode\n"
+ " -t <nsec> start time (UTC nanoseconds)\n"
+ " -m <mark> socket mark\n"
+ "\n",
+ progname);
+ exit(1);
+}
+
static void parse_opts(int argc, char **argv)
{
- int c, ilen, olen;
+ char *daddr = NULL, *saddr = NULL;
+ int domain = PF_UNSPEC;
+ int c;
- while ((c = getopt(argc, argv, "46c:")) != -1) {
+ while ((c = getopt(argc, argv, "46c:S:D:rt:m:")) != -1) {
switch (c) {
case '4':
- cfg_do_ipv4 = true;
+ if (domain != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ domain = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ cfg_errq_level = SOL_IP;
+ cfg_errq_type = IP_RECVERR;
break;
case '6':
- cfg_do_ipv6 = true;
+ if (domain != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ domain = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ cfg_errq_level = SOL_IPV6;
+ cfg_errq_type = IPV6_RECVERR;
break;
case 'c':
if (!strcmp(optarg, "tai"))
@@ -344,50 +465,50 @@ static void parse_opts(int argc, char **argv)
else
error(1, 0, "unknown clock id %s", optarg);
break;
+ case 'S':
+ saddr = optarg;
+ break;
+ case 'D':
+ daddr = optarg;
+ break;
+ case 'r':
+ cfg_rx = true;
+ break;
+ case 't':
+ cfg_start_time_ns = strtol(optarg, NULL, 0);
+ break;
+ case 'm':
+ cfg_mark = strtol(optarg, NULL, 0);
+ break;
default:
- error(1, 0, "parse error at %d", optind);
+ usage(argv[0]);
}
}
- if (argc - optind != 2)
- error(1, 0, "Usage: %s [-46] -c <clock> <in> <out>", argv[0]);
+ if (argc - optind != 1)
+ usage(argv[0]);
+
+ if (domain == PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ if (!daddr)
+ error(1, 0, "-D <server addr> required\n");
+ if (!cfg_rx && !saddr)
+ error(1, 0, "-S <client addr> required\n");
- ilen = parse_io(argv[optind], cfg_in);
- olen = parse_io(argv[optind + 1], cfg_out);
- if (ilen != olen)
- error(1, 0, "i/o streams len mismatch (%d, %d)\n", ilen, olen);
- cfg_num_pkt = ilen;
+ setup_sockaddr(domain, daddr, &cfg_dst_addr);
+ setup_sockaddr(domain, saddr, &cfg_src_addr);
+
+ cfg_num_pkt = parse_io(argv[optind], cfg_buf);
}
int main(int argc, char **argv)
{
parse_opts(argc, argv);
- if (cfg_do_ipv6) {
- struct sockaddr_in6 addr6 = {0};
-
- addr6.sin6_family = AF_INET6;
- addr6.sin6_port = htons(cfg_port);
- addr6.sin6_addr = in6addr_loopback;
-
- cfg_errq_level = SOL_IPV6;
- cfg_errq_type = IPV6_RECVERR;
-
- do_test((void *)&addr6, sizeof(addr6));
- }
-
- if (cfg_do_ipv4) {
- struct sockaddr_in addr4 = {0};
-
- addr4.sin_family = AF_INET;
- addr4.sin_port = htons(cfg_port);
- addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-
- cfg_errq_level = SOL_IP;
- cfg_errq_type = IP_RECVERR;
-
- do_test((void *)&addr4, sizeof(addr4));
- }
+ if (cfg_rx)
+ do_test_rx((void *)&cfg_dst_addr, cfg_alen);
+ else
+ do_test_tx((void *)&cfg_src_addr, cfg_alen);
return 0;
}
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh
index 3f7800eaecb1..3f06f4d286a9 100755
--- a/tools/testing/selftests/net/so_txtime.sh
+++ b/tools/testing/selftests/net/so_txtime.sh
@@ -3,32 +3,85 @@
#
# Regression tests for the SO_TXTIME interface
-# Run in network namespace
-if [[ $# -eq 0 ]]; then
- if ! ./in_netns.sh $0 __subprocess; then
- # test is time sensitive, can be flaky
- echo "test failed: retry once"
- ./in_netns.sh $0 __subprocess
+set -e
+
+readonly DEV="veth0"
+readonly BIN="./so_txtime"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+cleanup() {
+ ip netns del "${NS2}"
+ ip netns del "${NS1}"
+}
+
+trap cleanup EXIT
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" netns "${NS1}" type veth \
+ peer name "${DEV}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+
+do_test() {
+ local readonly IP="$1"
+ local readonly CLOCK="$2"
+ local readonly TXARGS="$3"
+ local readonly RXARGS="$4"
+
+ if [[ "${IP}" == "4" ]]; then
+ local readonly SADDR="${SADDR4}"
+ local readonly DADDR="${DADDR4}"
+ elif [[ "${IP}" == "6" ]]; then
+ local readonly SADDR="${SADDR6}"
+ local readonly DADDR="${DADDR6}"
+ else
+ echo "Invalid IP version ${IP}"
+ exit 1
fi
- exit $?
-fi
+ local readonly START="$(date +%s%N --date="+ 0.1 seconds")"
+ ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r &
+ ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}"
+ wait "$!"
+}
-set -e
+ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq
+do_test 4 mono a,-1 a,-1
+do_test 6 mono a,0 a,0
+do_test 6 mono a,10 a,10
+do_test 4 mono a,10,b,20 a,10,b,20
+do_test 6 mono a,20,b,10 b,20,a,20
-tc qdisc add dev lo root fq
-./so_txtime -4 -6 -c mono a,-1 a,-1
-./so_txtime -4 -6 -c mono a,0 a,0
-./so_txtime -4 -6 -c mono a,10 a,10
-./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20
-./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20
-
-if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 400000; then
- ! ./so_txtime -4 -6 -c tai a,-1 a,-1
- ! ./so_txtime -4 -6 -c tai a,0 a,0
- ./so_txtime -4 -6 -c tai a,10 a,10
- ./so_txtime -4 -6 -c tai a,10,b,20 a,10,b,20
- ./so_txtime -4 -6 -c tai a,20,b,10 b,10,a,20
+if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then
+ ! do_test 4 tai a,-1 a,-1
+ ! do_test 6 tai a,0 a,0
+ do_test 6 tai a,10 a,10
+ do_test 4 tai a,10,b,20 a,10,b,20
+ do_test 6 tai a,20,b,10 b,10,a,20
else
echo "tc ($(tc -V)) does not support qdisc etf. skipping"
fi
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
new file mode 100755
index 000000000000..a8fa64136282
--- /dev/null
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -0,0 +1,251 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+readonly BASE="ns-$(mktemp -u XXXXXX)"
+readonly SRC=2
+readonly DST=1
+readonly DST_NAT=100
+readonly NS_SRC=$BASE$SRC
+readonly NS_DST=$BASE$DST
+
+# "baremetal" network used for raw UDP traffic
+readonly BM_NET_V4=192.168.1.
+readonly BM_NET_V6=2001:db8::
+
+# "overlay" network used for UDP over UDP tunnel traffic
+readonly OL_NET_V4=172.16.1.
+readonly OL_NET_V6=2001:db8:1::
+readonly NPROCS=`nproc`
+
+cleanup() {
+ local ns
+ local -r jobs="$(jobs -p)"
+ [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
+
+ for ns in $NS_SRC $NS_DST; do
+ ip netns del $ns 2>/dev/null
+ done
+}
+
+trap cleanup EXIT
+
+create_ns() {
+ local net
+ local ns
+
+ for ns in $NS_SRC $NS_DST; do
+ ip netns add $ns
+ ip -n $ns link set dev lo up
+ done
+
+ ip link add name veth$SRC type veth peer name veth$DST
+
+ for ns in $SRC $DST; do
+ ip link set dev veth$ns netns $BASE$ns
+ ip -n $BASE$ns link set dev veth$ns up
+ ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
+ ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
+ done
+ ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
+}
+
+create_vxlan_endpoint() {
+ local -r netns=$1
+ local -r bm_dev=$2
+ local -r bm_rem_addr=$3
+ local -r vxlan_dev=$4
+ local -r vxlan_id=$5
+ local -r vxlan_port=4789
+
+ ip -n $netns link set dev $bm_dev up
+ ip -n $netns link add dev $vxlan_dev type vxlan id $vxlan_id \
+ dstport $vxlan_port remote $bm_rem_addr
+ ip -n $netns link set dev $vxlan_dev up
+}
+
+create_vxlan_pair() {
+ local ns
+
+ create_ns
+
+ for ns in $SRC $DST; do
+ # note that 3 - $SRC == $DST and 3 - $DST == $SRC
+ create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V4$((3 - $ns)) vxlan$ns 4
+ ip -n $BASE$ns addr add dev vxlan$ns $OL_NET_V4$ns/24
+ done
+ for ns in $SRC $DST; do
+ create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6
+ ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad
+ done
+}
+
+is_ipv6() {
+ if [[ $1 =~ .*:.* ]]; then
+ return 0
+ fi
+ return 1
+}
+
+run_test() {
+ local -r msg=$1
+ local -r dst=$2
+ local -r pkts=$3
+ local -r vxpkts=$4
+ local bind=$5
+ local rx_args=""
+ local rx_family="-4"
+ local family=-4
+ local filter=IpInReceives
+ local ipt=iptables
+
+ printf "%-40s" "$msg"
+
+ if is_ipv6 $dst; then
+ # rx program does not support '-6' and implies ipv6 usage by default
+ rx_family=""
+ family=-6
+ filter=Ip6InReceives
+ ipt=ip6tables
+ fi
+
+ rx_args="$rx_family"
+ [ -n "$bind" ] && rx_args="$rx_args -b $bind"
+
+ # send a single GSO packet, segmented in 10 UDP frames.
+ # Always expect 10 UDP frames on RX side as rx socket does
+ # not enable GRO
+ ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789
+ ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000
+ ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args &
+ local spid=$!
+ sleep 0.1
+ ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst
+ local retc=$?
+ wait $spid
+ local rets=$?
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ echo " fail client exit code $retc, server $rets"
+ ret=1
+ return
+ fi
+
+ local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \
+ sed -e 's/\[//' -e 's/:.*//'`
+ if [ $rcv != $pkts ]; then
+ echo " fail - received $rvs packets, expected $pkts"
+ ret=1
+ return
+ fi
+
+ local vxrcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 4789' | \
+ sed -e 's/\[//' -e 's/:.*//'`
+
+ # upper net can generate a little noise, allow some tolerance
+ if [ $vxrcv -lt $vxpkts -o $vxrcv -gt $((vxpkts + 3)) ]; then
+ echo " fail - received $vxrcv vxlan packets, expected $vxpkts"
+ ret=1
+ return
+ fi
+ echo " ok"
+}
+
+run_bench() {
+ local -r msg=$1
+ local -r dst=$2
+ local family=-4
+
+ printf "%-40s" "$msg"
+ if [ $NPROCS -lt 2 ]; then
+ echo " skip - needed 2 CPUs found $NPROCS"
+ return
+ fi
+
+ is_ipv6 $dst && family=-6
+
+ # bind the sender and the receiver to different CPUs to try
+ # get reproducible results
+ ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus"
+ ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 &
+ local spid=$!
+ sleep 0.1
+ ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst
+ local retc=$?
+ wait $spid
+ local rets=$?
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ echo " fail client exit code $retc, server $rets"
+ ret=1
+ return
+ fi
+}
+
+for family in 4 6; do
+ BM_NET=$BM_NET_V4
+ OL_NET=$OL_NET_V4
+ IPT=iptables
+ SUFFIX=24
+ VXDEV=vxlan
+
+ if [ $family = 6 ]; then
+ BM_NET=$BM_NET_V6
+ OL_NET=$OL_NET_V6
+ SUFFIX="64 nodad"
+ VXDEV=vxlan6
+ IPT=ip6tables
+ fi
+
+ echo "IPv$family"
+
+ create_ns
+ run_test "No GRO" $BM_NET$DST 10 0
+ cleanup
+
+ create_ns
+ ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
+ run_test "GRO frag list" $BM_NET$DST 1 0
+ cleanup
+
+ # UDP GRO fwd skips aggregation when find an udp socket with the GRO option
+ # if there is an UDP tunnel in the running system, such lookup happen
+ # take place.
+ # use NAT to circumvent GRO FWD check
+ create_ns
+ ip -n $NS_DST addr add dev veth$DST $BM_NET$DST_NAT/$SUFFIX
+ ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+ ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $BM_NET$DST_NAT \
+ -j DNAT --to-destination $BM_NET$DST
+ run_test "GRO fwd" $BM_NET$DST_NAT 1 0 $BM_NET$DST
+ cleanup
+
+ create_ns
+ run_bench "UDP fwd perf" $BM_NET$DST
+ ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+ run_bench "UDP GRO fwd perf" $BM_NET$DST
+ cleanup
+
+ create_vxlan_pair
+ ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
+ run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
+ cleanup
+
+ # use NAT to circumvent GRO FWD check
+ create_vxlan_pair
+ ip -n $NS_DST addr add dev $VXDEV$DST $OL_NET$DST_NAT/$SUFFIX
+ ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+ ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $OL_NET$DST_NAT \
+ -j DNAT --to-destination $OL_NET$DST
+
+ # load arp cache before running the test to reduce the amount of
+ # stray traffic on top of the UDP tunnel
+ ip netns exec $NS_SRC ping -q -c 1 $OL_NET$DST_NAT >/dev/null
+ run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
+ cleanup
+
+ create_vxlan_pair
+ run_bench "UDP tunnel fwd perf" $OL_NET$DST
+ ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+ run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
+ cleanup
+done
+
+exit $ret
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
new file mode 100755
index 000000000000..2fedc0781ce8
--- /dev/null
+++ b/tools/testing/selftests/net/veth.sh
@@ -0,0 +1,177 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+readonly STATS="$(mktemp -p /tmp ns-XXXXXX)"
+readonly BASE=`basename $STATS`
+readonly SRC=2
+readonly DST=1
+readonly DST_NAT=100
+readonly NS_SRC=$BASE$SRC
+readonly NS_DST=$BASE$DST
+
+# "baremetal" network used for raw UDP traffic
+readonly BM_NET_V4=192.168.1.
+readonly BM_NET_V6=2001:db8::
+
+readonly NPROCS=`nproc`
+ret=0
+
+cleanup() {
+ local ns
+ local -r jobs="$(jobs -p)"
+ [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
+ rm -f $STATS
+
+ for ns in $NS_SRC $NS_DST; do
+ ip netns del $ns 2>/dev/null
+ done
+}
+
+trap cleanup EXIT
+
+create_ns() {
+ local ns
+
+ for ns in $NS_SRC $NS_DST; do
+ ip netns add $ns
+ ip -n $ns link set dev lo up
+ done
+
+ ip link add name veth$SRC type veth peer name veth$DST
+
+ for ns in $SRC $DST; do
+ ip link set dev veth$ns netns $BASE$ns up
+ ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
+ ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
+ done
+ echo "#kernel" > $BASE
+ chmod go-rw $BASE
+}
+
+__chk_flag() {
+ local msg="$1"
+ local target=$2
+ local expected=$3
+ local flagname=$4
+
+ local flag=`ip netns exec $BASE$target ethtool -k veth$target |\
+ grep $flagname | awk '{print $2}'`
+
+ printf "%-60s" "$msg"
+ if [ "$flag" = "$expected" ]; then
+ echo " ok "
+ else
+ echo " fail - expected $expected found $flag"
+ ret=1
+ fi
+}
+
+chk_gro_flag() {
+ __chk_flag "$1" $2 $3 generic-receive-offload
+}
+
+chk_tso_flag() {
+ __chk_flag "$1" $2 $3 tcp-segmentation-offload
+}
+
+chk_gro() {
+ local msg="$1"
+ local expected=$2
+
+ ip netns exec $BASE$SRC ping -qc 1 $BM_NET_V4$DST >/dev/null
+ NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat -n
+
+ printf "%-60s" "$msg"
+ ip netns exec $BASE$DST ./udpgso_bench_rx -C 1000 -R 10 &
+ local spid=$!
+ sleep 0.1
+
+ ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 13000 -S 1300 -M 1 -D $BM_NET_V4$DST
+ local retc=$?
+ wait $spid
+ local rets=$?
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ echo " fail client exit code $retc, server $rets"
+ ret=1
+ return
+ fi
+
+ local pkts=`NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat IpInReceives | \
+ awk '{print $2}' | tail -n 1`
+ if [ "$pkts" = "$expected" ]; then
+ echo " ok "
+ else
+ echo " fail - got $pkts packets, expected $expected "
+ ret=1
+ fi
+}
+
+if [ ! -f ../bpf/xdp_dummy.o ]; then
+ echo "Missing xdp_dummy helper. Build bpf selftest first"
+ exit -1
+fi
+
+create_ns
+chk_gro_flag "default - gro flag" $SRC off
+chk_gro_flag " - peer gro flag" $DST off
+chk_tso_flag " - tso flag" $SRC on
+chk_tso_flag " - peer tso flag" $DST on
+chk_gro " - aggregation" 1
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+chk_gro " - aggregation with TSO off" 10
+cleanup
+
+create_ns
+ip netns exec $NS_DST ethtool -K veth$DST gro on
+chk_gro_flag "with gro on - gro flag" $DST on
+chk_gro_flag " - peer gro flag" $SRC off
+chk_tso_flag " - tso flag" $SRC on
+chk_tso_flag " - peer tso flag" $DST on
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+chk_gro " - aggregation with TSO off" 1
+cleanup
+
+create_ns
+ip -n $NS_DST link set dev veth$DST down
+ip netns exec $NS_DST ethtool -K veth$DST gro on
+chk_gro_flag "with gro enabled on link down - gro flag" $DST on
+chk_gro_flag " - peer gro flag" $SRC off
+chk_tso_flag " - tso flag" $SRC on
+chk_tso_flag " - peer tso flag" $DST on
+ip -n $NS_DST link set dev veth$DST up
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+chk_gro " - aggregation with TSO off" 1
+cleanup
+
+create_ns
+ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
+chk_gro_flag "with xdp attached - gro flag" $DST on
+chk_gro_flag " - peer gro flag" $SRC off
+chk_tso_flag " - tso flag" $SRC off
+chk_tso_flag " - peer tso flag" $DST on
+ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+chk_gro " - aggregation" 1
+
+
+ip -n $NS_DST link set dev veth$DST down
+ip -n $NS_SRC link set dev veth$SRC down
+chk_gro_flag " - after dev off, flag" $DST on
+chk_gro_flag " - peer flag" $SRC off
+
+ip netns exec $NS_DST ethtool -K veth$DST gro on
+ip -n $NS_DST link set dev veth$DST xdp off
+chk_gro_flag " - after gro on xdp off, gro flag" $DST on
+chk_gro_flag " - peer gro flag" $SRC off
+chk_tso_flag " - tso flag" $SRC on
+chk_tso_flag " - peer tso flag" $DST on
+ip -n $NS_DST link set dev veth$DST up
+ip -n $NS_SRC link set dev veth$SRC up
+chk_gro " - aggregation" 1
+
+ip netns exec $NS_DST ethtool -K veth$DST gro off
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+chk_gro "aggregation again with default and TSO off" 10
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index 431296c0f91c..427d94816f2d 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -371,6 +371,88 @@ else
ip netns exec nsr1 nft list ruleset
fi
+# Another test:
+# Add bridge interface br0 to Router1, with NAT enabled.
+ip -net nsr1 link add name br0 type bridge
+ip -net nsr1 addr flush dev veth0
+ip -net nsr1 link set up dev veth0
+ip -net nsr1 link set veth0 master br0
+ip -net nsr1 addr add 10.0.1.1/24 dev br0
+ip -net nsr1 addr add dead:1::1/64 dev br0
+ip -net nsr1 link set up dev br0
+
+ip netns exec nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
+
+# br0 with NAT enabled.
+ip netns exec nsr1 nft -f - <<EOF
+flush table ip nat
+table ip nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+ }
+
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oifname "veth1" counter masquerade
+ }
+}
+EOF
+
+if test_tcp_forwarding_nat ns1 ns2; then
+ echo "PASS: flow offloaded for ns1/ns2 with bridge NAT"
+else
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
+ ip netns exec nsr1 nft list ruleset
+ ret=1
+fi
+
+# Another test:
+# Add bridge interface br0 to Router1, with NAT and VLAN.
+ip -net nsr1 link set veth0 nomaster
+ip -net nsr1 link set down dev veth0
+ip -net nsr1 link add link veth0 name veth0.10 type vlan id 10
+ip -net nsr1 link set up dev veth0
+ip -net nsr1 link set up dev veth0.10
+ip -net nsr1 link set veth0.10 master br0
+
+ip -net ns1 addr flush dev eth0
+ip -net ns1 link add link eth0 name eth0.10 type vlan id 10
+ip -net ns1 link set eth0 up
+ip -net ns1 link set eth0.10 up
+ip -net ns1 addr add 10.0.1.99/24 dev eth0.10
+ip -net ns1 route add default via 10.0.1.1
+ip -net ns1 addr add dead:1::99/64 dev eth0.10
+
+if test_tcp_forwarding_nat ns1 ns2; then
+ echo "PASS: flow offloaded for ns1/ns2 with bridge NAT and VLAN"
+else
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
+ ip netns exec nsr1 nft list ruleset
+ ret=1
+fi
+
+# restore test topology (remove bridge and VLAN)
+ip -net nsr1 link set veth0 nomaster
+ip -net nsr1 link set veth0 down
+ip -net nsr1 link set veth0.10 down
+ip -net nsr1 link delete veth0.10 type vlan
+ip -net nsr1 link delete br0 type bridge
+ip -net ns1 addr flush dev eth0.10
+ip -net ns1 link set eth0.10 down
+ip -net ns1 link set eth0 down
+ip -net ns1 link delete eth0.10 type vlan
+
+# restore address in ns1 and nsr1
+ip -net ns1 link set eth0 up
+ip -net ns1 addr add 10.0.1.99/24 dev eth0
+ip -net ns1 route add default via 10.0.1.1
+ip -net ns1 addr add dead:1::99/64 dev eth0
+ip -net ns1 route add default via dead:1::1
+ip -net nsr1 addr add 10.0.1.1/24 dev veth0
+ip -net nsr1 addr add dead:1::1/64 dev veth0
+ip -net nsr1 link set up dev veth0
+
KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
SPI1=$RANDOM
diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore
new file mode 100644
index 000000000000..790c47001e77
--- /dev/null
+++ b/tools/testing/selftests/perf_events/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+sigtrap_threads
+remove_on_exec
diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile
new file mode 100644
index 000000000000..fcafa5f0d34c
--- /dev/null
+++ b/tools/testing/selftests/perf_events/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wl,-no-as-needed -Wall -I../../../../usr/include
+LDFLAGS += -lpthread
+
+TEST_GEN_PROGS := sigtrap_threads remove_on_exec
+include ../lib.mk
diff --git a/tools/testing/selftests/perf_events/config b/tools/testing/selftests/perf_events/config
new file mode 100644
index 000000000000..ba58ff2203e4
--- /dev/null
+++ b/tools/testing/selftests/perf_events/config
@@ -0,0 +1 @@
+CONFIG_PERF_EVENTS=y
diff --git a/tools/testing/selftests/perf_events/remove_on_exec.c b/tools/testing/selftests/perf_events/remove_on_exec.c
new file mode 100644
index 000000000000..5814611a1dc7
--- /dev/null
+++ b/tools/testing/selftests/perf_events/remove_on_exec.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for remove_on_exec.
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+/* We need the latest siginfo from the kernel repo. */
+#include <sys/types.h>
+#include <asm/siginfo.h>
+#define __have_siginfo_t 1
+#define __have_sigval_t 1
+#define __have_sigevent_t 1
+#define __siginfo_t_defined
+#define __sigval_t_defined
+#define __sigevent_t_defined
+#define _BITS_SIGINFO_CONSTS_H 1
+#define _BITS_SIGEVENT_CONSTS_H 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/perf_event.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+static volatile int signal_count;
+
+static struct perf_event_attr make_event_attr(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .size = sizeof(attr),
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .sample_period = 1000,
+ .exclude_kernel = 1,
+ .exclude_hv = 1,
+ .disabled = 1,
+ .inherit = 1,
+ /*
+ * Children normally retain their inherited event on exec; with
+ * remove_on_exec, we'll remove their event, but the parent and
+ * any other non-exec'd children will keep their events.
+ */
+ .remove_on_exec = 1,
+ .sigtrap = 1,
+ };
+ return attr;
+}
+
+static void sigtrap_handler(int signum, siginfo_t *info, void *ucontext)
+{
+ if (info->si_code != TRAP_PERF) {
+ fprintf(stderr, "%s: unexpected si_code %d\n", __func__, info->si_code);
+ return;
+ }
+
+ signal_count++;
+}
+
+FIXTURE(remove_on_exec)
+{
+ struct sigaction oldact;
+ int fd;
+};
+
+FIXTURE_SETUP(remove_on_exec)
+{
+ struct perf_event_attr attr = make_event_attr();
+ struct sigaction action = {};
+
+ signal_count = 0;
+
+ /* Initialize sigtrap handler. */
+ action.sa_flags = SA_SIGINFO | SA_NODEFER;
+ action.sa_sigaction = sigtrap_handler;
+ sigemptyset(&action.sa_mask);
+ ASSERT_EQ(sigaction(SIGTRAP, &action, &self->oldact), 0);
+
+ /* Initialize perf event. */
+ self->fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
+ ASSERT_NE(self->fd, -1);
+}
+
+FIXTURE_TEARDOWN(remove_on_exec)
+{
+ close(self->fd);
+ sigaction(SIGTRAP, &self->oldact, NULL);
+}
+
+/* Verify event propagates to fork'd child. */
+TEST_F(remove_on_exec, fork_only)
+{
+ int status;
+ pid_t pid = fork();
+
+ if (pid == 0) {
+ ASSERT_EQ(signal_count, 0);
+ ASSERT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ while (!signal_count);
+ _exit(42);
+ }
+
+ while (!signal_count); /* Child enables event. */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(WEXITSTATUS(status), 42);
+}
+
+/*
+ * Verify that event does _not_ propagate to fork+exec'd child; event enabled
+ * after fork+exec.
+ */
+TEST_F(remove_on_exec, fork_exec_then_enable)
+{
+ pid_t pid_exec, pid_only_fork;
+ int pipefd[2];
+ int tmp;
+
+ /*
+ * Non-exec child, to ensure exec does not affect inherited events of
+ * other children.
+ */
+ pid_only_fork = fork();
+ if (pid_only_fork == 0) {
+ /* Block until parent enables event. */
+ while (!signal_count);
+ _exit(42);
+ }
+
+ ASSERT_NE(pipe(pipefd), -1);
+ pid_exec = fork();
+ if (pid_exec == 0) {
+ ASSERT_NE(dup2(pipefd[1], STDOUT_FILENO), -1);
+ close(pipefd[0]);
+ execl("/proc/self/exe", "exec_child", NULL);
+ _exit((perror("exec failed"), 1));
+ }
+ close(pipefd[1]);
+
+ ASSERT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Child is running. */
+ /* Wait for exec'd child to start spinning. */
+ EXPECT_EQ(read(pipefd[0], &tmp, sizeof(int)), sizeof(int));
+ EXPECT_EQ(tmp, 42);
+ close(pipefd[0]);
+ /* Now we can enable the event, knowing the child is doing work. */
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ /* If the event propagated to the exec'd child, it will exit normally... */
+ usleep(100000); /* ... give time for event to trigger (in case of bug). */
+ EXPECT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Should still be running. */
+ EXPECT_EQ(kill(pid_exec, SIGKILL), 0);
+
+ /* Verify removal from child did not affect this task's event. */
+ tmp = signal_count;
+ while (signal_count == tmp); /* Should not hang! */
+ /* Nor should it have affected the first child. */
+ EXPECT_EQ(waitpid(pid_only_fork, &tmp, 0), pid_only_fork);
+ EXPECT_EQ(WEXITSTATUS(tmp), 42);
+}
+
+/*
+ * Verify that event does _not_ propagate to fork+exec'd child; event enabled
+ * before fork+exec.
+ */
+TEST_F(remove_on_exec, enable_then_fork_exec)
+{
+ pid_t pid_exec;
+ int tmp;
+
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+
+ pid_exec = fork();
+ if (pid_exec == 0) {
+ execl("/proc/self/exe", "exec_child", NULL);
+ _exit((perror("exec failed"), 1));
+ }
+
+ /*
+ * The child may exit abnormally at any time if the event propagated and
+ * a SIGTRAP is sent before the handler was set up.
+ */
+ usleep(100000); /* ... give time for event to trigger (in case of bug). */
+ EXPECT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Should still be running. */
+ EXPECT_EQ(kill(pid_exec, SIGKILL), 0);
+
+ /* Verify removal from child did not affect this task's event. */
+ tmp = signal_count;
+ while (signal_count == tmp); /* Should not hang! */
+}
+
+TEST_F(remove_on_exec, exec_stress)
+{
+ pid_t pids[30];
+ int i, tmp;
+
+ for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) {
+ pids[i] = fork();
+ if (pids[i] == 0) {
+ execl("/proc/self/exe", "exec_child", NULL);
+ _exit((perror("exec failed"), 1));
+ }
+
+ /* Some forked with event disabled, rest with enabled. */
+ if (i > 10)
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ }
+
+ usleep(100000); /* ... give time for event to trigger (in case of bug). */
+
+ for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) {
+ /* All children should still be running. */
+ EXPECT_EQ(waitpid(pids[i], &tmp, WNOHANG), 0);
+ EXPECT_EQ(kill(pids[i], SIGKILL), 0);
+ }
+
+ /* Verify event is still alive. */
+ tmp = signal_count;
+ while (signal_count == tmp);
+}
+
+/* For exec'd child. */
+static void exec_child(void)
+{
+ struct sigaction action = {};
+ const int val = 42;
+
+ /* Set up sigtrap handler in case we erroneously receive a trap. */
+ action.sa_flags = SA_SIGINFO | SA_NODEFER;
+ action.sa_sigaction = sigtrap_handler;
+ sigemptyset(&action.sa_mask);
+ if (sigaction(SIGTRAP, &action, NULL))
+ _exit((perror("sigaction failed"), 1));
+
+ /* Signal parent that we're starting to spin. */
+ if (write(STDOUT_FILENO, &val, sizeof(int)) == -1)
+ _exit((perror("write failed"), 1));
+
+ /* Should hang here until killed. */
+ while (!signal_count);
+}
+
+#define main test_main
+TEST_HARNESS_MAIN
+#undef main
+int main(int argc, char *argv[])
+{
+ if (!strcmp(argv[0], "exec_child")) {
+ exec_child();
+ return 1;
+ }
+
+ return test_main(argc, argv);
+}
diff --git a/tools/testing/selftests/perf_events/settings b/tools/testing/selftests/perf_events/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/perf_events/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c
new file mode 100644
index 000000000000..78ddf5e11625
--- /dev/null
+++ b/tools/testing/selftests/perf_events/sigtrap_threads.c
@@ -0,0 +1,210 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for perf events with SIGTRAP across all threads.
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+/* We need the latest siginfo from the kernel repo. */
+#include <sys/types.h>
+#include <asm/siginfo.h>
+#define __have_siginfo_t 1
+#define __have_sigval_t 1
+#define __have_sigevent_t 1
+#define __siginfo_t_defined
+#define __sigval_t_defined
+#define __sigevent_t_defined
+#define _BITS_SIGINFO_CONSTS_H 1
+#define _BITS_SIGEVENT_CONSTS_H 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#define NUM_THREADS 5
+
+/* Data shared between test body, threads, and signal handler. */
+static struct {
+ int tids_want_signal; /* Which threads still want a signal. */
+ int signal_count; /* Sanity check number of signals received. */
+ volatile int iterate_on; /* Variable to set breakpoint on. */
+ siginfo_t first_siginfo; /* First observed siginfo_t. */
+} ctx;
+
+/* Unique value to check si_perf is correctly set from perf_event_attr::sig_data. */
+#define TEST_SIG_DATA(addr) (~(unsigned long)(addr))
+
+static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_BREAKPOINT,
+ .size = sizeof(attr),
+ .sample_period = 1,
+ .disabled = !enabled,
+ .bp_addr = (unsigned long)addr,
+ .bp_type = HW_BREAKPOINT_RW,
+ .bp_len = HW_BREAKPOINT_LEN_1,
+ .inherit = 1, /* Children inherit events ... */
+ .inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */
+ .remove_on_exec = 1, /* Required by sigtrap. */
+ .sigtrap = 1, /* Request synchronous SIGTRAP on event. */
+ .sig_data = TEST_SIG_DATA(addr),
+ };
+ return attr;
+}
+
+static void sigtrap_handler(int signum, siginfo_t *info, void *ucontext)
+{
+ if (info->si_code != TRAP_PERF) {
+ fprintf(stderr, "%s: unexpected si_code %d\n", __func__, info->si_code);
+ return;
+ }
+
+ /*
+ * The data in siginfo_t we're interested in should all be the same
+ * across threads.
+ */
+ if (!__atomic_fetch_add(&ctx.signal_count, 1, __ATOMIC_RELAXED))
+ ctx.first_siginfo = *info;
+ __atomic_fetch_sub(&ctx.tids_want_signal, syscall(__NR_gettid), __ATOMIC_RELAXED);
+}
+
+static void *test_thread(void *arg)
+{
+ pthread_barrier_t *barrier = (pthread_barrier_t *)arg;
+ pid_t tid = syscall(__NR_gettid);
+ int iter;
+ int i;
+
+ pthread_barrier_wait(barrier);
+
+ __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
+ iter = ctx.iterate_on; /* read */
+ for (i = 0; i < iter - 1; i++) {
+ __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
+ ctx.iterate_on = iter; /* idempotent write */
+ }
+
+ return NULL;
+}
+
+FIXTURE(sigtrap_threads)
+{
+ struct sigaction oldact;
+ pthread_t threads[NUM_THREADS];
+ pthread_barrier_t barrier;
+ int fd;
+};
+
+FIXTURE_SETUP(sigtrap_threads)
+{
+ struct perf_event_attr attr = make_event_attr(false, &ctx.iterate_on);
+ struct sigaction action = {};
+ int i;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ /* Initialize sigtrap handler. */
+ action.sa_flags = SA_SIGINFO | SA_NODEFER;
+ action.sa_sigaction = sigtrap_handler;
+ sigemptyset(&action.sa_mask);
+ ASSERT_EQ(sigaction(SIGTRAP, &action, &self->oldact), 0);
+
+ /* Initialize perf event. */
+ self->fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
+ ASSERT_NE(self->fd, -1);
+
+ /* Spawn threads inheriting perf event. */
+ pthread_barrier_init(&self->barrier, NULL, NUM_THREADS + 1);
+ for (i = 0; i < NUM_THREADS; i++)
+ ASSERT_EQ(pthread_create(&self->threads[i], NULL, test_thread, &self->barrier), 0);
+}
+
+FIXTURE_TEARDOWN(sigtrap_threads)
+{
+ pthread_barrier_destroy(&self->barrier);
+ close(self->fd);
+ sigaction(SIGTRAP, &self->oldact, NULL);
+}
+
+static void run_test_threads(struct __test_metadata *_metadata,
+ FIXTURE_DATA(sigtrap_threads) *self)
+{
+ int i;
+
+ pthread_barrier_wait(&self->barrier);
+ for (i = 0; i < NUM_THREADS; i++)
+ ASSERT_EQ(pthread_join(self->threads[i], NULL), 0);
+}
+
+TEST_F(sigtrap_threads, remain_disabled)
+{
+ run_test_threads(_metadata, self);
+ EXPECT_EQ(ctx.signal_count, 0);
+ EXPECT_NE(ctx.tids_want_signal, 0);
+}
+
+TEST_F(sigtrap_threads, enable_event)
+{
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ run_test_threads(_metadata, self);
+
+ EXPECT_EQ(ctx.signal_count, NUM_THREADS);
+ EXPECT_EQ(ctx.tids_want_signal, 0);
+ EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
+ EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
+ EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+
+ /* Check enabled for parent. */
+ ctx.iterate_on = 0;
+ EXPECT_EQ(ctx.signal_count, NUM_THREADS + 1);
+}
+
+/* Test that modification propagates to all inherited events. */
+TEST_F(sigtrap_threads, modify_and_enable_event)
+{
+ struct perf_event_attr new_attr = make_event_attr(true, &ctx.iterate_on);
+
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr), 0);
+ run_test_threads(_metadata, self);
+
+ EXPECT_EQ(ctx.signal_count, NUM_THREADS);
+ EXPECT_EQ(ctx.tids_want_signal, 0);
+ EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
+ EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
+ EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+
+ /* Check enabled for parent. */
+ ctx.iterate_on = 0;
+ EXPECT_EQ(ctx.signal_count, NUM_THREADS + 1);
+}
+
+/* Stress test event + signal handling. */
+TEST_F(sigtrap_threads, signal_stress)
+{
+ ctx.iterate_on = 3000;
+
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ run_test_threads(_metadata, self);
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_DISABLE, 0), 0);
+
+ EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on);
+ EXPECT_EQ(ctx.tids_want_signal, 0);
+ EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
+ EXPECT_EQ(ctx.first_siginfo.si_errno, PERF_TYPE_BREAKPOINT);
+ EXPECT_EQ(ctx.first_siginfo.si_perf, TEST_SIG_DATA(&ctx.iterate_on));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index af3df79d8163..c5ecb4634094 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-include ../../../../../../scripts/Kbuild.include
+include ../../../../../build/Build.include
noarg:
$(MAKE) -C ../../
diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
index 1dbfb62567d2..6bb993001680 100755
--- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh
+++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
@@ -21,7 +21,6 @@ then
awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'`
else
# No mpstat command, so use all available CPUs.
- echo The mpstat command is not available, so greedily using all CPUs.
idlecpus=$ncpus
fi
awk -v ncpus=$ncpus -v idlecpus=$idlecpus < /dev/null '
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index 188b864bc4bf..15d937ba96ca 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -5,10 +5,11 @@
# of this script is to inflict random OS jitter on a concurrently running
# test.
#
-# Usage: jitter.sh me duration [ sleepmax [ spinmax ] ]
+# Usage: jitter.sh me jittering-path duration [ sleepmax [ spinmax ] ]
#
# me: Random-number-generator seed salt.
# duration: Time to run in seconds.
+# jittering-path: Path to file whose removal will stop this script.
# sleepmax: Maximum microseconds to sleep, defaults to one second.
# spinmax: Maximum microseconds to spin, defaults to one millisecond.
#
@@ -17,9 +18,10 @@
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
me=$(($1 * 1000))
-duration=$2
-sleepmax=${3-1000000}
-spinmax=${4-1000}
+jittering=$2
+duration=$3
+sleepmax=${4-1000000}
+spinmax=${5-1000}
n=1
@@ -47,7 +49,7 @@ do
fi
# Check for stop request.
- if test -f "$TORTURE_STOPFILE"
+ if ! test -f "$jittering"
then
exit 1;
fi
@@ -67,10 +69,10 @@ do
srand(n + me + systime());
ncpus = split(cpus, ca);
curcpu = ca[int(rand() * ncpus + 1)];
- mask = lshift(1, curcpu);
- if (mask + 0 <= 0)
- mask = 1;
- printf("%#x\n", mask);
+ z = "";
+ for (i = 1; 4 * i <= curcpu; i++)
+ z = z "0";
+ print "0x" 2 ^ (curcpu % 4) z;
}' < /dev/null`
n=$(($n+1))
if ! taskset -p $cpumask $$ > /dev/null 2>&1
diff --git a/tools/testing/selftests/rcutorture/bin/jitterstart.sh b/tools/testing/selftests/rcutorture/bin/jitterstart.sh
new file mode 100644
index 000000000000..3d710ad291c3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/jitterstart.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Start up the specified number of jitter.sh scripts in the background.
+#
+# Usage: . jitterstart.sh n jittering-dir duration [ sleepmax [ spinmax ] ]
+#
+# n: Number of jitter.sh scripts to start up.
+# jittering-dir: Directory in which to put "jittering" file.
+# duration: Time to run in seconds.
+# sleepmax: Maximum microseconds to sleep, defaults to one second.
+# spinmax: Maximum microseconds to spin, defaults to one millisecond.
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+jitter_n=$1
+if test -z "$jitter_n"
+then
+ echo jitterstart.sh: Missing count of jitter.sh scripts to start.
+ exit 33
+fi
+jittering_dir=$2
+if test -z "$jittering_dir"
+then
+ echo jitterstart.sh: Missing directory in which to place jittering file.
+ exit 34
+fi
+shift
+shift
+
+touch ${jittering_dir}/jittering
+for ((jitter_i = 1; jitter_i <= $jitter_n; jitter_i++))
+do
+ jitter.sh $jitter_i "${jittering_dir}/jittering" "$@" &
+done
diff --git a/tools/testing/selftests/rcutorture/bin/jitterstop.sh b/tools/testing/selftests/rcutorture/bin/jitterstop.sh
new file mode 100644
index 000000000000..576a4cf4b79a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/jitterstop.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Remove the "jittering" file, signaling the jitter.sh scripts to stop,
+# then wait for them to terminate.
+#
+# Usage: . jitterstop.sh jittering-dir
+#
+# jittering-dir: Directory containing "jittering" file.
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+jittering_dir=$1
+if test -z "$jittering_dir"
+then
+ echo jitterstop.sh: Missing directory in which to place jittering file.
+ exit 34
+fi
+
+rm -f ${jittering_dir}/jittering
+wait
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
new file mode 100755
index 000000000000..46e47a00a7db
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -0,0 +1,199 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Rerun a series of tests under KVM.
+#
+# Usage: kvm-again.sh /path/to/old/run [ options ]
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+scriptname=$0
+args="$*"
+
+T=${TMPDIR-/tmp}/kvm-again.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+if ! test -d tools/testing/selftests/rcutorture/bin
+then
+ echo $scriptname must be run from top-level directory of kernel source tree.
+ exit 1
+fi
+
+oldrun=$1
+shift
+if ! test -d "$oldrun"
+then
+ echo "Usage: $scriptname /path/to/old/run [ options ]"
+ exit 1
+fi
+if ! cp "$oldrun/batches" $T/batches.oldrun
+then
+ # Later on, can reconstitute this from console.log files.
+ echo Prior run batches file does not exist: $oldrun/batches
+ exit 1
+fi
+
+if test -f "$oldrun/torture_suite"
+then
+ torture_suite="`cat $oldrun/torture_suite`"
+elif test -f "$oldrun/TORTURE_SUITE"
+then
+ torture_suite="`cat $oldrun/TORTURE_SUITE`"
+else
+ echo "Prior run torture_suite file does not exist: $oldrun/{torture_suite,TORTURE_SUITE}"
+ exit 1
+fi
+
+KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
+PATH=${KVM}/bin:$PATH; export PATH
+. functions.sh
+
+dryrun=
+dur=
+default_link="cp -R"
+rundir="`pwd`/tools/testing/selftests/rcutorture/res/`date +%Y.%m.%d-%H.%M.%S-again`"
+
+startdate="`date`"
+starttime="`get_starttime`"
+
+usage () {
+ echo "Usage: $scriptname $oldrun [ arguments ]:"
+ echo " --dryrun"
+ echo " --duration minutes | <seconds>s | <hours>h | <days>d"
+ echo " --link hard|soft|copy"
+ echo " --remote"
+ echo " --rundir /new/res/path"
+ exit 1
+}
+
+while test $# -gt 0
+do
+ case "$1" in
+ --dryrun)
+ dryrun=1
+ ;;
+ --duration)
+ checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(s\|m\|h\|d\|\)$' '^error'
+ mult=60
+ if echo "$2" | grep -q 's$'
+ then
+ mult=1
+ elif echo "$2" | grep -q 'h$'
+ then
+ mult=3600
+ elif echo "$2" | grep -q 'd$'
+ then
+ mult=86400
+ fi
+ ts=`echo $2 | sed -e 's/[smhd]$//'`
+ dur=$(($ts*mult))
+ shift
+ ;;
+ --link)
+ checkarg --link "hard|soft|copy" "$#" "$2" 'hard\|soft\|copy' '^--'
+ case "$2" in
+ copy)
+ arg_link="cp -R"
+ ;;
+ hard)
+ arg_link="cp -Rl"
+ ;;
+ soft)
+ arg_link="cp -Rs"
+ ;;
+ esac
+ shift
+ ;;
+ --remote)
+ arg_remote=1
+ default_link="cp -as"
+ ;;
+ --rundir)
+ checkarg --rundir "(absolute pathname)" "$#" "$2" '^/' '^error'
+ rundir=$2
+ if test -e "$rundir"
+ then
+ echo "--rundir $2: Already exists."
+ usage
+ fi
+ shift
+ ;;
+ *)
+ echo Unknown argument $1
+ usage
+ ;;
+ esac
+ shift
+done
+if test -z "$arg_link"
+then
+ arg_link="$default_link"
+fi
+
+echo ---- Re-run results directory: $rundir
+
+# Copy old run directory tree over and adjust.
+mkdir -p "`dirname "$rundir"`"
+if ! $arg_link "$oldrun" "$rundir"
+then
+ echo "Cannot copy from $oldrun to $rundir."
+ usage
+fi
+rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
+echo $oldrun > "$rundir/re-run"
+if ! test -d "$rundir/../../bin"
+then
+ $arg_link "$oldrun/../../bin" "$rundir/../.."
+fi
+for i in $rundir/*/qemu-cmd
+do
+ cp "$i" $T
+ qemu_cmd_dir="`dirname "$i"`"
+ kernel_dir="`echo $qemu_cmd_dir | sed -e 's/\.[0-9]\+$//'`"
+ jitter_dir="`dirname "$kernel_dir"`"
+ kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" $dur < $T/qemu-cmd > $i
+ if test -n "$arg_remote"
+ then
+ echo "# TORTURE_KCONFIG_GDB_ARG=''" >> $i
+ fi
+done
+
+# Extract settings from the last qemu-cmd file transformed above.
+grep '^#' $i | sed -e 's/^# //' > $T/qemu-cmd-settings
+. $T/qemu-cmd-settings
+
+grep -v '^#' $T/batches.oldrun | awk '
+BEGIN {
+ oldbatch = 1;
+}
+
+{
+ if (oldbatch != $1) {
+ print "kvm-test-1-run-batch.sh" curbatch;
+ curbatch = "";
+ oldbatch = $1;
+ }
+ curbatch = curbatch " " $2;
+}
+
+END {
+ print "kvm-test-1-run-batch.sh" curbatch
+}' > $T/runbatches.sh
+
+if test -n "$dryrun"
+then
+ echo ---- Dryrun complete, directory: $rundir | tee -a "$rundir/log"
+else
+ ( cd "$rundir"; sh $T/runbatches.sh )
+ kcsan-collapse.sh "$rundir" | tee -a "$rundir/log"
+ echo | tee -a "$rundir/log"
+ echo ---- Results directory: $rundir | tee -a "$rundir/log"
+ kvm-recheck.sh "$rundir" > $T/kvm-recheck.sh.out 2>&1
+ ret=$?
+ cat $T/kvm-recheck.sh.out | tee -a "$rundir/log"
+ echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a "$rundir/log"
+ exit $ret
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 47cf4db10896..e01b31b87044 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -30,7 +30,7 @@ do
resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'`
head -1 $resdir/log
fi
- TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
+ TORTURE_SUITE="`cat $i/../torture_suite`"
configfile=`echo $i | sed -e 's,^.*/,,'`
rm -f $i/console.log.*.diags
kvm-recheck-${TORTURE_SUITE}.sh $i
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
new file mode 100755
index 000000000000..7ea0809e229e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Carry out a kvm-based run for the specified batch of scenarios, which
+# might have been built by --build-only kvm.sh run.
+#
+# Usage: kvm-test-1-run-batch.sh SCENARIO [ SCENARIO ... ]
+#
+# Each SCENARIO is the name of a directory in the current directory
+# containing a ready-to-run qemu-cmd file.
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+T=${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+echo ---- Running batch $*
+# Check arguments
+runfiles=
+for i in "$@"
+do
+ if ! echo $i | grep -q '^[^/.a-z]\+\(\.[0-9]\+\)\?$'
+ then
+ echo Bad scenario name: \"$i\" 1>&2
+ exit 1
+ fi
+ if ! test -d "$i"
+ then
+ echo Scenario name not a directory: \"$i\" 1>&2
+ exit 2
+ fi
+ if ! test -f "$i/qemu-cmd"
+ then
+ echo Scenario lacks a command file: \"$i/qemu-cmd\" 1>&2
+ exit 3
+ fi
+ rm -f $i/build.*
+ touch $i/build.run
+ runfiles="$runfiles $i/build.run"
+done
+
+# Extract settings from the qemu-cmd file.
+grep '^#' $1/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
+. $T/qemu-cmd-settings
+
+# Start up jitter, start each scenario, wait, end jitter.
+echo ---- System running test: `uname -a`
+echo ---- Starting kernels. `date` | tee -a log
+$TORTURE_JITTER_START
+for i in "$@"
+do
+ echo ---- System running test: `uname -a` > $i/kvm-test-1-run-qemu.sh.out
+ echo > $i/kvm-test-1-run-qemu.sh.out
+ kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 &
+done
+for i in $runfiles
+do
+ while ls $i > /dev/null 2>&1
+ do
+ :
+ done
+done
+echo ---- All kernel runs complete. `date` | tee -a log
+$TORTURE_JITTER_STOP
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
new file mode 100755
index 000000000000..5b1aa2a4f3f6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Carry out a kvm-based run for the specified qemu-cmd file, which might
+# have been generated by --build-only kvm.sh run.
+#
+# Usage: kvm-test-1-run-qemu.sh qemu-cmd-dir
+#
+# qemu-cmd-dir provides the directory containing qemu-cmd file.
+# This is assumed to be of the form prefix/ds/scenario, where
+# "ds" is the top-level date-stamped directory and "scenario"
+# is the scenario name. Any required adjustments to this file
+# must have been made by the caller. The shell-command comments
+# at the end of the qemu-cmd file are not optional.
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+T=${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.$$
+trap 'rm -rf $T' 0
+mkdir $T
+
+resdir="$1"
+if ! test -d "$resdir"
+then
+ echo $0: Nonexistent directory: $resdir
+ exit 1
+fi
+if ! test -f "$resdir/qemu-cmd"
+then
+ echo $0: Nonexistent qemu-cmd file: $resdir/qemu-cmd
+ exit 1
+fi
+
+echo ' ---' `date`: Starting kernel, PID $$
+
+# Obtain settings from the qemu-cmd file.
+grep '^#' $resdir/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
+. $T/qemu-cmd-settings
+
+# Decorate qemu-cmd with redirection, backgrounding, and PID capture
+sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
+echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd
+
+# In case qemu refuses to run...
+echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
+
+# Attempt to run qemu
+kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
+( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
+commandcompleted=0
+if test -z "$TORTURE_KCONFIG_GDB_ARG"
+then
+ sleep 10 # Give qemu's pid a chance to reach the file
+ if test -s "$resdir/qemu_pid"
+ then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+ echo Monitoring qemu job at pid $qemu_pid
+ else
+ qemu_pid=""
+ echo Monitoring qemu job at yet-as-unknown pid
+ fi
+fi
+if test -n "$TORTURE_KCONFIG_GDB_ARG"
+then
+ base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
+ if ! test -f $base_resdir/vmlinux
+ then
+ base_resdir="`cat re-run`/$resdir"
+ if ! test -f $base_resdir/vmlinux
+ then
+ base_resdir=/path/to
+ fi
+ fi
+ echo Waiting for you to attach a debug session, for example: > /dev/tty
+ echo " gdb $base_resdir/vmlinux" > /dev/tty
+ echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty
+ echo " target remote :1234" > /dev/tty
+ echo " continue" > /dev/tty
+ kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
+fi
+while :
+do
+ if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+ then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+ fi
+ kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+ if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
+ then
+ if test -n "$TORTURE_KCONFIG_GDB_ARG"
+ then
+ :
+ elif test $kruntime -ge $seconds || test -f "$resdir/../STOP.1"
+ then
+ break;
+ fi
+ sleep 1
+ else
+ commandcompleted=1
+ if test $kruntime -lt $seconds
+ then
+ echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1
+ grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1
+ killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`"
+ if test -n "$killpid"
+ then
+ echo "ps -fp $killpid" >> $resdir/Warnings 2>&1
+ ps -fp $killpid >> $resdir/Warnings 2>&1
+ fi
+ else
+ echo ' ---' `date`: "Kernel done"
+ fi
+ break
+ fi
+done
+if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
+then
+ qemu_pid=`cat "$resdir/qemu_pid"`
+fi
+if test $commandcompleted -eq 0 -a -n "$qemu_pid"
+then
+ if ! test -f "$resdir/../STOP.1"
+ then
+ echo Grace period for qemu job at pid $qemu_pid
+ fi
+ oldline="`tail $resdir/console.log`"
+ while :
+ do
+ if test -f "$resdir/../STOP.1"
+ then
+ echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1
+ kill -KILL $qemu_pid
+ break
+ fi
+ kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+ if kill -0 $qemu_pid > /dev/null 2>&1
+ then
+ :
+ else
+ break
+ fi
+ must_continue=no
+ newline="`tail $resdir/console.log`"
+ if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : '
+ then
+ must_continue=yes
+ fi
+ last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`"
+ if test -z "$last_ts"
+ then
+ last_ts=0
+ fi
+ if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ then
+ must_continue=yes
+ fi
+ if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ then
+ echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
+ kill -KILL $qemu_pid
+ break
+ fi
+ oldline=$newline
+ sleep 10
+ done
+elif test -z "$qemu_pid"
+then
+ echo Unknown PID, cannot kill qemu command
+fi
+
+# Tell the script that this run is done.
+rm -f $resdir/build.run
+
+parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 536d103ef166..420ed5ce9d32 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -7,15 +7,15 @@
# Execute this in the source tree. Do not run it as a background task
# because qemu does not seem to like that much.
#
-# Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args
+# Usage: kvm-test-1-run.sh config resdir seconds qemu-args boot_args_in
#
# qemu-args defaults to "-enable-kvm -nographic", along with arguments
# specifying the number of CPUs and other options
# generated from the underlying CPU architecture.
-# boot_args defaults to value returned by the per_version_boot_params
+# boot_args_in defaults to value returned by the per_version_boot_params
# shell function.
#
-# Anything you specify for either qemu-args or boot_args is appended to
+# Anything you specify for either qemu-args or boot_args_in is appended to
# the default values. The "-smp" value is deduced from the contents of
# the config fragment.
#
@@ -35,14 +35,13 @@ mkdir $T
config_template=${1}
config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'`
title=`echo $config_template | sed -e 's/^.*\///'`
-builddir=${2}
-resdir=${3}
+resdir=${2}
if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir"
then
echo "kvm-test-1-run.sh :$resdir: Not a writable directory, cannot store results into it"
exit 1
fi
-echo ' ---' `date`: Starting build
+echo ' ---' `date`: Starting build, PID $$
echo ' ---' Kconfig fragment at: $config_template >> $resdir/log
touch $resdir/ConfigFragment.input
@@ -73,7 +72,7 @@ config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG"
cp $T/KcList $resdir/ConfigFragment
base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
-if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
+if test "$base_resdir" != "$resdir" && test -f $base_resdir/bzImage && test -f $base_resdir/vmlinux
then
# Rerunning previous test, so use that test's kernel.
QEMU="`identify_qemu $base_resdir/vmlinux`"
@@ -83,6 +82,17 @@ then
ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
# Arch-independent indicator
touch $resdir/builtkernel
+elif test "$base_resdir" != "$resdir"
+then
+ # Rerunning previous test for which build failed
+ ln -s $base_resdir/Make*.out $resdir # for kvm-recheck.sh
+ ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
+ echo Initial build failed, not running KVM, see $resdir.
+ if test -f $resdir/build.wait
+ then
+ mv $resdir/build.wait $resdir/build.ready
+ fi
+ exit 1
elif kvm-build.sh $T/KcList $resdir
then
# Had to build a kernel for this test.
@@ -107,23 +117,23 @@ else
# Build failed.
cp .config $resdir || :
echo Build failed, not running KVM, see $resdir.
- if test -f $builddir.wait
+ if test -f $resdir/build.wait
then
- mv $builddir.wait $builddir.ready
+ mv $resdir/build.wait $resdir/build.ready
fi
exit 1
fi
-if test -f $builddir.wait
+if test -f $resdir/build.wait
then
- mv $builddir.wait $builddir.ready
+ mv $resdir/build.wait $resdir/build.ready
fi
-while test -f $builddir.ready
+while test -f $resdir/build.ready
do
sleep 1
done
-seconds=$4
-qemu_args=$5
-boot_args=$6
+seconds=$3
+qemu_args=$4
+boot_args_in=$5
if test -z "$TORTURE_BUILDONLY"
then
@@ -133,7 +143,7 @@ fi
# Generate -smp qemu argument.
qemu_args="-enable-kvm -nographic $qemu_args"
cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment`
-cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"`
+cpu_count=`configfrag_boot_cpus "$boot_args_in" "$config_template" "$cpu_count"`
if test "$cpu_count" -gt "$TORTURE_ALLOTED_CPUS"
then
echo CPU count limited from $cpu_count to $TORTURE_ALLOTED_CPUS | tee -a $resdir/Warnings
@@ -149,16 +159,52 @@ qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`"
qemu_append="`identify_qemu_append "$QEMU"`"
# Pull in Kconfig-fragment boot parameters
-boot_args="`configfrag_boot_params "$boot_args" "$config_template"`"
+boot_args="`configfrag_boot_params "$boot_args_in" "$config_template"`"
# Generate kernel-version-specific boot parameters
boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
if test -n "$TORTURE_BOOT_GDB_ARG"
then
boot_args="$boot_args $TORTURE_BOOT_GDB_ARG"
fi
+
+# Give bare-metal advice
+modprobe_args="`echo $boot_args | tr -s ' ' '\012' | grep "^$TORTURE_MOD\." | sed -e "s/$TORTURE_MOD\.//g"`"
+kboot_args="`echo $boot_args | tr -s ' ' '\012' | grep -v "^$TORTURE_MOD\."`"
+testid_txt="`dirname $resdir`/testid.txt"
+touch $resdir/bare-metal
+echo To run this scenario on bare metal: >> $resdir/bare-metal
+echo >> $resdir/bare-metal
+echo " 1." Set your bare-metal build tree to the state shown in this file: >> $resdir/bare-metal
+echo " " $testid_txt >> $resdir/bare-metal
+echo " 2." Update your bare-metal build tree"'"s .config based on this file: >> $resdir/bare-metal
+echo " " $resdir/ConfigFragment >> $resdir/bare-metal
+echo " 3." Make the bare-metal kernel"'"s build system aware of your .config updates: >> $resdir/bare-metal
+echo " " $ 'yes "" | make oldconfig' >> $resdir/bare-metal
+echo " 4." Build your bare-metal kernel. >> $resdir/bare-metal
+echo " 5." Boot your bare-metal kernel with the following parameters: >> $resdir/bare-metal
+echo " " $kboot_args >> $resdir/bare-metal
+echo " 6." Start the test with the following command: >> $resdir/bare-metal
+echo " " $ modprobe $TORTURE_MOD $modprobe_args >> $resdir/bare-metal
+echo " 7." After some time, end the test with the following command: >> $resdir/bare-metal
+echo " " $ rmmod $TORTURE_MOD >> $resdir/bare-metal
+echo " 8." Copy your bare-metal kernel"'"s .config file, overwriting this file: >> $resdir/bare-metal
+echo " " $resdir/.config >> $resdir/bare-metal
+echo " 9." Copy the console output from just before the modprobe to just after >> $resdir/bare-metal
+echo " " the rmmod into this file: >> $resdir/bare-metal
+echo " " $resdir/console.log >> $resdir/bare-metal
+echo "10." Check for runtime errors using the following command: >> $resdir/bare-metal
+echo " " $ tools/testing/selftests/rcutorture/bin/kvm-recheck.sh `dirname $resdir` >> $resdir/bare-metal
+echo >> $resdir/bare-metal
+echo Some of the above steps may be skipped if you build your bare-metal >> $resdir/bare-metal
+echo kernel here: `head -n 1 $testid_txt | sed -e 's/^Build directory: //'` >> $resdir/bare-metal
+
echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" $TORTURE_QEMU_GDB_ARG > $resdir/qemu-cmd
echo "# TORTURE_SHUTDOWN_GRACE=$TORTURE_SHUTDOWN_GRACE" >> $resdir/qemu-cmd
echo "# seconds=$seconds" >> $resdir/qemu-cmd
+echo "# TORTURE_KCONFIG_GDB_ARG=\"$TORTURE_KCONFIG_GDB_ARG\"" >> $resdir/qemu-cmd
+echo "# TORTURE_JITTER_START=\"$TORTURE_JITTER_START\"" >> $resdir/qemu-cmd
+echo "# TORTURE_JITTER_STOP=\"$TORTURE_JITTER_STOP\"" >> $resdir/qemu-cmd
+echo "# TORTURE_TRUST_MAKE=\"$TORTURE_TRUST_MAKE\"; export TORTURE_TRUST_MAKE" >> $resdir/qemu-cmd
if test -n "$TORTURE_BUILDONLY"
then
@@ -167,140 +213,4 @@ then
exit 0
fi
-# Decorate qemu-cmd with redirection, backgrounding, and PID capture
-sed -e 's/$/ 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
-echo 'echo $! > $resdir/qemu_pid' >> $T/qemu-cmd
-
-# In case qemu refuses to run...
-echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
-
-# Attempt to run qemu
-kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
-( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
-commandcompleted=0
-if test -z "$TORTURE_KCONFIG_GDB_ARG"
-then
- sleep 10 # Give qemu's pid a chance to reach the file
- if test -s "$resdir/qemu_pid"
- then
- qemu_pid=`cat "$resdir/qemu_pid"`
- echo Monitoring qemu job at pid $qemu_pid
- else
- qemu_pid=""
- echo Monitoring qemu job at yet-as-unknown pid
- fi
-fi
-if test -n "$TORTURE_KCONFIG_GDB_ARG"
-then
- echo Waiting for you to attach a debug session, for example: > /dev/tty
- echo " gdb $base_resdir/vmlinux" > /dev/tty
- echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty
- echo " target remote :1234" > /dev/tty
- echo " continue" > /dev/tty
- kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
-fi
-while :
-do
- if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
- then
- qemu_pid=`cat "$resdir/qemu_pid"`
- fi
- kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
- if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
- then
- if test -n "$TORTURE_KCONFIG_GDB_ARG"
- then
- :
- elif test $kruntime -ge $seconds || test -f "$resdir/../STOP.1"
- then
- break;
- fi
- sleep 1
- else
- commandcompleted=1
- if test $kruntime -lt $seconds
- then
- echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1
- grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1
- killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`"
- if test -n "$killpid"
- then
- echo "ps -fp $killpid" >> $resdir/Warnings 2>&1
- ps -fp $killpid >> $resdir/Warnings 2>&1
- fi
- # Reduce probability of PID reuse by allowing a one-minute buffer
- if test $((kruntime + 60)) -lt $seconds && test -s "$resdir/../jitter_pids"
- then
- awk < "$resdir/../jitter_pids" '
- NF > 0 {
- pidlist = pidlist " " $1;
- n++;
- }
- END {
- if (n > 0) {
- print "kill " pidlist;
- }
- }' | sh
- fi
- else
- echo ' ---' `date`: "Kernel done"
- fi
- break
- fi
-done
-if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
-then
- qemu_pid=`cat "$resdir/qemu_pid"`
-fi
-if test $commandcompleted -eq 0 -a -n "$qemu_pid"
-then
- if ! test -f "$resdir/../STOP.1"
- then
- echo Grace period for qemu job at pid $qemu_pid
- fi
- oldline="`tail $resdir/console.log`"
- while :
- do
- if test -f "$resdir/../STOP.1"
- then
- echo "PID $qemu_pid killed due to run STOP.1 request" >> $resdir/Warnings 2>&1
- kill -KILL $qemu_pid
- break
- fi
- kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
- if kill -0 $qemu_pid > /dev/null 2>&1
- then
- :
- else
- break
- fi
- must_continue=no
- newline="`tail $resdir/console.log`"
- if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : '
- then
- must_continue=yes
- fi
- last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`"
- if test -z "$last_ts"
- then
- last_ts=0
- fi
- if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE))
- then
- must_continue=yes
- fi
- if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
- then
- echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
- kill -KILL $qemu_pid
- break
- fi
- oldline=$newline
- sleep 10
- done
-elif test -z "$qemu_pid"
-then
- echo Unknown PID, cannot kill qemu command
-fi
-
-parse-console.sh $resdir/console.log $title
+kvm-test-1-run-qemu.sh $resdir
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
index c45a953ef393..d40b4e60a50c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
@@ -3,7 +3,7 @@
#
# Transform a qemu-cmd file to allow reuse.
#
-# Usage: kvm-transform.sh bzImage console.log < qemu-cmd-in > qemu-cmd-out
+# Usage: kvm-transform.sh bzImage console.log jitter_dir [ seconds ] < qemu-cmd-in > qemu-cmd-out
#
# bzImage: Kernel and initrd from the same prior kvm.sh run.
# console.log: File into which to place console output.
@@ -29,20 +29,62 @@ then
echo "Need console log file name."
exit 1
fi
+jitter_dir="$3"
+if test -z "$jitter_dir" || ! test -d "$jitter_dir"
+then
+ echo "Need valid jitter directory: '$jitter_dir'"
+ exit 1
+fi
+seconds="$4"
+if test -n "$seconds" && echo $seconds | grep -q '[^0-9]'
+then
+ echo "Invalid duration, should be numeric in seconds: '$seconds'"
+ exit 1
+fi
+
+awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
+ -v seconds="$seconds" '
+/^# seconds=/ {
+ if (seconds == "")
+ print $0;
+ else
+ print "# seconds=" seconds;
+ next;
+}
+
+/^# TORTURE_JITTER_START=/ {
+ print "# TORTURE_JITTER_START=\". jitterstart.sh " $4 " " jitter_dir " " $6 " " $7;
+ next;
+}
+
+/^# TORTURE_JITTER_STOP=/ {
+ print "# TORTURE_JITTER_STOP=\". jitterstop.sh " " " jitter_dir " " $5;
+ next;
+}
+
+/^#/ {
+ print $0;
+ next;
+}
-awk -v image="$image" -v consolelog="$consolelog" '
{
line = "";
for (i = 1; i <= NF; i++) {
- if (line == "")
+ if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/) {
+ sub(/[0-9]*$/, seconds, $i);
+ if (line == "")
+ line = $i;
+ else
+ line = line " " $i;
+ } else if (line == "") {
line = $i;
- else
+ } else {
line = line " " $i;
+ }
if ($i == "-serial") {
i++;
line = line " file:" consolelog;
- }
- if ($i == "-kernel") {
+ } else if ($i == "-kernel") {
i++;
line = line " " image;
}
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index 8d3c99b35e06..6bf00a003d3d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -29,17 +29,21 @@ PATH=${KVM}/bin:$PATH; export PATH
TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
TORTURE_DEFCONFIG=defconfig
TORTURE_BOOT_IMAGE=""
+TORTURE_BUILDONLY=
TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
TORTURE_KCONFIG_ARG=""
TORTURE_KCONFIG_GDB_ARG=""
TORTURE_BOOT_GDB_ARG=""
TORTURE_QEMU_GDB_ARG=""
+TORTURE_JITTER_START=""
+TORTURE_JITTER_STOP=""
TORTURE_KCONFIG_KASAN_ARG=""
TORTURE_KCONFIG_KCSAN_ARG=""
TORTURE_KMAKE_ARG=""
TORTURE_QEMU_MEM=512
TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
+TORTURE_MOD=rcutorture
TORTURE_TRUST_MAKE=""
resdir=""
configs=""
@@ -100,7 +104,7 @@ do
TORTURE_BUILDONLY=1
;;
--configs|--config)
- checkarg --configs "(list of config files)" "$#" "$2" '^[^/]\+$' '^--'
+ checkarg --configs "(list of config files)" "$#" "$2" '^[^/.a-z]\+$' '^--'
configs="$configs $2"
shift
;;
@@ -116,7 +120,7 @@ do
shift
;;
--datestamp)
- checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._-/]*$' '^--'
+ checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--'
ds=$2
shift
;;
@@ -215,6 +219,7 @@ do
--torture)
checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--'
TORTURE_SUITE=$2
+ TORTURE_MOD="`echo $TORTURE_SUITE | sed -e 's/^\(lock\|rcu\|scf\)$/\1torture/'`"
shift
if test "$TORTURE_SUITE" = rcuscale || test "$TORTURE_SUITE" = refscale
then
@@ -381,6 +386,7 @@ TORTURE_QEMU_GDB_ARG="$TORTURE_QEMU_GDB_ARG"; export TORTURE_QEMU_GDB_ARG
TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG
TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG
TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
+TORTURE_MOD="$TORTURE_MOD"; export TORTURE_MOD
TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
@@ -399,12 +405,17 @@ echo Results directory: $resdir/$ds
echo $scriptname $args
touch $resdir/$ds/log
echo $scriptname $args >> $resdir/$ds/log
-echo ${TORTURE_SUITE} > $resdir/$ds/TORTURE_SUITE
-pwd > $resdir/$ds/testid.txt
+echo ${TORTURE_SUITE} > $resdir/$ds/torture_suite
+echo Build directory: `pwd` > $resdir/$ds/testid.txt
if test -d .git
then
+ echo Current commit: `git rev-parse HEAD` >> $resdir/$ds/testid.txt
+ echo >> $resdir/$ds/testid.txt
+ echo ' ---' Output of "'"git status"'": >> $resdir/$ds/testid.txt
git status >> $resdir/$ds/testid.txt
- git rev-parse HEAD >> $resdir/$ds/testid.txt
+ echo >> $resdir/$ds/testid.txt
+ echo >> $resdir/$ds/testid.txt
+ echo ' ---' Output of "'"git diff HEAD"'": >> $resdir/$ds/testid.txt
git diff HEAD >> $resdir/$ds/testid.txt
fi
___EOF___
@@ -434,8 +445,17 @@ function dump(first, pastlast, batchnum)
print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log";
print "needqemurun="
jn=1
+ njitter = 0;
+ split(jitter, ja);
+ if (ja[1] == -1 && ncpus == 0)
+ njitter = 1;
+ else if (ja[1] == -1)
+ njitter = ncpus;
+ else
+ njitter = ja[1];
+ print "TORTURE_JITTER_START=\". jitterstart.sh " njitter " " rd " " dur " " ja[2] " " ja[3] "\"; export TORTURE_JITTER_START";
+ print "TORTURE_JITTER_STOP=\". jitterstop.sh " rd " \"; export TORTURE_JITTER_STOP"
for (j = first; j < pastlast; j++) {
- builddir=KVM "/b" j - first + 1
cpusr[jn] = cpus[j];
if (cfrep[cf[j]] == "") {
cfr[jn] = cf[j];
@@ -444,15 +464,15 @@ function dump(first, pastlast, batchnum)
cfrep[cf[j]]++;
cfr[jn] = cf[j] "." cfrep[cf[j]];
}
+ builddir=rd cfr[jn] "/build";
if (cpusr[jn] > ncpus && ncpus != 0)
ovf = "-ovf";
else
ovf = "";
print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
- print "rm -f " builddir ".*";
- print "touch " builddir ".wait";
print "mkdir " rd cfr[jn] " || :";
- print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
+ print "touch " builddir ".wait";
+ print "kvm-test-1-run.sh " CONFIGDIR cf[j], rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
print "while test -f " builddir ".wait"
print "do"
@@ -461,23 +481,21 @@ function dump(first, pastlast, batchnum)
print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date` | tee -a " rd "log";
jn++;
}
+ print "runfiles="
for (j = 1; j < jn; j++) {
- builddir=KVM "/b" j
- print "rm -f " builddir ".ready"
+ builddir=rd cfr[j] "/build";
+ if (TORTURE_BUILDONLY)
+ print "rm -f " builddir ".ready"
+ else
+ print "mv " builddir ".ready " builddir ".run"
+ print "runfiles=\"$runfiles " builddir ".run\""
+ fi
print "if test -f \"" rd cfr[j] "/builtkernel\""
print "then"
print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` | tee -a " rd "log";
print "\tneedqemurun=1"
print "fi"
}
- njitter = 0;
- split(jitter, ja);
- if (ja[1] == -1 && ncpus == 0)
- njitter = 1;
- else if (ja[1] == -1)
- njitter = ncpus;
- else
- njitter = ja[1];
if (TORTURE_BUILDONLY && njitter != 0) {
njitter = 0;
print "echo Build-only run, so suppressing jitter | tee -a " rd "log"
@@ -488,19 +506,18 @@ function dump(first, pastlast, batchnum)
print "if test -n \"$needqemurun\""
print "then"
print "\techo ---- Starting kernels. `date` | tee -a " rd "log";
- print "\techo > " rd "jitter_pids"
- for (j = 0; j < njitter; j++) {
- print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&"
- print "\techo $! >> " rd "jitter_pids"
- }
- print "\twait"
+ print "\t$TORTURE_JITTER_START";
+ print "\twhile ls $runfiles > /dev/null 2>&1"
+ print "\tdo"
+ print "\t\t:"
+ print "\tdone"
+ print "\t$TORTURE_JITTER_STOP";
print "\techo ---- All kernel runs complete. `date` | tee -a " rd "log";
print "else"
print "\twait"
print "\techo ---- No kernel runs. `date` | tee -a " rd "log";
print "fi"
for (j = 1; j < jn; j++) {
- builddir=KVM "/b" j
print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results: | tee -a " rd "log";
print "cat " rd cfr[j] "/kvm-test-1-run.sh.out | tee -a " rd "log";
}
@@ -548,6 +565,18 @@ echo 'ret=$?' >> $T/script
echo "cat $T/kvm-recheck.sh.out | tee -a $resdir/$ds/log" >> $T/script
echo 'exit $ret' >> $T/script
+# Extract the tests and their batches from the script.
+egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
+ sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' |
+ awk '
+ /^----Start/ {
+ batchno = $3;
+ next;
+ }
+ {
+ print batchno, $1, $2
+ }' > $T/batches
+
if test "$dryrun" = script
then
cat $T/script
@@ -566,21 +595,14 @@ then
exit 0
elif test "$dryrun" = batches
then
- # Extract the tests and their batches from the script.
- egrep 'Start batch|Starting build\.' $T/script | grep -v ">>" |
- sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' |
- awk '
- /^----Start/ {
- batchno = $3;
- next;
- }
- {
- print batchno, $1, $2
- }'
+ cat $T/batches
+ exit 0
else
- # Not a dryrun, so run the script.
+ # Not a dryrun. Record the batches and the number of CPUs, then run the script.
bash $T/script
ret=$?
+ cp $T/batches $resdir/$ds/batches
+ echo '#' cpus=$cpus >> $resdir/$ds/batches
echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a $resdir/$ds/log
exit $ret
fi
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index ad7525b7ac29..56e2e1a42569 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -374,7 +374,7 @@ done
if test "$do_kvfree" = "yes"
then
torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot"
- torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+ torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
fi
echo " --- " $scriptname $args
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index f2b20db9e296..98b6175e5aa0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -7,8 +7,8 @@ TREE07
TREE09
SRCU-N
SRCU-P
-SRCU-t
-SRCU-u
+SRCU-T
+SRCU-U
TINY01
TINY02
TASKS01
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T
index d6557c38dfe4..d6557c38dfe4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T.boot
index 238bfe3bd0cc..238bfe3bd0cc 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T.boot
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U
index 6bc24e99862f..6bc24e99862f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U.boot
index ce48c7b82673..ce48c7b82673 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U.boot
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
index 1c218944b1e9..64f864f1f361 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -4,3 +4,4 @@ rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
rcutree.kthread_prio=2
threadirqs
+tree.use_softirq=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
index 5adc6756792a..a8d94caf7d2f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
@@ -1 +1 @@
-rcutree.rcu_fanout_leaf=4 nohz_full=1-7
+rcutree.rcu_fanout_leaf=4 nohz_full=1-N
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
index 22478fd3a865..94d38445d393 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
@@ -1,3 +1,3 @@
rcupdate.rcu_self_test=1
rcutree.rcu_fanout_exact=1
-rcu_nocbs=0-7
+rcu_nocbs=all
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
index 0333e9b18522..ffbe15109f0d 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
@@ -12,5 +12,5 @@
# Adds per-version torture-module parameters to kernels supporting them.
per_version_boot_params () {
echo $1 rcuscale.shutdown=1 \
- rcuscale.verbose=1
+ rcuscale.verbose=0
}
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
index 321e82641287..f81fa2c541a6 100644
--- a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
@@ -12,5 +12,5 @@
# Adds per-version torture-module parameters to kernels supporting them.
per_version_boot_params () {
echo $1 refscale.shutdown=1 \
- refscale.verbose=1
+ refscale.verbose=0
}
diff --git a/tools/testing/selftests/resctrl/.gitignore b/tools/testing/selftests/resctrl/.gitignore
new file mode 100644
index 000000000000..ab68442b6bc8
--- /dev/null
+++ b/tools/testing/selftests/resctrl/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+resctrl_tests
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
index d585cc1948cc..6bcee2ec91a9 100644
--- a/tools/testing/selftests/resctrl/Makefile
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -1,5 +1,5 @@
CC = $(CROSS_COMPILE)gcc
-CFLAGS = -g -Wall
+CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2
SRCS=$(wildcard *.c)
OBJS=$(SRCS:.c=.o)
diff --git a/tools/testing/selftests/resctrl/README b/tools/testing/selftests/resctrl/README
index 6e5a0ffa18e8..4b36b25b6ac0 100644
--- a/tools/testing/selftests/resctrl/README
+++ b/tools/testing/selftests/resctrl/README
@@ -46,8 +46,8 @@ ARGUMENTS
Parameter '-h' shows usage information.
usage: resctrl_tests [-h] [-b "benchmark_cmd [options]"] [-t test list] [-n no_of_bits]
- -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM default benchmark is builtin fill_buf
- -t test list: run tests specified in the test list, e.g. -t mbm, mba, cqm, cat
+ -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT default benchmark is builtin fill_buf
+ -t test list: run tests specified in the test list, e.g. -t mbm, mba, cmt, cat
-n no_of_bits: run cache tests using specified no of bits in cache bit mask
-p cpu_no: specify CPU number to run the test. 1 is default
-h: help
diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 38dbf4962e33..68ff856d36f0 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -111,7 +111,7 @@ static int get_llc_perf(unsigned long *llc_perf_miss)
/*
* Get LLC Occupancy as reported by RESCTRL FS
- * For CQM,
+ * For CMT,
* 1. If con_mon grp and mon grp given, then read from mon grp in
* con_mon grp
* 2. If only con_mon grp given, then read from con_mon grp
@@ -182,7 +182,7 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
/*
* Measure cache miss from perf.
*/
- if (!strcmp(param->resctrl_val, "cat")) {
+ if (!strncmp(param->resctrl_val, CAT_STR, sizeof(CAT_STR))) {
ret = get_llc_perf(&llc_perf_miss);
if (ret < 0)
return ret;
@@ -192,7 +192,7 @@ int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
/*
* Measure llc occupancy from resctrl.
*/
- if (!strcmp(param->resctrl_val, "cqm")) {
+ if (!strncmp(param->resctrl_val, CMT_STR, sizeof(CMT_STR))) {
ret = get_llc_occu_resctrl(&llc_occu_resc);
if (ret < 0)
return ret;
@@ -234,7 +234,7 @@ int cat_val(struct resctrl_val_param *param)
if (ret)
return ret;
- if ((strcmp(resctrl_val, "cat") == 0)) {
+ if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) {
ret = initialize_llc_perf();
if (ret)
return ret;
@@ -242,7 +242,7 @@ int cat_val(struct resctrl_val_param *param)
/* Test runs until the callback setup() tells the test to stop. */
while (1) {
- if (strcmp(resctrl_val, "cat") == 0) {
+ if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) {
ret = param->setup(1, param);
if (ret) {
ret = 0;
@@ -270,3 +270,45 @@ int cat_val(struct resctrl_val_param *param)
return ret;
}
+
+/*
+ * show_cache_info: show cache test result information
+ * @sum_llc_val: sum of LLC cache result data
+ * @no_of_bits: number of bits
+ * @cache_span: cache span in bytes for CMT or in lines for CAT
+ * @max_diff: max difference
+ * @max_diff_percent: max difference percentage
+ * @num_of_runs: number of runs
+ * @platform: show test information on this platform
+ * @cmt: CMT test or CAT test
+ *
+ * Return: 0 on success. non-zero on failure.
+ */
+int show_cache_info(unsigned long sum_llc_val, int no_of_bits,
+ unsigned long cache_span, unsigned long max_diff,
+ unsigned long max_diff_percent, unsigned long num_of_runs,
+ bool platform, bool cmt)
+{
+ unsigned long avg_llc_val = 0;
+ float diff_percent;
+ long avg_diff = 0;
+ int ret;
+
+ avg_llc_val = sum_llc_val / (num_of_runs - 1);
+ avg_diff = (long)abs(cache_span - avg_llc_val);
+ diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100;
+
+ ret = platform && abs((int)diff_percent) > max_diff_percent &&
+ (cmt ? (abs(avg_diff) > max_diff) : true);
+
+ ksft_print_msg("%s Check cache miss rate within %d%%\n",
+ ret ? "Fail:" : "Pass:", max_diff_percent);
+
+ ksft_print_msg("Percent diff=%d\n", abs((int)diff_percent));
+ ksft_print_msg("Number of bits: %d\n", no_of_bits);
+ ksft_print_msg("Average LLC val: %lu\n", avg_llc_val);
+ ksft_print_msg("Cache span (%s): %lu\n", cmt ? "bytes" : "lines",
+ cache_span);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index 5da43767b973..cd4f68388e0f 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -17,10 +17,10 @@
#define MAX_DIFF_PERCENT 4
#define MAX_DIFF 1000000
-int count_of_bits;
-char cbm_mask[256];
-unsigned long long_mask;
-unsigned long cache_size;
+static int count_of_bits;
+static char cbm_mask[256];
+static unsigned long long_mask;
+static unsigned long cache_size;
/*
* Change schemata. Write schemata to specified
@@ -52,27 +52,6 @@ static int cat_setup(int num, ...)
return ret;
}
-static void show_cache_info(unsigned long sum_llc_perf_miss, int no_of_bits,
- unsigned long span)
-{
- unsigned long allocated_cache_lines = span / 64;
- unsigned long avg_llc_perf_miss = 0;
- float diff_percent;
-
- avg_llc_perf_miss = sum_llc_perf_miss / (NUM_OF_RUNS - 1);
- diff_percent = ((float)allocated_cache_lines - avg_llc_perf_miss) /
- allocated_cache_lines * 100;
-
- printf("%sok CAT: cache miss rate within %d%%\n",
- !is_amd && abs((int)diff_percent) > MAX_DIFF_PERCENT ?
- "not " : "", MAX_DIFF_PERCENT);
- tests_run++;
- printf("# Percent diff=%d\n", abs((int)diff_percent));
- printf("# Number of bits: %d\n", no_of_bits);
- printf("# Avg_llc_perf_miss: %lu\n", avg_llc_perf_miss);
- printf("# Allocated cache lines: %lu\n", allocated_cache_lines);
-}
-
static int check_results(struct resctrl_val_param *param)
{
char *token_array[8], temp[512];
@@ -80,7 +59,7 @@ static int check_results(struct resctrl_val_param *param)
int runs = 0, no_of_bits = 0;
FILE *fp;
- printf("# Checking for pass/fail\n");
+ ksft_print_msg("Checking for pass/fail\n");
fp = fopen(param->filename, "r");
if (!fp) {
perror("# Cannot open file");
@@ -108,9 +87,9 @@ static int check_results(struct resctrl_val_param *param)
fclose(fp);
no_of_bits = count_bits(param->mask);
- show_cache_info(sum_llc_perf_miss, no_of_bits, param->span);
-
- return 0;
+ return show_cache_info(sum_llc_perf_miss, no_of_bits, param->span / 64,
+ MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS,
+ !is_amd, false);
}
void cat_test_cleanup(void)
@@ -132,11 +111,8 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
if (ret)
return ret;
- if (!validate_resctrl_feature_request("cat"))
- return -1;
-
/* Get default cbm mask for L3/L2 cache */
- ret = get_cbm_mask(cache_type);
+ ret = get_cbm_mask(cache_type, cbm_mask);
if (ret)
return ret;
@@ -146,15 +122,18 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
ret = get_cache_size(cpu_no, cache_type, &cache_size);
if (ret)
return ret;
- printf("cache size :%lu\n", cache_size);
+ ksft_print_msg("Cache size :%lu\n", cache_size);
/* Get max number of bits from default-cabm mask */
count_of_bits = count_bits(long_mask);
- if (n < 1 || n > count_of_bits - 1) {
- printf("Invalid input value for no_of_bits n!\n");
- printf("Please Enter value in range 1 to %d\n",
- count_of_bits - 1);
+ if (!n)
+ n = count_of_bits / 2;
+
+ if (n > count_of_bits - 1) {
+ ksft_print_msg("Invalid input value for no_of_bits n!\n");
+ ksft_print_msg("Please enter value in range 1 to %d\n",
+ count_of_bits - 1);
return -1;
}
@@ -164,7 +143,7 @@ int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
return -1;
struct resctrl_val_param param = {
- .resctrl_val = "cat",
+ .resctrl_val = CAT_STR,
.cpu_no = cpu_no,
.mum_resctrlfs = 0,
.setup = cat_setup,
diff --git a/tools/testing/selftests/resctrl/cqm_test.c b/tools/testing/selftests/resctrl/cmt_test.c
index c8756152bd61..8968e36db99d 100644
--- a/tools/testing/selftests/resctrl/cqm_test.c
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Cache Monitoring Technology (CQM) test
+ * Cache Monitoring Technology (CMT) test
*
* Copyright (C) 2018 Intel Corporation
*
@@ -11,17 +11,17 @@
#include "resctrl.h"
#include <unistd.h>
-#define RESULT_FILE_NAME "result_cqm"
+#define RESULT_FILE_NAME "result_cmt"
#define NUM_OF_RUNS 5
#define MAX_DIFF 2000000
#define MAX_DIFF_PERCENT 15
-int count_of_bits;
-char cbm_mask[256];
-unsigned long long_mask;
-unsigned long cache_size;
+static int count_of_bits;
+static char cbm_mask[256];
+static unsigned long long_mask;
+static unsigned long cache_size;
-static int cqm_setup(int num, ...)
+static int cmt_setup(int num, ...)
{
struct resctrl_val_param *p;
va_list param;
@@ -39,38 +39,6 @@ static int cqm_setup(int num, ...)
return 0;
}
-static void show_cache_info(unsigned long sum_llc_occu_resc, int no_of_bits,
- unsigned long span)
-{
- unsigned long avg_llc_occu_resc = 0;
- float diff_percent;
- long avg_diff = 0;
- bool res;
-
- avg_llc_occu_resc = sum_llc_occu_resc / (NUM_OF_RUNS - 1);
- avg_diff = (long)abs(span - avg_llc_occu_resc);
-
- diff_percent = (((float)span - avg_llc_occu_resc) / span) * 100;
-
- if ((abs((int)diff_percent) <= MAX_DIFF_PERCENT) ||
- (abs(avg_diff) <= MAX_DIFF))
- res = true;
- else
- res = false;
-
- printf("%sok CQM: diff within %d, %d\%%\n", res ? "" : "not",
- MAX_DIFF, (int)MAX_DIFF_PERCENT);
-
- printf("# diff: %ld\n", avg_diff);
- printf("# percent diff=%d\n", abs((int)diff_percent));
- printf("# Results are displayed in (Bytes)\n");
- printf("# Number of bits: %d\n", no_of_bits);
- printf("# Avg_llc_occu_resc: %lu\n", avg_llc_occu_resc);
- printf("# llc_occu_exp (span): %lu\n", span);
-
- tests_run++;
-}
-
static int check_results(struct resctrl_val_param *param, int no_of_bits)
{
char *token_array[8], temp[512];
@@ -78,7 +46,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits)
int runs = 0;
FILE *fp;
- printf("# checking for pass/fail\n");
+ ksft_print_msg("Checking for pass/fail\n");
fp = fopen(param->filename, "r");
if (!fp) {
perror("# Error in opening file\n");
@@ -86,7 +54,7 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits)
return errno;
}
- while (fgets(temp, 1024, fp)) {
+ while (fgets(temp, sizeof(temp), fp)) {
char *token = strtok(temp, ":\t");
int fields = 0;
@@ -101,17 +69,18 @@ static int check_results(struct resctrl_val_param *param, int no_of_bits)
runs++;
}
fclose(fp);
- show_cache_info(sum_llc_occu_resc, no_of_bits, param->span);
- return 0;
+ return show_cache_info(sum_llc_occu_resc, no_of_bits, param->span,
+ MAX_DIFF, MAX_DIFF_PERCENT, NUM_OF_RUNS,
+ true, true);
}
-void cqm_test_cleanup(void)
+void cmt_test_cleanup(void)
{
remove(RESULT_FILE_NAME);
}
-int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
+int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
{
int ret, mum_resctrlfs;
@@ -122,10 +91,10 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
if (ret)
return ret;
- if (!validate_resctrl_feature_request("cqm"))
+ if (!validate_resctrl_feature_request(CMT_STR))
return -1;
- ret = get_cbm_mask("L3");
+ ret = get_cbm_mask("L3", cbm_mask);
if (ret)
return ret;
@@ -134,18 +103,18 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
ret = get_cache_size(cpu_no, "L3", &cache_size);
if (ret)
return ret;
- printf("cache size :%lu\n", cache_size);
+ ksft_print_msg("Cache size :%lu\n", cache_size);
count_of_bits = count_bits(long_mask);
if (n < 1 || n > count_of_bits) {
- printf("Invalid input value for numbr_of_bits n!\n");
- printf("Please Enter value in range 1 to %d\n", count_of_bits);
+ ksft_print_msg("Invalid input value for numbr_of_bits n!\n");
+ ksft_print_msg("Please enter value in range 1 to %d\n", count_of_bits);
return -1;
}
struct resctrl_val_param param = {
- .resctrl_val = "cqm",
+ .resctrl_val = CMT_STR,
.ctrlgrp = "c1",
.mongrp = "m1",
.cpu_no = cpu_no,
@@ -154,7 +123,7 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
.mask = ~(long_mask << n) & long_mask,
.span = cache_size * n / count_of_bits,
.num_of_runs = 0,
- .setup = cqm_setup,
+ .setup = cmt_setup,
};
if (strcmp(benchmark_cmd[0], "fill_buf") == 0)
@@ -170,7 +139,7 @@ int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
if (ret)
return ret;
- cqm_test_cleanup();
+ cmt_test_cleanup();
return 0;
}
diff --git a/tools/testing/selftests/resctrl/config b/tools/testing/selftests/resctrl/config
new file mode 100644
index 000000000000..8d9f2deb56ed
--- /dev/null
+++ b/tools/testing/selftests/resctrl/config
@@ -0,0 +1,2 @@
+CONFIG_X86_CPU_RESCTRL=y
+CONFIG_PROC_CPU_RESCTRL=y
diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
index 79c611c99a3d..51e5cf22632f 100644
--- a/tools/testing/selftests/resctrl/fill_buf.c
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -115,7 +115,7 @@ static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr,
while (1) {
ret = fill_one_span_read(start_ptr, end_ptr);
- if (!strcmp(resctrl_val, "cat"))
+ if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)))
break;
}
@@ -134,7 +134,7 @@ static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr,
{
while (1) {
fill_one_span_write(start_ptr, end_ptr);
- if (!strcmp(resctrl_val, "cat"))
+ if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)))
break;
}
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
index 7bf8eaa6204b..1a1bdb6180cf 100644
--- a/tools/testing/selftests/resctrl/mba_test.c
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -12,7 +12,7 @@
#define RESULT_FILE_NAME "result_mba"
#define NUM_OF_RUNS 5
-#define MAX_DIFF 300
+#define MAX_DIFF_PERCENT 5
#define ALLOCATION_MAX 100
#define ALLOCATION_MIN 10
#define ALLOCATION_STEP 10
@@ -56,13 +56,14 @@ static void show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc)
int allocation, runs;
bool failed = false;
- printf("# Results are displayed in (MB)\n");
+ ksft_print_msg("Results are displayed in (MB)\n");
/* Memory bandwidth from 100% down to 10% */
for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP;
allocation++) {
unsigned long avg_bw_imc, avg_bw_resc;
unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
- unsigned long avg_diff;
+ int avg_diff_per;
+ float avg_diff;
/*
* The first run is discarded due to inaccurate value from
@@ -76,23 +77,26 @@ static void show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc)
avg_bw_imc = sum_bw_imc / (NUM_OF_RUNS - 1);
avg_bw_resc = sum_bw_resc / (NUM_OF_RUNS - 1);
- avg_diff = labs((long)(avg_bw_resc - avg_bw_imc));
-
- printf("%sok MBA schemata percentage %u smaller than %d %%\n",
- avg_diff > MAX_DIFF ? "not " : "",
- ALLOCATION_MAX - ALLOCATION_STEP * allocation,
- MAX_DIFF);
- tests_run++;
- printf("# avg_diff: %lu\n", avg_diff);
- printf("# avg_bw_imc: %lu\n", avg_bw_imc);
- printf("# avg_bw_resc: %lu\n", avg_bw_resc);
- if (avg_diff > MAX_DIFF)
+ avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc;
+ avg_diff_per = (int)(avg_diff * 100);
+
+ ksft_print_msg("%s Check MBA diff within %d%% for schemata %u\n",
+ avg_diff_per > MAX_DIFF_PERCENT ?
+ "Fail:" : "Pass:",
+ MAX_DIFF_PERCENT,
+ ALLOCATION_MAX - ALLOCATION_STEP * allocation);
+
+ ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per);
+ ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc);
+ ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc);
+ if (avg_diff_per > MAX_DIFF_PERCENT)
failed = true;
}
- printf("%sok schemata change using MBA%s\n", failed ? "not " : "",
- failed ? " # at least one test failed" : "");
- tests_run++;
+ ksft_print_msg("%s Check schemata change using MBA\n",
+ failed ? "Fail:" : "Pass:");
+ if (failed)
+ ksft_print_msg("At least one test failed\n");
}
static int check_results(void)
@@ -141,7 +145,7 @@ void mba_test_cleanup(void)
int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd)
{
struct resctrl_val_param param = {
- .resctrl_val = "mba",
+ .resctrl_val = MBA_STR,
.ctrlgrp = "c1",
.mongrp = "m1",
.cpu_no = cpu_no,
@@ -154,9 +158,6 @@ int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd)
remove(RESULT_FILE_NAME);
- if (!validate_resctrl_feature_request("mba"))
- return -1;
-
ret = resctrl_val(benchmark_cmd, &param);
if (ret)
return ret;
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index 4700f7453f81..8392e5c55ed0 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -11,16 +11,16 @@
#include "resctrl.h"
#define RESULT_FILE_NAME "result_mbm"
-#define MAX_DIFF 300
+#define MAX_DIFF_PERCENT 5
#define NUM_OF_RUNS 5
-static void
+static int
show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span)
{
unsigned long avg_bw_imc = 0, avg_bw_resc = 0;
unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
- long avg_diff = 0;
- int runs;
+ int runs, ret, avg_diff_per;
+ float avg_diff = 0;
/*
* Discard the first value which is inaccurate due to monitoring setup
@@ -33,15 +33,18 @@ show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span)
avg_bw_imc = sum_bw_imc / 4;
avg_bw_resc = sum_bw_resc / 4;
- avg_diff = avg_bw_resc - avg_bw_imc;
-
- printf("%sok MBM: diff within %d%%\n",
- labs(avg_diff) > MAX_DIFF ? "not " : "", MAX_DIFF);
- tests_run++;
- printf("# avg_diff: %lu\n", labs(avg_diff));
- printf("# Span (MB): %d\n", span);
- printf("# avg_bw_imc: %lu\n", avg_bw_imc);
- printf("# avg_bw_resc: %lu\n", avg_bw_resc);
+ avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc;
+ avg_diff_per = (int)(avg_diff * 100);
+
+ ret = avg_diff_per > MAX_DIFF_PERCENT;
+ ksft_print_msg("%s Check MBM diff within %d%%\n",
+ ret ? "Fail:" : "Pass:", MAX_DIFF_PERCENT);
+ ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per);
+ ksft_print_msg("Span (MB): %d\n", span);
+ ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc);
+ ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc);
+
+ return ret;
}
static int check_results(int span)
@@ -49,10 +52,10 @@ static int check_results(int span)
unsigned long bw_imc[NUM_OF_RUNS], bw_resc[NUM_OF_RUNS];
char temp[1024], *token_array[8];
char output[] = RESULT_FILE_NAME;
- int runs;
+ int runs, ret;
FILE *fp;
- printf("# Checking for pass/fail\n");
+ ksft_print_msg("Checking for pass/fail\n");
fp = fopen(output, "r");
if (!fp) {
@@ -76,11 +79,11 @@ static int check_results(int span)
runs++;
}
- show_bw_info(bw_imc, bw_resc, span);
+ ret = show_bw_info(bw_imc, bw_resc, span);
fclose(fp);
- return 0;
+ return ret;
}
static int mbm_setup(int num, ...)
@@ -114,7 +117,7 @@ void mbm_test_cleanup(void)
int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd)
{
struct resctrl_val_param param = {
- .resctrl_val = "mbm",
+ .resctrl_val = MBM_STR,
.ctrlgrp = "c1",
.mongrp = "m1",
.span = span,
@@ -128,9 +131,6 @@ int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd)
remove(RESULT_FILE_NAME);
- if (!validate_resctrl_feature_request("mbm"))
- return -1;
-
ret = resctrl_val(benchmark_cmd, &param);
if (ret)
return ret;
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 39bf59c6b9c5..1ad10c47e31d 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -23,11 +23,16 @@
#include <sys/eventfd.h>
#include <asm/unistd.h>
#include <linux/perf_event.h>
+#include "../kselftest.h"
#define MB (1024 * 1024)
#define RESCTRL_PATH "/sys/fs/resctrl"
#define PHYS_ID_PATH "/sys/devices/system/cpu/cpu"
#define CBM_MASK_PATH "/sys/fs/resctrl/info"
+#define L3_PATH "/sys/fs/resctrl/info/L3"
+#define MB_PATH "/sys/fs/resctrl/info/MB"
+#define L3_MON_PATH "/sys/fs/resctrl/info/L3_MON"
+#define L3_MON_FEATURES_PATH "/sys/fs/resctrl/info/L3_MON/mon_features"
#define PARENT_EXIT(err_msg) \
do { \
@@ -62,11 +67,15 @@ struct resctrl_val_param {
int (*setup)(int num, ...);
};
-pid_t bm_pid, ppid;
-int tests_run;
+#define MBM_STR "mbm"
+#define MBA_STR "mba"
+#define CMT_STR "cmt"
+#define CAT_STR "cat"
-char llc_occup_path[1024];
-bool is_amd;
+extern pid_t bm_pid, ppid;
+
+extern char llc_occup_path[1024];
+extern bool is_amd;
bool check_resctrlfs_support(void);
int filter_dmesg(void);
@@ -74,7 +83,7 @@ int remount_resctrlfs(bool mum_resctrlfs);
int get_resource_id(int cpu_no, int *resource_id);
int umount_resctrlfs(void);
int validate_bw_report_request(char *bw_report);
-bool validate_resctrl_feature_request(char *resctrl_val);
+bool validate_resctrl_feature_request(const char *resctrl_val);
char *fgrep(FILE *inf, const char *str);
int taskset_benchmark(pid_t bm_pid, int cpu_no);
void run_benchmark(int signum, siginfo_t *info, void *ucontext);
@@ -92,16 +101,20 @@ void tests_cleanup(void);
void mbm_test_cleanup(void);
int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd);
void mba_test_cleanup(void);
-int get_cbm_mask(char *cache_type);
+int get_cbm_mask(char *cache_type, char *cbm_mask);
int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size);
void ctrlc_handler(int signum, siginfo_t *info, void *ptr);
int cat_val(struct resctrl_val_param *param);
void cat_test_cleanup(void);
int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type);
-int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd);
+int cmt_resctrl_val(int cpu_no, int n, char **benchmark_cmd);
unsigned int count_bits(unsigned long n);
-void cqm_test_cleanup(void);
+void cmt_test_cleanup(void);
int get_core_sibling(int cpu_no);
int measure_cache_vals(struct resctrl_val_param *param, int bm_pid);
+int show_cache_info(unsigned long sum_llc_val, int no_of_bits,
+ unsigned long cache_span, unsigned long max_diff,
+ unsigned long max_diff_percent, unsigned long num_of_runs,
+ bool platform, bool cmt);
#endif /* RESCTRL_H */
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 425cc85ac883..f51b5fc066a3 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -37,10 +37,10 @@ void detect_amd(void)
static void cmd_help(void)
{
printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list] [-n no_of_bits]\n");
- printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM");
- printf("\t default benchmark is builtin fill_buf\n");
+ printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT\n");
+ printf("\t default benchmark is builtin fill_buf\n");
printf("\t-t test list: run tests specified in the test list, ");
- printf("e.g. -t mbm, mba, cqm, cat\n");
+ printf("e.g. -t mbm, mba, cmt, cat\n");
printf("\t-n no_of_bits: run cache tests using specified no of bits in cache bit mask\n");
printf("\t-p cpu_no: specify CPU number to run the test. 1 is default\n");
printf("\t-h: help\n");
@@ -50,17 +50,88 @@ void tests_cleanup(void)
{
mbm_test_cleanup();
mba_test_cleanup();
- cqm_test_cleanup();
+ cmt_test_cleanup();
+ cat_test_cleanup();
+}
+
+static void run_mbm_test(bool has_ben, char **benchmark_cmd, int span,
+ int cpu_no, char *bw_report)
+{
+ int res;
+
+ ksft_print_msg("Starting MBM BW change ...\n");
+
+ if (!validate_resctrl_feature_request(MBM_STR)) {
+ ksft_test_result_skip("Hardware does not support MBM or MBM is disabled\n");
+ return;
+ }
+
+ if (!has_ben)
+ sprintf(benchmark_cmd[5], "%s", MBA_STR);
+ res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd);
+ ksft_test_result(!res, "MBM: bw change\n");
+ mbm_test_cleanup();
+}
+
+static void run_mba_test(bool has_ben, char **benchmark_cmd, int span,
+ int cpu_no, char *bw_report)
+{
+ int res;
+
+ ksft_print_msg("Starting MBA Schemata change ...\n");
+
+ if (!validate_resctrl_feature_request(MBA_STR)) {
+ ksft_test_result_skip("Hardware does not support MBA or MBA is disabled\n");
+ return;
+ }
+
+ if (!has_ben)
+ sprintf(benchmark_cmd[1], "%d", span);
+ res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd);
+ ksft_test_result(!res, "MBA: schemata change\n");
+ mba_test_cleanup();
+}
+
+static void run_cmt_test(bool has_ben, char **benchmark_cmd, int cpu_no)
+{
+ int res;
+
+ ksft_print_msg("Starting CMT test ...\n");
+ if (!validate_resctrl_feature_request(CMT_STR)) {
+ ksft_test_result_skip("Hardware does not support CMT or CMT is disabled\n");
+ return;
+ }
+
+ if (!has_ben)
+ sprintf(benchmark_cmd[5], "%s", CMT_STR);
+ res = cmt_resctrl_val(cpu_no, 5, benchmark_cmd);
+ ksft_test_result(!res, "CMT: test\n");
+ cmt_test_cleanup();
+}
+
+static void run_cat_test(int cpu_no, int no_of_bits)
+{
+ int res;
+
+ ksft_print_msg("Starting CAT test ...\n");
+
+ if (!validate_resctrl_feature_request(CAT_STR)) {
+ ksft_test_result_skip("Hardware does not support CAT or CAT is disabled\n");
+ return;
+ }
+
+ res = cat_perf_miss_val(cpu_no, no_of_bits, "L3");
+ ksft_test_result(!res, "CAT: test\n");
cat_test_cleanup();
}
int main(int argc, char **argv)
{
- bool has_ben = false, mbm_test = true, mba_test = true, cqm_test = true;
- int res, c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 5;
+ bool has_ben = false, mbm_test = true, mba_test = true, cmt_test = true;
+ int c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 0;
char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64];
char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE];
- int ben_ind, ben_count;
+ int ben_ind, ben_count, tests = 0;
bool cat_test = true;
for (i = 0; i < argc; i++) {
@@ -73,7 +144,7 @@ int main(int argc, char **argv)
}
}
- while ((c = getopt(argc_new, argv, "ht:b:")) != -1) {
+ while ((c = getopt(argc_new, argv, "ht:b:n:p:")) != -1) {
char *token;
switch (c) {
@@ -82,17 +153,21 @@ int main(int argc, char **argv)
mbm_test = false;
mba_test = false;
- cqm_test = false;
+ cmt_test = false;
cat_test = false;
while (token) {
- if (!strcmp(token, "mbm")) {
+ if (!strncmp(token, MBM_STR, sizeof(MBM_STR))) {
mbm_test = true;
- } else if (!strcmp(token, "mba")) {
+ tests++;
+ } else if (!strncmp(token, MBA_STR, sizeof(MBA_STR))) {
mba_test = true;
- } else if (!strcmp(token, "cqm")) {
- cqm_test = true;
- } else if (!strcmp(token, "cat")) {
+ tests++;
+ } else if (!strncmp(token, CMT_STR, sizeof(CMT_STR))) {
+ cmt_test = true;
+ tests++;
+ } else if (!strncmp(token, CAT_STR, sizeof(CAT_STR))) {
cat_test = true;
+ tests++;
} else {
printf("invalid argument\n");
@@ -106,6 +181,10 @@ int main(int argc, char **argv)
break;
case 'n':
no_of_bits = atoi(optarg);
+ if (no_of_bits <= 0) {
+ printf("Bail out! invalid argument for no_of_bits\n");
+ return -1;
+ }
break;
case 'h':
cmd_help();
@@ -118,7 +197,7 @@ int main(int argc, char **argv)
}
}
- printf("TAP version 13\n");
+ ksft_print_header();
/*
* Typically we need root privileges, because:
@@ -126,7 +205,7 @@ int main(int argc, char **argv)
* 2. We execute perf commands
*/
if (geteuid() != 0)
- printf("# WARNING: not running as root, tests may fail.\n");
+ return ksft_exit_fail_msg("Not running as root, abort testing.\n");
/* Detect AMD vendor */
detect_amd();
@@ -155,48 +234,26 @@ int main(int argc, char **argv)
sprintf(bw_report, "reads");
sprintf(bm_type, "fill_buf");
- check_resctrlfs_support();
+ if (!check_resctrlfs_support())
+ return ksft_exit_fail_msg("resctrl FS does not exist\n");
+
filter_dmesg();
- if (!is_amd && mbm_test) {
- printf("# Starting MBM BW change ...\n");
- if (!has_ben)
- sprintf(benchmark_cmd[5], "%s", "mba");
- res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd);
- printf("%sok MBM: bw change\n", res ? "not " : "");
- mbm_test_cleanup();
- tests_run++;
- }
+ ksft_set_plan(tests ? : 4);
- if (!is_amd && mba_test) {
- printf("# Starting MBA Schemata change ...\n");
- if (!has_ben)
- sprintf(benchmark_cmd[1], "%d", span);
- res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd);
- printf("%sok MBA: schemata change\n", res ? "not " : "");
- mba_test_cleanup();
- tests_run++;
- }
+ if (!is_amd && mbm_test)
+ run_mbm_test(has_ben, benchmark_cmd, span, cpu_no, bw_report);
- if (cqm_test) {
- printf("# Starting CQM test ...\n");
- if (!has_ben)
- sprintf(benchmark_cmd[5], "%s", "cqm");
- res = cqm_resctrl_val(cpu_no, no_of_bits, benchmark_cmd);
- printf("%sok CQM: test\n", res ? "not " : "");
- cqm_test_cleanup();
- tests_run++;
- }
+ if (!is_amd && mba_test)
+ run_mba_test(has_ben, benchmark_cmd, span, cpu_no, bw_report);
- if (cat_test) {
- printf("# Starting CAT test ...\n");
- res = cat_perf_miss_val(cpu_no, no_of_bits, "L3");
- printf("%sok CAT: test\n", res ? "not " : "");
- tests_run++;
- cat_test_cleanup();
- }
+ if (cmt_test)
+ run_cmt_test(has_ben, benchmark_cmd, cpu_no);
+
+ if (cat_test)
+ run_cat_test(cpu_no, no_of_bits);
- printf("1..%d\n", tests_run);
+ umount_resctrlfs();
- return 0;
+ return ksft_exit_pass();
}
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index 520fea3606d1..95224345c78e 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -221,8 +221,8 @@ static int read_from_imc_dir(char *imc_dir, int count)
*/
static int num_of_imcs(void)
{
+ char imc_dir[512], *temp;
unsigned int count = 0;
- char imc_dir[512];
struct dirent *ep;
int ret;
DIR *dp;
@@ -230,7 +230,25 @@ static int num_of_imcs(void)
dp = opendir(DYN_PMU_PATH);
if (dp) {
while ((ep = readdir(dp))) {
- if (strstr(ep->d_name, UNCORE_IMC)) {
+ temp = strstr(ep->d_name, UNCORE_IMC);
+ if (!temp)
+ continue;
+
+ /*
+ * imc counters are named as "uncore_imc_<n>", hence
+ * increment the pointer to point to <n>. Note that
+ * sizeof(UNCORE_IMC) would count for null character as
+ * well and hence the last underscore character in
+ * uncore_imc'_' need not be counted.
+ */
+ temp = temp + sizeof(UNCORE_IMC);
+
+ /*
+ * Some directories under "DYN_PMU_PATH" could have
+ * names like "uncore_imc_free_running", hence, check if
+ * first character is a numerical digit or not.
+ */
+ if (temp[0] >= '0' && temp[0] <= '9') {
sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH,
ep->d_name);
ret = read_from_imc_dir(imc_dir, count);
@@ -282,9 +300,9 @@ static int initialize_mem_bw_imc(void)
* Memory B/W utilized by a process on a socket can be calculated using
* iMC counters. Perf events are used to read these counters.
*
- * Return: >= 0 on success. < 0 on failure.
+ * Return: = 0 on success. < 0 on failure.
*/
-static float get_mem_bw_imc(int cpu_no, char *bw_report)
+static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
{
float reads, writes, of_mul_read, of_mul_write;
int imc, j, ret;
@@ -355,13 +373,18 @@ static float get_mem_bw_imc(int cpu_no, char *bw_report)
close(imc_counters_config[imc][WRITE].fd);
}
- if (strcmp(bw_report, "reads") == 0)
- return reads;
+ if (strcmp(bw_report, "reads") == 0) {
+ *bw_imc = reads;
+ return 0;
+ }
- if (strcmp(bw_report, "writes") == 0)
- return writes;
+ if (strcmp(bw_report, "writes") == 0) {
+ *bw_imc = writes;
+ return 0;
+ }
- return (reads + writes);
+ *bw_imc = reads + writes;
+ return 0;
}
void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id)
@@ -397,10 +420,10 @@ static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
return;
}
- if (strcmp(resctrl_val, "mbm") == 0)
+ if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
set_mbm_path(ctrlgrp, mongrp, resource_id);
- if ((strcmp(resctrl_val, "mba") == 0)) {
+ if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
if (ctrlgrp)
sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH,
RESCTRL_PATH, ctrlgrp, resource_id);
@@ -420,9 +443,8 @@ static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
* 1. If con_mon grp is given, then read from it
* 2. If con_mon grp is not given, then read from root con_mon grp
*/
-static unsigned long get_mem_bw_resctrl(void)
+static int get_mem_bw_resctrl(unsigned long *mbm_total)
{
- unsigned long mbm_total = 0;
FILE *fp;
fp = fopen(mbm_total_path, "r");
@@ -431,7 +453,7 @@ static unsigned long get_mem_bw_resctrl(void)
return -1;
}
- if (fscanf(fp, "%lu", &mbm_total) <= 0) {
+ if (fscanf(fp, "%lu", mbm_total) <= 0) {
perror("Could not get mbm local bytes");
fclose(fp);
@@ -439,7 +461,7 @@ static unsigned long get_mem_bw_resctrl(void)
}
fclose(fp);
- return mbm_total;
+ return 0;
}
pid_t bm_pid, ppid;
@@ -449,7 +471,7 @@ void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
kill(bm_pid, SIGKILL);
umount_resctrlfs();
tests_cleanup();
- printf("Ending\n\n");
+ ksft_print_msg("Ending\n\n");
exit(EXIT_SUCCESS);
}
@@ -492,7 +514,7 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc,
return 0;
}
-static void set_cqm_path(const char *ctrlgrp, const char *mongrp, char sock_num)
+static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num)
{
if (strlen(ctrlgrp) && strlen(mongrp))
sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH,
@@ -512,7 +534,7 @@ static void set_cqm_path(const char *ctrlgrp, const char *mongrp, char sock_num)
* @ctrlgrp: Name of the control monitor group (con_mon grp)
* @mongrp: Name of the monitor group (mon grp)
* @cpu_no: CPU number that the benchmark PID is binded to
- * @resctrl_val: Resctrl feature (Eg: cat, cqm.. etc)
+ * @resctrl_val: Resctrl feature (Eg: cat, cmt.. etc)
*/
static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
int cpu_no, char *resctrl_val)
@@ -524,14 +546,15 @@ static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
return;
}
- if (strcmp(resctrl_val, "cqm") == 0)
- set_cqm_path(ctrlgrp, mongrp, resource_id);
+ if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
+ set_cmt_path(ctrlgrp, mongrp, resource_id);
}
static int
measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
{
- unsigned long bw_imc, bw_resc, bw_resc_end;
+ unsigned long bw_resc, bw_resc_end;
+ float bw_imc;
int ret;
/*
@@ -541,13 +564,13 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
* Compare the two values to validate resctrl value.
* It takes 1sec to measure the data.
*/
- bw_imc = get_mem_bw_imc(param->cpu_no, param->bw_report);
- if (bw_imc <= 0)
- return bw_imc;
+ ret = get_mem_bw_imc(param->cpu_no, param->bw_report, &bw_imc);
+ if (ret < 0)
+ return ret;
- bw_resc_end = get_mem_bw_resctrl();
- if (bw_resc_end <= 0)
- return bw_resc_end;
+ ret = get_mem_bw_resctrl(&bw_resc_end);
+ if (ret < 0)
+ return ret;
bw_resc = (bw_resc_end - *bw_resc_start) / MB;
ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
@@ -579,8 +602,8 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
if (strcmp(param->filename, "") == 0)
sprintf(param->filename, "stdio");
- if ((strcmp(resctrl_val, "mba")) == 0 ||
- (strcmp(resctrl_val, "mbm")) == 0) {
+ if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
+ !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
ret = validate_bw_report_request(param->bw_report);
if (ret)
return ret;
@@ -645,7 +668,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
PARENT_EXIT("Child is done");
}
- printf("# benchmark PID: %d\n", bm_pid);
+ ksft_print_msg("Benchmark PID: %d\n", bm_pid);
/*
* Register CTRL-C handler for parent, as it has to kill benchmark
@@ -674,15 +697,15 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
if (ret)
goto out;
- if ((strcmp(resctrl_val, "mbm") == 0) ||
- (strcmp(resctrl_val, "mba") == 0)) {
+ if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
+ !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
ret = initialize_mem_bw_imc();
if (ret)
goto out;
initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
param->cpu_no, resctrl_val);
- } else if (strcmp(resctrl_val, "cqm") == 0)
+ } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp,
param->cpu_no, resctrl_val);
@@ -710,8 +733,8 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
/* Test runs until the callback setup() tells the test to stop. */
while (1) {
- if ((strcmp(resctrl_val, "mbm") == 0) ||
- (strcmp(resctrl_val, "mba") == 0)) {
+ if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
+ !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
ret = param->setup(1, param);
if (ret) {
ret = 0;
@@ -721,7 +744,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
ret = measure_vals(param, &bw_resc_start);
if (ret)
break;
- } else if (strcmp(resctrl_val, "cqm") == 0) {
+ } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
ret = param->setup(1, param);
if (ret) {
ret = 0;
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 19c0ec4045a4..5f5a166ade60 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -10,8 +10,6 @@
*/
#include "resctrl.h"
-int tests_run;
-
static int find_resctrl_mount(char *buffer)
{
FILE *mounts;
@@ -49,8 +47,6 @@ static int find_resctrl_mount(char *buffer)
return -ENOENT;
}
-char cbm_mask[256];
-
/*
* remount_resctrlfs - Remount resctrl FS at /sys/fs/resctrl
* @mum_resctrlfs: Should the resctrl FS be remounted?
@@ -70,28 +66,25 @@ int remount_resctrlfs(bool mum_resctrlfs)
if (ret)
strcpy(mountpoint, RESCTRL_PATH);
- if (!ret && mum_resctrlfs && umount(mountpoint)) {
- printf("not ok unmounting \"%s\"\n", mountpoint);
- perror("# umount");
- tests_run++;
- }
+ if (!ret && mum_resctrlfs && umount(mountpoint))
+ ksft_print_msg("Fail: unmounting \"%s\"\n", mountpoint);
if (!ret && !mum_resctrlfs)
return 0;
+ ksft_print_msg("Mounting resctrl to \"%s\"\n", RESCTRL_PATH);
ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL);
- printf("%sok mounting resctrl to \"%s\"\n", ret ? "not " : "",
- RESCTRL_PATH);
if (ret)
perror("# mount");
- tests_run++;
-
return ret;
}
int umount_resctrlfs(void)
{
+ if (find_resctrl_mount(NULL))
+ return 0;
+
if (umount(RESCTRL_PATH)) {
perror("# Unable to umount resctrl");
@@ -205,16 +198,18 @@ int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size)
/*
* get_cbm_mask - Get cbm mask for given cache
* @cache_type: Cache level L2/L3
- *
- * Mask is stored in cbm_mask which is global variable.
+ * @cbm_mask: cbm_mask returned as a string
*
* Return: = 0 on success, < 0 on failure.
*/
-int get_cbm_mask(char *cache_type)
+int get_cbm_mask(char *cache_type, char *cbm_mask)
{
char cbm_mask_path[1024];
FILE *fp;
+ if (!cbm_mask)
+ return -1;
+
sprintf(cbm_mask_path, "%s/%s/cbm_mask", CBM_MASK_PATH, cache_type);
fp = fopen(cbm_mask_path, "r");
@@ -268,7 +263,7 @@ int get_core_sibling(int cpu_no)
while (token) {
sibling_cpu_no = atoi(token);
/* Skipping core 0 as we don't want to run test on core 0 */
- if (sibling_cpu_no != 0)
+ if (sibling_cpu_no != 0 && sibling_cpu_no != cpu_no)
break;
token = strtok(NULL, "-,");
}
@@ -334,7 +329,7 @@ void run_benchmark(int signum, siginfo_t *info, void *ucontext)
operation = atoi(benchmark_cmd[4]);
sprintf(resctrl_val, "%s", benchmark_cmd[5]);
- if (strcmp(resctrl_val, "cqm") != 0)
+ if (strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
buffer_span = span * MB;
else
buffer_span = span;
@@ -458,9 +453,9 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
if (ret)
goto out;
- /* Create mon grp and write pid into it for "mbm" and "cqm" test */
- if ((strcmp(resctrl_val, "cqm") == 0) ||
- (strcmp(resctrl_val, "mbm") == 0)) {
+ /* Create mon grp and write pid into it for "mbm" and "cmt" test */
+ if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)) ||
+ !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
if (strlen(mongrp)) {
sprintf(monitorgroup_p, "%s/mon_groups", controlgroup);
sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp);
@@ -477,13 +472,10 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
}
out:
- printf("%sok writing benchmark parameters to resctrl FS\n",
- ret ? "not " : "");
+ ksft_print_msg("Writing benchmark parameters to resctrl FS\n");
if (ret)
perror("# writing to resctrlfs");
- tests_run++;
-
return ret;
}
@@ -505,13 +497,13 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
int resource_id, ret = 0;
FILE *fp;
- if ((strcmp(resctrl_val, "mba") != 0) &&
- (strcmp(resctrl_val, "cat") != 0) &&
- (strcmp(resctrl_val, "cqm") != 0))
+ if (strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) &&
+ strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) &&
+ strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
return -ENOENT;
if (!schemata) {
- printf("# Skipping empty schemata update\n");
+ ksft_print_msg("Skipping empty schemata update\n");
return -1;
}
@@ -528,9 +520,10 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
else
sprintf(controlgroup, "%s/schemata", RESCTRL_PATH);
- if (!strcmp(resctrl_val, "cat") || !strcmp(resctrl_val, "cqm"))
+ if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR)) ||
+ !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata);
- if (strcmp(resctrl_val, "mba") == 0)
+ if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)))
sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata);
fp = fopen(controlgroup, "w");
@@ -551,10 +544,9 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
fclose(fp);
out:
- printf("%sok Write schema \"%s\" to resctrl FS%s%s\n",
- ret ? "not " : "", schema, ret ? " # " : "",
- ret ? reason : "");
- tests_run++;
+ ksft_print_msg("Write schema \"%s\" to resctrl FS%s%s\n",
+ schema, ret ? " # " : "",
+ ret ? reason : "");
return ret;
}
@@ -578,18 +570,20 @@ bool check_resctrlfs_support(void)
fclose(inf);
- printf("%sok kernel supports resctrl filesystem\n", ret ? "" : "not ");
- tests_run++;
+ ksft_print_msg("%s Check kernel supports resctrl filesystem\n",
+ ret ? "Pass:" : "Fail:");
+
+ if (!ret)
+ return ret;
dp = opendir(RESCTRL_PATH);
- printf("%sok resctrl mountpoint \"%s\" exists\n",
- dp ? "" : "not ", RESCTRL_PATH);
+ ksft_print_msg("%s Check resctrl mountpoint \"%s\" exists\n",
+ dp ? "Pass:" : "Fail:", RESCTRL_PATH);
if (dp)
closedir(dp);
- tests_run++;
- printf("# resctrl filesystem %s mounted\n",
- find_resctrl_mount(NULL) ? "not" : "is");
+ ksft_print_msg("resctrl filesystem %s mounted\n",
+ find_resctrl_mount(NULL) ? "not" : "is");
return ret;
}
@@ -615,26 +609,56 @@ char *fgrep(FILE *inf, const char *str)
* validate_resctrl_feature_request - Check if requested feature is valid.
* @resctrl_val: Requested feature
*
- * Return: 0 on success, non-zero on failure
+ * Return: True if the feature is supported, else false
*/
-bool validate_resctrl_feature_request(char *resctrl_val)
+bool validate_resctrl_feature_request(const char *resctrl_val)
{
- FILE *inf = fopen("/proc/cpuinfo", "r");
+ struct stat statbuf;
bool found = false;
char *res;
+ FILE *inf;
- if (!inf)
+ if (!resctrl_val)
return false;
- res = fgrep(inf, "flags");
-
- if (res) {
- char *s = strchr(res, ':');
+ if (remount_resctrlfs(false))
+ return false;
- found = s && !strstr(s, resctrl_val);
- free(res);
+ if (!strncmp(resctrl_val, CAT_STR, sizeof(CAT_STR))) {
+ if (!stat(L3_PATH, &statbuf))
+ return true;
+ } else if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
+ if (!stat(MB_PATH, &statbuf))
+ return true;
+ } else if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
+ !strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
+ if (!stat(L3_MON_PATH, &statbuf)) {
+ inf = fopen(L3_MON_FEATURES_PATH, "r");
+ if (!inf)
+ return false;
+
+ if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
+ res = fgrep(inf, "llc_occupancy");
+ if (res) {
+ found = true;
+ free(res);
+ }
+ }
+
+ if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
+ res = fgrep(inf, "mbm_total_bytes");
+ if (res) {
+ free(res);
+ res = fgrep(inf, "mbm_local_bytes");
+ if (res) {
+ found = true;
+ free(res);
+ }
+ }
+ }
+ fclose(inf);
+ }
}
- fclose(inf);
return found;
}
@@ -671,9 +695,9 @@ int filter_dmesg(void)
while (fgets(line, 1024, fp)) {
if (strstr(line, "intel_rdt:"))
- printf("# dmesg: %s", line);
+ ksft_print_msg("dmesg: %s", line);
if (strstr(line, "resctrl:"))
- printf("# dmesg: %s", line);
+ ksft_print_msg("dmesg: %s", line);
}
fclose(fp);
waitpid(pid, NULL, 0);
diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h
index 592c1ccf4576..0bd73428d2f3 100644
--- a/tools/testing/selftests/sgx/defines.h
+++ b/tools/testing/selftests/sgx/defines.h
@@ -14,7 +14,7 @@
#define __aligned(x) __attribute__((__aligned__(x)))
#define __packed __attribute__((packed))
-#include "../../../../arch/x86/kernel/cpu/sgx/arch.h"
+#include "../../../../arch/x86/include/asm/sgx.h"
#include "../../../../arch/x86/include/asm/enclu.h"
#include "../../../../arch/x86/include/uapi/asm/sgx.h"
diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
index 9d43b75aaa55..f441ac34b4d4 100644
--- a/tools/testing/selftests/sgx/load.c
+++ b/tools/testing/selftests/sgx/load.c
@@ -45,19 +45,19 @@ static bool encl_map_bin(const char *path, struct encl *encl)
fd = open(path, O_RDONLY);
if (fd == -1) {
- perror("open()");
+ perror("enclave executable open()");
return false;
}
ret = stat(path, &sb);
if (ret) {
- perror("stat()");
+ perror("enclave executable stat()");
goto err;
}
bin = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (bin == MAP_FAILED) {
- perror("mmap()");
+ perror("enclave executable mmap()");
goto err;
}
@@ -90,8 +90,7 @@ static bool encl_ioc_create(struct encl *encl)
ioc.src = (unsigned long)secs;
rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_CREATE, &ioc);
if (rc) {
- fprintf(stderr, "SGX_IOC_ENCLAVE_CREATE failed: errno=%d\n",
- errno);
+ perror("SGX_IOC_ENCLAVE_CREATE failed");
munmap((void *)secs->base, encl->encl_size);
return false;
}
@@ -116,31 +115,72 @@ static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg)
rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc);
if (rc < 0) {
- fprintf(stderr, "SGX_IOC_ENCLAVE_ADD_PAGES failed: errno=%d.\n",
- errno);
+ perror("SGX_IOC_ENCLAVE_ADD_PAGES failed");
return false;
}
return true;
}
+
+
bool encl_load(const char *path, struct encl *encl)
{
+ const char device_path[] = "/dev/sgx_enclave";
Elf64_Phdr *phdr_tbl;
off_t src_offset;
Elf64_Ehdr *ehdr;
+ struct stat sb;
+ void *ptr;
int i, j;
int ret;
+ int fd = -1;
memset(encl, 0, sizeof(*encl));
- ret = open("/dev/sgx_enclave", O_RDWR);
- if (ret < 0) {
- fprintf(stderr, "Unable to open /dev/sgx_enclave\n");
+ fd = open(device_path, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open /dev/sgx_enclave");
+ goto err;
+ }
+
+ ret = stat(device_path, &sb);
+ if (ret) {
+ perror("device file stat()");
+ goto err;
+ }
+
+ /*
+ * This just checks if the /dev file has these permission
+ * bits set. It does not check that the current user is
+ * the owner or in the owning group.
+ */
+ if (!(sb.st_mode & (S_IXUSR | S_IXGRP | S_IXOTH))) {
+ fprintf(stderr, "no execute permissions on device file %s\n", device_path);
+ goto err;
+ }
+
+ ptr = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0);
+ if (ptr == (void *)-1) {
+ perror("mmap for read");
+ goto err;
+ }
+ munmap(ptr, PAGE_SIZE);
+
+#define ERR_MSG \
+"mmap() succeeded for PROT_READ, but failed for PROT_EXEC.\n" \
+" Check that current user has execute permissions on %s and \n" \
+" that /dev does not have noexec set: mount | grep \"/dev .*noexec\"\n" \
+" If so, remount it executable: mount -o remount,exec /dev\n\n"
+
+ ptr = mmap(NULL, PAGE_SIZE, PROT_EXEC, MAP_SHARED, fd, 0);
+ if (ptr == (void *)-1) {
+ fprintf(stderr, ERR_MSG, device_path);
goto err;
}
+ munmap(ptr, PAGE_SIZE);
- encl->fd = ret;
+ encl->fd = fd;
if (!encl_map_bin(path, encl))
goto err;
@@ -217,6 +257,8 @@ bool encl_load(const char *path, struct encl *encl)
return true;
err:
+ if (fd != -1)
+ close(fd);
encl_delete(encl);
return false;
}
@@ -229,7 +271,7 @@ static bool encl_map_area(struct encl *encl)
area = mmap(NULL, encl_size * 2, PROT_NONE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (area == MAP_FAILED) {
- perror("mmap");
+ perror("reservation mmap()");
return false;
}
@@ -268,8 +310,7 @@ bool encl_build(struct encl *encl)
ioc.sigstruct = (uint64_t)&encl->sigstruct;
ret = ioctl(encl->fd, SGX_IOC_ENCLAVE_INIT, &ioc);
if (ret) {
- fprintf(stderr, "SGX_IOC_ENCLAVE_INIT failed: errno=%d\n",
- errno);
+ perror("SGX_IOC_ENCLAVE_INIT failed");
return false;
}
diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
index 724cec700926..d304a4044eb9 100644
--- a/tools/testing/selftests/sgx/main.c
+++ b/tools/testing/selftests/sgx/main.c
@@ -15,6 +15,7 @@
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
+#include <sys/auxv.h>
#include "defines.h"
#include "main.h"
#include "../kselftest.h"
@@ -28,24 +29,6 @@ struct vdso_symtab {
Elf64_Word *elf_hashtab;
};
-static void *vdso_get_base_addr(char *envp[])
-{
- Elf64_auxv_t *auxv;
- int i;
-
- for (i = 0; envp[i]; i++)
- ;
-
- auxv = (Elf64_auxv_t *)&envp[i + 1];
-
- for (i = 0; auxv[i].a_type != AT_NULL; i++) {
- if (auxv[i].a_type == AT_SYSINFO_EHDR)
- return (void *)auxv[i].a_un.a_val;
- }
-
- return NULL;
-}
-
static Elf64_Dyn *vdso_get_dyntab(void *addr)
{
Elf64_Ehdr *ehdr = addr;
@@ -162,7 +145,7 @@ static int user_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r
return 0;
}
-int main(int argc, char *argv[], char *envp[])
+int main(int argc, char *argv[])
{
struct sgx_enclave_run run;
struct vdso_symtab symtab;
@@ -195,7 +178,7 @@ int main(int argc, char *argv[], char *envp[])
addr = mmap((void *)encl.encl_base + seg->offset, seg->size,
seg->prot, MAP_SHARED | MAP_FIXED, encl.fd, 0);
if (addr == MAP_FAILED) {
- fprintf(stderr, "mmap() failed, errno=%d.\n", errno);
+ perror("mmap() segment failed");
exit(KSFT_FAIL);
}
}
@@ -203,7 +186,8 @@ int main(int argc, char *argv[], char *envp[])
memset(&run, 0, sizeof(run));
run.tcs = encl.encl_base;
- addr = vdso_get_base_addr(envp);
+ /* Get vDSO base address */
+ addr = (void *)getauxval(AT_SYSINFO_EHDR);
if (!addr)
goto err;
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index b8268da5adaa..8e45792703ed 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -764,5 +764,53 @@
"teardown": [
"$TC actions flush action police"
]
+ },
+ {
+ "id": "cdd7",
+ "name": "Add valid police action with packets per second rate limit",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police pkts_rate 1000 pkts_burst 200 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 0bit burst 0b mtu 4096Mb pkts_rate 1000 pkts_burst 200",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "f5bc",
+ "name": "Add invalid police action with both bps and pps",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k pkts_rate 1000 pkts_burst 200 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 ",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
index 8e8c1ae12260..e0c5f060ccb9 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
@@ -24,6 +24,30 @@
]
},
{
+ "id": "4297",
+ "name": "Add simple action with change command",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions change action simple sdata \"Not changed\" index 60",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <Not changed>.*index 60 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
"id": "6d4c",
"name": "Add simple action with duplicate index",
"category": [
@@ -151,5 +175,64 @@
"teardown": [
"$TC actions flush action simple"
]
+ },
+ {
+ "id": "8d07",
+ "name": "Verify cleanup of failed actions batch add",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"2\" index 2",
+ [
+ "$TC actions add action simple sdata \"1\" index 1 action simple sdata \"2\" index 2",
+ 255
+ ],
+ "$TC actions flush action simple"
+ ],
+ "cmdUnderTest": "$TC actions add action simple sdata \"2\" index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <2>.*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "a68a",
+ "name": "Verify cleanup of failed actions batch change",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC actions change action simple sdata \"1\" index 1 action simple sdata \"2\" goto chain 42 index 2",
+ 255
+ ],
+ "$TC actions flush action simple"
+ ],
+ "cmdUnderTest": "$TC actions add action simple sdata \"1\" index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <1>.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
}
]
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
index bfc974b4572d..ef8eb3604595 100644
--- a/tools/testing/selftests/timers/clocksource-switch.c
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -3,7 +3,7 @@
* (C) Copyright IBM 2012
* Licensed under the GPLv2
*
- * NOTE: This is a meta-test which quickly changes the clocksourc and
+ * NOTE: This is a meta-test which quickly changes the clocksource and
* then uses other tests to detect problems. Thus this test requires
* that the inconsistency-check and nanosleep tests be present in the
* same directory it is run from.
@@ -134,7 +134,7 @@ int main(int argv, char **argc)
return -1;
}
- /* Check everything is sane before we start switching asyncrhonously */
+ /* Check everything is sane before we start switching asynchronously */
for (i = 0; i < count; i++) {
printf("Validating clocksource %s\n", clocksource_list[i]);
if (change_clocksource(clocksource_list[i])) {
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
index 19e46ed5dfb5..23eb398c8140 100644
--- a/tools/testing/selftests/timers/leap-a-day.c
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -5,7 +5,7 @@
* Licensed under the GPLv2
*
* This test signals the kernel to insert a leap second
- * every day at midnight GMT. This allows for stessing the
+ * every day at midnight GMT. This allows for stressing the
* kernel's leap-second behavior, as well as how well applications
* handle the leap-second discontinuity.
*
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
index dc80728ed191..f70802c5dd0d 100644
--- a/tools/testing/selftests/timers/leapcrash.c
+++ b/tools/testing/selftests/timers/leapcrash.c
@@ -4,10 +4,10 @@
* (C) Copyright 2013, 2015 Linaro Limited
* Licensed under the GPL
*
- * This test demonstrates leapsecond deadlock that is possibe
+ * This test demonstrates leapsecond deadlock that is possible
* on kernels from 2.6.26 to 3.3.
*
- * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA
+ * WARNING: THIS WILL LIKELY HARD HANG SYSTEMS AND MAY LOSE DATA
* RUN AT YOUR OWN RISK!
* To build:
* $ gcc leapcrash.c -o leapcrash -lrt
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
index cf3e48919874..80aed4bf06fb 100644
--- a/tools/testing/selftests/timers/threadtest.c
+++ b/tools/testing/selftests/timers/threadtest.c
@@ -76,7 +76,7 @@ void checklist(struct timespec *list, int size)
/* The shared thread shares a global list
* that each thread fills while holding the lock.
- * This stresses clock syncronization across cpus.
+ * This stresses clock synchronization across cpus.
*/
void *shared_thread(void *arg)
{
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index d42115e4284d..8b0cd421ebd3 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -101,7 +101,7 @@ endef
ifeq ($(CAN_BUILD_I386),1)
$(BINARIES_32): CFLAGS += -m32
$(BINARIES_32): LDLIBS += -lrt -ldl -lm
-$(BINARIES_32): %_32: %.c
+$(BINARIES_32): $(OUTPUT)/%_32: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
endif
@@ -109,7 +109,7 @@ endif
ifeq ($(CAN_BUILD_X86_64),1)
$(BINARIES_64): CFLAGS += -m64
$(BINARIES_64): LDLIBS += -lrt -ldl
-$(BINARIES_64): %_64: %.c
+$(BINARIES_64): $(OUTPUT)/%_64: %.c
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
endif
diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S
index a71d92da8f46..f3f56e681e9f 100644
--- a/tools/testing/selftests/x86/thunks_32.S
+++ b/tools/testing/selftests/x86/thunks_32.S
@@ -45,3 +45,5 @@ call64_from_32:
ret
.size call64_from_32, .-call64_from_32
+
+.section .note.GNU-stack,"",%progbits
diff --git a/tools/thermal/tmon/Makefile b/tools/thermal/tmon/Makefile
index 59e417ec3e13..9db867df7679 100644
--- a/tools/thermal/tmon/Makefile
+++ b/tools/thermal/tmon/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
# We need this for the "cc-option" macro.
-include ../../../scripts/Kbuild.include
+include ../../build/Build.include
VERSION = 1.0
diff --git a/tools/usb/usbip/doc/usbip.8 b/tools/usb/usbip/doc/usbip.8
index a15d20063b98..1f26e4a00638 100644
--- a/tools/usb/usbip/doc/usbip.8
+++ b/tools/usb/usbip/doc/usbip.8
@@ -50,9 +50,16 @@ Attach a remote USB device.
.PP
.HP
+\fBattach\fR \-\-remote=<\fIhost\fR> \-\-device=<\fIdev_id\fR>
+.IP
+Attach a remote USB gadget.
+Only used when the remote usbipd is in device mode.
+.PP
+
+.HP
\fBdetach\fR \-\-port=<\fIport\fR>
.IP
-Detach an imported USB device.
+Detach an imported USB device/gadget.
.PP
.HP
@@ -74,11 +81,25 @@ List USB devices exported by a remote host.
.PP
.HP
+\fBlist\fR \-\-device
+.IP
+List USB gadgets of local usbip-vudc.
+Only used when the local usbipd is in device mode.
+Note that this can not list usbip-vudc USB gadgets of the remote device mode usbipd.
+.PP
+
+.HP
\fBlist\fR \-\-local
.IP
List local USB devices.
.PP
+.HP
+\fBport\fR
+.IP
+List imported devices/gadgets.
+.PP
+
.SH EXAMPLES
@@ -90,8 +111,27 @@ List local USB devices.
client:# usbip attach --remote=server --busid=1-2
- Connect the remote USB device.
+ client:# usbip port
+ - List imported devices/gadgets.
+
client:# usbip detach --port=0
- Detach the usb device.
+The following example shows the usage of device mode
+
+ server:# usbip list --device
+ - List gadgets exported by local usbipd server.
+
+ client:# modprobe vhci-hcd
+
+ client:# usbip attach --remote=server --device=usbip-vudc.0
+ - Connect the remote USB gadget.
+
+ client:# usbip port
+ - List imported devices/gadgets.
+
+ client:# usbip detach --port=0
+ - Detach the usb gadget.
+
.SH "SEE ALSO"
\fBusbipd\fP\fB(8)\fB\fP
diff --git a/tools/usb/usbip/doc/usbipd.8 b/tools/usb/usbip/doc/usbipd.8
index fb62a756893b..d974394f86a1 100644
--- a/tools/usb/usbip/doc/usbipd.8
+++ b/tools/usb/usbip/doc/usbipd.8
@@ -30,6 +30,12 @@ Bind to IPv6. Default is both.
.PP
.HP
+\fB\-e\fR, \fB\-\-device\fR
+.IP
+Run in device mode. Rather than drive an attached device, create a virtual UDC to bind gadgets to.
+.PP
+
+.HP
\fB\-D\fR, \fB\-\-daemon\fR
.IP
Run as a daemon process.
@@ -86,6 +92,26 @@ USB/IP client can connect and use exported devices.
- A usb device 1-2 is now exportable to other hosts!
- Use 'usbip unbind --busid=1-2' when you want to shutdown exporting and use the device locally.
+The following example shows the usage of device mode
+
+ server:# modprobe usbip-vudc
+ - Use /sys/class/udc/ interface.
+ - usbip-host is independent of this module.
+
+ server:# usbipd -e -D
+ - Start usbip daemon in device mode.
+
+ server:# modprobe g_mass_storage file=/tmp/tmp.img
+ - Bind a gadget to usbip-vudc.
+ - in this example, a mass storage gadget is bound.
+
+ server:# usbip list --device
+ - List gadgets exported by local usbipd server.
+
+ server:# modprobe -r g_mass_storage
+ - Unbind a gadget from usbip-vudc.
+ - in this example, the previous mass storage gadget is unbound.
+
.SH "SEE ALSO"
\fBusbip\fP\fB(8)\fB\fP
diff --git a/tools/usb/usbip/libsrc/list.h b/tools/usb/usbip/libsrc/list.h
index a941671e4900..9cca2425587b 100644
--- a/tools/usb/usbip/libsrc/list.h
+++ b/tools/usb/usbip/libsrc/list.h
@@ -77,17 +77,17 @@ static inline void __list_del(struct list_head * prev, struct list_head * next)
#define LIST_POISON1 ((void *) 0x00100100 + POISON_POINTER_DELTA)
#define LIST_POISON2 ((void *) 0x00200200 + POISON_POINTER_DELTA)
+static inline void __list_del_entry(struct list_head *entry)
+{
+ __list_del(entry->prev, entry->next);
+}
+
/**
* list_del - deletes entry from list.
* @entry: the element to delete from the list.
* Note: list_empty() on entry does not return true after this, the entry is
* in an undefined state.
*/
-static inline void __list_del_entry(struct list_head *entry)
-{
- __list_del(entry->prev, entry->next);
-}
-
static inline void list_del(struct list_head *entry)
{
__list_del(entry->prev, entry->next);
diff --git a/tools/usb/usbip/src/usbip_list.c b/tools/usb/usbip/src/usbip_list.c
index 8625b0f514ee..3d810bcca02f 100644
--- a/tools/usb/usbip/src/usbip_list.c
+++ b/tools/usb/usbip/src/usbip_list.c
@@ -33,7 +33,8 @@ static const char usbip_list_usage_string[] =
"usbip list [-p|--parsable] <args>\n"
" -p, --parsable Parsable list format\n"
" -r, --remote=<host> List the exportable USB devices on <host>\n"
- " -l, --local List the local USB devices\n";
+ " -l, --local List the local USB devices\n"
+ " -d, --device List the local USB gadgets bound to usbip-vudc\n";
void usbip_list_usage(void)
{