summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile15
-rw-r--r--tools/gpio/Makefile12
-rw-r--r--tools/gpio/gpio-utils.c11
-rw-r--r--tools/gpio/gpio-utils.h27
-rw-r--r--tools/gpio/lsgpio.c195
-rw-r--r--tools/hv/Makefile2
-rw-r--r--tools/include/asm-generic/bitops/__fls.h2
-rw-r--r--tools/include/asm-generic/bitops/fls.h2
-rw-r--r--tools/include/asm-generic/bitops/fls64.h2
-rw-r--r--tools/include/linux/hashtable.h152
-rw-r--r--tools/include/linux/stringify.h12
-rw-r--r--tools/lib/api/Makefile2
-rwxr-xr-xtools/lib/lockdep/run_tests.sh12
-rw-r--r--tools/lib/subcmd/Makefile8
-rw-r--r--tools/lib/traceevent/event-parse.c4
-rw-r--r--tools/net/bpf_dbg.c10
-rw-r--r--tools/net/bpf_exp.l84
-rw-r--r--tools/net/bpf_exp.y146
-rw-r--r--tools/objtool/.gitignore2
-rw-r--r--tools/objtool/Build13
-rw-r--r--tools/objtool/Documentation/stack-validation.txt362
-rw-r--r--tools/objtool/Makefile63
-rw-r--r--tools/objtool/arch.h44
-rw-r--r--tools/objtool/arch/x86/Build12
-rw-r--r--tools/objtool/arch/x86/decode.c172
-rw-r--r--tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk387
-rw-r--r--tools/objtool/arch/x86/insn/inat.c97
-rw-r--r--tools/objtool/arch/x86/insn/inat.h221
-rw-r--r--tools/objtool/arch/x86/insn/inat_types.h29
-rw-r--r--tools/objtool/arch/x86/insn/insn.c594
-rw-r--r--tools/objtool/arch/x86/insn/insn.h201
-rw-r--r--tools/objtool/arch/x86/insn/x86-opcode-map.txt984
-rw-r--r--tools/objtool/builtin-check.c1206
-rw-r--r--tools/objtool/builtin.h22
-rw-r--r--tools/objtool/elf.c412
-rw-r--r--tools/objtool/elf.h85
-rw-r--r--tools/objtool/objtool.c136
-rw-r--r--tools/objtool/special.c193
-rw-r--r--tools/objtool/special.h42
-rw-r--r--tools/objtool/warn.h60
-rw-r--r--tools/perf/Documentation/Makefile2
-rw-r--r--tools/perf/Documentation/perf-list.txt6
-rw-r--r--tools/perf/MANIFEST1
-rw-r--r--tools/perf/Makefile.perf2
-rw-r--r--tools/perf/arch/powerpc/util/header.c6
-rw-r--r--tools/perf/bench/bench.h22
-rw-r--r--tools/perf/bench/mem-memcpy-arch.h2
-rw-r--r--tools/perf/bench/mem-memset-arch.h2
-rw-r--r--tools/perf/bench/numa.c2
-rw-r--r--tools/perf/builtin-annotate.c2
-rw-r--r--tools/perf/builtin-diff.c2
-rw-r--r--tools/perf/builtin-help.c69
-rw-r--r--tools/perf/builtin-inject.c8
-rw-r--r--tools/perf/builtin-kmem.c49
-rw-r--r--tools/perf/builtin-mem.c2
-rw-r--r--tools/perf/builtin-report.c3
-rw-r--r--tools/perf/builtin-script.c46
-rw-r--r--tools/perf/builtin-timechart.c2
-rw-r--r--tools/perf/builtin-top.c8
-rw-r--r--tools/perf/builtin-trace.c9
-rw-r--r--tools/perf/builtin.h64
-rw-r--r--tools/perf/config/Makefile2
-rw-r--r--tools/perf/tests/code-reading.c5
-rw-r--r--tools/perf/tests/dwarf-unwind.c4
-rw-r--r--tools/perf/tests/hists_common.c6
-rw-r--r--tools/perf/tests/hists_cumulate.c9
-rw-r--r--tools/perf/tests/hists_filter.c9
-rw-r--r--tools/perf/tests/hists_link.c20
-rw-r--r--tools/perf/tests/hists_output.c9
-rwxr-xr-xtools/perf/tests/perf-targz-src-pkg2
-rw-r--r--tools/perf/ui/browsers/hists.c2
-rw-r--r--tools/perf/ui/gtk/hists.c2
-rw-r--r--tools/perf/util/Build1
-rw-r--r--tools/perf/util/abspath.c37
-rw-r--r--tools/perf/util/annotate.h2
-rw-r--r--tools/perf/util/auxtrace.h2
-rw-r--r--tools/perf/util/build-id.c3
-rw-r--r--tools/perf/util/cache.h24
-rw-r--r--tools/perf/util/callchain.h4
-rw-r--r--tools/perf/util/cgroup.h4
-rw-r--r--tools/perf/util/cloexec.h2
-rw-r--r--tools/perf/util/data-convert-bt.c2
-rw-r--r--tools/perf/util/db-export.c2
-rw-r--r--tools/perf/util/dso.h3
-rw-r--r--tools/perf/util/dwarf-aux.c10
-rw-r--r--tools/perf/util/dwarf-aux.h72
-rw-r--r--tools/perf/util/event.c46
-rw-r--r--tools/perf/util/event.h13
-rw-r--r--tools/perf/util/evsel.c1
-rw-r--r--tools/perf/util/genelf.h32
-rw-r--r--tools/perf/util/header.c5
-rw-r--r--tools/perf/util/header.h2
-rw-r--r--tools/perf/util/hist.c2
-rw-r--r--tools/perf/util/hist.h3
-rw-r--r--tools/perf/util/intel-bts.c3
-rw-r--r--tools/perf/util/intel-pt-decoder/insn.c6
-rw-r--r--tools/perf/util/intel-pt.c5
-rw-r--r--tools/perf/util/jit.h12
-rw-r--r--tools/perf/util/jitdump.c2
-rw-r--r--tools/perf/util/llvm-utils.c24
-rw-r--r--tools/perf/util/llvm-utils.h7
-rw-r--r--tools/perf/util/machine.c14
-rw-r--r--tools/perf/util/machine.h2
-rw-r--r--tools/perf/util/parse-events.h21
-rw-r--r--tools/perf/util/path.c30
-rw-r--r--tools/perf/util/probe-event.c2
-rw-r--r--tools/perf/util/probe-event.h57
-rw-r--r--tools/perf/util/probe-finder.c8
-rw-r--r--tools/perf/util/probe-finder.h24
-rw-r--r--tools/perf/util/quote.h2
-rw-r--r--tools/perf/util/session.c5
-rw-r--r--tools/perf/util/sort.c2
-rw-r--r--tools/perf/util/stat-shadow.c18
-rw-r--r--tools/perf/util/strbuf.c9
-rw-r--r--tools/perf/util/strbuf.h21
-rw-r--r--tools/perf/util/svghelper.h51
-rw-r--r--tools/perf/util/symbol-elf.c12
-rw-r--r--tools/perf/util/symbol.h4
-rw-r--r--tools/perf/util/usage.c8
-rw-r--r--tools/perf/util/util.h27
-rw-r--r--tools/perf/util/wrapper.c12
-rw-r--r--tools/power/x86/turbostat/turbostat.832
-rw-r--r--tools/power/x86/turbostat/turbostat.c986
-rw-r--r--tools/scripts/utilities.mak (renamed from tools/perf/config/utilities.mak)0
-rw-r--r--tools/testing/nvdimm/test/nfit.c285
-rw-r--r--tools/testing/radix-tree/.gitignore2
-rw-r--r--tools/testing/radix-tree/Makefile19
-rw-r--r--tools/testing/radix-tree/find_next_bit.c57
-rw-r--r--tools/testing/radix-tree/linux.c60
-rw-r--r--tools/testing/radix-tree/linux/bitops.h150
-rw-r--r--tools/testing/radix-tree/linux/bitops/__ffs.h43
-rw-r--r--tools/testing/radix-tree/linux/bitops/ffs.h41
-rw-r--r--tools/testing/radix-tree/linux/bitops/ffz.h12
-rw-r--r--tools/testing/radix-tree/linux/bitops/find.h13
-rw-r--r--tools/testing/radix-tree/linux/bitops/fls.h41
-rw-r--r--tools/testing/radix-tree/linux/bitops/fls64.h14
-rw-r--r--tools/testing/radix-tree/linux/bitops/hweight.h11
-rw-r--r--tools/testing/radix-tree/linux/bitops/le.h53
-rw-r--r--tools/testing/radix-tree/linux/bitops/non-atomic.h111
-rw-r--r--tools/testing/radix-tree/linux/bug.h1
-rw-r--r--tools/testing/radix-tree/linux/cpu.h34
-rw-r--r--tools/testing/radix-tree/linux/export.h2
-rw-r--r--tools/testing/radix-tree/linux/gfp.h10
-rw-r--r--tools/testing/radix-tree/linux/kernel.h35
-rw-r--r--tools/testing/radix-tree/linux/kmemleak.h1
-rw-r--r--tools/testing/radix-tree/linux/mempool.h16
-rw-r--r--tools/testing/radix-tree/linux/notifier.h8
-rw-r--r--tools/testing/radix-tree/linux/percpu.h7
-rw-r--r--tools/testing/radix-tree/linux/preempt.h4
-rw-r--r--tools/testing/radix-tree/linux/radix-tree.h1
-rw-r--r--tools/testing/radix-tree/linux/rcupdate.h9
-rw-r--r--tools/testing/radix-tree/linux/slab.h28
-rw-r--r--tools/testing/radix-tree/linux/types.h28
-rw-r--r--tools/testing/radix-tree/main.c272
-rw-r--r--tools/testing/radix-tree/rcupdate.c86
-rw-r--r--tools/testing/radix-tree/regression.h8
-rw-r--r--tools/testing/radix-tree/regression1.c220
-rw-r--r--tools/testing/radix-tree/regression2.c126
-rw-r--r--tools/testing/radix-tree/regression3.c117
-rw-r--r--tools/testing/radix-tree/tag_check.c332
-rw-r--r--tools/testing/radix-tree/test.c219
-rw-r--r--tools/testing/radix-tree/test.h40
-rw-r--r--tools/testing/selftests/breakpoints/.gitignore1
-rw-r--r--tools/testing/selftests/breakpoints/Makefile4
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c218
-rw-r--r--tools/testing/selftests/cpu-hotplug/config2
-rw-r--r--tools/testing/selftests/firmware/config1
-rw-r--r--tools/testing/selftests/ftrace/config1
-rw-r--r--tools/testing/selftests/ipc/.gitignore1
-rw-r--r--tools/testing/selftests/ipc/config2
-rw-r--r--tools/testing/selftests/lib/Makefile2
-rwxr-xr-xtools/testing/selftests/lib/bitmap.sh10
-rw-r--r--tools/testing/selftests/media_tests/.gitignore1
-rw-r--r--tools/testing/selftests/media_tests/Makefile7
-rw-r--r--tools/testing/selftests/media_tests/media_device_test.c95
-rw-r--r--tools/testing/selftests/memory-hotplug/config4
-rw-r--r--tools/testing/selftests/mount/config2
-rw-r--r--tools/testing/selftests/net/.gitignore2
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/config3
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c117
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_cpu.c258
-rw-r--r--tools/testing/selftests/net/reuseport_dualstack.c208
-rw-r--r--tools/testing/selftests/powerpc/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/basic_asm.h70
-rw-r--r--tools/testing/selftests/powerpc/math/.gitignore6
-rw-r--r--tools/testing/selftests/powerpc/math/Makefile19
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_asm.S198
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_preempt.c113
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_signal.c135
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_syscall.c90
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_asm.S235
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_preempt.c112
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_signal.c156
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_syscall.c91
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c2
-rw-r--r--tools/testing/selftests/pstore/config4
-rw-r--r--tools/testing/selftests/seccomp/config2
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c22
-rw-r--r--tools/testing/selftests/static_keys/config1
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c2
-rw-r--r--tools/testing/selftests/user/config1
-rw-r--r--tools/testing/selftests/vm/config1
-rw-r--r--tools/testing/selftests/x86/Makefile2
-rw-r--r--tools/testing/selftests/x86/iopl.c135
-rw-r--r--tools/testing/selftests/zram/config2
-rw-r--r--tools/virtio/linux/dma-mapping.h17
-rw-r--r--tools/vm/page-types.c133
-rw-r--r--tools/vm/slabinfo.c2
209 files changed, 12029 insertions, 1076 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 6339f6ac3ccb..60c7e6c8ff17 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -13,6 +13,7 @@ help:
@echo ' cpupower - a tool for all things x86 CPU power'
@echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer'
@echo ' freefall - laptop accelerometer program for disk protection'
+ @echo ' gpio - GPIO tools'
@echo ' hv - tools used when in Hyper-V clients'
@echo ' iio - IIO tools'
@echo ' lguest - a minimal 32-bit x86 hypervisor'
@@ -20,6 +21,7 @@ help:
@echo ' perf - Linux performance measurement and analysis tool'
@echo ' selftests - various kernel selftests'
@echo ' spi - spi tools'
+ @echo ' objtool - an ELF object analysis tool'
@echo ' tmon - thermal monitoring and tuning tool'
@echo ' turbostat - Intel CPU idle stats and freq reporting tool'
@echo ' usb - USB testing tools'
@@ -53,7 +55,7 @@ acpi: FORCE
cpupower: FORCE
$(call descend,power/$@)
-cgroup firewire hv guest spi usb virtio vm net iio: FORCE
+cgroup firewire hv guest spi usb virtio vm net iio gpio objtool: FORCE
$(call descend,$@)
liblockdep: FORCE
@@ -85,7 +87,7 @@ freefall: FORCE
all: acpi cgroup cpupower hv firewire lguest \
perf selftests turbostat usb \
virtio vm net x86_energy_perf_policy \
- tmon freefall
+ tmon freefall objtool
acpi_install:
$(call descend,power/$(@:_install=),install)
@@ -93,7 +95,7 @@ acpi_install:
cpupower_install:
$(call descend,power/$(@:_install=),install)
-cgroup_install firewire_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install:
+cgroup_install firewire_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install objtool_install:
$(call descend,$(@:_install=),install)
selftests_install:
@@ -111,7 +113,7 @@ freefall_install:
install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install vm_install net_install x86_energy_perf_policy_install \
- tmon_install freefall_install
+ tmon_install freefall_install objtool_install
acpi_clean:
$(call descend,power/acpi,clean)
@@ -119,7 +121,7 @@ acpi_clean:
cpupower_clean:
$(call descend,power/cpupower,clean)
-cgroup_clean hv_clean firewire_clean lguest_clean spi_clean usb_clean virtio_clean vm_clean net_clean iio_clean:
+cgroup_clean hv_clean firewire_clean lguest_clean spi_clean usb_clean virtio_clean vm_clean net_clean iio_clean gpio_clean objtool_clean:
$(call descend,$(@:_clean=),clean)
liblockdep_clean:
@@ -155,6 +157,7 @@ build_clean:
clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean lguest_clean \
perf_clean selftests_clean turbostat_clean spi_clean usb_clean virtio_clean \
vm_clean net_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
- freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean
+ freefall_clean build_clean libbpf_clean libsubcmd_clean liblockdep_clean \
+ gpio_clean objtool_clean
.PHONY: FORCE
diff --git a/tools/gpio/Makefile b/tools/gpio/Makefile
new file mode 100644
index 000000000000..4d198d5c4203
--- /dev/null
+++ b/tools/gpio/Makefile
@@ -0,0 +1,12 @@
+CC = $(CROSS_COMPILE)gcc
+CFLAGS += -Wall -g -D_GNU_SOURCE
+
+all: lsgpio
+
+lsgpio: lsgpio.o gpio-utils.o
+
+%.o: %.c gpio-utils.h
+
+.PHONY: clean
+clean:
+ rm -f *.o lsgpio
diff --git a/tools/gpio/gpio-utils.c b/tools/gpio/gpio-utils.c
new file mode 100644
index 000000000000..8208718f2c99
--- /dev/null
+++ b/tools/gpio/gpio-utils.c
@@ -0,0 +1,11 @@
+/*
+ * GPIO tools - helpers library for the GPIO tools
+ *
+ * Copyright (C) 2015 Linus Walleij
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+#include "gpio-utils.h"
diff --git a/tools/gpio/gpio-utils.h b/tools/gpio/gpio-utils.h
new file mode 100644
index 000000000000..5f57133b8c04
--- /dev/null
+++ b/tools/gpio/gpio-utils.h
@@ -0,0 +1,27 @@
+/*
+ * GPIO tools - utility helpers library for the GPIO tools
+ *
+ * Copyright (C) 2015 Linus Walleij
+ *
+ * Portions copied from iio_utils and lssio:
+ * Copyright (c) 2010 Manuel Stahl <manuel.stahl@iis.fraunhofer.de>
+ * Copyright (c) 2008 Jonathan Cameron
+ * *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+#ifndef _GPIO_UTILS_H_
+#define _GPIO_UTILS_H_
+
+#include <string.h>
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+static inline int check_prefix(const char *str, const char *prefix)
+{
+ return strlen(str) > strlen(prefix) &&
+ strncmp(str, prefix, strlen(prefix)) == 0;
+}
+
+#endif /* _GPIO_UTILS_H_ */
diff --git a/tools/gpio/lsgpio.c b/tools/gpio/lsgpio.c
new file mode 100644
index 000000000000..1124da375942
--- /dev/null
+++ b/tools/gpio/lsgpio.c
@@ -0,0 +1,195 @@
+/*
+ * lsgpio - example on how to list the GPIO lines on a system
+ *
+ * Copyright (C) 2015 Linus Walleij
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * Usage:
+ * lsgpio <-n device-name>
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+#include <poll.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <sys/ioctl.h>
+#include <linux/gpio.h>
+
+#include "gpio-utils.h"
+
+struct gpio_flag {
+ char *name;
+ unsigned long mask;
+};
+
+struct gpio_flag flagnames[] = {
+ {
+ .name = "kernel",
+ .mask = GPIOLINE_FLAG_KERNEL,
+ },
+ {
+ .name = "output",
+ .mask = GPIOLINE_FLAG_IS_OUT,
+ },
+ {
+ .name = "active-low",
+ .mask = GPIOLINE_FLAG_ACTIVE_LOW,
+ },
+ {
+ .name = "open-drain",
+ .mask = GPIOLINE_FLAG_OPEN_DRAIN,
+ },
+ {
+ .name = "open-source",
+ .mask = GPIOLINE_FLAG_OPEN_SOURCE,
+ },
+};
+
+void print_flags(unsigned long flags)
+{
+ int i;
+ int printed = 0;
+
+ for (i = 0; i < ARRAY_SIZE(flagnames); i++) {
+ if (flags & flagnames[i].mask) {
+ if (printed)
+ fprintf(stdout, " ");
+ fprintf(stdout, "%s", flagnames[i].name);
+ printed++;
+ }
+ }
+}
+
+int list_device(const char *device_name)
+{
+ struct gpiochip_info cinfo;
+ char *chrdev_name;
+ int fd;
+ int ret;
+ int i;
+
+ ret = asprintf(&chrdev_name, "/dev/%s", device_name);
+ if (ret < 0)
+ return -ENOMEM;
+
+ fd = open(chrdev_name, 0);
+ if (fd == -1) {
+ ret = -errno;
+ fprintf(stderr, "Failed to open %s\n", chrdev_name);
+ goto exit_close_error;
+ }
+
+ /* Inspect this GPIO chip */
+ ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, &cinfo);
+ if (ret == -1) {
+ ret = -errno;
+ perror("Failed to issue CHIPINFO IOCTL\n");
+ goto exit_close_error;
+ }
+ fprintf(stdout, "GPIO chip: %s, \"%s\", %u GPIO lines\n",
+ cinfo.name, cinfo.label, cinfo.lines);
+
+ /* Loop over the lines and print info */
+ for (i = 0; i < cinfo.lines; i++) {
+ struct gpioline_info linfo;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.line_offset = i;
+
+ ret = ioctl(fd, GPIO_GET_LINEINFO_IOCTL, &linfo);
+ if (ret == -1) {
+ ret = -errno;
+ perror("Failed to issue LINEINFO IOCTL\n");
+ goto exit_close_error;
+ }
+ fprintf(stdout, "\tline %2d:", linfo.line_offset);
+ if (linfo.name[0])
+ fprintf(stdout, " \"%s\"", linfo.name);
+ else
+ fprintf(stdout, " unnamed");
+ if (linfo.consumer[0])
+ fprintf(stdout, " \"%s\"", linfo.consumer);
+ else
+ fprintf(stdout, " unused");
+ if (linfo.flags) {
+ fprintf(stdout, " [");
+ print_flags(linfo.flags);
+ fprintf(stdout, "]");
+ }
+ fprintf(stdout, "\n");
+
+ }
+
+exit_close_error:
+ if (close(fd) == -1)
+ perror("Failed to close GPIO character device file");
+ free(chrdev_name);
+ return ret;
+}
+
+void print_usage(void)
+{
+ fprintf(stderr, "Usage: lsgpio [options]...\n"
+ "List GPIO chips, lines and states\n"
+ " -n <name> List GPIOs on a named device\n"
+ " -? This helptext\n"
+ );
+}
+
+int main(int argc, char **argv)
+{
+ const char *device_name;
+ int ret;
+ int c;
+
+ while ((c = getopt(argc, argv, "n:")) != -1) {
+ switch (c) {
+ case 'n':
+ device_name = optarg;
+ break;
+ case '?':
+ print_usage();
+ return -1;
+ }
+ }
+
+ if (device_name)
+ ret = list_device(device_name);
+ else {
+ const struct dirent *ent;
+ DIR *dp;
+
+ /* List all GPIO devices one at a time */
+ dp = opendir("/dev");
+ if (!dp) {
+ ret = -errno;
+ goto error_out;
+ }
+
+ ret = -ENOENT;
+ while (ent = readdir(dp), ent) {
+ if (check_prefix(ent->d_name, "gpiochip")) {
+ ret = list_device(ent->d_name);
+ if (ret)
+ break;
+ }
+ }
+
+ ret = 0;
+ if (closedir(dp) == -1) {
+ perror("scanning devices: Failed to close directory");
+ ret = -errno;
+ }
+ }
+error_out:
+ return ret;
+}
diff --git a/tools/hv/Makefile b/tools/hv/Makefile
index a8ab79556926..a8c4644022a6 100644
--- a/tools/hv/Makefile
+++ b/tools/hv/Makefile
@@ -5,6 +5,8 @@ PTHREAD_LIBS = -lpthread
WARNINGS = -Wall -Wextra
CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS) $(shell getconf LFS_CFLAGS)
+CFLAGS += -D__EXPORTED_HEADERS__ -I../../include/uapi -I../../include
+
all: hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon
%: %.c
$(CC) $(CFLAGS) -o $@ $^
diff --git a/tools/include/asm-generic/bitops/__fls.h b/tools/include/asm-generic/bitops/__fls.h
index 2218b9add4c1..494c9c615d1c 100644
--- a/tools/include/asm-generic/bitops/__fls.h
+++ b/tools/include/asm-generic/bitops/__fls.h
@@ -1 +1 @@
-#include <../../../../include/asm-generic/bitops/__fls.h>
+#include "../../../../include/asm-generic/bitops/__fls.h"
diff --git a/tools/include/asm-generic/bitops/fls.h b/tools/include/asm-generic/bitops/fls.h
index dbf711a28f71..0e4995fa0248 100644
--- a/tools/include/asm-generic/bitops/fls.h
+++ b/tools/include/asm-generic/bitops/fls.h
@@ -1 +1 @@
-#include <../../../../include/asm-generic/bitops/fls.h>
+#include "../../../../include/asm-generic/bitops/fls.h"
diff --git a/tools/include/asm-generic/bitops/fls64.h b/tools/include/asm-generic/bitops/fls64.h
index 980b1f63c047..35bee0071e78 100644
--- a/tools/include/asm-generic/bitops/fls64.h
+++ b/tools/include/asm-generic/bitops/fls64.h
@@ -1 +1 @@
-#include <../../../../include/asm-generic/bitops/fls64.h>
+#include "../../../../include/asm-generic/bitops/fls64.h"
diff --git a/tools/include/linux/hashtable.h b/tools/include/linux/hashtable.h
new file mode 100644
index 000000000000..c65cc0aa2659
--- /dev/null
+++ b/tools/include/linux/hashtable.h
@@ -0,0 +1,152 @@
+/*
+ * Statically sized hash table implementation
+ * (C) 2012 Sasha Levin <levinsasha928@gmail.com>
+ */
+
+#ifndef _LINUX_HASHTABLE_H
+#define _LINUX_HASHTABLE_H
+
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+#include <linux/hash.h>
+#include <linux/log2.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#define DEFINE_HASHTABLE(name, bits) \
+ struct hlist_head name[1 << (bits)] = \
+ { [0 ... ((1 << (bits)) - 1)] = HLIST_HEAD_INIT }
+
+#define DECLARE_HASHTABLE(name, bits) \
+ struct hlist_head name[1 << (bits)]
+
+#define HASH_SIZE(name) (ARRAY_SIZE(name))
+#define HASH_BITS(name) ilog2(HASH_SIZE(name))
+
+/* Use hash_32 when possible to allow for fast 32bit hashing in 64bit kernels. */
+#define hash_min(val, bits) \
+ (sizeof(val) <= 4 ? hash_32(val, bits) : hash_long(val, bits))
+
+static inline void __hash_init(struct hlist_head *ht, unsigned int sz)
+{
+ unsigned int i;
+
+ for (i = 0; i < sz; i++)
+ INIT_HLIST_HEAD(&ht[i]);
+}
+
+/**
+ * hash_init - initialize a hash table
+ * @hashtable: hashtable to be initialized
+ *
+ * Calculates the size of the hashtable from the given parameter, otherwise
+ * same as hash_init_size.
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_init(hashtable) __hash_init(hashtable, HASH_SIZE(hashtable))
+
+/**
+ * hash_add - add an object to a hashtable
+ * @hashtable: hashtable to add to
+ * @node: the &struct hlist_node of the object to be added
+ * @key: the key of the object to be added
+ */
+#define hash_add(hashtable, node, key) \
+ hlist_add_head(node, &hashtable[hash_min(key, HASH_BITS(hashtable))])
+
+/**
+ * hash_hashed - check whether an object is in any hashtable
+ * @node: the &struct hlist_node of the object to be checked
+ */
+static inline bool hash_hashed(struct hlist_node *node)
+{
+ return !hlist_unhashed(node);
+}
+
+static inline bool __hash_empty(struct hlist_head *ht, unsigned int sz)
+{
+ unsigned int i;
+
+ for (i = 0; i < sz; i++)
+ if (!hlist_empty(&ht[i]))
+ return false;
+
+ return true;
+}
+
+/**
+ * hash_empty - check whether a hashtable is empty
+ * @hashtable: hashtable to check
+ *
+ * This has to be a macro since HASH_BITS() will not work on pointers since
+ * it calculates the size during preprocessing.
+ */
+#define hash_empty(hashtable) __hash_empty(hashtable, HASH_SIZE(hashtable))
+
+/**
+ * hash_del - remove an object from a hashtable
+ * @node: &struct hlist_node of the object to remove
+ */
+static inline void hash_del(struct hlist_node *node)
+{
+ hlist_del_init(node);
+}
+
+/**
+ * hash_for_each - iterate over a hashtable
+ * @name: hashtable to iterate
+ * @bkt: integer to use as bucket loop cursor
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ */
+#define hash_for_each(name, bkt, obj, member) \
+ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
+ (bkt)++)\
+ hlist_for_each_entry(obj, &name[bkt], member)
+
+/**
+ * hash_for_each_safe - iterate over a hashtable safe against removal of
+ * hash entry
+ * @name: hashtable to iterate
+ * @bkt: integer to use as bucket loop cursor
+ * @tmp: a &struct used for temporary storage
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ */
+#define hash_for_each_safe(name, bkt, tmp, obj, member) \
+ for ((bkt) = 0, obj = NULL; obj == NULL && (bkt) < HASH_SIZE(name);\
+ (bkt)++)\
+ hlist_for_each_entry_safe(obj, tmp, &name[bkt], member)
+
+/**
+ * hash_for_each_possible - iterate over all possible objects hashing to the
+ * same bucket
+ * @name: hashtable to iterate
+ * @obj: the type * to use as a loop cursor for each entry
+ * @member: the name of the hlist_node within the struct
+ * @key: the key of the objects to iterate over
+ */
+#define hash_for_each_possible(name, obj, member, key) \
+ hlist_for_each_entry(obj, &name[hash_min(key, HASH_BITS(name))], member)
+
+/**
+ * hash_for_each_possible_safe - iterate over all possible objects hashing to the
+ * same bucket safe against removals
+ * @name: hashtable to iterate
+ * @obj: the type * to use as a loop cursor for each entry
+ * @tmp: a &struct used for temporary storage
+ * @member: the name of the hlist_node within the struct
+ * @key: the key of the objects to iterate over
+ */
+#define hash_for_each_possible_safe(name, obj, tmp, member, key) \
+ hlist_for_each_entry_safe(obj, tmp,\
+ &name[hash_min(key, HASH_BITS(name))], member)
+
+
+#endif
diff --git a/tools/include/linux/stringify.h b/tools/include/linux/stringify.h
new file mode 100644
index 000000000000..841cec8ed525
--- /dev/null
+++ b/tools/include/linux/stringify.h
@@ -0,0 +1,12 @@
+#ifndef __LINUX_STRINGIFY_H
+#define __LINUX_STRINGIFY_H
+
+/* Indirect stringification. Doing two levels allows the parameter to be a
+ * macro itself. For example, compile with -DFOO=bar, __stringify(FOO)
+ * converts to "bar".
+ */
+
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+#endif /* !__LINUX_STRINGIFY_H */
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
index bbc82c614bee..316f308a63ea 100644
--- a/tools/lib/api/Makefile
+++ b/tools/lib/api/Makefile
@@ -1,5 +1,5 @@
include ../../scripts/Makefile.include
-include ../../perf/config/utilities.mak # QUIET_CLEAN
+include ../../scripts/utilities.mak # QUIET_CLEAN
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(shell pwd)))
diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
index 5334ad9d39b7..1069d96248c1 100755
--- a/tools/lib/lockdep/run_tests.sh
+++ b/tools/lib/lockdep/run_tests.sh
@@ -3,7 +3,7 @@
make &> /dev/null
for i in `ls tests/*.c`; do
- testname=$(basename -s .c "$i")
+ testname=$(basename "$i" .c)
gcc -o tests/$testname -pthread -lpthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null
echo -ne "$testname... "
if [ $(timeout 1 ./tests/$testname | wc -l) -gt 0 ]; then
@@ -11,11 +11,13 @@ for i in `ls tests/*.c`; do
else
echo "FAILED!"
fi
- rm tests/$testname
+ if [ -f "tests/$testname" ]; then
+ rm tests/$testname
+ fi
done
for i in `ls tests/*.c`; do
- testname=$(basename -s .c "$i")
+ testname=$(basename "$i" .c)
gcc -o tests/$testname -pthread -lpthread -Iinclude $i &> /dev/null
echo -ne "(PRELOAD) $testname... "
if [ $(timeout 1 ./lockdep ./tests/$testname | wc -l) -gt 0 ]; then
@@ -23,5 +25,7 @@ for i in `ls tests/*.c`; do
else
echo "FAILED!"
fi
- rm tests/$testname
+ if [ -f "tests/$testname" ]; then
+ rm tests/$testname
+ fi
done
diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile
index 629cf8c14e68..a8103700c172 100644
--- a/tools/lib/subcmd/Makefile
+++ b/tools/lib/subcmd/Makefile
@@ -1,5 +1,5 @@
include ../../scripts/Makefile.include
-include ../../perf/config/utilities.mak # QUIET_CLEAN
+include ../../scripts/utilities.mak # QUIET_CLEAN
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(shell pwd)))
@@ -8,8 +8,10 @@ srctree := $(patsubst %/,%,$(dir $(srctree)))
#$(info Determined 'srctree' to be $(srctree))
endif
-CC = $(CROSS_COMPILE)gcc
-AR = $(CROSS_COMPILE)ar
+CC ?= $(CROSS_COMPILE)gcc
+LD ?= $(CROSS_COMPILE)ld
+AR ?= $(CROSS_COMPILE)ar
+
RM = rm -f
MAKEFLAGS += --no-print-directory
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 190cc886ab91..a8b6357d1ffe 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -5427,10 +5427,8 @@ void pevent_print_event_time(struct pevent *pevent, struct trace_seq *s,
}
if (pevent->latency_format) {
- trace_seq_printf(s, " %3d", record->cpu);
pevent_data_lat_fmt(pevent, s, record);
- } else
- trace_seq_printf(s, " [%03d]", record->cpu);
+ }
if (use_usec_format) {
if (pevent->flags & PEVENT_NSEC_OUTPUT) {
diff --git a/tools/net/bpf_dbg.c b/tools/net/bpf_dbg.c
index 9a287bec695a..4f254bcc4423 100644
--- a/tools/net/bpf_dbg.c
+++ b/tools/net/bpf_dbg.c
@@ -129,16 +129,16 @@ struct bpf_regs {
};
static struct sock_filter bpf_image[BPF_MAXINSNS + 1];
-static unsigned int bpf_prog_len = 0;
+static unsigned int bpf_prog_len;
static int bpf_breakpoints[64];
static struct bpf_regs bpf_regs[BPF_MAXINSNS + 1];
static struct bpf_regs bpf_curr;
-static unsigned int bpf_regs_len = 0;
+static unsigned int bpf_regs_len;
static int pcap_fd = -1;
-static unsigned int pcap_packet = 0;
-static size_t pcap_map_size = 0;
+static unsigned int pcap_packet;
+static size_t pcap_map_size;
static char *pcap_ptr_va_start, *pcap_ptr_va_curr;
static const char * const op_table[] = {
@@ -1172,7 +1172,7 @@ static int cmd_breakpoint(char *subcmd)
static int cmd_run(char *num)
{
- static uint32_t pass = 0, fail = 0;
+ static uint32_t pass, fail;
bool has_limit = true;
int pkts = 0, i = 0;
diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l
index 7cc72a336645..bd83149e7be0 100644
--- a/tools/net/bpf_exp.l
+++ b/tools/net/bpf_exp.l
@@ -23,6 +23,9 @@
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
+#include <string.h>
+
+#include <linux/filter.h>
#include "bpf_exp.yacc.h"
@@ -79,22 +82,71 @@ extern void yyerror(const char *str);
"txa" { return OP_TXA; }
"#"?("len") { return K_PKT_LEN; }
-"#"?("proto") { return K_PROTO; }
-"#"?("type") { return K_TYPE; }
-"#"?("poff") { return K_POFF; }
-"#"?("ifidx") { return K_IFIDX; }
-"#"?("nla") { return K_NLATTR; }
-"#"?("nlan") { return K_NLATTR_NEST; }
-"#"?("mark") { return K_MARK; }
-"#"?("queue") { return K_QUEUE; }
-"#"?("hatype") { return K_HATYPE; }
-"#"?("rxhash") { return K_RXHASH; }
-"#"?("cpu") { return K_CPU; }
-"#"?("vlan_tci") { return K_VLAN_TCI; }
-"#"?("vlan_pr") { return K_VLAN_AVAIL; }
-"#"?("vlan_avail") { return K_VLAN_AVAIL; }
-"#"?("vlan_tpid") { return K_VLAN_TPID; }
-"#"?("rand") { return K_RAND; }
+
+"#"?("proto") {
+ yylval.number = SKF_AD_PROTOCOL;
+ return extension;
+ }
+"#"?("type") {
+ yylval.number = SKF_AD_PKTTYPE;
+ return extension;
+ }
+"#"?("poff") {
+ yylval.number = SKF_AD_PAY_OFFSET;
+ return extension;
+ }
+"#"?("ifidx") {
+ yylval.number = SKF_AD_IFINDEX;
+ return extension;
+ }
+"#"?("nla") {
+ yylval.number = SKF_AD_NLATTR;
+ return extension;
+ }
+"#"?("nlan") {
+ yylval.number = SKF_AD_NLATTR_NEST;
+ return extension;
+ }
+"#"?("mark") {
+ yylval.number = SKF_AD_MARK;
+ return extension;
+ }
+"#"?("queue") {
+ yylval.number = SKF_AD_QUEUE;
+ return extension;
+ }
+"#"?("hatype") {
+ yylval.number = SKF_AD_HATYPE;
+ return extension;
+ }
+"#"?("rxhash") {
+ yylval.number = SKF_AD_RXHASH;
+ return extension;
+ }
+"#"?("cpu") {
+ yylval.number = SKF_AD_CPU;
+ return extension;
+ }
+"#"?("vlan_tci") {
+ yylval.number = SKF_AD_VLAN_TAG;
+ return extension;
+ }
+"#"?("vlan_pr") {
+ yylval.number = SKF_AD_VLAN_TAG_PRESENT;
+ return extension;
+ }
+"#"?("vlan_avail") {
+ yylval.number = SKF_AD_VLAN_TAG_PRESENT;
+ return extension;
+ }
+"#"?("vlan_tpid") {
+ yylval.number = SKF_AD_VLAN_TPID;
+ return extension;
+ }
+"#"?("rand") {
+ yylval.number = SKF_AD_RANDOM;
+ return extension;
+ }
":" { return ':'; }
"," { return ','; }
diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y
index e24eea1b0db5..56ba1de50784 100644
--- a/tools/net/bpf_exp.y
+++ b/tools/net/bpf_exp.y
@@ -35,6 +35,7 @@
enum jmp_type { JTL, JFL, JKL };
extern FILE *yyin;
+extern int yylineno;
extern int yylex(void);
extern void yyerror(const char *str);
@@ -55,14 +56,14 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type);
%token OP_RET OP_TAX OP_TXA OP_LDXB OP_MOD OP_NEG OP_JNEQ OP_JLT OP_JLE OP_LDI
%token OP_LDXI
-%token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE
-%token K_RXHASH K_CPU K_IFIDX K_VLAN_TCI K_VLAN_AVAIL K_VLAN_TPID K_POFF K_RAND
+%token K_PKT_LEN
%token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%'
-%token number label
+%token extension number label
%type <label> label
+%type <number> extension
%type <number> number
%%
@@ -125,51 +126,9 @@ ldb
bpf_set_curr_instr(BPF_LD | BPF_B | BPF_IND, 0, 0, $6); }
| OP_LDB '[' number ']' {
bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, $3); }
- | OP_LDB K_PROTO {
+ | OP_LDB extension {
bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_PROTOCOL); }
- | OP_LDB K_TYPE {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_PKTTYPE); }
- | OP_LDB K_IFIDX {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_IFINDEX); }
- | OP_LDB K_NLATTR {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_NLATTR); }
- | OP_LDB K_NLATTR_NEST {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_NLATTR_NEST); }
- | OP_LDB K_MARK {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_MARK); }
- | OP_LDB K_QUEUE {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_QUEUE); }
- | OP_LDB K_HATYPE {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_HATYPE); }
- | OP_LDB K_RXHASH {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_RXHASH); }
- | OP_LDB K_CPU {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_CPU); }
- | OP_LDB K_VLAN_TCI {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_VLAN_TAG); }
- | OP_LDB K_VLAN_AVAIL {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); }
- | OP_LDB K_POFF {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
- | OP_LDB K_RAND {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_RANDOM); }
- | OP_LDB K_VLAN_TPID {
- bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_VLAN_TPID); }
+ SKF_AD_OFF + $2); }
;
ldh
@@ -179,51 +138,9 @@ ldh
bpf_set_curr_instr(BPF_LD | BPF_H | BPF_IND, 0, 0, $6); }
| OP_LDH '[' number ']' {
bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, $3); }
- | OP_LDH K_PROTO {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_PROTOCOL); }
- | OP_LDH K_TYPE {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_PKTTYPE); }
- | OP_LDH K_IFIDX {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_IFINDEX); }
- | OP_LDH K_NLATTR {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_NLATTR); }
- | OP_LDH K_NLATTR_NEST {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_NLATTR_NEST); }
- | OP_LDH K_MARK {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_MARK); }
- | OP_LDH K_QUEUE {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_QUEUE); }
- | OP_LDH K_HATYPE {
+ | OP_LDH extension {
bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_HATYPE); }
- | OP_LDH K_RXHASH {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_RXHASH); }
- | OP_LDH K_CPU {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_CPU); }
- | OP_LDH K_VLAN_TCI {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_VLAN_TAG); }
- | OP_LDH K_VLAN_AVAIL {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); }
- | OP_LDH K_POFF {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
- | OP_LDH K_RAND {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_RANDOM); }
- | OP_LDH K_VLAN_TPID {
- bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_VLAN_TPID); }
+ SKF_AD_OFF + $2); }
;
ldi
@@ -238,51 +155,9 @@ ld
bpf_set_curr_instr(BPF_LD | BPF_IMM, 0, 0, $3); }
| OP_LD K_PKT_LEN {
bpf_set_curr_instr(BPF_LD | BPF_W | BPF_LEN, 0, 0, 0); }
- | OP_LD K_PROTO {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_PROTOCOL); }
- | OP_LD K_TYPE {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_PKTTYPE); }
- | OP_LD K_IFIDX {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_IFINDEX); }
- | OP_LD K_NLATTR {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_NLATTR); }
- | OP_LD K_NLATTR_NEST {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_NLATTR_NEST); }
- | OP_LD K_MARK {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_MARK); }
- | OP_LD K_QUEUE {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_QUEUE); }
- | OP_LD K_HATYPE {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_HATYPE); }
- | OP_LD K_RXHASH {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_RXHASH); }
- | OP_LD K_CPU {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_CPU); }
- | OP_LD K_VLAN_TCI {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_VLAN_TAG); }
- | OP_LD K_VLAN_AVAIL {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); }
- | OP_LD K_POFF {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
- | OP_LD K_RAND {
- bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_RANDOM); }
- | OP_LD K_VLAN_TPID {
+ | OP_LD extension {
bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
- SKF_AD_OFF + SKF_AD_VLAN_TPID); }
+ SKF_AD_OFF + $2); }
| OP_LD 'M' '[' number ']' {
bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); }
| OP_LD '[' 'x' '+' number ']' {
@@ -776,5 +651,6 @@ void bpf_asm_compile(FILE *fp, bool cstyle)
void yyerror(const char *str)
{
+ fprintf(stderr, "error: %s at line %d\n", str, yylineno);
exit(1);
}
diff --git a/tools/objtool/.gitignore b/tools/objtool/.gitignore
new file mode 100644
index 000000000000..a0b3128bb31f
--- /dev/null
+++ b/tools/objtool/.gitignore
@@ -0,0 +1,2 @@
+arch/x86/insn/inat-tables.c
+objtool
diff --git a/tools/objtool/Build b/tools/objtool/Build
new file mode 100644
index 000000000000..0e89258a3541
--- /dev/null
+++ b/tools/objtool/Build
@@ -0,0 +1,13 @@
+objtool-y += arch/$(ARCH)/
+objtool-y += builtin-check.o
+objtool-y += elf.o
+objtool-y += special.o
+objtool-y += objtool.o
+
+objtool-y += libstring.o
+
+CFLAGS += -I$(srctree)/tools/lib
+
+$(OUTPUT)libstring.o: ../lib/string.c FORCE
+ $(call rule_mkdir)
+ $(call if_changed_dep,cc_o_c)
diff --git a/tools/objtool/Documentation/stack-validation.txt b/tools/objtool/Documentation/stack-validation.txt
new file mode 100644
index 000000000000..55a60d331f47
--- /dev/null
+++ b/tools/objtool/Documentation/stack-validation.txt
@@ -0,0 +1,362 @@
+Compile-time stack metadata validation
+======================================
+
+
+Overview
+--------
+
+The kernel CONFIG_STACK_VALIDATION option enables a host tool named
+objtool which runs at compile time. It has a "check" subcommand which
+analyzes every .o file and ensures the validity of its stack metadata.
+It enforces a set of rules on asm code and C inline assembly code so
+that stack traces can be reliable.
+
+Currently it only checks frame pointer usage, but there are plans to add
+CFI validation for C files and CFI generation for asm files.
+
+For each function, it recursively follows all possible code paths and
+validates the correct frame pointer state at each instruction.
+
+It also follows code paths involving special sections, like
+.altinstructions, __jump_table, and __ex_table, which can add
+alternative execution paths to a given instruction (or set of
+instructions). Similarly, it knows how to follow switch statements, for
+which gcc sometimes uses jump tables.
+
+
+Why do we need stack metadata validation?
+-----------------------------------------
+
+Here are some of the benefits of validating stack metadata:
+
+a) More reliable stack traces for frame pointer enabled kernels
+
+ Frame pointers are used for debugging purposes. They allow runtime
+ code and debug tools to be able to walk the stack to determine the
+ chain of function call sites that led to the currently executing
+ code.
+
+ For some architectures, frame pointers are enabled by
+ CONFIG_FRAME_POINTER. For some other architectures they may be
+ required by the ABI (sometimes referred to as "backchain pointers").
+
+ For C code, gcc automatically generates instructions for setting up
+ frame pointers when the -fno-omit-frame-pointer option is used.
+
+ But for asm code, the frame setup instructions have to be written by
+ hand, which most people don't do. So the end result is that
+ CONFIG_FRAME_POINTER is honored for C code but not for most asm code.
+
+ For stack traces based on frame pointers to be reliable, all
+ functions which call other functions must first create a stack frame
+ and update the frame pointer. If a first function doesn't properly
+ create a stack frame before calling a second function, the *caller*
+ of the first function will be skipped on the stack trace.
+
+ For example, consider the following example backtrace with frame
+ pointers enabled:
+
+ [<ffffffff81812584>] dump_stack+0x4b/0x63
+ [<ffffffff812d6dc2>] cmdline_proc_show+0x12/0x30
+ [<ffffffff8127f568>] seq_read+0x108/0x3e0
+ [<ffffffff812cce62>] proc_reg_read+0x42/0x70
+ [<ffffffff81256197>] __vfs_read+0x37/0x100
+ [<ffffffff81256b16>] vfs_read+0x86/0x130
+ [<ffffffff81257898>] SyS_read+0x58/0xd0
+ [<ffffffff8181c1f2>] entry_SYSCALL_64_fastpath+0x12/0x76
+
+ It correctly shows that the caller of cmdline_proc_show() is
+ seq_read().
+
+ If we remove the frame pointer logic from cmdline_proc_show() by
+ replacing the frame pointer related instructions with nops, here's
+ what it looks like instead:
+
+ [<ffffffff81812584>] dump_stack+0x4b/0x63
+ [<ffffffff812d6dc2>] cmdline_proc_show+0x12/0x30
+ [<ffffffff812cce62>] proc_reg_read+0x42/0x70
+ [<ffffffff81256197>] __vfs_read+0x37/0x100
+ [<ffffffff81256b16>] vfs_read+0x86/0x130
+ [<ffffffff81257898>] SyS_read+0x58/0xd0
+ [<ffffffff8181c1f2>] entry_SYSCALL_64_fastpath+0x12/0x76
+
+ Notice that cmdline_proc_show()'s caller, seq_read(), has been
+ skipped. Instead the stack trace seems to show that
+ cmdline_proc_show() was called by proc_reg_read().
+
+ The benefit of objtool here is that because it ensures that *all*
+ functions honor CONFIG_FRAME_POINTER, no functions will ever[*] be
+ skipped on a stack trace.
+
+ [*] unless an interrupt or exception has occurred at the very
+ beginning of a function before the stack frame has been created,
+ or at the very end of the function after the stack frame has been
+ destroyed. This is an inherent limitation of frame pointers.
+
+b) 100% reliable stack traces for DWARF enabled kernels
+
+ (NOTE: This is not yet implemented)
+
+ As an alternative to frame pointers, DWARF Call Frame Information
+ (CFI) metadata can be used to walk the stack. Unlike frame pointers,
+ CFI metadata is out of band. So it doesn't affect runtime
+ performance and it can be reliable even when interrupts or exceptions
+ are involved.
+
+ For C code, gcc automatically generates DWARF CFI metadata. But for
+ asm code, generating CFI is a tedious manual approach which requires
+ manually placed .cfi assembler macros to be scattered throughout the
+ code. It's clumsy and very easy to get wrong, and it makes the real
+ code harder to read.
+
+ Stacktool will improve this situation in several ways. For code
+ which already has CFI annotations, it will validate them. For code
+ which doesn't have CFI annotations, it will generate them. So an
+ architecture can opt to strip out all the manual .cfi annotations
+ from their asm code and have objtool generate them instead.
+
+ We might also add a runtime stack validation debug option where we
+ periodically walk the stack from schedule() and/or an NMI to ensure
+ that the stack metadata is sane and that we reach the bottom of the
+ stack.
+
+ So the benefit of objtool here will be that external tooling should
+ always show perfect stack traces. And the same will be true for
+ kernel warning/oops traces if the architecture has a runtime DWARF
+ unwinder.
+
+c) Higher live patching compatibility rate
+
+ (NOTE: This is not yet implemented)
+
+ Currently with CONFIG_LIVEPATCH there's a basic live patching
+ framework which is safe for roughly 85-90% of "security" fixes. But
+ patches can't have complex features like function dependency or
+ prototype changes, or data structure changes.
+
+ There's a strong need to support patches which have the more complex
+ features so that the patch compatibility rate for security fixes can
+ eventually approach something resembling 100%. To achieve that, a
+ "consistency model" is needed, which allows tasks to be safely
+ transitioned from an unpatched state to a patched state.
+
+ One of the key requirements of the currently proposed livepatch
+ consistency model [*] is that it needs to walk the stack of each
+ sleeping task to determine if it can be transitioned to the patched
+ state. If objtool can ensure that stack traces are reliable, this
+ consistency model can be used and the live patching compatibility
+ rate can be improved significantly.
+
+ [*] https://lkml.kernel.org/r/cover.1423499826.git.jpoimboe@redhat.com
+
+
+Rules
+-----
+
+To achieve the validation, objtool enforces the following rules:
+
+1. Each callable function must be annotated as such with the ELF
+ function type. In asm code, this is typically done using the
+ ENTRY/ENDPROC macros. If objtool finds a return instruction
+ outside of a function, it flags an error since that usually indicates
+ callable code which should be annotated accordingly.
+
+ This rule is needed so that objtool can properly identify each
+ callable function in order to analyze its stack metadata.
+
+2. Conversely, each section of code which is *not* callable should *not*
+ be annotated as an ELF function. The ENDPROC macro shouldn't be used
+ in this case.
+
+ This rule is needed so that objtool can ignore non-callable code.
+ Such code doesn't have to follow any of the other rules.
+
+3. Each callable function which calls another function must have the
+ correct frame pointer logic, if required by CONFIG_FRAME_POINTER or
+ the architecture's back chain rules. This can by done in asm code
+ with the FRAME_BEGIN/FRAME_END macros.
+
+ This rule ensures that frame pointer based stack traces will work as
+ designed. If function A doesn't create a stack frame before calling
+ function B, the _caller_ of function A will be skipped on the stack
+ trace.
+
+4. Dynamic jumps and jumps to undefined symbols are only allowed if:
+
+ a) the jump is part of a switch statement; or
+
+ b) the jump matches sibling call semantics and the frame pointer has
+ the same value it had on function entry.
+
+ This rule is needed so that objtool can reliably analyze all of a
+ function's code paths. If a function jumps to code in another file,
+ and it's not a sibling call, objtool has no way to follow the jump
+ because it only analyzes a single file at a time.
+
+5. A callable function may not execute kernel entry/exit instructions.
+ The only code which needs such instructions is kernel entry code,
+ which shouldn't be be in callable functions anyway.
+
+ This rule is just a sanity check to ensure that callable functions
+ return normally.
+
+
+Errors in .S files
+------------------
+
+If you're getting an error in a compiled .S file which you don't
+understand, first make sure that the affected code follows the above
+rules.
+
+Here are some examples of common warnings reported by objtool, what
+they mean, and suggestions for how to fix them.
+
+
+1. asm_file.o: warning: objtool: func()+0x128: call without frame pointer save/setup
+
+ The func() function made a function call without first saving and/or
+ updating the frame pointer.
+
+ If func() is indeed a callable function, add proper frame pointer
+ logic using the FRAME_BEGIN and FRAME_END macros. Otherwise, remove
+ its ELF function annotation by changing ENDPROC to END.
+
+ If you're getting this error in a .c file, see the "Errors in .c
+ files" section.
+
+
+2. asm_file.o: warning: objtool: .text+0x53: return instruction outside of a callable function
+
+ A return instruction was detected, but objtool couldn't find a way
+ for a callable function to reach the instruction.
+
+ If the return instruction is inside (or reachable from) a callable
+ function, the function needs to be annotated with the ENTRY/ENDPROC
+ macros.
+
+ If you _really_ need a return instruction outside of a function, and
+ are 100% sure that it won't affect stack traces, you can tell
+ objtool to ignore it. See the "Adding exceptions" section below.
+
+
+3. asm_file.o: warning: objtool: func()+0x9: function has unreachable instruction
+
+ The instruction lives inside of a callable function, but there's no
+ possible control flow path from the beginning of the function to the
+ instruction.
+
+ If the instruction is actually needed, and it's actually in a
+ callable function, ensure that its function is properly annotated
+ with ENTRY/ENDPROC.
+
+ If it's not actually in a callable function (e.g. kernel entry code),
+ change ENDPROC to END.
+
+
+4. asm_file.o: warning: objtool: func(): can't find starting instruction
+ or
+ asm_file.o: warning: objtool: func()+0x11dd: can't decode instruction
+
+ Did you put data in a text section? If so, that can confuse
+ objtool's instruction decoder. Move the data to a more appropriate
+ section like .data or .rodata.
+
+
+5. asm_file.o: warning: objtool: func()+0x6: kernel entry/exit from callable instruction
+
+ This is a kernel entry/exit instruction like sysenter or sysret.
+ Such instructions aren't allowed in a callable function, and are most
+ likely part of the kernel entry code.
+
+ If the instruction isn't actually in a callable function, change
+ ENDPROC to END.
+
+
+6. asm_file.o: warning: objtool: func()+0x26: sibling call from callable instruction with changed frame pointer
+
+ This is a dynamic jump or a jump to an undefined symbol. Stacktool
+ assumed it's a sibling call and detected that the frame pointer
+ wasn't first restored to its original state.
+
+ If it's not really a sibling call, you may need to move the
+ destination code to the local file.
+
+ If the instruction is not actually in a callable function (e.g.
+ kernel entry code), change ENDPROC to END.
+
+
+7. asm_file: warning: objtool: func()+0x5c: frame pointer state mismatch
+
+ The instruction's frame pointer state is inconsistent, depending on
+ which execution path was taken to reach the instruction.
+
+ Make sure the function pushes and sets up the frame pointer (for
+ x86_64, this means rbp) at the beginning of the function and pops it
+ at the end of the function. Also make sure that no other code in the
+ function touches the frame pointer.
+
+
+Errors in .c files
+------------------
+
+1. c_file.o: warning: objtool: funcA() falls through to next function funcB()
+
+ This means that funcA() doesn't end with a return instruction or an
+ unconditional jump, and that objtool has determined that the function
+ can fall through into the next function. There could be different
+ reasons for this:
+
+ 1) funcA()'s last instruction is a call to a "noreturn" function like
+ panic(). In this case the noreturn function needs to be added to
+ objtool's hard-coded global_noreturns array. Feel free to bug the
+ objtool maintainer, or you can submit a patch.
+
+ 2) funcA() uses the unreachable() annotation in a section of code
+ that is actually reachable.
+
+ 3) If funcA() calls an inline function, the object code for funcA()
+ might be corrupt due to a gcc bug. For more details, see:
+ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646
+
+2. If you're getting any other objtool error in a compiled .c file, it
+ may be because the file uses an asm() statement which has a "call"
+ instruction. An asm() statement with a call instruction must declare
+ the use of the stack pointer in its output operand. For example, on
+ x86_64:
+
+ register void *__sp asm("rsp");
+ asm volatile("call func" : "+r" (__sp));
+
+ Otherwise the stack frame may not get created before the call.
+
+3. Another possible cause for errors in C code is if the Makefile removes
+ -fno-omit-frame-pointer or adds -fomit-frame-pointer to the gcc options.
+
+Also see the above section for .S file errors for more information what
+the individual error messages mean.
+
+If the error doesn't seem to make sense, it could be a bug in objtool.
+Feel free to ask the objtool maintainer for help.
+
+
+Adding exceptions
+-----------------
+
+If you _really_ need objtool to ignore something, and are 100% sure
+that it won't affect kernel stack traces, you can tell objtool to
+ignore it:
+
+- To skip validation of a function, use the STACK_FRAME_NON_STANDARD
+ macro.
+
+- To skip validation of a file, add
+
+ OBJECT_FILES_NON_STANDARD_filename.o := n
+
+ to the Makefile.
+
+- To skip validation of a directory, add
+
+ OBJECT_FILES_NON_STANDARD := y
+
+ to the Makefile.
diff --git a/tools/objtool/Makefile b/tools/objtool/Makefile
new file mode 100644
index 000000000000..6765c7e949f3
--- /dev/null
+++ b/tools/objtool/Makefile
@@ -0,0 +1,63 @@
+include ../scripts/Makefile.include
+
+ifndef ($(ARCH))
+ARCH ?= $(shell uname -m)
+ifeq ($(ARCH),x86_64)
+ARCH := x86
+endif
+endif
+
+# always use the host compiler
+CC = gcc
+LD = ld
+AR = ar
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+endif
+
+SUBCMD_SRCDIR = $(srctree)/tools/lib/subcmd/
+LIBSUBCMD_OUTPUT = $(if $(OUTPUT),$(OUTPUT),$(PWD)/)
+LIBSUBCMD = $(LIBSUBCMD_OUTPUT)libsubcmd.a
+
+OBJTOOL := $(OUTPUT)objtool
+OBJTOOL_IN := $(OBJTOOL)-in.o
+
+all: $(OBJTOOL)
+
+INCLUDES := -I$(srctree)/tools/include
+CFLAGS += -Wall -Werror $(EXTRA_WARNINGS) -fomit-frame-pointer -O2 -g $(INCLUDES)
+LDFLAGS += -lelf $(LIBSUBCMD)
+
+AWK = awk
+export srctree OUTPUT CFLAGS ARCH AWK
+include $(srctree)/tools/build/Makefile.include
+
+$(OBJTOOL_IN): fixdep FORCE
+ @$(MAKE) $(build)=objtool
+
+$(OBJTOOL): $(LIBSUBCMD) $(OBJTOOL_IN)
+ @(test -d ../../kernel -a -d ../../tools -a -d ../objtool && (( \
+ diff -I'^#include' arch/x86/insn/insn.c ../../arch/x86/lib/insn.c >/dev/null && \
+ diff -I'^#include' arch/x86/insn/inat.c ../../arch/x86/lib/inat.c >/dev/null && \
+ diff arch/x86/insn/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \
+ diff arch/x86/insn/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \
+ diff -I'^#include' arch/x86/insn/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \
+ diff -I'^#include' arch/x86/insn/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \
+ diff -I'^#include' arch/x86/insn/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \
+ || echo "Warning: objtool: x86 instruction decoder differs from kernel" >&2 )) || true
+ $(QUIET_LINK)$(CC) $(OBJTOOL_IN) $(LDFLAGS) -o $@
+
+
+$(LIBSUBCMD): fixdep FORCE
+ $(Q)$(MAKE) -C $(SUBCMD_SRCDIR) OUTPUT=$(LIBSUBCMD_OUTPUT)
+
+clean:
+ $(call QUIET_CLEAN, objtool) $(RM) $(OBJTOOL)
+ $(Q)find $(OUTPUT) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
+ $(Q)$(RM) $(OUTPUT)arch/x86/insn/inat-tables.c $(OUTPUT)fixdep
+
+FORCE:
+
+.PHONY: clean FORCE
diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h
new file mode 100644
index 000000000000..f7350fcedc70
--- /dev/null
+++ b/tools/objtool/arch.h
@@ -0,0 +1,44 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _ARCH_H
+#define _ARCH_H
+
+#include <stdbool.h>
+#include "elf.h"
+
+#define INSN_FP_SAVE 1
+#define INSN_FP_SETUP 2
+#define INSN_FP_RESTORE 3
+#define INSN_JUMP_CONDITIONAL 4
+#define INSN_JUMP_UNCONDITIONAL 5
+#define INSN_JUMP_DYNAMIC 6
+#define INSN_CALL 7
+#define INSN_CALL_DYNAMIC 8
+#define INSN_RETURN 9
+#define INSN_CONTEXT_SWITCH 10
+#define INSN_BUG 11
+#define INSN_NOP 12
+#define INSN_OTHER 13
+#define INSN_LAST INSN_OTHER
+
+int arch_decode_instruction(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int maxlen,
+ unsigned int *len, unsigned char *type,
+ unsigned long *displacement);
+
+#endif /* _ARCH_H */
diff --git a/tools/objtool/arch/x86/Build b/tools/objtool/arch/x86/Build
new file mode 100644
index 000000000000..debbdb0b5c43
--- /dev/null
+++ b/tools/objtool/arch/x86/Build
@@ -0,0 +1,12 @@
+objtool-y += decode.o
+
+inat_tables_script = arch/x86/insn/gen-insn-attr-x86.awk
+inat_tables_maps = arch/x86/insn/x86-opcode-map.txt
+
+$(OUTPUT)arch/x86/insn/inat-tables.c: $(inat_tables_script) $(inat_tables_maps)
+ $(call rule_mkdir)
+ $(Q)$(call echo-cmd,gen)$(AWK) -f $(inat_tables_script) $(inat_tables_maps) > $@
+
+$(OUTPUT)arch/x86/decode.o: $(OUTPUT)arch/x86/insn/inat-tables.c
+
+CFLAGS_decode.o += -I$(OUTPUT)arch/x86/insn
diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c
new file mode 100644
index 000000000000..c0c0b265e88e
--- /dev/null
+++ b/tools/objtool/arch/x86/decode.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#define unlikely(cond) (cond)
+#include "insn/insn.h"
+#include "insn/inat.c"
+#include "insn/insn.c"
+
+#include "../../elf.h"
+#include "../../arch.h"
+#include "../../warn.h"
+
+static int is_x86_64(struct elf *elf)
+{
+ switch (elf->ehdr.e_machine) {
+ case EM_X86_64:
+ return 1;
+ case EM_386:
+ return 0;
+ default:
+ WARN("unexpected ELF machine type %d", elf->ehdr.e_machine);
+ return -1;
+ }
+}
+
+int arch_decode_instruction(struct elf *elf, struct section *sec,
+ unsigned long offset, unsigned int maxlen,
+ unsigned int *len, unsigned char *type,
+ unsigned long *immediate)
+{
+ struct insn insn;
+ int x86_64;
+ unsigned char op1, op2, ext;
+
+ x86_64 = is_x86_64(elf);
+ if (x86_64 == -1)
+ return -1;
+
+ insn_init(&insn, (void *)(sec->data + offset), maxlen, x86_64);
+ insn_get_length(&insn);
+ insn_get_opcode(&insn);
+ insn_get_modrm(&insn);
+ insn_get_immediate(&insn);
+
+ if (!insn_complete(&insn)) {
+ WARN_FUNC("can't decode instruction", sec, offset);
+ return -1;
+ }
+
+ *len = insn.length;
+ *type = INSN_OTHER;
+
+ if (insn.vex_prefix.nbytes)
+ return 0;
+
+ op1 = insn.opcode.bytes[0];
+ op2 = insn.opcode.bytes[1];
+
+ switch (op1) {
+ case 0x55:
+ if (!insn.rex_prefix.nbytes)
+ /* push rbp */
+ *type = INSN_FP_SAVE;
+ break;
+
+ case 0x5d:
+ if (!insn.rex_prefix.nbytes)
+ /* pop rbp */
+ *type = INSN_FP_RESTORE;
+ break;
+
+ case 0x70 ... 0x7f:
+ *type = INSN_JUMP_CONDITIONAL;
+ break;
+
+ case 0x89:
+ if (insn.rex_prefix.nbytes == 1 &&
+ insn.rex_prefix.bytes[0] == 0x48 &&
+ insn.modrm.nbytes && insn.modrm.bytes[0] == 0xe5)
+ /* mov rsp, rbp */
+ *type = INSN_FP_SETUP;
+ break;
+
+ case 0x90:
+ *type = INSN_NOP;
+ break;
+
+ case 0x0f:
+ if (op2 >= 0x80 && op2 <= 0x8f)
+ *type = INSN_JUMP_CONDITIONAL;
+ else if (op2 == 0x05 || op2 == 0x07 || op2 == 0x34 ||
+ op2 == 0x35)
+ /* sysenter, sysret */
+ *type = INSN_CONTEXT_SWITCH;
+ else if (op2 == 0x0b || op2 == 0xb9)
+ /* ud2 */
+ *type = INSN_BUG;
+ else if (op2 == 0x0d || op2 == 0x1f)
+ /* nopl/nopw */
+ *type = INSN_NOP;
+ else if (op2 == 0x01 && insn.modrm.nbytes &&
+ (insn.modrm.bytes[0] == 0xc2 ||
+ insn.modrm.bytes[0] == 0xd8))
+ /* vmlaunch, vmrun */
+ *type = INSN_CONTEXT_SWITCH;
+
+ break;
+
+ case 0xc9: /* leave */
+ *type = INSN_FP_RESTORE;
+ break;
+
+ case 0xe3: /* jecxz/jrcxz */
+ *type = INSN_JUMP_CONDITIONAL;
+ break;
+
+ case 0xe9:
+ case 0xeb:
+ *type = INSN_JUMP_UNCONDITIONAL;
+ break;
+
+ case 0xc2:
+ case 0xc3:
+ *type = INSN_RETURN;
+ break;
+
+ case 0xc5: /* iret */
+ case 0xca: /* retf */
+ case 0xcb: /* retf */
+ *type = INSN_CONTEXT_SWITCH;
+ break;
+
+ case 0xe8:
+ *type = INSN_CALL;
+ break;
+
+ case 0xff:
+ ext = X86_MODRM_REG(insn.modrm.bytes[0]);
+ if (ext == 2 || ext == 3)
+ *type = INSN_CALL_DYNAMIC;
+ else if (ext == 4)
+ *type = INSN_JUMP_DYNAMIC;
+ else if (ext == 5) /*jmpf */
+ *type = INSN_CONTEXT_SWITCH;
+
+ break;
+
+ default:
+ break;
+ }
+
+ *immediate = insn.immediate.nbytes ? insn.immediate.value : 0;
+
+ return 0;
+}
diff --git a/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk b/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk
new file mode 100644
index 000000000000..093a892026f9
--- /dev/null
+++ b/tools/objtool/arch/x86/insn/gen-insn-attr-x86.awk
@@ -0,0 +1,387 @@
+#!/bin/awk -f
+# gen-insn-attr-x86.awk: Instruction attribute table generator
+# Written by Masami Hiramatsu <mhiramat@redhat.com>
+#
+# Usage: awk -f gen-insn-attr-x86.awk x86-opcode-map.txt > inat-tables.c
+
+# Awk implementation sanity check
+function check_awk_implement() {
+ if (sprintf("%x", 0) != "0")
+ return "Your awk has a printf-format problem."
+ return ""
+}
+
+# Clear working vars
+function clear_vars() {
+ delete table
+ delete lptable2
+ delete lptable1
+ delete lptable3
+ eid = -1 # escape id
+ gid = -1 # group id
+ aid = -1 # AVX id
+ tname = ""
+}
+
+BEGIN {
+ # Implementation error checking
+ awkchecked = check_awk_implement()
+ if (awkchecked != "") {
+ print "Error: " awkchecked > "/dev/stderr"
+ print "Please try to use gawk." > "/dev/stderr"
+ exit 1
+ }
+
+ # Setup generating tables
+ print "/* x86 opcode map generated from x86-opcode-map.txt */"
+ print "/* Do not change this code. */\n"
+ ggid = 1
+ geid = 1
+ gaid = 0
+ delete etable
+ delete gtable
+ delete atable
+
+ opnd_expr = "^[A-Za-z/]"
+ ext_expr = "^\\("
+ sep_expr = "^\\|$"
+ group_expr = "^Grp[0-9A-Za-z]+"
+
+ imm_expr = "^[IJAOL][a-z]"
+ imm_flag["Ib"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+ imm_flag["Jb"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+ imm_flag["Iw"] = "INAT_MAKE_IMM(INAT_IMM_WORD)"
+ imm_flag["Id"] = "INAT_MAKE_IMM(INAT_IMM_DWORD)"
+ imm_flag["Iq"] = "INAT_MAKE_IMM(INAT_IMM_QWORD)"
+ imm_flag["Ap"] = "INAT_MAKE_IMM(INAT_IMM_PTR)"
+ imm_flag["Iz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+ imm_flag["Jz"] = "INAT_MAKE_IMM(INAT_IMM_VWORD32)"
+ imm_flag["Iv"] = "INAT_MAKE_IMM(INAT_IMM_VWORD)"
+ imm_flag["Ob"] = "INAT_MOFFSET"
+ imm_flag["Ov"] = "INAT_MOFFSET"
+ imm_flag["Lx"] = "INAT_MAKE_IMM(INAT_IMM_BYTE)"
+
+ modrm_expr = "^([CDEGMNPQRSUVW/][a-z]+|NTA|T[012])"
+ force64_expr = "\\([df]64\\)"
+ rex_expr = "^REX(\\.[XRWB]+)*"
+ fpu_expr = "^ESC" # TODO
+
+ lprefix1_expr = "\\((66|!F3)\\)"
+ lprefix2_expr = "\\(F3\\)"
+ lprefix3_expr = "\\((F2|!F3|66\\&F2)\\)"
+ lprefix_expr = "\\((66|F2|F3)\\)"
+ max_lprefix = 4
+
+ # All opcodes starting with lower-case 'v' or with (v1) superscript
+ # accepts VEX prefix
+ vexok_opcode_expr = "^v.*"
+ vexok_expr = "\\(v1\\)"
+ # All opcodes with (v) superscript supports *only* VEX prefix
+ vexonly_expr = "\\(v\\)"
+
+ prefix_expr = "\\(Prefix\\)"
+ prefix_num["Operand-Size"] = "INAT_PFX_OPNDSZ"
+ prefix_num["REPNE"] = "INAT_PFX_REPNE"
+ prefix_num["REP/REPE"] = "INAT_PFX_REPE"
+ prefix_num["XACQUIRE"] = "INAT_PFX_REPNE"
+ prefix_num["XRELEASE"] = "INAT_PFX_REPE"
+ prefix_num["LOCK"] = "INAT_PFX_LOCK"
+ prefix_num["SEG=CS"] = "INAT_PFX_CS"
+ prefix_num["SEG=DS"] = "INAT_PFX_DS"
+ prefix_num["SEG=ES"] = "INAT_PFX_ES"
+ prefix_num["SEG=FS"] = "INAT_PFX_FS"
+ prefix_num["SEG=GS"] = "INAT_PFX_GS"
+ prefix_num["SEG=SS"] = "INAT_PFX_SS"
+ prefix_num["Address-Size"] = "INAT_PFX_ADDRSZ"
+ prefix_num["VEX+1byte"] = "INAT_PFX_VEX2"
+ prefix_num["VEX+2byte"] = "INAT_PFX_VEX3"
+
+ clear_vars()
+}
+
+function semantic_error(msg) {
+ print "Semantic error at " NR ": " msg > "/dev/stderr"
+ exit 1
+}
+
+function debug(msg) {
+ print "DEBUG: " msg
+}
+
+function array_size(arr, i,c) {
+ c = 0
+ for (i in arr)
+ c++
+ return c
+}
+
+/^Table:/ {
+ print "/* " $0 " */"
+ if (tname != "")
+ semantic_error("Hit Table: before EndTable:.");
+}
+
+/^Referrer:/ {
+ if (NF != 1) {
+ # escape opcode table
+ ref = ""
+ for (i = 2; i <= NF; i++)
+ ref = ref $i
+ eid = escape[ref]
+ tname = sprintf("inat_escape_table_%d", eid)
+ }
+}
+
+/^AVXcode:/ {
+ if (NF != 1) {
+ # AVX/escape opcode table
+ aid = $2
+ if (gaid <= aid)
+ gaid = aid + 1
+ if (tname == "") # AVX only opcode table
+ tname = sprintf("inat_avx_table_%d", $2)
+ }
+ if (aid == -1 && eid == -1) # primary opcode table
+ tname = "inat_primary_table"
+}
+
+/^GrpTable:/ {
+ print "/* " $0 " */"
+ if (!($2 in group))
+ semantic_error("No group: " $2 )
+ gid = group[$2]
+ tname = "inat_group_table_" gid
+}
+
+function print_table(tbl,name,fmt,n)
+{
+ print "const insn_attr_t " name " = {"
+ for (i = 0; i < n; i++) {
+ id = sprintf(fmt, i)
+ if (tbl[id])
+ print " [" id "] = " tbl[id] ","
+ }
+ print "};"
+}
+
+/^EndTable/ {
+ if (gid != -1) {
+ # print group tables
+ if (array_size(table) != 0) {
+ print_table(table, tname "[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,0] = tname
+ }
+ if (array_size(lptable1) != 0) {
+ print_table(lptable1, tname "_1[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,1] = tname "_1"
+ }
+ if (array_size(lptable2) != 0) {
+ print_table(lptable2, tname "_2[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,2] = tname "_2"
+ }
+ if (array_size(lptable3) != 0) {
+ print_table(lptable3, tname "_3[INAT_GROUP_TABLE_SIZE]",
+ "0x%x", 8)
+ gtable[gid,3] = tname "_3"
+ }
+ } else {
+ # print primary/escaped tables
+ if (array_size(table) != 0) {
+ print_table(table, tname "[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,0] = tname
+ if (aid >= 0)
+ atable[aid,0] = tname
+ }
+ if (array_size(lptable1) != 0) {
+ print_table(lptable1,tname "_1[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,1] = tname "_1"
+ if (aid >= 0)
+ atable[aid,1] = tname "_1"
+ }
+ if (array_size(lptable2) != 0) {
+ print_table(lptable2,tname "_2[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,2] = tname "_2"
+ if (aid >= 0)
+ atable[aid,2] = tname "_2"
+ }
+ if (array_size(lptable3) != 0) {
+ print_table(lptable3,tname "_3[INAT_OPCODE_TABLE_SIZE]",
+ "0x%02x", 256)
+ etable[eid,3] = tname "_3"
+ if (aid >= 0)
+ atable[aid,3] = tname "_3"
+ }
+ }
+ print ""
+ clear_vars()
+}
+
+function add_flags(old,new) {
+ if (old && new)
+ return old " | " new
+ else if (old)
+ return old
+ else
+ return new
+}
+
+# convert operands to flags.
+function convert_operands(count,opnd, i,j,imm,mod)
+{
+ imm = null
+ mod = null
+ for (j = 1; j <= count; j++) {
+ i = opnd[j]
+ if (match(i, imm_expr) == 1) {
+ if (!imm_flag[i])
+ semantic_error("Unknown imm opnd: " i)
+ if (imm) {
+ if (i != "Ib")
+ semantic_error("Second IMM error")
+ imm = add_flags(imm, "INAT_SCNDIMM")
+ } else
+ imm = imm_flag[i]
+ } else if (match(i, modrm_expr))
+ mod = "INAT_MODRM"
+ }
+ return add_flags(imm, mod)
+}
+
+/^[0-9a-f]+\:/ {
+ if (NR == 1)
+ next
+ # get index
+ idx = "0x" substr($1, 1, index($1,":") - 1)
+ if (idx in table)
+ semantic_error("Redefine " idx " in " tname)
+
+ # check if escaped opcode
+ if ("escape" == $2) {
+ if ($3 != "#")
+ semantic_error("No escaped name")
+ ref = ""
+ for (i = 4; i <= NF; i++)
+ ref = ref $i
+ if (ref in escape)
+ semantic_error("Redefine escape (" ref ")")
+ escape[ref] = geid
+ geid++
+ table[idx] = "INAT_MAKE_ESCAPE(" escape[ref] ")"
+ next
+ }
+
+ variant = null
+ # converts
+ i = 2
+ while (i <= NF) {
+ opcode = $(i++)
+ delete opnds
+ ext = null
+ flags = null
+ opnd = null
+ # parse one opcode
+ if (match($i, opnd_expr)) {
+ opnd = $i
+ count = split($(i++), opnds, ",")
+ flags = convert_operands(count, opnds)
+ }
+ if (match($i, ext_expr))
+ ext = $(i++)
+ if (match($i, sep_expr))
+ i++
+ else if (i < NF)
+ semantic_error($i " is not a separator")
+
+ # check if group opcode
+ if (match(opcode, group_expr)) {
+ if (!(opcode in group)) {
+ group[opcode] = ggid
+ ggid++
+ }
+ flags = add_flags(flags, "INAT_MAKE_GROUP(" group[opcode] ")")
+ }
+ # check force(or default) 64bit
+ if (match(ext, force64_expr))
+ flags = add_flags(flags, "INAT_FORCE64")
+
+ # check REX prefix
+ if (match(opcode, rex_expr))
+ flags = add_flags(flags, "INAT_MAKE_PREFIX(INAT_PFX_REX)")
+
+ # check coprocessor escape : TODO
+ if (match(opcode, fpu_expr))
+ flags = add_flags(flags, "INAT_MODRM")
+
+ # check VEX codes
+ if (match(ext, vexonly_expr))
+ flags = add_flags(flags, "INAT_VEXOK | INAT_VEXONLY")
+ else if (match(ext, vexok_expr) || match(opcode, vexok_opcode_expr))
+ flags = add_flags(flags, "INAT_VEXOK")
+
+ # check prefixes
+ if (match(ext, prefix_expr)) {
+ if (!prefix_num[opcode])
+ semantic_error("Unknown prefix: " opcode)
+ flags = add_flags(flags, "INAT_MAKE_PREFIX(" prefix_num[opcode] ")")
+ }
+ if (length(flags) == 0)
+ continue
+ # check if last prefix
+ if (match(ext, lprefix1_expr)) {
+ lptable1[idx] = add_flags(lptable1[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (match(ext, lprefix2_expr)) {
+ lptable2[idx] = add_flags(lptable2[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (match(ext, lprefix3_expr)) {
+ lptable3[idx] = add_flags(lptable3[idx],flags)
+ variant = "INAT_VARIANT"
+ }
+ if (!match(ext, lprefix_expr)){
+ table[idx] = add_flags(table[idx],flags)
+ }
+ }
+ if (variant)
+ table[idx] = add_flags(table[idx],variant)
+}
+
+END {
+ if (awkchecked != "")
+ exit 1
+ # print escape opcode map's array
+ print "/* Escape opcode map array */"
+ print "const insn_attr_t * const inat_escape_tables[INAT_ESC_MAX + 1]" \
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < geid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (etable[i,j])
+ print " ["i"]["j"] = "etable[i,j]","
+ print "};\n"
+ # print group opcode map's array
+ print "/* Group opcode map array */"
+ print "const insn_attr_t * const inat_group_tables[INAT_GRP_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < ggid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (gtable[i,j])
+ print " ["i"]["j"] = "gtable[i,j]","
+ print "};\n"
+ # print AVX opcode map's array
+ print "/* AVX opcode map array */"
+ print "const insn_attr_t * const inat_avx_tables[X86_VEX_M_MAX + 1]"\
+ "[INAT_LSTPFX_MAX + 1] = {"
+ for (i = 0; i < gaid; i++)
+ for (j = 0; j < max_lprefix; j++)
+ if (atable[i,j])
+ print " ["i"]["j"] = "atable[i,j]","
+ print "};"
+}
+
diff --git a/tools/objtool/arch/x86/insn/inat.c b/tools/objtool/arch/x86/insn/inat.c
new file mode 100644
index 000000000000..e4bf28e6f4c7
--- /dev/null
+++ b/tools/objtool/arch/x86/insn/inat.c
@@ -0,0 +1,97 @@
+/*
+ * x86 instruction attribute tables
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include "insn.h"
+
+/* Attribute tables are generated from opcode map */
+#include "inat-tables.c"
+
+/* Attribute search APIs */
+insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode)
+{
+ return inat_primary_table[opcode];
+}
+
+int inat_get_last_prefix_id(insn_byte_t last_pfx)
+{
+ insn_attr_t lpfx_attr;
+
+ lpfx_attr = inat_get_opcode_attribute(last_pfx);
+ return inat_last_prefix_id(lpfx_attr);
+}
+
+insn_attr_t inat_get_escape_attribute(insn_byte_t opcode, int lpfx_id,
+ insn_attr_t esc_attr)
+{
+ const insn_attr_t *table;
+ int n;
+
+ n = inat_escape_id(esc_attr);
+
+ table = inat_escape_tables[n][0];
+ if (!table)
+ return 0;
+ if (inat_has_variant(table[opcode]) && lpfx_id) {
+ table = inat_escape_tables[n][lpfx_id];
+ if (!table)
+ return 0;
+ }
+ return table[opcode];
+}
+
+insn_attr_t inat_get_group_attribute(insn_byte_t modrm, int lpfx_id,
+ insn_attr_t grp_attr)
+{
+ const insn_attr_t *table;
+ int n;
+
+ n = inat_group_id(grp_attr);
+
+ table = inat_group_tables[n][0];
+ if (!table)
+ return inat_group_common_attribute(grp_attr);
+ if (inat_has_variant(table[X86_MODRM_REG(modrm)]) && lpfx_id) {
+ table = inat_group_tables[n][lpfx_id];
+ if (!table)
+ return inat_group_common_attribute(grp_attr);
+ }
+ return table[X86_MODRM_REG(modrm)] |
+ inat_group_common_attribute(grp_attr);
+}
+
+insn_attr_t inat_get_avx_attribute(insn_byte_t opcode, insn_byte_t vex_m,
+ insn_byte_t vex_p)
+{
+ const insn_attr_t *table;
+ if (vex_m > X86_VEX_M_MAX || vex_p > INAT_LSTPFX_MAX)
+ return 0;
+ /* At first, this checks the master table */
+ table = inat_avx_tables[vex_m][0];
+ if (!table)
+ return 0;
+ if (!inat_is_group(table[opcode]) && vex_p) {
+ /* If this is not a group, get attribute directly */
+ table = inat_avx_tables[vex_m][vex_p];
+ if (!table)
+ return 0;
+ }
+ return table[opcode];
+}
+
diff --git a/tools/objtool/arch/x86/insn/inat.h b/tools/objtool/arch/x86/insn/inat.h
new file mode 100644
index 000000000000..611645e903a8
--- /dev/null
+++ b/tools/objtool/arch/x86/insn/inat.h
@@ -0,0 +1,221 @@
+#ifndef _ASM_X86_INAT_H
+#define _ASM_X86_INAT_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+#include "inat_types.h"
+
+/*
+ * Internal bits. Don't use bitmasks directly, because these bits are
+ * unstable. You should use checking functions.
+ */
+
+#define INAT_OPCODE_TABLE_SIZE 256
+#define INAT_GROUP_TABLE_SIZE 8
+
+/* Legacy last prefixes */
+#define INAT_PFX_OPNDSZ 1 /* 0x66 */ /* LPFX1 */
+#define INAT_PFX_REPE 2 /* 0xF3 */ /* LPFX2 */
+#define INAT_PFX_REPNE 3 /* 0xF2 */ /* LPFX3 */
+/* Other Legacy prefixes */
+#define INAT_PFX_LOCK 4 /* 0xF0 */
+#define INAT_PFX_CS 5 /* 0x2E */
+#define INAT_PFX_DS 6 /* 0x3E */
+#define INAT_PFX_ES 7 /* 0x26 */
+#define INAT_PFX_FS 8 /* 0x64 */
+#define INAT_PFX_GS 9 /* 0x65 */
+#define INAT_PFX_SS 10 /* 0x36 */
+#define INAT_PFX_ADDRSZ 11 /* 0x67 */
+/* x86-64 REX prefix */
+#define INAT_PFX_REX 12 /* 0x4X */
+/* AVX VEX prefixes */
+#define INAT_PFX_VEX2 13 /* 2-bytes VEX prefix */
+#define INAT_PFX_VEX3 14 /* 3-bytes VEX prefix */
+
+#define INAT_LSTPFX_MAX 3
+#define INAT_LGCPFX_MAX 11
+
+/* Immediate size */
+#define INAT_IMM_BYTE 1
+#define INAT_IMM_WORD 2
+#define INAT_IMM_DWORD 3
+#define INAT_IMM_QWORD 4
+#define INAT_IMM_PTR 5
+#define INAT_IMM_VWORD32 6
+#define INAT_IMM_VWORD 7
+
+/* Legacy prefix */
+#define INAT_PFX_OFFS 0
+#define INAT_PFX_BITS 4
+#define INAT_PFX_MAX ((1 << INAT_PFX_BITS) - 1)
+#define INAT_PFX_MASK (INAT_PFX_MAX << INAT_PFX_OFFS)
+/* Escape opcodes */
+#define INAT_ESC_OFFS (INAT_PFX_OFFS + INAT_PFX_BITS)
+#define INAT_ESC_BITS 2
+#define INAT_ESC_MAX ((1 << INAT_ESC_BITS) - 1)
+#define INAT_ESC_MASK (INAT_ESC_MAX << INAT_ESC_OFFS)
+/* Group opcodes (1-16) */
+#define INAT_GRP_OFFS (INAT_ESC_OFFS + INAT_ESC_BITS)
+#define INAT_GRP_BITS 5
+#define INAT_GRP_MAX ((1 << INAT_GRP_BITS) - 1)
+#define INAT_GRP_MASK (INAT_GRP_MAX << INAT_GRP_OFFS)
+/* Immediates */
+#define INAT_IMM_OFFS (INAT_GRP_OFFS + INAT_GRP_BITS)
+#define INAT_IMM_BITS 3
+#define INAT_IMM_MASK (((1 << INAT_IMM_BITS) - 1) << INAT_IMM_OFFS)
+/* Flags */
+#define INAT_FLAG_OFFS (INAT_IMM_OFFS + INAT_IMM_BITS)
+#define INAT_MODRM (1 << (INAT_FLAG_OFFS))
+#define INAT_FORCE64 (1 << (INAT_FLAG_OFFS + 1))
+#define INAT_SCNDIMM (1 << (INAT_FLAG_OFFS + 2))
+#define INAT_MOFFSET (1 << (INAT_FLAG_OFFS + 3))
+#define INAT_VARIANT (1 << (INAT_FLAG_OFFS + 4))
+#define INAT_VEXOK (1 << (INAT_FLAG_OFFS + 5))
+#define INAT_VEXONLY (1 << (INAT_FLAG_OFFS + 6))
+/* Attribute making macros for attribute tables */
+#define INAT_MAKE_PREFIX(pfx) (pfx << INAT_PFX_OFFS)
+#define INAT_MAKE_ESCAPE(esc) (esc << INAT_ESC_OFFS)
+#define INAT_MAKE_GROUP(grp) ((grp << INAT_GRP_OFFS) | INAT_MODRM)
+#define INAT_MAKE_IMM(imm) (imm << INAT_IMM_OFFS)
+
+/* Attribute search APIs */
+extern insn_attr_t inat_get_opcode_attribute(insn_byte_t opcode);
+extern int inat_get_last_prefix_id(insn_byte_t last_pfx);
+extern insn_attr_t inat_get_escape_attribute(insn_byte_t opcode,
+ int lpfx_id,
+ insn_attr_t esc_attr);
+extern insn_attr_t inat_get_group_attribute(insn_byte_t modrm,
+ int lpfx_id,
+ insn_attr_t esc_attr);
+extern insn_attr_t inat_get_avx_attribute(insn_byte_t opcode,
+ insn_byte_t vex_m,
+ insn_byte_t vex_pp);
+
+/* Attribute checking functions */
+static inline int inat_is_legacy_prefix(insn_attr_t attr)
+{
+ attr &= INAT_PFX_MASK;
+ return attr && attr <= INAT_LGCPFX_MAX;
+}
+
+static inline int inat_is_address_size_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_ADDRSZ;
+}
+
+static inline int inat_is_operand_size_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_OPNDSZ;
+}
+
+static inline int inat_is_rex_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_REX;
+}
+
+static inline int inat_last_prefix_id(insn_attr_t attr)
+{
+ if ((attr & INAT_PFX_MASK) > INAT_LSTPFX_MAX)
+ return 0;
+ else
+ return attr & INAT_PFX_MASK;
+}
+
+static inline int inat_is_vex_prefix(insn_attr_t attr)
+{
+ attr &= INAT_PFX_MASK;
+ return attr == INAT_PFX_VEX2 || attr == INAT_PFX_VEX3;
+}
+
+static inline int inat_is_vex3_prefix(insn_attr_t attr)
+{
+ return (attr & INAT_PFX_MASK) == INAT_PFX_VEX3;
+}
+
+static inline int inat_is_escape(insn_attr_t attr)
+{
+ return attr & INAT_ESC_MASK;
+}
+
+static inline int inat_escape_id(insn_attr_t attr)
+{
+ return (attr & INAT_ESC_MASK) >> INAT_ESC_OFFS;
+}
+
+static inline int inat_is_group(insn_attr_t attr)
+{
+ return attr & INAT_GRP_MASK;
+}
+
+static inline int inat_group_id(insn_attr_t attr)
+{
+ return (attr & INAT_GRP_MASK) >> INAT_GRP_OFFS;
+}
+
+static inline int inat_group_common_attribute(insn_attr_t attr)
+{
+ return attr & ~INAT_GRP_MASK;
+}
+
+static inline int inat_has_immediate(insn_attr_t attr)
+{
+ return attr & INAT_IMM_MASK;
+}
+
+static inline int inat_immediate_size(insn_attr_t attr)
+{
+ return (attr & INAT_IMM_MASK) >> INAT_IMM_OFFS;
+}
+
+static inline int inat_has_modrm(insn_attr_t attr)
+{
+ return attr & INAT_MODRM;
+}
+
+static inline int inat_is_force64(insn_attr_t attr)
+{
+ return attr & INAT_FORCE64;
+}
+
+static inline int inat_has_second_immediate(insn_attr_t attr)
+{
+ return attr & INAT_SCNDIMM;
+}
+
+static inline int inat_has_moffset(insn_attr_t attr)
+{
+ return attr & INAT_MOFFSET;
+}
+
+static inline int inat_has_variant(insn_attr_t attr)
+{
+ return attr & INAT_VARIANT;
+}
+
+static inline int inat_accept_vex(insn_attr_t attr)
+{
+ return attr & INAT_VEXOK;
+}
+
+static inline int inat_must_vex(insn_attr_t attr)
+{
+ return attr & INAT_VEXONLY;
+}
+#endif
diff --git a/tools/objtool/arch/x86/insn/inat_types.h b/tools/objtool/arch/x86/insn/inat_types.h
new file mode 100644
index 000000000000..cb3c20ce39cf
--- /dev/null
+++ b/tools/objtool/arch/x86/insn/inat_types.h
@@ -0,0 +1,29 @@
+#ifndef _ASM_X86_INAT_TYPES_H
+#define _ASM_X86_INAT_TYPES_H
+/*
+ * x86 instruction attributes
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+/* Instruction attributes */
+typedef unsigned int insn_attr_t;
+typedef unsigned char insn_byte_t;
+typedef signed int insn_value_t;
+
+#endif
diff --git a/tools/objtool/arch/x86/insn/insn.c b/tools/objtool/arch/x86/insn/insn.c
new file mode 100644
index 000000000000..9f26eae6c9f0
--- /dev/null
+++ b/tools/objtool/arch/x86/insn/insn.c
@@ -0,0 +1,594 @@
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2002, 2004, 2009
+ */
+
+#ifdef __KERNEL__
+#include <linux/string.h>
+#else
+#include <string.h>
+#endif
+#include "inat.h"
+#include "insn.h"
+
+/* Verify next sizeof(t) bytes can be on the same instruction */
+#define validate_next(t, insn, n) \
+ ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
+
+#define __get_next(t, insn) \
+ ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+
+#define __peek_nbyte_next(t, insn, n) \
+ ({ t r = *(t*)((insn)->next_byte + n); r; })
+
+#define get_next(t, insn) \
+ ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
+
+#define peek_nbyte_next(t, insn, n) \
+ ({ if (unlikely(!validate_next(t, insn, n))) goto err_out; __peek_nbyte_next(t, insn, n); })
+
+#define peek_next(t, insn) peek_nbyte_next(t, insn, 0)
+
+/**
+ * insn_init() - initialize struct insn
+ * @insn: &struct insn to be initialized
+ * @kaddr: address (in kernel memory) of instruction (or copy thereof)
+ * @x86_64: !0 for 64-bit kernel or 64-bit app
+ */
+void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64)
+{
+ /*
+ * Instructions longer than MAX_INSN_SIZE (15 bytes) are invalid
+ * even if the input buffer is long enough to hold them.
+ */
+ if (buf_len > MAX_INSN_SIZE)
+ buf_len = MAX_INSN_SIZE;
+
+ memset(insn, 0, sizeof(*insn));
+ insn->kaddr = kaddr;
+ insn->end_kaddr = kaddr + buf_len;
+ insn->next_byte = kaddr;
+ insn->x86_64 = x86_64 ? 1 : 0;
+ insn->opnd_bytes = 4;
+ if (x86_64)
+ insn->addr_bytes = 8;
+ else
+ insn->addr_bytes = 4;
+}
+
+/**
+ * insn_get_prefixes - scan x86 instruction prefix bytes
+ * @insn: &struct insn containing instruction
+ *
+ * Populates the @insn->prefixes bitmap, and updates @insn->next_byte
+ * to point to the (first) opcode. No effect if @insn->prefixes.got
+ * is already set.
+ */
+void insn_get_prefixes(struct insn *insn)
+{
+ struct insn_field *prefixes = &insn->prefixes;
+ insn_attr_t attr;
+ insn_byte_t b, lb;
+ int i, nb;
+
+ if (prefixes->got)
+ return;
+
+ nb = 0;
+ lb = 0;
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ while (inat_is_legacy_prefix(attr)) {
+ /* Skip if same prefix */
+ for (i = 0; i < nb; i++)
+ if (prefixes->bytes[i] == b)
+ goto found;
+ if (nb == 4)
+ /* Invalid instruction */
+ break;
+ prefixes->bytes[nb++] = b;
+ if (inat_is_address_size_prefix(attr)) {
+ /* address size switches 2/4 or 4/8 */
+ if (insn->x86_64)
+ insn->addr_bytes ^= 12;
+ else
+ insn->addr_bytes ^= 6;
+ } else if (inat_is_operand_size_prefix(attr)) {
+ /* oprand size switches 2/4 */
+ insn->opnd_bytes ^= 6;
+ }
+found:
+ prefixes->nbytes++;
+ insn->next_byte++;
+ lb = b;
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ }
+ /* Set the last prefix */
+ if (lb && lb != insn->prefixes.bytes[3]) {
+ if (unlikely(insn->prefixes.bytes[3])) {
+ /* Swap the last prefix */
+ b = insn->prefixes.bytes[3];
+ for (i = 0; i < nb; i++)
+ if (prefixes->bytes[i] == lb)
+ prefixes->bytes[i] = b;
+ }
+ insn->prefixes.bytes[3] = lb;
+ }
+
+ /* Decode REX prefix */
+ if (insn->x86_64) {
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ if (inat_is_rex_prefix(attr)) {
+ insn->rex_prefix.value = b;
+ insn->rex_prefix.nbytes = 1;
+ insn->next_byte++;
+ if (X86_REX_W(b))
+ /* REX.W overrides opnd_size */
+ insn->opnd_bytes = 8;
+ }
+ }
+ insn->rex_prefix.got = 1;
+
+ /* Decode VEX prefix */
+ b = peek_next(insn_byte_t, insn);
+ attr = inat_get_opcode_attribute(b);
+ if (inat_is_vex_prefix(attr)) {
+ insn_byte_t b2 = peek_nbyte_next(insn_byte_t, insn, 1);
+ if (!insn->x86_64) {
+ /*
+ * In 32-bits mode, if the [7:6] bits (mod bits of
+ * ModRM) on the second byte are not 11b, it is
+ * LDS or LES.
+ */
+ if (X86_MODRM_MOD(b2) != 3)
+ goto vex_end;
+ }
+ insn->vex_prefix.bytes[0] = b;
+ insn->vex_prefix.bytes[1] = b2;
+ if (inat_is_vex3_prefix(attr)) {
+ b2 = peek_nbyte_next(insn_byte_t, insn, 2);
+ insn->vex_prefix.bytes[2] = b2;
+ insn->vex_prefix.nbytes = 3;
+ insn->next_byte += 3;
+ if (insn->x86_64 && X86_VEX_W(b2))
+ /* VEX.W overrides opnd_size */
+ insn->opnd_bytes = 8;
+ } else {
+ /*
+ * For VEX2, fake VEX3-like byte#2.
+ * Makes it easier to decode vex.W, vex.vvvv,
+ * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
+ */
+ insn->vex_prefix.bytes[2] = b2 & 0x7f;
+ insn->vex_prefix.nbytes = 2;
+ insn->next_byte += 2;
+ }
+ }
+vex_end:
+ insn->vex_prefix.got = 1;
+
+ prefixes->got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_opcode - collect opcode(s)
+ * @insn: &struct insn containing instruction
+ *
+ * Populates @insn->opcode, updates @insn->next_byte to point past the
+ * opcode byte(s), and set @insn->attr (except for groups).
+ * If necessary, first collects any preceding (prefix) bytes.
+ * Sets @insn->opcode.value = opcode1. No effect if @insn->opcode.got
+ * is already 1.
+ */
+void insn_get_opcode(struct insn *insn)
+{
+ struct insn_field *opcode = &insn->opcode;
+ insn_byte_t op;
+ int pfx_id;
+ if (opcode->got)
+ return;
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+
+ /* Get first opcode */
+ op = get_next(insn_byte_t, insn);
+ opcode->bytes[0] = op;
+ opcode->nbytes = 1;
+
+ /* Check if there is VEX prefix or not */
+ if (insn_is_avx(insn)) {
+ insn_byte_t m, p;
+ m = insn_vex_m_bits(insn);
+ p = insn_vex_p_bits(insn);
+ insn->attr = inat_get_avx_attribute(op, m, p);
+ if (!inat_accept_vex(insn->attr) && !inat_is_group(insn->attr))
+ insn->attr = 0; /* This instruction is bad */
+ goto end; /* VEX has only 1 byte for opcode */
+ }
+
+ insn->attr = inat_get_opcode_attribute(op);
+ while (inat_is_escape(insn->attr)) {
+ /* Get escaped opcode */
+ op = get_next(insn_byte_t, insn);
+ opcode->bytes[opcode->nbytes++] = op;
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_escape_attribute(op, pfx_id, insn->attr);
+ }
+ if (inat_must_vex(insn->attr))
+ insn->attr = 0; /* This instruction is bad */
+end:
+ opcode->got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_modrm - collect ModRM byte, if any
+ * @insn: &struct insn containing instruction
+ *
+ * Populates @insn->modrm and updates @insn->next_byte to point past the
+ * ModRM byte, if any. If necessary, first collects the preceding bytes
+ * (prefixes and opcode(s)). No effect if @insn->modrm.got is already 1.
+ */
+void insn_get_modrm(struct insn *insn)
+{
+ struct insn_field *modrm = &insn->modrm;
+ insn_byte_t pfx_id, mod;
+ if (modrm->got)
+ return;
+ if (!insn->opcode.got)
+ insn_get_opcode(insn);
+
+ if (inat_has_modrm(insn->attr)) {
+ mod = get_next(insn_byte_t, insn);
+ modrm->value = mod;
+ modrm->nbytes = 1;
+ if (inat_is_group(insn->attr)) {
+ pfx_id = insn_last_prefix_id(insn);
+ insn->attr = inat_get_group_attribute(mod, pfx_id,
+ insn->attr);
+ if (insn_is_avx(insn) && !inat_accept_vex(insn->attr))
+ insn->attr = 0; /* This is bad */
+ }
+ }
+
+ if (insn->x86_64 && inat_is_force64(insn->attr))
+ insn->opnd_bytes = 8;
+ modrm->got = 1;
+
+err_out:
+ return;
+}
+
+
+/**
+ * insn_rip_relative() - Does instruction use RIP-relative addressing mode?
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte. No effect if @insn->x86_64 is 0.
+ */
+int insn_rip_relative(struct insn *insn)
+{
+ struct insn_field *modrm = &insn->modrm;
+
+ if (!insn->x86_64)
+ return 0;
+ if (!modrm->got)
+ insn_get_modrm(insn);
+ /*
+ * For rip-relative instructions, the mod field (top 2 bits)
+ * is zero and the r/m field (bottom 3 bits) is 0x5.
+ */
+ return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+}
+
+/**
+ * insn_get_sib() - Get the SIB byte of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * ModRM byte.
+ */
+void insn_get_sib(struct insn *insn)
+{
+ insn_byte_t modrm;
+
+ if (insn->sib.got)
+ return;
+ if (!insn->modrm.got)
+ insn_get_modrm(insn);
+ if (insn->modrm.nbytes) {
+ modrm = (insn_byte_t)insn->modrm.value;
+ if (insn->addr_bytes != 2 &&
+ X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
+ insn->sib.value = get_next(insn_byte_t, insn);
+ insn->sib.nbytes = 1;
+ }
+ }
+ insn->sib.got = 1;
+
+err_out:
+ return;
+}
+
+
+/**
+ * insn_get_displacement() - Get the displacement of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * SIB byte.
+ * Displacement value is sign-expanded.
+ */
+void insn_get_displacement(struct insn *insn)
+{
+ insn_byte_t mod, rm, base;
+
+ if (insn->displacement.got)
+ return;
+ if (!insn->sib.got)
+ insn_get_sib(insn);
+ if (insn->modrm.nbytes) {
+ /*
+ * Interpreting the modrm byte:
+ * mod = 00 - no displacement fields (exceptions below)
+ * mod = 01 - 1-byte displacement field
+ * mod = 10 - displacement field is 4 bytes, or 2 bytes if
+ * address size = 2 (0x67 prefix in 32-bit mode)
+ * mod = 11 - no memory operand
+ *
+ * If address size = 2...
+ * mod = 00, r/m = 110 - displacement field is 2 bytes
+ *
+ * If address size != 2...
+ * mod != 11, r/m = 100 - SIB byte exists
+ * mod = 00, SIB base = 101 - displacement field is 4 bytes
+ * mod = 00, r/m = 101 - rip-relative addressing, displacement
+ * field is 4 bytes
+ */
+ mod = X86_MODRM_MOD(insn->modrm.value);
+ rm = X86_MODRM_RM(insn->modrm.value);
+ base = X86_SIB_BASE(insn->sib.value);
+ if (mod == 3)
+ goto out;
+ if (mod == 1) {
+ insn->displacement.value = get_next(signed char, insn);
+ insn->displacement.nbytes = 1;
+ } else if (insn->addr_bytes == 2) {
+ if ((mod == 0 && rm == 6) || mod == 2) {
+ insn->displacement.value =
+ get_next(short, insn);
+ insn->displacement.nbytes = 2;
+ }
+ } else {
+ if ((mod == 0 && rm == 5) || mod == 2 ||
+ (mod == 0 && base == 5)) {
+ insn->displacement.value = get_next(int, insn);
+ insn->displacement.nbytes = 4;
+ }
+ }
+ }
+out:
+ insn->displacement.got = 1;
+
+err_out:
+ return;
+}
+
+/* Decode moffset16/32/64. Return 0 if failed */
+static int __get_moffset(struct insn *insn)
+{
+ switch (insn->addr_bytes) {
+ case 2:
+ insn->moffset1.value = get_next(short, insn);
+ insn->moffset1.nbytes = 2;
+ break;
+ case 4:
+ insn->moffset1.value = get_next(int, insn);
+ insn->moffset1.nbytes = 4;
+ break;
+ case 8:
+ insn->moffset1.value = get_next(int, insn);
+ insn->moffset1.nbytes = 4;
+ insn->moffset2.value = get_next(int, insn);
+ insn->moffset2.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->moffset1.got = insn->moffset2.got = 1;
+
+ return 1;
+
+err_out:
+ return 0;
+}
+
+/* Decode imm v32(Iz). Return 0 if failed */
+static int __get_immv32(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate.value = get_next(short, insn);
+ insn->immediate.nbytes = 2;
+ break;
+ case 4:
+ case 8:
+ insn->immediate.value = get_next(int, insn);
+ insn->immediate.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+
+ return 1;
+
+err_out:
+ return 0;
+}
+
+/* Decode imm v64(Iv/Ov), Return 0 if failed */
+static int __get_immv(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate1.value = get_next(short, insn);
+ insn->immediate1.nbytes = 2;
+ break;
+ case 4:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ break;
+ case 8:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ insn->immediate2.value = get_next(int, insn);
+ insn->immediate2.nbytes = 4;
+ break;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->immediate1.got = insn->immediate2.got = 1;
+
+ return 1;
+err_out:
+ return 0;
+}
+
+/* Decode ptr16:16/32(Ap) */
+static int __get_immptr(struct insn *insn)
+{
+ switch (insn->opnd_bytes) {
+ case 2:
+ insn->immediate1.value = get_next(short, insn);
+ insn->immediate1.nbytes = 2;
+ break;
+ case 4:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ break;
+ case 8:
+ /* ptr16:64 is not exist (no segment) */
+ return 0;
+ default: /* opnd_bytes must be modified manually */
+ goto err_out;
+ }
+ insn->immediate2.value = get_next(unsigned short, insn);
+ insn->immediate2.nbytes = 2;
+ insn->immediate1.got = insn->immediate2.got = 1;
+
+ return 1;
+err_out:
+ return 0;
+}
+
+/**
+ * insn_get_immediate() - Get the immediates of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * displacement bytes.
+ * Basically, most of immediates are sign-expanded. Unsigned-value can be
+ * get by bit masking with ((1 << (nbytes * 8)) - 1)
+ */
+void insn_get_immediate(struct insn *insn)
+{
+ if (insn->immediate.got)
+ return;
+ if (!insn->displacement.got)
+ insn_get_displacement(insn);
+
+ if (inat_has_moffset(insn->attr)) {
+ if (!__get_moffset(insn))
+ goto err_out;
+ goto done;
+ }
+
+ if (!inat_has_immediate(insn->attr))
+ /* no immediates */
+ goto done;
+
+ switch (inat_immediate_size(insn->attr)) {
+ case INAT_IMM_BYTE:
+ insn->immediate.value = get_next(signed char, insn);
+ insn->immediate.nbytes = 1;
+ break;
+ case INAT_IMM_WORD:
+ insn->immediate.value = get_next(short, insn);
+ insn->immediate.nbytes = 2;
+ break;
+ case INAT_IMM_DWORD:
+ insn->immediate.value = get_next(int, insn);
+ insn->immediate.nbytes = 4;
+ break;
+ case INAT_IMM_QWORD:
+ insn->immediate1.value = get_next(int, insn);
+ insn->immediate1.nbytes = 4;
+ insn->immediate2.value = get_next(int, insn);
+ insn->immediate2.nbytes = 4;
+ break;
+ case INAT_IMM_PTR:
+ if (!__get_immptr(insn))
+ goto err_out;
+ break;
+ case INAT_IMM_VWORD32:
+ if (!__get_immv32(insn))
+ goto err_out;
+ break;
+ case INAT_IMM_VWORD:
+ if (!__get_immv(insn))
+ goto err_out;
+ break;
+ default:
+ /* Here, insn must have an immediate, but failed */
+ goto err_out;
+ }
+ if (inat_has_second_immediate(insn->attr)) {
+ insn->immediate2.value = get_next(signed char, insn);
+ insn->immediate2.nbytes = 1;
+ }
+done:
+ insn->immediate.got = 1;
+
+err_out:
+ return;
+}
+
+/**
+ * insn_get_length() - Get the length of instruction
+ * @insn: &struct insn containing instruction
+ *
+ * If necessary, first collects the instruction up to and including the
+ * immediates bytes.
+ */
+void insn_get_length(struct insn *insn)
+{
+ if (insn->length)
+ return;
+ if (!insn->immediate.got)
+ insn_get_immediate(insn);
+ insn->length = (unsigned char)((unsigned long)insn->next_byte
+ - (unsigned long)insn->kaddr);
+}
diff --git a/tools/objtool/arch/x86/insn/insn.h b/tools/objtool/arch/x86/insn/insn.h
new file mode 100644
index 000000000000..dd12da0f4593
--- /dev/null
+++ b/tools/objtool/arch/x86/insn/insn.h
@@ -0,0 +1,201 @@
+#ifndef _ASM_X86_INSN_H
+#define _ASM_X86_INSN_H
+/*
+ * x86 instruction analysis
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright (C) IBM Corporation, 2009
+ */
+
+/* insn_attr_t is defined in inat.h */
+#include "inat.h"
+
+struct insn_field {
+ union {
+ insn_value_t value;
+ insn_byte_t bytes[4];
+ };
+ /* !0 if we've run insn_get_xxx() for this field */
+ unsigned char got;
+ unsigned char nbytes;
+};
+
+struct insn {
+ struct insn_field prefixes; /*
+ * Prefixes
+ * prefixes.bytes[3]: last prefix
+ */
+ struct insn_field rex_prefix; /* REX prefix */
+ struct insn_field vex_prefix; /* VEX prefix */
+ struct insn_field opcode; /*
+ * opcode.bytes[0]: opcode1
+ * opcode.bytes[1]: opcode2
+ * opcode.bytes[2]: opcode3
+ */
+ struct insn_field modrm;
+ struct insn_field sib;
+ struct insn_field displacement;
+ union {
+ struct insn_field immediate;
+ struct insn_field moffset1; /* for 64bit MOV */
+ struct insn_field immediate1; /* for 64bit imm or off16/32 */
+ };
+ union {
+ struct insn_field moffset2; /* for 64bit MOV */
+ struct insn_field immediate2; /* for 64bit imm or seg16 */
+ };
+
+ insn_attr_t attr;
+ unsigned char opnd_bytes;
+ unsigned char addr_bytes;
+ unsigned char length;
+ unsigned char x86_64;
+
+ const insn_byte_t *kaddr; /* kernel address of insn to analyze */
+ const insn_byte_t *end_kaddr; /* kernel address of last insn in buffer */
+ const insn_byte_t *next_byte;
+};
+
+#define MAX_INSN_SIZE 15
+
+#define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6)
+#define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3)
+#define X86_MODRM_RM(modrm) ((modrm) & 0x07)
+
+#define X86_SIB_SCALE(sib) (((sib) & 0xc0) >> 6)
+#define X86_SIB_INDEX(sib) (((sib) & 0x38) >> 3)
+#define X86_SIB_BASE(sib) ((sib) & 0x07)
+
+#define X86_REX_W(rex) ((rex) & 8)
+#define X86_REX_R(rex) ((rex) & 4)
+#define X86_REX_X(rex) ((rex) & 2)
+#define X86_REX_B(rex) ((rex) & 1)
+
+/* VEX bit flags */
+#define X86_VEX_W(vex) ((vex) & 0x80) /* VEX3 Byte2 */
+#define X86_VEX_R(vex) ((vex) & 0x80) /* VEX2/3 Byte1 */
+#define X86_VEX_X(vex) ((vex) & 0x40) /* VEX3 Byte1 */
+#define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */
+#define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */
+/* VEX bit fields */
+#define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */
+#define X86_VEX2_M 1 /* VEX2.M always 1 */
+#define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_P(vex) ((vex) & 0x03) /* VEX3 Byte2, VEX2 Byte1 */
+#define X86_VEX_M_MAX 0x1f /* VEX3.M Maximum value */
+
+extern void insn_init(struct insn *insn, const void *kaddr, int buf_len, int x86_64);
+extern void insn_get_prefixes(struct insn *insn);
+extern void insn_get_opcode(struct insn *insn);
+extern void insn_get_modrm(struct insn *insn);
+extern void insn_get_sib(struct insn *insn);
+extern void insn_get_displacement(struct insn *insn);
+extern void insn_get_immediate(struct insn *insn);
+extern void insn_get_length(struct insn *insn);
+
+/* Attribute will be determined after getting ModRM (for opcode groups) */
+static inline void insn_get_attribute(struct insn *insn)
+{
+ insn_get_modrm(insn);
+}
+
+/* Instruction uses RIP-relative addressing */
+extern int insn_rip_relative(struct insn *insn);
+
+/* Init insn for kernel text */
+static inline void kernel_insn_init(struct insn *insn,
+ const void *kaddr, int buf_len)
+{
+#ifdef CONFIG_X86_64
+ insn_init(insn, kaddr, buf_len, 1);
+#else /* CONFIG_X86_32 */
+ insn_init(insn, kaddr, buf_len, 0);
+#endif
+}
+
+static inline int insn_is_avx(struct insn *insn)
+{
+ if (!insn->prefixes.got)
+ insn_get_prefixes(insn);
+ return (insn->vex_prefix.value != 0);
+}
+
+/* Ensure this instruction is decoded completely */
+static inline int insn_complete(struct insn *insn)
+{
+ return insn->opcode.got && insn->modrm.got && insn->sib.got &&
+ insn->displacement.got && insn->immediate.got;
+}
+
+static inline insn_byte_t insn_vex_m_bits(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
+ return X86_VEX2_M;
+ else
+ return X86_VEX3_M(insn->vex_prefix.bytes[1]);
+}
+
+static inline insn_byte_t insn_vex_p_bits(struct insn *insn)
+{
+ if (insn->vex_prefix.nbytes == 2) /* 2 bytes VEX */
+ return X86_VEX_P(insn->vex_prefix.bytes[1]);
+ else
+ return X86_VEX_P(insn->vex_prefix.bytes[2]);
+}
+
+/* Get the last prefix id from last prefix or VEX prefix */
+static inline int insn_last_prefix_id(struct insn *insn)
+{
+ if (insn_is_avx(insn))
+ return insn_vex_p_bits(insn); /* VEX_p is a SIMD prefix id */
+
+ if (insn->prefixes.bytes[3])
+ return inat_get_last_prefix_id(insn->prefixes.bytes[3]);
+
+ return 0;
+}
+
+/* Offset of each field from kaddr */
+static inline int insn_offset_rex_prefix(struct insn *insn)
+{
+ return insn->prefixes.nbytes;
+}
+static inline int insn_offset_vex_prefix(struct insn *insn)
+{
+ return insn_offset_rex_prefix(insn) + insn->rex_prefix.nbytes;
+}
+static inline int insn_offset_opcode(struct insn *insn)
+{
+ return insn_offset_vex_prefix(insn) + insn->vex_prefix.nbytes;
+}
+static inline int insn_offset_modrm(struct insn *insn)
+{
+ return insn_offset_opcode(insn) + insn->opcode.nbytes;
+}
+static inline int insn_offset_sib(struct insn *insn)
+{
+ return insn_offset_modrm(insn) + insn->modrm.nbytes;
+}
+static inline int insn_offset_displacement(struct insn *insn)
+{
+ return insn_offset_sib(insn) + insn->sib.nbytes;
+}
+static inline int insn_offset_immediate(struct insn *insn)
+{
+ return insn_offset_displacement(insn) + insn->displacement.nbytes;
+}
+
+#endif /* _ASM_X86_INSN_H */
diff --git a/tools/objtool/arch/x86/insn/x86-opcode-map.txt b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
new file mode 100644
index 000000000000..d388de72eaca
--- /dev/null
+++ b/tools/objtool/arch/x86/insn/x86-opcode-map.txt
@@ -0,0 +1,984 @@
+# x86 Opcode Maps
+#
+# This is (mostly) based on following documentations.
+# - Intel(R) 64 and IA-32 Architectures Software Developer's Manual Vol.2C
+# (#326018-047US, June 2013)
+#
+#<Opcode maps>
+# Table: table-name
+# Referrer: escaped-name
+# AVXcode: avx-code
+# opcode: mnemonic|GrpXXX [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# (or)
+# opcode: escape # escaped-name
+# EndTable
+#
+#<group maps>
+# GrpTable: GrpXXX
+# reg: mnemonic [operand1[,operand2...]] [(extra1)[,(extra2)...] [| 2nd-mnemonic ...]
+# EndTable
+#
+# AVX Superscripts
+# (v): this opcode requires VEX prefix.
+# (v1): this opcode only supports 128bit VEX.
+#
+# Last Prefix Superscripts
+# - (66): the last prefix is 0x66
+# - (F3): the last prefix is 0xF3
+# - (F2): the last prefix is 0xF2
+# - (!F3) : the last prefix is not 0xF3 (including non-last prefix case)
+# - (66&F2): Both 0x66 and 0xF2 prefixes are specified.
+
+Table: one byte opcode
+Referrer:
+AVXcode:
+# 0x00 - 0x0f
+00: ADD Eb,Gb
+01: ADD Ev,Gv
+02: ADD Gb,Eb
+03: ADD Gv,Ev
+04: ADD AL,Ib
+05: ADD rAX,Iz
+06: PUSH ES (i64)
+07: POP ES (i64)
+08: OR Eb,Gb
+09: OR Ev,Gv
+0a: OR Gb,Eb
+0b: OR Gv,Ev
+0c: OR AL,Ib
+0d: OR rAX,Iz
+0e: PUSH CS (i64)
+0f: escape # 2-byte escape
+# 0x10 - 0x1f
+10: ADC Eb,Gb
+11: ADC Ev,Gv
+12: ADC Gb,Eb
+13: ADC Gv,Ev
+14: ADC AL,Ib
+15: ADC rAX,Iz
+16: PUSH SS (i64)
+17: POP SS (i64)
+18: SBB Eb,Gb
+19: SBB Ev,Gv
+1a: SBB Gb,Eb
+1b: SBB Gv,Ev
+1c: SBB AL,Ib
+1d: SBB rAX,Iz
+1e: PUSH DS (i64)
+1f: POP DS (i64)
+# 0x20 - 0x2f
+20: AND Eb,Gb
+21: AND Ev,Gv
+22: AND Gb,Eb
+23: AND Gv,Ev
+24: AND AL,Ib
+25: AND rAx,Iz
+26: SEG=ES (Prefix)
+27: DAA (i64)
+28: SUB Eb,Gb
+29: SUB Ev,Gv
+2a: SUB Gb,Eb
+2b: SUB Gv,Ev
+2c: SUB AL,Ib
+2d: SUB rAX,Iz
+2e: SEG=CS (Prefix)
+2f: DAS (i64)
+# 0x30 - 0x3f
+30: XOR Eb,Gb
+31: XOR Ev,Gv
+32: XOR Gb,Eb
+33: XOR Gv,Ev
+34: XOR AL,Ib
+35: XOR rAX,Iz
+36: SEG=SS (Prefix)
+37: AAA (i64)
+38: CMP Eb,Gb
+39: CMP Ev,Gv
+3a: CMP Gb,Eb
+3b: CMP Gv,Ev
+3c: CMP AL,Ib
+3d: CMP rAX,Iz
+3e: SEG=DS (Prefix)
+3f: AAS (i64)
+# 0x40 - 0x4f
+40: INC eAX (i64) | REX (o64)
+41: INC eCX (i64) | REX.B (o64)
+42: INC eDX (i64) | REX.X (o64)
+43: INC eBX (i64) | REX.XB (o64)
+44: INC eSP (i64) | REX.R (o64)
+45: INC eBP (i64) | REX.RB (o64)
+46: INC eSI (i64) | REX.RX (o64)
+47: INC eDI (i64) | REX.RXB (o64)
+48: DEC eAX (i64) | REX.W (o64)
+49: DEC eCX (i64) | REX.WB (o64)
+4a: DEC eDX (i64) | REX.WX (o64)
+4b: DEC eBX (i64) | REX.WXB (o64)
+4c: DEC eSP (i64) | REX.WR (o64)
+4d: DEC eBP (i64) | REX.WRB (o64)
+4e: DEC eSI (i64) | REX.WRX (o64)
+4f: DEC eDI (i64) | REX.WRXB (o64)
+# 0x50 - 0x5f
+50: PUSH rAX/r8 (d64)
+51: PUSH rCX/r9 (d64)
+52: PUSH rDX/r10 (d64)
+53: PUSH rBX/r11 (d64)
+54: PUSH rSP/r12 (d64)
+55: PUSH rBP/r13 (d64)
+56: PUSH rSI/r14 (d64)
+57: PUSH rDI/r15 (d64)
+58: POP rAX/r8 (d64)
+59: POP rCX/r9 (d64)
+5a: POP rDX/r10 (d64)
+5b: POP rBX/r11 (d64)
+5c: POP rSP/r12 (d64)
+5d: POP rBP/r13 (d64)
+5e: POP rSI/r14 (d64)
+5f: POP rDI/r15 (d64)
+# 0x60 - 0x6f
+60: PUSHA/PUSHAD (i64)
+61: POPA/POPAD (i64)
+62: BOUND Gv,Ma (i64)
+63: ARPL Ew,Gw (i64) | MOVSXD Gv,Ev (o64)
+64: SEG=FS (Prefix)
+65: SEG=GS (Prefix)
+66: Operand-Size (Prefix)
+67: Address-Size (Prefix)
+68: PUSH Iz (d64)
+69: IMUL Gv,Ev,Iz
+6a: PUSH Ib (d64)
+6b: IMUL Gv,Ev,Ib
+6c: INS/INSB Yb,DX
+6d: INS/INSW/INSD Yz,DX
+6e: OUTS/OUTSB DX,Xb
+6f: OUTS/OUTSW/OUTSD DX,Xz
+# 0x70 - 0x7f
+70: JO Jb
+71: JNO Jb
+72: JB/JNAE/JC Jb
+73: JNB/JAE/JNC Jb
+74: JZ/JE Jb
+75: JNZ/JNE Jb
+76: JBE/JNA Jb
+77: JNBE/JA Jb
+78: JS Jb
+79: JNS Jb
+7a: JP/JPE Jb
+7b: JNP/JPO Jb
+7c: JL/JNGE Jb
+7d: JNL/JGE Jb
+7e: JLE/JNG Jb
+7f: JNLE/JG Jb
+# 0x80 - 0x8f
+80: Grp1 Eb,Ib (1A)
+81: Grp1 Ev,Iz (1A)
+82: Grp1 Eb,Ib (1A),(i64)
+83: Grp1 Ev,Ib (1A)
+84: TEST Eb,Gb
+85: TEST Ev,Gv
+86: XCHG Eb,Gb
+87: XCHG Ev,Gv
+88: MOV Eb,Gb
+89: MOV Ev,Gv
+8a: MOV Gb,Eb
+8b: MOV Gv,Ev
+8c: MOV Ev,Sw
+8d: LEA Gv,M
+8e: MOV Sw,Ew
+8f: Grp1A (1A) | POP Ev (d64)
+# 0x90 - 0x9f
+90: NOP | PAUSE (F3) | XCHG r8,rAX
+91: XCHG rCX/r9,rAX
+92: XCHG rDX/r10,rAX
+93: XCHG rBX/r11,rAX
+94: XCHG rSP/r12,rAX
+95: XCHG rBP/r13,rAX
+96: XCHG rSI/r14,rAX
+97: XCHG rDI/r15,rAX
+98: CBW/CWDE/CDQE
+99: CWD/CDQ/CQO
+9a: CALLF Ap (i64)
+9b: FWAIT/WAIT
+9c: PUSHF/D/Q Fv (d64)
+9d: POPF/D/Q Fv (d64)
+9e: SAHF
+9f: LAHF
+# 0xa0 - 0xaf
+a0: MOV AL,Ob
+a1: MOV rAX,Ov
+a2: MOV Ob,AL
+a3: MOV Ov,rAX
+a4: MOVS/B Yb,Xb
+a5: MOVS/W/D/Q Yv,Xv
+a6: CMPS/B Xb,Yb
+a7: CMPS/W/D Xv,Yv
+a8: TEST AL,Ib
+a9: TEST rAX,Iz
+aa: STOS/B Yb,AL
+ab: STOS/W/D/Q Yv,rAX
+ac: LODS/B AL,Xb
+ad: LODS/W/D/Q rAX,Xv
+ae: SCAS/B AL,Yb
+# Note: The May 2011 Intel manual shows Xv for the second parameter of the
+# next instruction but Yv is correct
+af: SCAS/W/D/Q rAX,Yv
+# 0xb0 - 0xbf
+b0: MOV AL/R8L,Ib
+b1: MOV CL/R9L,Ib
+b2: MOV DL/R10L,Ib
+b3: MOV BL/R11L,Ib
+b4: MOV AH/R12L,Ib
+b5: MOV CH/R13L,Ib
+b6: MOV DH/R14L,Ib
+b7: MOV BH/R15L,Ib
+b8: MOV rAX/r8,Iv
+b9: MOV rCX/r9,Iv
+ba: MOV rDX/r10,Iv
+bb: MOV rBX/r11,Iv
+bc: MOV rSP/r12,Iv
+bd: MOV rBP/r13,Iv
+be: MOV rSI/r14,Iv
+bf: MOV rDI/r15,Iv
+# 0xc0 - 0xcf
+c0: Grp2 Eb,Ib (1A)
+c1: Grp2 Ev,Ib (1A)
+c2: RETN Iw (f64)
+c3: RETN
+c4: LES Gz,Mp (i64) | VEX+2byte (Prefix)
+c5: LDS Gz,Mp (i64) | VEX+1byte (Prefix)
+c6: Grp11A Eb,Ib (1A)
+c7: Grp11B Ev,Iz (1A)
+c8: ENTER Iw,Ib
+c9: LEAVE (d64)
+ca: RETF Iw
+cb: RETF
+cc: INT3
+cd: INT Ib
+ce: INTO (i64)
+cf: IRET/D/Q
+# 0xd0 - 0xdf
+d0: Grp2 Eb,1 (1A)
+d1: Grp2 Ev,1 (1A)
+d2: Grp2 Eb,CL (1A)
+d3: Grp2 Ev,CL (1A)
+d4: AAM Ib (i64)
+d5: AAD Ib (i64)
+d6:
+d7: XLAT/XLATB
+d8: ESC
+d9: ESC
+da: ESC
+db: ESC
+dc: ESC
+dd: ESC
+de: ESC
+df: ESC
+# 0xe0 - 0xef
+# Note: "forced64" is Intel CPU behavior: they ignore 0x66 prefix
+# in 64-bit mode. AMD CPUs accept 0x66 prefix, it causes RIP truncation
+# to 16 bits. In 32-bit mode, 0x66 is accepted by both Intel and AMD.
+e0: LOOPNE/LOOPNZ Jb (f64)
+e1: LOOPE/LOOPZ Jb (f64)
+e2: LOOP Jb (f64)
+e3: JrCXZ Jb (f64)
+e4: IN AL,Ib
+e5: IN eAX,Ib
+e6: OUT Ib,AL
+e7: OUT Ib,eAX
+# With 0x66 prefix in 64-bit mode, for AMD CPUs immediate offset
+# in "near" jumps and calls is 16-bit. For CALL,
+# push of return address is 16-bit wide, RSP is decremented by 2
+# but is not truncated to 16 bits, unlike RIP.
+e8: CALL Jz (f64)
+e9: JMP-near Jz (f64)
+ea: JMP-far Ap (i64)
+eb: JMP-short Jb (f64)
+ec: IN AL,DX
+ed: IN eAX,DX
+ee: OUT DX,AL
+ef: OUT DX,eAX
+# 0xf0 - 0xff
+f0: LOCK (Prefix)
+f1:
+f2: REPNE (Prefix) | XACQUIRE (Prefix)
+f3: REP/REPE (Prefix) | XRELEASE (Prefix)
+f4: HLT
+f5: CMC
+f6: Grp3_1 Eb (1A)
+f7: Grp3_2 Ev (1A)
+f8: CLC
+f9: STC
+fa: CLI
+fb: STI
+fc: CLD
+fd: STD
+fe: Grp4 (1A)
+ff: Grp5 (1A)
+EndTable
+
+Table: 2-byte opcode (0x0f)
+Referrer: 2-byte escape
+AVXcode: 1
+# 0x0f 0x00-0x0f
+00: Grp6 (1A)
+01: Grp7 (1A)
+02: LAR Gv,Ew
+03: LSL Gv,Ew
+04:
+05: SYSCALL (o64)
+06: CLTS
+07: SYSRET (o64)
+08: INVD
+09: WBINVD
+0a:
+0b: UD2 (1B)
+0c:
+# AMD's prefetch group. Intel supports prefetchw(/1) only.
+0d: GrpP
+0e: FEMMS
+# 3DNow! uses the last imm byte as opcode extension.
+0f: 3DNow! Pq,Qq,Ib
+# 0x0f 0x10-0x1f
+# NOTE: According to Intel SDM opcode map, vmovups and vmovupd has no operands
+# but it actually has operands. And also, vmovss and vmovsd only accept 128bit.
+# MOVSS/MOVSD has too many forms(3) on SDM. This map just shows a typical form.
+# Many AVX instructions lack v1 superscript, according to Intel AVX-Prgramming
+# Reference A.1
+10: vmovups Vps,Wps | vmovupd Vpd,Wpd (66) | vmovss Vx,Hx,Wss (F3),(v1) | vmovsd Vx,Hx,Wsd (F2),(v1)
+11: vmovups Wps,Vps | vmovupd Wpd,Vpd (66) | vmovss Wss,Hx,Vss (F3),(v1) | vmovsd Wsd,Hx,Vsd (F2),(v1)
+12: vmovlps Vq,Hq,Mq (v1) | vmovhlps Vq,Hq,Uq (v1) | vmovlpd Vq,Hq,Mq (66),(v1) | vmovsldup Vx,Wx (F3) | vmovddup Vx,Wx (F2)
+13: vmovlps Mq,Vq (v1) | vmovlpd Mq,Vq (66),(v1)
+14: vunpcklps Vx,Hx,Wx | vunpcklpd Vx,Hx,Wx (66)
+15: vunpckhps Vx,Hx,Wx | vunpckhpd Vx,Hx,Wx (66)
+16: vmovhps Vdq,Hq,Mq (v1) | vmovlhps Vdq,Hq,Uq (v1) | vmovhpd Vdq,Hq,Mq (66),(v1) | vmovshdup Vx,Wx (F3)
+17: vmovhps Mq,Vq (v1) | vmovhpd Mq,Vq (66),(v1)
+18: Grp16 (1A)
+19:
+# Intel SDM opcode map does not list MPX instructions. For now using Gv for
+# bnd registers and Ev for everything else is OK because the instruction
+# decoder does not use the information except as an indication that there is
+# a ModR/M byte.
+1a: BNDCL Gv,Ev (F3) | BNDCU Gv,Ev (F2) | BNDMOV Gv,Ev (66) | BNDLDX Gv,Ev
+1b: BNDCN Gv,Ev (F2) | BNDMOV Ev,Gv (66) | BNDMK Gv,Ev (F3) | BNDSTX Ev,Gv
+1c:
+1d:
+1e:
+1f: NOP Ev
+# 0x0f 0x20-0x2f
+20: MOV Rd,Cd
+21: MOV Rd,Dd
+22: MOV Cd,Rd
+23: MOV Dd,Rd
+24:
+25:
+26:
+27:
+28: vmovaps Vps,Wps | vmovapd Vpd,Wpd (66)
+29: vmovaps Wps,Vps | vmovapd Wpd,Vpd (66)
+2a: cvtpi2ps Vps,Qpi | cvtpi2pd Vpd,Qpi (66) | vcvtsi2ss Vss,Hss,Ey (F3),(v1) | vcvtsi2sd Vsd,Hsd,Ey (F2),(v1)
+2b: vmovntps Mps,Vps | vmovntpd Mpd,Vpd (66)
+2c: cvttps2pi Ppi,Wps | cvttpd2pi Ppi,Wpd (66) | vcvttss2si Gy,Wss (F3),(v1) | vcvttsd2si Gy,Wsd (F2),(v1)
+2d: cvtps2pi Ppi,Wps | cvtpd2pi Qpi,Wpd (66) | vcvtss2si Gy,Wss (F3),(v1) | vcvtsd2si Gy,Wsd (F2),(v1)
+2e: vucomiss Vss,Wss (v1) | vucomisd Vsd,Wsd (66),(v1)
+2f: vcomiss Vss,Wss (v1) | vcomisd Vsd,Wsd (66),(v1)
+# 0x0f 0x30-0x3f
+30: WRMSR
+31: RDTSC
+32: RDMSR
+33: RDPMC
+34: SYSENTER
+35: SYSEXIT
+36:
+37: GETSEC
+38: escape # 3-byte escape 1
+39:
+3a: escape # 3-byte escape 2
+3b:
+3c:
+3d:
+3e:
+3f:
+# 0x0f 0x40-0x4f
+40: CMOVO Gv,Ev
+41: CMOVNO Gv,Ev
+42: CMOVB/C/NAE Gv,Ev
+43: CMOVAE/NB/NC Gv,Ev
+44: CMOVE/Z Gv,Ev
+45: CMOVNE/NZ Gv,Ev
+46: CMOVBE/NA Gv,Ev
+47: CMOVA/NBE Gv,Ev
+48: CMOVS Gv,Ev
+49: CMOVNS Gv,Ev
+4a: CMOVP/PE Gv,Ev
+4b: CMOVNP/PO Gv,Ev
+4c: CMOVL/NGE Gv,Ev
+4d: CMOVNL/GE Gv,Ev
+4e: CMOVLE/NG Gv,Ev
+4f: CMOVNLE/G Gv,Ev
+# 0x0f 0x50-0x5f
+50: vmovmskps Gy,Ups | vmovmskpd Gy,Upd (66)
+51: vsqrtps Vps,Wps | vsqrtpd Vpd,Wpd (66) | vsqrtss Vss,Hss,Wss (F3),(v1) | vsqrtsd Vsd,Hsd,Wsd (F2),(v1)
+52: vrsqrtps Vps,Wps | vrsqrtss Vss,Hss,Wss (F3),(v1)
+53: vrcpps Vps,Wps | vrcpss Vss,Hss,Wss (F3),(v1)
+54: vandps Vps,Hps,Wps | vandpd Vpd,Hpd,Wpd (66)
+55: vandnps Vps,Hps,Wps | vandnpd Vpd,Hpd,Wpd (66)
+56: vorps Vps,Hps,Wps | vorpd Vpd,Hpd,Wpd (66)
+57: vxorps Vps,Hps,Wps | vxorpd Vpd,Hpd,Wpd (66)
+58: vaddps Vps,Hps,Wps | vaddpd Vpd,Hpd,Wpd (66) | vaddss Vss,Hss,Wss (F3),(v1) | vaddsd Vsd,Hsd,Wsd (F2),(v1)
+59: vmulps Vps,Hps,Wps | vmulpd Vpd,Hpd,Wpd (66) | vmulss Vss,Hss,Wss (F3),(v1) | vmulsd Vsd,Hsd,Wsd (F2),(v1)
+5a: vcvtps2pd Vpd,Wps | vcvtpd2ps Vps,Wpd (66) | vcvtss2sd Vsd,Hx,Wss (F3),(v1) | vcvtsd2ss Vss,Hx,Wsd (F2),(v1)
+5b: vcvtdq2ps Vps,Wdq | vcvtps2dq Vdq,Wps (66) | vcvttps2dq Vdq,Wps (F3)
+5c: vsubps Vps,Hps,Wps | vsubpd Vpd,Hpd,Wpd (66) | vsubss Vss,Hss,Wss (F3),(v1) | vsubsd Vsd,Hsd,Wsd (F2),(v1)
+5d: vminps Vps,Hps,Wps | vminpd Vpd,Hpd,Wpd (66) | vminss Vss,Hss,Wss (F3),(v1) | vminsd Vsd,Hsd,Wsd (F2),(v1)
+5e: vdivps Vps,Hps,Wps | vdivpd Vpd,Hpd,Wpd (66) | vdivss Vss,Hss,Wss (F3),(v1) | vdivsd Vsd,Hsd,Wsd (F2),(v1)
+5f: vmaxps Vps,Hps,Wps | vmaxpd Vpd,Hpd,Wpd (66) | vmaxss Vss,Hss,Wss (F3),(v1) | vmaxsd Vsd,Hsd,Wsd (F2),(v1)
+# 0x0f 0x60-0x6f
+60: punpcklbw Pq,Qd | vpunpcklbw Vx,Hx,Wx (66),(v1)
+61: punpcklwd Pq,Qd | vpunpcklwd Vx,Hx,Wx (66),(v1)
+62: punpckldq Pq,Qd | vpunpckldq Vx,Hx,Wx (66),(v1)
+63: packsswb Pq,Qq | vpacksswb Vx,Hx,Wx (66),(v1)
+64: pcmpgtb Pq,Qq | vpcmpgtb Vx,Hx,Wx (66),(v1)
+65: pcmpgtw Pq,Qq | vpcmpgtw Vx,Hx,Wx (66),(v1)
+66: pcmpgtd Pq,Qq | vpcmpgtd Vx,Hx,Wx (66),(v1)
+67: packuswb Pq,Qq | vpackuswb Vx,Hx,Wx (66),(v1)
+68: punpckhbw Pq,Qd | vpunpckhbw Vx,Hx,Wx (66),(v1)
+69: punpckhwd Pq,Qd | vpunpckhwd Vx,Hx,Wx (66),(v1)
+6a: punpckhdq Pq,Qd | vpunpckhdq Vx,Hx,Wx (66),(v1)
+6b: packssdw Pq,Qd | vpackssdw Vx,Hx,Wx (66),(v1)
+6c: vpunpcklqdq Vx,Hx,Wx (66),(v1)
+6d: vpunpckhqdq Vx,Hx,Wx (66),(v1)
+6e: movd/q Pd,Ey | vmovd/q Vy,Ey (66),(v1)
+6f: movq Pq,Qq | vmovdqa Vx,Wx (66) | vmovdqu Vx,Wx (F3)
+# 0x0f 0x70-0x7f
+70: pshufw Pq,Qq,Ib | vpshufd Vx,Wx,Ib (66),(v1) | vpshufhw Vx,Wx,Ib (F3),(v1) | vpshuflw Vx,Wx,Ib (F2),(v1)
+71: Grp12 (1A)
+72: Grp13 (1A)
+73: Grp14 (1A)
+74: pcmpeqb Pq,Qq | vpcmpeqb Vx,Hx,Wx (66),(v1)
+75: pcmpeqw Pq,Qq | vpcmpeqw Vx,Hx,Wx (66),(v1)
+76: pcmpeqd Pq,Qq | vpcmpeqd Vx,Hx,Wx (66),(v1)
+# Note: Remove (v), because vzeroall and vzeroupper becomes emms without VEX.
+77: emms | vzeroupper | vzeroall
+78: VMREAD Ey,Gy
+79: VMWRITE Gy,Ey
+7a:
+7b:
+7c: vhaddpd Vpd,Hpd,Wpd (66) | vhaddps Vps,Hps,Wps (F2)
+7d: vhsubpd Vpd,Hpd,Wpd (66) | vhsubps Vps,Hps,Wps (F2)
+7e: movd/q Ey,Pd | vmovd/q Ey,Vy (66),(v1) | vmovq Vq,Wq (F3),(v1)
+7f: movq Qq,Pq | vmovdqa Wx,Vx (66) | vmovdqu Wx,Vx (F3)
+# 0x0f 0x80-0x8f
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
+80: JO Jz (f64)
+81: JNO Jz (f64)
+82: JB/JC/JNAE Jz (f64)
+83: JAE/JNB/JNC Jz (f64)
+84: JE/JZ Jz (f64)
+85: JNE/JNZ Jz (f64)
+86: JBE/JNA Jz (f64)
+87: JA/JNBE Jz (f64)
+88: JS Jz (f64)
+89: JNS Jz (f64)
+8a: JP/JPE Jz (f64)
+8b: JNP/JPO Jz (f64)
+8c: JL/JNGE Jz (f64)
+8d: JNL/JGE Jz (f64)
+8e: JLE/JNG Jz (f64)
+8f: JNLE/JG Jz (f64)
+# 0x0f 0x90-0x9f
+90: SETO Eb
+91: SETNO Eb
+92: SETB/C/NAE Eb
+93: SETAE/NB/NC Eb
+94: SETE/Z Eb
+95: SETNE/NZ Eb
+96: SETBE/NA Eb
+97: SETA/NBE Eb
+98: SETS Eb
+99: SETNS Eb
+9a: SETP/PE Eb
+9b: SETNP/PO Eb
+9c: SETL/NGE Eb
+9d: SETNL/GE Eb
+9e: SETLE/NG Eb
+9f: SETNLE/G Eb
+# 0x0f 0xa0-0xaf
+a0: PUSH FS (d64)
+a1: POP FS (d64)
+a2: CPUID
+a3: BT Ev,Gv
+a4: SHLD Ev,Gv,Ib
+a5: SHLD Ev,Gv,CL
+a6: GrpPDLK
+a7: GrpRNG
+a8: PUSH GS (d64)
+a9: POP GS (d64)
+aa: RSM
+ab: BTS Ev,Gv
+ac: SHRD Ev,Gv,Ib
+ad: SHRD Ev,Gv,CL
+ae: Grp15 (1A),(1C)
+af: IMUL Gv,Ev
+# 0x0f 0xb0-0xbf
+b0: CMPXCHG Eb,Gb
+b1: CMPXCHG Ev,Gv
+b2: LSS Gv,Mp
+b3: BTR Ev,Gv
+b4: LFS Gv,Mp
+b5: LGS Gv,Mp
+b6: MOVZX Gv,Eb
+b7: MOVZX Gv,Ew
+b8: JMPE (!F3) | POPCNT Gv,Ev (F3)
+b9: Grp10 (1A)
+ba: Grp8 Ev,Ib (1A)
+bb: BTC Ev,Gv
+bc: BSF Gv,Ev (!F3) | TZCNT Gv,Ev (F3)
+bd: BSR Gv,Ev (!F3) | LZCNT Gv,Ev (F3)
+be: MOVSX Gv,Eb
+bf: MOVSX Gv,Ew
+# 0x0f 0xc0-0xcf
+c0: XADD Eb,Gb
+c1: XADD Ev,Gv
+c2: vcmpps Vps,Hps,Wps,Ib | vcmppd Vpd,Hpd,Wpd,Ib (66) | vcmpss Vss,Hss,Wss,Ib (F3),(v1) | vcmpsd Vsd,Hsd,Wsd,Ib (F2),(v1)
+c3: movnti My,Gy
+c4: pinsrw Pq,Ry/Mw,Ib | vpinsrw Vdq,Hdq,Ry/Mw,Ib (66),(v1)
+c5: pextrw Gd,Nq,Ib | vpextrw Gd,Udq,Ib (66),(v1)
+c6: vshufps Vps,Hps,Wps,Ib | vshufpd Vpd,Hpd,Wpd,Ib (66)
+c7: Grp9 (1A)
+c8: BSWAP RAX/EAX/R8/R8D
+c9: BSWAP RCX/ECX/R9/R9D
+ca: BSWAP RDX/EDX/R10/R10D
+cb: BSWAP RBX/EBX/R11/R11D
+cc: BSWAP RSP/ESP/R12/R12D
+cd: BSWAP RBP/EBP/R13/R13D
+ce: BSWAP RSI/ESI/R14/R14D
+cf: BSWAP RDI/EDI/R15/R15D
+# 0x0f 0xd0-0xdf
+d0: vaddsubpd Vpd,Hpd,Wpd (66) | vaddsubps Vps,Hps,Wps (F2)
+d1: psrlw Pq,Qq | vpsrlw Vx,Hx,Wx (66),(v1)
+d2: psrld Pq,Qq | vpsrld Vx,Hx,Wx (66),(v1)
+d3: psrlq Pq,Qq | vpsrlq Vx,Hx,Wx (66),(v1)
+d4: paddq Pq,Qq | vpaddq Vx,Hx,Wx (66),(v1)
+d5: pmullw Pq,Qq | vpmullw Vx,Hx,Wx (66),(v1)
+d6: vmovq Wq,Vq (66),(v1) | movq2dq Vdq,Nq (F3) | movdq2q Pq,Uq (F2)
+d7: pmovmskb Gd,Nq | vpmovmskb Gd,Ux (66),(v1)
+d8: psubusb Pq,Qq | vpsubusb Vx,Hx,Wx (66),(v1)
+d9: psubusw Pq,Qq | vpsubusw Vx,Hx,Wx (66),(v1)
+da: pminub Pq,Qq | vpminub Vx,Hx,Wx (66),(v1)
+db: pand Pq,Qq | vpand Vx,Hx,Wx (66),(v1)
+dc: paddusb Pq,Qq | vpaddusb Vx,Hx,Wx (66),(v1)
+dd: paddusw Pq,Qq | vpaddusw Vx,Hx,Wx (66),(v1)
+de: pmaxub Pq,Qq | vpmaxub Vx,Hx,Wx (66),(v1)
+df: pandn Pq,Qq | vpandn Vx,Hx,Wx (66),(v1)
+# 0x0f 0xe0-0xef
+e0: pavgb Pq,Qq | vpavgb Vx,Hx,Wx (66),(v1)
+e1: psraw Pq,Qq | vpsraw Vx,Hx,Wx (66),(v1)
+e2: psrad Pq,Qq | vpsrad Vx,Hx,Wx (66),(v1)
+e3: pavgw Pq,Qq | vpavgw Vx,Hx,Wx (66),(v1)
+e4: pmulhuw Pq,Qq | vpmulhuw Vx,Hx,Wx (66),(v1)
+e5: pmulhw Pq,Qq | vpmulhw Vx,Hx,Wx (66),(v1)
+e6: vcvttpd2dq Vx,Wpd (66) | vcvtdq2pd Vx,Wdq (F3) | vcvtpd2dq Vx,Wpd (F2)
+e7: movntq Mq,Pq | vmovntdq Mx,Vx (66)
+e8: psubsb Pq,Qq | vpsubsb Vx,Hx,Wx (66),(v1)
+e9: psubsw Pq,Qq | vpsubsw Vx,Hx,Wx (66),(v1)
+ea: pminsw Pq,Qq | vpminsw Vx,Hx,Wx (66),(v1)
+eb: por Pq,Qq | vpor Vx,Hx,Wx (66),(v1)
+ec: paddsb Pq,Qq | vpaddsb Vx,Hx,Wx (66),(v1)
+ed: paddsw Pq,Qq | vpaddsw Vx,Hx,Wx (66),(v1)
+ee: pmaxsw Pq,Qq | vpmaxsw Vx,Hx,Wx (66),(v1)
+ef: pxor Pq,Qq | vpxor Vx,Hx,Wx (66),(v1)
+# 0x0f 0xf0-0xff
+f0: vlddqu Vx,Mx (F2)
+f1: psllw Pq,Qq | vpsllw Vx,Hx,Wx (66),(v1)
+f2: pslld Pq,Qq | vpslld Vx,Hx,Wx (66),(v1)
+f3: psllq Pq,Qq | vpsllq Vx,Hx,Wx (66),(v1)
+f4: pmuludq Pq,Qq | vpmuludq Vx,Hx,Wx (66),(v1)
+f5: pmaddwd Pq,Qq | vpmaddwd Vx,Hx,Wx (66),(v1)
+f6: psadbw Pq,Qq | vpsadbw Vx,Hx,Wx (66),(v1)
+f7: maskmovq Pq,Nq | vmaskmovdqu Vx,Ux (66),(v1)
+f8: psubb Pq,Qq | vpsubb Vx,Hx,Wx (66),(v1)
+f9: psubw Pq,Qq | vpsubw Vx,Hx,Wx (66),(v1)
+fa: psubd Pq,Qq | vpsubd Vx,Hx,Wx (66),(v1)
+fb: psubq Pq,Qq | vpsubq Vx,Hx,Wx (66),(v1)
+fc: paddb Pq,Qq | vpaddb Vx,Hx,Wx (66),(v1)
+fd: paddw Pq,Qq | vpaddw Vx,Hx,Wx (66),(v1)
+fe: paddd Pq,Qq | vpaddd Vx,Hx,Wx (66),(v1)
+ff:
+EndTable
+
+Table: 3-byte opcode 1 (0x0f 0x38)
+Referrer: 3-byte escape 1
+AVXcode: 2
+# 0x0f 0x38 0x00-0x0f
+00: pshufb Pq,Qq | vpshufb Vx,Hx,Wx (66),(v1)
+01: phaddw Pq,Qq | vphaddw Vx,Hx,Wx (66),(v1)
+02: phaddd Pq,Qq | vphaddd Vx,Hx,Wx (66),(v1)
+03: phaddsw Pq,Qq | vphaddsw Vx,Hx,Wx (66),(v1)
+04: pmaddubsw Pq,Qq | vpmaddubsw Vx,Hx,Wx (66),(v1)
+05: phsubw Pq,Qq | vphsubw Vx,Hx,Wx (66),(v1)
+06: phsubd Pq,Qq | vphsubd Vx,Hx,Wx (66),(v1)
+07: phsubsw Pq,Qq | vphsubsw Vx,Hx,Wx (66),(v1)
+08: psignb Pq,Qq | vpsignb Vx,Hx,Wx (66),(v1)
+09: psignw Pq,Qq | vpsignw Vx,Hx,Wx (66),(v1)
+0a: psignd Pq,Qq | vpsignd Vx,Hx,Wx (66),(v1)
+0b: pmulhrsw Pq,Qq | vpmulhrsw Vx,Hx,Wx (66),(v1)
+0c: vpermilps Vx,Hx,Wx (66),(v)
+0d: vpermilpd Vx,Hx,Wx (66),(v)
+0e: vtestps Vx,Wx (66),(v)
+0f: vtestpd Vx,Wx (66),(v)
+# 0x0f 0x38 0x10-0x1f
+10: pblendvb Vdq,Wdq (66)
+11:
+12:
+13: vcvtph2ps Vx,Wx,Ib (66),(v)
+14: blendvps Vdq,Wdq (66)
+15: blendvpd Vdq,Wdq (66)
+16: vpermps Vqq,Hqq,Wqq (66),(v)
+17: vptest Vx,Wx (66)
+18: vbroadcastss Vx,Wd (66),(v)
+19: vbroadcastsd Vqq,Wq (66),(v)
+1a: vbroadcastf128 Vqq,Mdq (66),(v)
+1b:
+1c: pabsb Pq,Qq | vpabsb Vx,Wx (66),(v1)
+1d: pabsw Pq,Qq | vpabsw Vx,Wx (66),(v1)
+1e: pabsd Pq,Qq | vpabsd Vx,Wx (66),(v1)
+1f:
+# 0x0f 0x38 0x20-0x2f
+20: vpmovsxbw Vx,Ux/Mq (66),(v1)
+21: vpmovsxbd Vx,Ux/Md (66),(v1)
+22: vpmovsxbq Vx,Ux/Mw (66),(v1)
+23: vpmovsxwd Vx,Ux/Mq (66),(v1)
+24: vpmovsxwq Vx,Ux/Md (66),(v1)
+25: vpmovsxdq Vx,Ux/Mq (66),(v1)
+26:
+27:
+28: vpmuldq Vx,Hx,Wx (66),(v1)
+29: vpcmpeqq Vx,Hx,Wx (66),(v1)
+2a: vmovntdqa Vx,Mx (66),(v1)
+2b: vpackusdw Vx,Hx,Wx (66),(v1)
+2c: vmaskmovps Vx,Hx,Mx (66),(v)
+2d: vmaskmovpd Vx,Hx,Mx (66),(v)
+2e: vmaskmovps Mx,Hx,Vx (66),(v)
+2f: vmaskmovpd Mx,Hx,Vx (66),(v)
+# 0x0f 0x38 0x30-0x3f
+30: vpmovzxbw Vx,Ux/Mq (66),(v1)
+31: vpmovzxbd Vx,Ux/Md (66),(v1)
+32: vpmovzxbq Vx,Ux/Mw (66),(v1)
+33: vpmovzxwd Vx,Ux/Mq (66),(v1)
+34: vpmovzxwq Vx,Ux/Md (66),(v1)
+35: vpmovzxdq Vx,Ux/Mq (66),(v1)
+36: vpermd Vqq,Hqq,Wqq (66),(v)
+37: vpcmpgtq Vx,Hx,Wx (66),(v1)
+38: vpminsb Vx,Hx,Wx (66),(v1)
+39: vpminsd Vx,Hx,Wx (66),(v1)
+3a: vpminuw Vx,Hx,Wx (66),(v1)
+3b: vpminud Vx,Hx,Wx (66),(v1)
+3c: vpmaxsb Vx,Hx,Wx (66),(v1)
+3d: vpmaxsd Vx,Hx,Wx (66),(v1)
+3e: vpmaxuw Vx,Hx,Wx (66),(v1)
+3f: vpmaxud Vx,Hx,Wx (66),(v1)
+# 0x0f 0x38 0x40-0x8f
+40: vpmulld Vx,Hx,Wx (66),(v1)
+41: vphminposuw Vdq,Wdq (66),(v1)
+42:
+43:
+44:
+45: vpsrlvd/q Vx,Hx,Wx (66),(v)
+46: vpsravd Vx,Hx,Wx (66),(v)
+47: vpsllvd/q Vx,Hx,Wx (66),(v)
+# Skip 0x48-0x57
+58: vpbroadcastd Vx,Wx (66),(v)
+59: vpbroadcastq Vx,Wx (66),(v)
+5a: vbroadcasti128 Vqq,Mdq (66),(v)
+# Skip 0x5b-0x77
+78: vpbroadcastb Vx,Wx (66),(v)
+79: vpbroadcastw Vx,Wx (66),(v)
+# Skip 0x7a-0x7f
+80: INVEPT Gy,Mdq (66)
+81: INVPID Gy,Mdq (66)
+82: INVPCID Gy,Mdq (66)
+8c: vpmaskmovd/q Vx,Hx,Mx (66),(v)
+8e: vpmaskmovd/q Mx,Vx,Hx (66),(v)
+# 0x0f 0x38 0x90-0xbf (FMA)
+90: vgatherdd/q Vx,Hx,Wx (66),(v)
+91: vgatherqd/q Vx,Hx,Wx (66),(v)
+92: vgatherdps/d Vx,Hx,Wx (66),(v)
+93: vgatherqps/d Vx,Hx,Wx (66),(v)
+94:
+95:
+96: vfmaddsub132ps/d Vx,Hx,Wx (66),(v)
+97: vfmsubadd132ps/d Vx,Hx,Wx (66),(v)
+98: vfmadd132ps/d Vx,Hx,Wx (66),(v)
+99: vfmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9a: vfmsub132ps/d Vx,Hx,Wx (66),(v)
+9b: vfmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+9c: vfnmadd132ps/d Vx,Hx,Wx (66),(v)
+9d: vfnmadd132ss/d Vx,Hx,Wx (66),(v),(v1)
+9e: vfnmsub132ps/d Vx,Hx,Wx (66),(v)
+9f: vfnmsub132ss/d Vx,Hx,Wx (66),(v),(v1)
+a6: vfmaddsub213ps/d Vx,Hx,Wx (66),(v)
+a7: vfmsubadd213ps/d Vx,Hx,Wx (66),(v)
+a8: vfmadd213ps/d Vx,Hx,Wx (66),(v)
+a9: vfmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+aa: vfmsub213ps/d Vx,Hx,Wx (66),(v)
+ab: vfmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+ac: vfnmadd213ps/d Vx,Hx,Wx (66),(v)
+ad: vfnmadd213ss/d Vx,Hx,Wx (66),(v),(v1)
+ae: vfnmsub213ps/d Vx,Hx,Wx (66),(v)
+af: vfnmsub213ss/d Vx,Hx,Wx (66),(v),(v1)
+b6: vfmaddsub231ps/d Vx,Hx,Wx (66),(v)
+b7: vfmsubadd231ps/d Vx,Hx,Wx (66),(v)
+b8: vfmadd231ps/d Vx,Hx,Wx (66),(v)
+b9: vfmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+ba: vfmsub231ps/d Vx,Hx,Wx (66),(v)
+bb: vfmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+bc: vfnmadd231ps/d Vx,Hx,Wx (66),(v)
+bd: vfnmadd231ss/d Vx,Hx,Wx (66),(v),(v1)
+be: vfnmsub231ps/d Vx,Hx,Wx (66),(v)
+bf: vfnmsub231ss/d Vx,Hx,Wx (66),(v),(v1)
+# 0x0f 0x38 0xc0-0xff
+c8: sha1nexte Vdq,Wdq
+c9: sha1msg1 Vdq,Wdq
+ca: sha1msg2 Vdq,Wdq
+cb: sha256rnds2 Vdq,Wdq
+cc: sha256msg1 Vdq,Wdq
+cd: sha256msg2 Vdq,Wdq
+db: VAESIMC Vdq,Wdq (66),(v1)
+dc: VAESENC Vdq,Hdq,Wdq (66),(v1)
+dd: VAESENCLAST Vdq,Hdq,Wdq (66),(v1)
+de: VAESDEC Vdq,Hdq,Wdq (66),(v1)
+df: VAESDECLAST Vdq,Hdq,Wdq (66),(v1)
+f0: MOVBE Gy,My | MOVBE Gw,Mw (66) | CRC32 Gd,Eb (F2) | CRC32 Gd,Eb (66&F2)
+f1: MOVBE My,Gy | MOVBE Mw,Gw (66) | CRC32 Gd,Ey (F2) | CRC32 Gd,Ew (66&F2)
+f2: ANDN Gy,By,Ey (v)
+f3: Grp17 (1A)
+f5: BZHI Gy,Ey,By (v) | PEXT Gy,By,Ey (F3),(v) | PDEP Gy,By,Ey (F2),(v)
+f6: ADCX Gy,Ey (66) | ADOX Gy,Ey (F3) | MULX By,Gy,rDX,Ey (F2),(v)
+f7: BEXTR Gy,Ey,By (v) | SHLX Gy,Ey,By (66),(v) | SARX Gy,Ey,By (F3),(v) | SHRX Gy,Ey,By (F2),(v)
+EndTable
+
+Table: 3-byte opcode 2 (0x0f 0x3a)
+Referrer: 3-byte escape 2
+AVXcode: 3
+# 0x0f 0x3a 0x00-0xff
+00: vpermq Vqq,Wqq,Ib (66),(v)
+01: vpermpd Vqq,Wqq,Ib (66),(v)
+02: vpblendd Vx,Hx,Wx,Ib (66),(v)
+03:
+04: vpermilps Vx,Wx,Ib (66),(v)
+05: vpermilpd Vx,Wx,Ib (66),(v)
+06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v)
+07:
+08: vroundps Vx,Wx,Ib (66)
+09: vroundpd Vx,Wx,Ib (66)
+0a: vroundss Vss,Wss,Ib (66),(v1)
+0b: vroundsd Vsd,Wsd,Ib (66),(v1)
+0c: vblendps Vx,Hx,Wx,Ib (66)
+0d: vblendpd Vx,Hx,Wx,Ib (66)
+0e: vpblendw Vx,Hx,Wx,Ib (66),(v1)
+0f: palignr Pq,Qq,Ib | vpalignr Vx,Hx,Wx,Ib (66),(v1)
+14: vpextrb Rd/Mb,Vdq,Ib (66),(v1)
+15: vpextrw Rd/Mw,Vdq,Ib (66),(v1)
+16: vpextrd/q Ey,Vdq,Ib (66),(v1)
+17: vextractps Ed,Vdq,Ib (66),(v1)
+18: vinsertf128 Vqq,Hqq,Wqq,Ib (66),(v)
+19: vextractf128 Wdq,Vqq,Ib (66),(v)
+1d: vcvtps2ph Wx,Vx,Ib (66),(v)
+20: vpinsrb Vdq,Hdq,Ry/Mb,Ib (66),(v1)
+21: vinsertps Vdq,Hdq,Udq/Md,Ib (66),(v1)
+22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1)
+38: vinserti128 Vqq,Hqq,Wqq,Ib (66),(v)
+39: vextracti128 Wdq,Vqq,Ib (66),(v)
+40: vdpps Vx,Hx,Wx,Ib (66)
+41: vdppd Vdq,Hdq,Wdq,Ib (66),(v1)
+42: vmpsadbw Vx,Hx,Wx,Ib (66),(v1)
+44: vpclmulqdq Vdq,Hdq,Wdq,Ib (66),(v1)
+46: vperm2i128 Vqq,Hqq,Wqq,Ib (66),(v)
+4a: vblendvps Vx,Hx,Wx,Lx (66),(v)
+4b: vblendvpd Vx,Hx,Wx,Lx (66),(v)
+4c: vpblendvb Vx,Hx,Wx,Lx (66),(v1)
+60: vpcmpestrm Vdq,Wdq,Ib (66),(v1)
+61: vpcmpestri Vdq,Wdq,Ib (66),(v1)
+62: vpcmpistrm Vdq,Wdq,Ib (66),(v1)
+63: vpcmpistri Vdq,Wdq,Ib (66),(v1)
+cc: sha1rnds4 Vdq,Wdq,Ib
+df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1)
+f0: RORX Gy,Ey,Ib (F2),(v)
+EndTable
+
+GrpTable: Grp1
+0: ADD
+1: OR
+2: ADC
+3: SBB
+4: AND
+5: SUB
+6: XOR
+7: CMP
+EndTable
+
+GrpTable: Grp1A
+0: POP
+EndTable
+
+GrpTable: Grp2
+0: ROL
+1: ROR
+2: RCL
+3: RCR
+4: SHL/SAL
+5: SHR
+6:
+7: SAR
+EndTable
+
+GrpTable: Grp3_1
+0: TEST Eb,Ib
+1:
+2: NOT Eb
+3: NEG Eb
+4: MUL AL,Eb
+5: IMUL AL,Eb
+6: DIV AL,Eb
+7: IDIV AL,Eb
+EndTable
+
+GrpTable: Grp3_2
+0: TEST Ev,Iz
+1:
+2: NOT Ev
+3: NEG Ev
+4: MUL rAX,Ev
+5: IMUL rAX,Ev
+6: DIV rAX,Ev
+7: IDIV rAX,Ev
+EndTable
+
+GrpTable: Grp4
+0: INC Eb
+1: DEC Eb
+EndTable
+
+GrpTable: Grp5
+0: INC Ev
+1: DEC Ev
+# Note: "forced64" is Intel CPU behavior (see comment about CALL insn).
+2: CALLN Ev (f64)
+3: CALLF Ep
+4: JMPN Ev (f64)
+5: JMPF Mp
+6: PUSH Ev (d64)
+7:
+EndTable
+
+GrpTable: Grp6
+0: SLDT Rv/Mw
+1: STR Rv/Mw
+2: LLDT Ew
+3: LTR Ew
+4: VERR Ew
+5: VERW Ew
+EndTable
+
+GrpTable: Grp7
+0: SGDT Ms | VMCALL (001),(11B) | VMLAUNCH (010),(11B) | VMRESUME (011),(11B) | VMXOFF (100),(11B)
+1: SIDT Ms | MONITOR (000),(11B) | MWAIT (001),(11B) | CLAC (010),(11B) | STAC (011),(11B)
+2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B)
+3: LIDT Ms
+4: SMSW Mw/Rv
+5: rdpkru (110),(11B) | wrpkru (111),(11B)
+6: LMSW Ew
+7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B)
+EndTable
+
+GrpTable: Grp8
+4: BT
+5: BTS
+6: BTR
+7: BTC
+EndTable
+
+GrpTable: Grp9
+1: CMPXCHG8B/16B Mq/Mdq
+3: xrstors
+4: xsavec
+5: xsaves
+6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B)
+7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B)
+EndTable
+
+GrpTable: Grp10
+EndTable
+
+# Grp11A and Grp11B are expressed as Grp11 in Intel SDM
+GrpTable: Grp11A
+0: MOV Eb,Ib
+7: XABORT Ib (000),(11B)
+EndTable
+
+GrpTable: Grp11B
+0: MOV Eb,Iz
+7: XBEGIN Jz (000),(11B)
+EndTable
+
+GrpTable: Grp12
+2: psrlw Nq,Ib (11B) | vpsrlw Hx,Ux,Ib (66),(11B),(v1)
+4: psraw Nq,Ib (11B) | vpsraw Hx,Ux,Ib (66),(11B),(v1)
+6: psllw Nq,Ib (11B) | vpsllw Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp13
+2: psrld Nq,Ib (11B) | vpsrld Hx,Ux,Ib (66),(11B),(v1)
+4: psrad Nq,Ib (11B) | vpsrad Hx,Ux,Ib (66),(11B),(v1)
+6: pslld Nq,Ib (11B) | vpslld Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp14
+2: psrlq Nq,Ib (11B) | vpsrlq Hx,Ux,Ib (66),(11B),(v1)
+3: vpsrldq Hx,Ux,Ib (66),(11B),(v1)
+6: psllq Nq,Ib (11B) | vpsllq Hx,Ux,Ib (66),(11B),(v1)
+7: vpslldq Hx,Ux,Ib (66),(11B),(v1)
+EndTable
+
+GrpTable: Grp15
+0: fxsave | RDFSBASE Ry (F3),(11B)
+1: fxstor | RDGSBASE Ry (F3),(11B)
+2: vldmxcsr Md (v1) | WRFSBASE Ry (F3),(11B)
+3: vstmxcsr Md (v1) | WRGSBASE Ry (F3),(11B)
+4: XSAVE
+5: XRSTOR | lfence (11B)
+6: XSAVEOPT | clwb (66) | mfence (11B)
+7: clflush | clflushopt (66) | sfence (11B) | pcommit (66),(11B)
+EndTable
+
+GrpTable: Grp16
+0: prefetch NTA
+1: prefetch T0
+2: prefetch T1
+3: prefetch T2
+EndTable
+
+GrpTable: Grp17
+1: BLSR By,Ey (v)
+2: BLSMSK By,Ey (v)
+3: BLSI By,Ey (v)
+EndTable
+
+# AMD's Prefetch Group
+GrpTable: GrpP
+0: PREFETCH
+1: PREFETCHW
+EndTable
+
+GrpTable: GrpPDLK
+0: MONTMUL
+1: XSHA1
+2: XSHA2
+EndTable
+
+GrpTable: GrpRNG
+0: xstore-rng
+1: xcrypt-ecb
+2: xcrypt-cbc
+4: xcrypt-cfb
+5: xcrypt-ofb
+EndTable
diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c
new file mode 100644
index 000000000000..e8a1e69eb92c
--- /dev/null
+++ b/tools/objtool/builtin-check.c
@@ -0,0 +1,1206 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * objtool check:
+ *
+ * This command analyzes every .o file and ensures the validity of its stack
+ * trace metadata. It enforces a set of rules on asm code and C inline
+ * assembly code so that stack traces can be reliable.
+ *
+ * For more information, see tools/objtool/Documentation/stack-validation.txt.
+ */
+
+#include <string.h>
+#include <subcmd/parse-options.h>
+
+#include "builtin.h"
+#include "elf.h"
+#include "special.h"
+#include "arch.h"
+#include "warn.h"
+
+#include <linux/hashtable.h>
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+#define STATE_FP_SAVED 0x1
+#define STATE_FP_SETUP 0x2
+#define STATE_FENTRY 0x4
+
+struct instruction {
+ struct list_head list;
+ struct hlist_node hash;
+ struct section *sec;
+ unsigned long offset;
+ unsigned int len, state;
+ unsigned char type;
+ unsigned long immediate;
+ bool alt_group, visited;
+ struct symbol *call_dest;
+ struct instruction *jump_dest;
+ struct list_head alts;
+ struct symbol *func;
+};
+
+struct alternative {
+ struct list_head list;
+ struct instruction *insn;
+};
+
+struct objtool_file {
+ struct elf *elf;
+ struct list_head insn_list;
+ DECLARE_HASHTABLE(insn_hash, 16);
+ struct section *rodata, *whitelist;
+ bool ignore_unreachables, c_file;
+};
+
+const char *objname;
+static bool nofp;
+
+static struct instruction *find_insn(struct objtool_file *file,
+ struct section *sec, unsigned long offset)
+{
+ struct instruction *insn;
+
+ hash_for_each_possible(file->insn_hash, insn, hash, offset)
+ if (insn->sec == sec && insn->offset == offset)
+ return insn;
+
+ return NULL;
+}
+
+static struct instruction *next_insn_same_sec(struct objtool_file *file,
+ struct instruction *insn)
+{
+ struct instruction *next = list_next_entry(insn, list);
+
+ if (&next->list == &file->insn_list || next->sec != insn->sec)
+ return NULL;
+
+ return next;
+}
+
+#define for_each_insn(file, insn) \
+ list_for_each_entry(insn, &file->insn_list, list)
+
+#define func_for_each_insn(file, func, insn) \
+ for (insn = find_insn(file, func->sec, func->offset); \
+ insn && &insn->list != &file->insn_list && \
+ insn->sec == func->sec && \
+ insn->offset < func->offset + func->len; \
+ insn = list_next_entry(insn, list))
+
+#define sec_for_each_insn_from(file, insn) \
+ for (; insn; insn = next_insn_same_sec(file, insn))
+
+
+/*
+ * Check if the function has been manually whitelisted with the
+ * STACK_FRAME_NON_STANDARD macro, or if it should be automatically whitelisted
+ * due to its use of a context switching instruction.
+ */
+static bool ignore_func(struct objtool_file *file, struct symbol *func)
+{
+ struct rela *rela;
+ struct instruction *insn;
+
+ /* check for STACK_FRAME_NON_STANDARD */
+ if (file->whitelist && file->whitelist->rela)
+ list_for_each_entry(rela, &file->whitelist->rela->rela_list, list)
+ if (rela->sym->sec == func->sec &&
+ rela->addend == func->offset)
+ return true;
+
+ /* check if it has a context switching instruction */
+ func_for_each_insn(file, func, insn)
+ if (insn->type == INSN_CONTEXT_SWITCH)
+ return true;
+
+ return false;
+}
+
+/*
+ * This checks to see if the given function is a "noreturn" function.
+ *
+ * For global functions which are outside the scope of this object file, we
+ * have to keep a manual list of them.
+ *
+ * For local functions, we have to detect them manually by simply looking for
+ * the lack of a return instruction.
+ *
+ * Returns:
+ * -1: error
+ * 0: no dead end
+ * 1: dead end
+ */
+static int __dead_end_function(struct objtool_file *file, struct symbol *func,
+ int recursion)
+{
+ int i;
+ struct instruction *insn;
+ bool empty = true;
+
+ /*
+ * Unfortunately these have to be hard coded because the noreturn
+ * attribute isn't provided in ELF data.
+ */
+ static const char * const global_noreturns[] = {
+ "__stack_chk_fail",
+ "panic",
+ "do_exit",
+ "__module_put_and_exit",
+ "complete_and_exit",
+ "kvm_spurious_fault",
+ "__reiserfs_panic",
+ "lbug_with_loc"
+ };
+
+ if (func->bind == STB_WEAK)
+ return 0;
+
+ if (func->bind == STB_GLOBAL)
+ for (i = 0; i < ARRAY_SIZE(global_noreturns); i++)
+ if (!strcmp(func->name, global_noreturns[i]))
+ return 1;
+
+ if (!func->sec)
+ return 0;
+
+ func_for_each_insn(file, func, insn) {
+ empty = false;
+
+ if (insn->type == INSN_RETURN)
+ return 0;
+ }
+
+ if (empty)
+ return 0;
+
+ /*
+ * A function can have a sibling call instead of a return. In that
+ * case, the function's dead-end status depends on whether the target
+ * of the sibling call returns.
+ */
+ func_for_each_insn(file, func, insn) {
+ if (insn->sec != func->sec ||
+ insn->offset >= func->offset + func->len)
+ break;
+
+ if (insn->type == INSN_JUMP_UNCONDITIONAL) {
+ struct instruction *dest = insn->jump_dest;
+ struct symbol *dest_func;
+
+ if (!dest)
+ /* sibling call to another file */
+ return 0;
+
+ if (dest->sec != func->sec ||
+ dest->offset < func->offset ||
+ dest->offset >= func->offset + func->len) {
+ /* local sibling call */
+ dest_func = find_symbol_by_offset(dest->sec,
+ dest->offset);
+ if (!dest_func)
+ continue;
+
+ if (recursion == 5) {
+ WARN_FUNC("infinite recursion (objtool bug!)",
+ dest->sec, dest->offset);
+ return -1;
+ }
+
+ return __dead_end_function(file, dest_func,
+ recursion + 1);
+ }
+ }
+
+ if (insn->type == INSN_JUMP_DYNAMIC && list_empty(&insn->alts))
+ /* sibling call */
+ return 0;
+ }
+
+ return 1;
+}
+
+static int dead_end_function(struct objtool_file *file, struct symbol *func)
+{
+ return __dead_end_function(file, func, 0);
+}
+
+/*
+ * Call the arch-specific instruction decoder for all the instructions and add
+ * them to the global instruction list.
+ */
+static int decode_instructions(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+ unsigned long offset;
+ struct instruction *insn;
+ int ret;
+
+ list_for_each_entry(sec, &file->elf->sections, list) {
+
+ if (!(sec->sh.sh_flags & SHF_EXECINSTR))
+ continue;
+
+ for (offset = 0; offset < sec->len; offset += insn->len) {
+ insn = malloc(sizeof(*insn));
+ memset(insn, 0, sizeof(*insn));
+
+ INIT_LIST_HEAD(&insn->alts);
+ insn->sec = sec;
+ insn->offset = offset;
+
+ ret = arch_decode_instruction(file->elf, sec, offset,
+ sec->len - offset,
+ &insn->len, &insn->type,
+ &insn->immediate);
+ if (ret)
+ return ret;
+
+ if (!insn->type || insn->type > INSN_LAST) {
+ WARN_FUNC("invalid instruction type %d",
+ insn->sec, insn->offset, insn->type);
+ return -1;
+ }
+
+ hash_add(file->insn_hash, &insn->hash, insn->offset);
+ list_add_tail(&insn->list, &file->insn_list);
+ }
+
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC)
+ continue;
+
+ if (!find_insn(file, sec, func->offset)) {
+ WARN("%s(): can't find starting instruction",
+ func->name);
+ return -1;
+ }
+
+ func_for_each_insn(file, func, insn)
+ if (!insn->func)
+ insn->func = func;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Warnings shouldn't be reported for ignored functions.
+ */
+static void add_ignores(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct section *sec;
+ struct symbol *func;
+
+ list_for_each_entry(sec, &file->elf->sections, list) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC)
+ continue;
+
+ if (!ignore_func(file, func))
+ continue;
+
+ func_for_each_insn(file, func, insn)
+ insn->visited = true;
+ }
+ }
+}
+
+/*
+ * Find the destination instructions for all jumps.
+ */
+static int add_jump_destinations(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct rela *rela;
+ struct section *dest_sec;
+ unsigned long dest_off;
+
+ for_each_insn(file, insn) {
+ if (insn->type != INSN_JUMP_CONDITIONAL &&
+ insn->type != INSN_JUMP_UNCONDITIONAL)
+ continue;
+
+ /* skip ignores */
+ if (insn->visited)
+ continue;
+
+ rela = find_rela_by_dest_range(insn->sec, insn->offset,
+ insn->len);
+ if (!rela) {
+ dest_sec = insn->sec;
+ dest_off = insn->offset + insn->len + insn->immediate;
+ } else if (rela->sym->type == STT_SECTION) {
+ dest_sec = rela->sym->sec;
+ dest_off = rela->addend + 4;
+ } else if (rela->sym->sec->idx) {
+ dest_sec = rela->sym->sec;
+ dest_off = rela->sym->sym.st_value + rela->addend + 4;
+ } else {
+ /* sibling call */
+ insn->jump_dest = 0;
+ continue;
+ }
+
+ insn->jump_dest = find_insn(file, dest_sec, dest_off);
+ if (!insn->jump_dest) {
+
+ /*
+ * This is a special case where an alt instruction
+ * jumps past the end of the section. These are
+ * handled later in handle_group_alt().
+ */
+ if (!strcmp(insn->sec->name, ".altinstr_replacement"))
+ continue;
+
+ WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
+ insn->sec, insn->offset, dest_sec->name,
+ dest_off);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Find the destination instructions for all calls.
+ */
+static int add_call_destinations(struct objtool_file *file)
+{
+ struct instruction *insn;
+ unsigned long dest_off;
+ struct rela *rela;
+
+ for_each_insn(file, insn) {
+ if (insn->type != INSN_CALL)
+ continue;
+
+ rela = find_rela_by_dest_range(insn->sec, insn->offset,
+ insn->len);
+ if (!rela) {
+ dest_off = insn->offset + insn->len + insn->immediate;
+ insn->call_dest = find_symbol_by_offset(insn->sec,
+ dest_off);
+ if (!insn->call_dest) {
+ WARN_FUNC("can't find call dest symbol at offset 0x%lx",
+ insn->sec, insn->offset, dest_off);
+ return -1;
+ }
+ } else if (rela->sym->type == STT_SECTION) {
+ insn->call_dest = find_symbol_by_offset(rela->sym->sec,
+ rela->addend+4);
+ if (!insn->call_dest ||
+ insn->call_dest->type != STT_FUNC) {
+ WARN_FUNC("can't find call dest symbol at %s+0x%x",
+ insn->sec, insn->offset,
+ rela->sym->sec->name,
+ rela->addend + 4);
+ return -1;
+ }
+ } else
+ insn->call_dest = rela->sym;
+ }
+
+ return 0;
+}
+
+/*
+ * The .alternatives section requires some extra special care, over and above
+ * what other special sections require:
+ *
+ * 1. Because alternatives are patched in-place, we need to insert a fake jump
+ * instruction at the end so that validate_branch() skips all the original
+ * replaced instructions when validating the new instruction path.
+ *
+ * 2. An added wrinkle is that the new instruction length might be zero. In
+ * that case the old instructions are replaced with noops. We simulate that
+ * by creating a fake jump as the only new instruction.
+ *
+ * 3. In some cases, the alternative section includes an instruction which
+ * conditionally jumps to the _end_ of the entry. We have to modify these
+ * jumps' destinations to point back to .text rather than the end of the
+ * entry in .altinstr_replacement.
+ *
+ * 4. It has been requested that we don't validate the !POPCNT feature path
+ * which is a "very very small percentage of machines".
+ */
+static int handle_group_alt(struct objtool_file *file,
+ struct special_alt *special_alt,
+ struct instruction *orig_insn,
+ struct instruction **new_insn)
+{
+ struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump;
+ unsigned long dest_off;
+
+ last_orig_insn = NULL;
+ insn = orig_insn;
+ sec_for_each_insn_from(file, insn) {
+ if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
+ break;
+
+ if (special_alt->skip_orig)
+ insn->type = INSN_NOP;
+
+ insn->alt_group = true;
+ last_orig_insn = insn;
+ }
+
+ if (!next_insn_same_sec(file, last_orig_insn)) {
+ WARN("%s: don't know how to handle alternatives at end of section",
+ special_alt->orig_sec->name);
+ return -1;
+ }
+
+ fake_jump = malloc(sizeof(*fake_jump));
+ if (!fake_jump) {
+ WARN("malloc failed");
+ return -1;
+ }
+ memset(fake_jump, 0, sizeof(*fake_jump));
+ INIT_LIST_HEAD(&fake_jump->alts);
+ fake_jump->sec = special_alt->new_sec;
+ fake_jump->offset = -1;
+ fake_jump->type = INSN_JUMP_UNCONDITIONAL;
+ fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
+
+ if (!special_alt->new_len) {
+ *new_insn = fake_jump;
+ return 0;
+ }
+
+ last_new_insn = NULL;
+ insn = *new_insn;
+ sec_for_each_insn_from(file, insn) {
+ if (insn->offset >= special_alt->new_off + special_alt->new_len)
+ break;
+
+ last_new_insn = insn;
+
+ if (insn->type != INSN_JUMP_CONDITIONAL &&
+ insn->type != INSN_JUMP_UNCONDITIONAL)
+ continue;
+
+ if (!insn->immediate)
+ continue;
+
+ dest_off = insn->offset + insn->len + insn->immediate;
+ if (dest_off == special_alt->new_off + special_alt->new_len)
+ insn->jump_dest = fake_jump;
+
+ if (!insn->jump_dest) {
+ WARN_FUNC("can't find alternative jump destination",
+ insn->sec, insn->offset);
+ return -1;
+ }
+ }
+
+ if (!last_new_insn) {
+ WARN_FUNC("can't find last new alternative instruction",
+ special_alt->new_sec, special_alt->new_off);
+ return -1;
+ }
+
+ list_add(&fake_jump->list, &last_new_insn->list);
+
+ return 0;
+}
+
+/*
+ * A jump table entry can either convert a nop to a jump or a jump to a nop.
+ * If the original instruction is a jump, make the alt entry an effective nop
+ * by just skipping the original instruction.
+ */
+static int handle_jump_alt(struct objtool_file *file,
+ struct special_alt *special_alt,
+ struct instruction *orig_insn,
+ struct instruction **new_insn)
+{
+ if (orig_insn->type == INSN_NOP)
+ return 0;
+
+ if (orig_insn->type != INSN_JUMP_UNCONDITIONAL) {
+ WARN_FUNC("unsupported instruction at jump label",
+ orig_insn->sec, orig_insn->offset);
+ return -1;
+ }
+
+ *new_insn = list_next_entry(orig_insn, list);
+ return 0;
+}
+
+/*
+ * Read all the special sections which have alternate instructions which can be
+ * patched in or redirected to at runtime. Each instruction having alternate
+ * instruction(s) has them added to its insn->alts list, which will be
+ * traversed in validate_branch().
+ */
+static int add_special_section_alts(struct objtool_file *file)
+{
+ struct list_head special_alts;
+ struct instruction *orig_insn, *new_insn;
+ struct special_alt *special_alt, *tmp;
+ struct alternative *alt;
+ int ret;
+
+ ret = special_get_alts(file->elf, &special_alts);
+ if (ret)
+ return ret;
+
+ list_for_each_entry_safe(special_alt, tmp, &special_alts, list) {
+ alt = malloc(sizeof(*alt));
+ if (!alt) {
+ WARN("malloc failed");
+ ret = -1;
+ goto out;
+ }
+
+ orig_insn = find_insn(file, special_alt->orig_sec,
+ special_alt->orig_off);
+ if (!orig_insn) {
+ WARN_FUNC("special: can't find orig instruction",
+ special_alt->orig_sec, special_alt->orig_off);
+ ret = -1;
+ goto out;
+ }
+
+ new_insn = NULL;
+ if (!special_alt->group || special_alt->new_len) {
+ new_insn = find_insn(file, special_alt->new_sec,
+ special_alt->new_off);
+ if (!new_insn) {
+ WARN_FUNC("special: can't find new instruction",
+ special_alt->new_sec,
+ special_alt->new_off);
+ ret = -1;
+ goto out;
+ }
+ }
+
+ if (special_alt->group) {
+ ret = handle_group_alt(file, special_alt, orig_insn,
+ &new_insn);
+ if (ret)
+ goto out;
+ } else if (special_alt->jump_or_nop) {
+ ret = handle_jump_alt(file, special_alt, orig_insn,
+ &new_insn);
+ if (ret)
+ goto out;
+ }
+
+ alt->insn = new_insn;
+ list_add_tail(&alt->list, &orig_insn->alts);
+
+ list_del(&special_alt->list);
+ free(special_alt);
+ }
+
+out:
+ return ret;
+}
+
+static int add_switch_table(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn, struct rela *table,
+ struct rela *next_table)
+{
+ struct rela *rela = table;
+ struct instruction *alt_insn;
+ struct alternative *alt;
+
+ list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) {
+ if (rela == next_table)
+ break;
+
+ if (rela->sym->sec != insn->sec ||
+ rela->addend <= func->offset ||
+ rela->addend >= func->offset + func->len)
+ break;
+
+ alt_insn = find_insn(file, insn->sec, rela->addend);
+ if (!alt_insn) {
+ WARN("%s: can't find instruction at %s+0x%x",
+ file->rodata->rela->name, insn->sec->name,
+ rela->addend);
+ return -1;
+ }
+
+ alt = malloc(sizeof(*alt));
+ if (!alt) {
+ WARN("malloc failed");
+ return -1;
+ }
+
+ alt->insn = alt_insn;
+ list_add_tail(&alt->list, &insn->alts);
+ }
+
+ return 0;
+}
+
+static int add_func_switch_tables(struct objtool_file *file,
+ struct symbol *func)
+{
+ struct instruction *insn, *prev_jump;
+ struct rela *text_rela, *rodata_rela, *prev_rela;
+ int ret;
+
+ prev_jump = NULL;
+
+ func_for_each_insn(file, func, insn) {
+ if (insn->type != INSN_JUMP_DYNAMIC)
+ continue;
+
+ text_rela = find_rela_by_dest_range(insn->sec, insn->offset,
+ insn->len);
+ if (!text_rela || text_rela->sym != file->rodata->sym)
+ continue;
+
+ /* common case: jmpq *[addr](,%rax,8) */
+ rodata_rela = find_rela_by_dest(file->rodata,
+ text_rela->addend);
+
+ /*
+ * rare case: jmpq *[addr](%rip)
+ *
+ * This check is for a rare gcc quirk, currently only seen in
+ * three driver functions in the kernel, only with certain
+ * obscure non-distro configs.
+ *
+ * As part of an optimization, gcc makes a copy of an existing
+ * switch jump table, modifies it, and then hard-codes the jump
+ * (albeit with an indirect jump) to use a single entry in the
+ * table. The rest of the jump table and some of its jump
+ * targets remain as dead code.
+ *
+ * In such a case we can just crudely ignore all unreachable
+ * instruction warnings for the entire object file. Ideally we
+ * would just ignore them for the function, but that would
+ * require redesigning the code quite a bit. And honestly
+ * that's just not worth doing: unreachable instruction
+ * warnings are of questionable value anyway, and this is such
+ * a rare issue.
+ *
+ * kbuild reports:
+ * - https://lkml.kernel.org/r/201603231906.LWcVUpxm%25fengguang.wu@intel.com
+ * - https://lkml.kernel.org/r/201603271114.K9i45biy%25fengguang.wu@intel.com
+ * - https://lkml.kernel.org/r/201603291058.zuJ6ben1%25fengguang.wu@intel.com
+ *
+ * gcc bug:
+ * - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70604
+ */
+ if (!rodata_rela) {
+ rodata_rela = find_rela_by_dest(file->rodata,
+ text_rela->addend + 4);
+ if (rodata_rela)
+ file->ignore_unreachables = true;
+ }
+
+ if (!rodata_rela)
+ continue;
+
+ /*
+ * We found a switch table, but we don't know yet how big it
+ * is. Don't add it until we reach the end of the function or
+ * the beginning of another switch table in the same function.
+ */
+ if (prev_jump) {
+ ret = add_switch_table(file, func, prev_jump, prev_rela,
+ rodata_rela);
+ if (ret)
+ return ret;
+ }
+
+ prev_jump = insn;
+ prev_rela = rodata_rela;
+ }
+
+ if (prev_jump) {
+ ret = add_switch_table(file, func, prev_jump, prev_rela, NULL);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/*
+ * For some switch statements, gcc generates a jump table in the .rodata
+ * section which contains a list of addresses within the function to jump to.
+ * This finds these jump tables and adds them to the insn->alts lists.
+ */
+static int add_switch_table_alts(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+ int ret;
+
+ if (!file->rodata || !file->rodata->rela)
+ return 0;
+
+ list_for_each_entry(sec, &file->elf->sections, list) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC)
+ continue;
+
+ ret = add_func_switch_tables(file, func);
+ if (ret)
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static int decode_sections(struct objtool_file *file)
+{
+ int ret;
+
+ ret = decode_instructions(file);
+ if (ret)
+ return ret;
+
+ add_ignores(file);
+
+ ret = add_jump_destinations(file);
+ if (ret)
+ return ret;
+
+ ret = add_call_destinations(file);
+ if (ret)
+ return ret;
+
+ ret = add_special_section_alts(file);
+ if (ret)
+ return ret;
+
+ ret = add_switch_table_alts(file);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static bool is_fentry_call(struct instruction *insn)
+{
+ if (insn->type == INSN_CALL &&
+ insn->call_dest->type == STT_NOTYPE &&
+ !strcmp(insn->call_dest->name, "__fentry__"))
+ return true;
+
+ return false;
+}
+
+static bool has_modified_stack_frame(struct instruction *insn)
+{
+ return (insn->state & STATE_FP_SAVED) ||
+ (insn->state & STATE_FP_SETUP);
+}
+
+static bool has_valid_stack_frame(struct instruction *insn)
+{
+ return (insn->state & STATE_FP_SAVED) &&
+ (insn->state & STATE_FP_SETUP);
+}
+
+static unsigned int frame_state(unsigned long state)
+{
+ return (state & (STATE_FP_SAVED | STATE_FP_SETUP));
+}
+
+/*
+ * Follow the branch starting at the given instruction, and recursively follow
+ * any other branches (jumps). Meanwhile, track the frame pointer state at
+ * each instruction and validate all the rules described in
+ * tools/objtool/Documentation/stack-validation.txt.
+ */
+static int validate_branch(struct objtool_file *file,
+ struct instruction *first, unsigned char first_state)
+{
+ struct alternative *alt;
+ struct instruction *insn;
+ struct section *sec;
+ struct symbol *func = NULL;
+ unsigned char state;
+ int ret;
+
+ insn = first;
+ sec = insn->sec;
+ state = first_state;
+
+ if (insn->alt_group && list_empty(&insn->alts)) {
+ WARN_FUNC("don't know how to handle branch to middle of alternative instruction group",
+ sec, insn->offset);
+ return 1;
+ }
+
+ while (1) {
+ if (file->c_file && insn->func) {
+ if (func && func != insn->func) {
+ WARN("%s() falls through to next function %s()",
+ func->name, insn->func->name);
+ return 1;
+ }
+
+ func = insn->func;
+ }
+
+ if (insn->visited) {
+ if (frame_state(insn->state) != frame_state(state)) {
+ WARN_FUNC("frame pointer state mismatch",
+ sec, insn->offset);
+ return 1;
+ }
+
+ return 0;
+ }
+
+ insn->visited = true;
+ insn->state = state;
+
+ list_for_each_entry(alt, &insn->alts, list) {
+ ret = validate_branch(file, alt->insn, state);
+ if (ret)
+ return 1;
+ }
+
+ switch (insn->type) {
+
+ case INSN_FP_SAVE:
+ if (!nofp) {
+ if (state & STATE_FP_SAVED) {
+ WARN_FUNC("duplicate frame pointer save",
+ sec, insn->offset);
+ return 1;
+ }
+ state |= STATE_FP_SAVED;
+ }
+ break;
+
+ case INSN_FP_SETUP:
+ if (!nofp) {
+ if (state & STATE_FP_SETUP) {
+ WARN_FUNC("duplicate frame pointer setup",
+ sec, insn->offset);
+ return 1;
+ }
+ state |= STATE_FP_SETUP;
+ }
+ break;
+
+ case INSN_FP_RESTORE:
+ if (!nofp) {
+ if (has_valid_stack_frame(insn))
+ state &= ~STATE_FP_SETUP;
+
+ state &= ~STATE_FP_SAVED;
+ }
+ break;
+
+ case INSN_RETURN:
+ if (!nofp && has_modified_stack_frame(insn)) {
+ WARN_FUNC("return without frame pointer restore",
+ sec, insn->offset);
+ return 1;
+ }
+ return 0;
+
+ case INSN_CALL:
+ if (is_fentry_call(insn)) {
+ state |= STATE_FENTRY;
+ break;
+ }
+
+ ret = dead_end_function(file, insn->call_dest);
+ if (ret == 1)
+ return 0;
+ if (ret == -1)
+ return 1;
+
+ /* fallthrough */
+ case INSN_CALL_DYNAMIC:
+ if (!nofp && !has_valid_stack_frame(insn)) {
+ WARN_FUNC("call without frame pointer save/setup",
+ sec, insn->offset);
+ return 1;
+ }
+ break;
+
+ case INSN_JUMP_CONDITIONAL:
+ case INSN_JUMP_UNCONDITIONAL:
+ if (insn->jump_dest) {
+ ret = validate_branch(file, insn->jump_dest,
+ state);
+ if (ret)
+ return 1;
+ } else if (has_modified_stack_frame(insn)) {
+ WARN_FUNC("sibling call from callable instruction with changed frame pointer",
+ sec, insn->offset);
+ return 1;
+ } /* else it's a sibling call */
+
+ if (insn->type == INSN_JUMP_UNCONDITIONAL)
+ return 0;
+
+ break;
+
+ case INSN_JUMP_DYNAMIC:
+ if (list_empty(&insn->alts) &&
+ has_modified_stack_frame(insn)) {
+ WARN_FUNC("sibling call from callable instruction with changed frame pointer",
+ sec, insn->offset);
+ return 1;
+ }
+
+ return 0;
+
+ case INSN_BUG:
+ return 0;
+
+ default:
+ break;
+ }
+
+ insn = next_insn_same_sec(file, insn);
+ if (!insn) {
+ WARN("%s: unexpected end of section", sec->name);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static bool is_gcov_insn(struct instruction *insn)
+{
+ struct rela *rela;
+ struct section *sec;
+ struct symbol *sym;
+ unsigned long offset;
+
+ rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len);
+ if (!rela)
+ return false;
+
+ if (rela->sym->type != STT_SECTION)
+ return false;
+
+ sec = rela->sym->sec;
+ offset = rela->addend + insn->offset + insn->len - rela->offset;
+
+ list_for_each_entry(sym, &sec->symbol_list, list) {
+ if (sym->type != STT_OBJECT)
+ continue;
+
+ if (offset >= sym->offset && offset < sym->offset + sym->len)
+ return (!memcmp(sym->name, "__gcov0.", 8));
+ }
+
+ return false;
+}
+
+static bool is_kasan_insn(struct instruction *insn)
+{
+ return (insn->type == INSN_CALL &&
+ !strcmp(insn->call_dest->name, "__asan_handle_no_return"));
+}
+
+static bool is_ubsan_insn(struct instruction *insn)
+{
+ return (insn->type == INSN_CALL &&
+ !strcmp(insn->call_dest->name,
+ "__ubsan_handle_builtin_unreachable"));
+}
+
+static bool ignore_unreachable_insn(struct symbol *func,
+ struct instruction *insn)
+{
+ int i;
+
+ if (insn->type == INSN_NOP)
+ return true;
+
+ if (is_gcov_insn(insn))
+ return true;
+
+ /*
+ * Check if this (or a subsequent) instruction is related to
+ * CONFIG_UBSAN or CONFIG_KASAN.
+ *
+ * End the search at 5 instructions to avoid going into the weeds.
+ */
+ for (i = 0; i < 5; i++) {
+
+ if (is_kasan_insn(insn) || is_ubsan_insn(insn))
+ return true;
+
+ if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->jump_dest) {
+ insn = insn->jump_dest;
+ continue;
+ }
+
+ if (insn->offset + insn->len >= func->offset + func->len)
+ break;
+ insn = list_next_entry(insn, list);
+ }
+
+ return false;
+}
+
+static int validate_functions(struct objtool_file *file)
+{
+ struct section *sec;
+ struct symbol *func;
+ struct instruction *insn;
+ int ret, warnings = 0;
+
+ list_for_each_entry(sec, &file->elf->sections, list) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC)
+ continue;
+
+ insn = find_insn(file, sec, func->offset);
+ if (!insn)
+ continue;
+
+ ret = validate_branch(file, insn, 0);
+ warnings += ret;
+ }
+ }
+
+ list_for_each_entry(sec, &file->elf->sections, list) {
+ list_for_each_entry(func, &sec->symbol_list, list) {
+ if (func->type != STT_FUNC)
+ continue;
+
+ func_for_each_insn(file, func, insn) {
+ if (insn->visited)
+ continue;
+
+ insn->visited = true;
+
+ if (file->ignore_unreachables || warnings ||
+ ignore_unreachable_insn(func, insn))
+ continue;
+
+ WARN_FUNC("function has unreachable instruction", insn->sec, insn->offset);
+ warnings++;
+ }
+ }
+ }
+
+ return warnings;
+}
+
+static int validate_uncallable_instructions(struct objtool_file *file)
+{
+ struct instruction *insn;
+ int warnings = 0;
+
+ for_each_insn(file, insn) {
+ if (!insn->visited && insn->type == INSN_RETURN) {
+ WARN_FUNC("return instruction outside of a callable function",
+ insn->sec, insn->offset);
+ warnings++;
+ }
+ }
+
+ return warnings;
+}
+
+static void cleanup(struct objtool_file *file)
+{
+ struct instruction *insn, *tmpinsn;
+ struct alternative *alt, *tmpalt;
+
+ list_for_each_entry_safe(insn, tmpinsn, &file->insn_list, list) {
+ list_for_each_entry_safe(alt, tmpalt, &insn->alts, list) {
+ list_del(&alt->list);
+ free(alt);
+ }
+ list_del(&insn->list);
+ hash_del(&insn->hash);
+ free(insn);
+ }
+ elf_close(file->elf);
+}
+
+const char * const check_usage[] = {
+ "objtool check [<options>] file.o",
+ NULL,
+};
+
+int cmd_check(int argc, const char **argv)
+{
+ struct objtool_file file;
+ int ret, warnings = 0;
+
+ const struct option options[] = {
+ OPT_BOOLEAN('f', "no-fp", &nofp, "Skip frame pointer validation"),
+ OPT_END(),
+ };
+
+ argc = parse_options(argc, argv, options, check_usage, 0);
+
+ if (argc != 1)
+ usage_with_options(check_usage, options);
+
+ objname = argv[0];
+
+ file.elf = elf_open(objname);
+ if (!file.elf) {
+ fprintf(stderr, "error reading elf file %s\n", objname);
+ return 1;
+ }
+
+ INIT_LIST_HEAD(&file.insn_list);
+ hash_init(file.insn_hash);
+ file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard");
+ file.rodata = find_section_by_name(file.elf, ".rodata");
+ file.ignore_unreachables = false;
+ file.c_file = find_section_by_name(file.elf, ".comment");
+
+ ret = decode_sections(&file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+
+ ret = validate_functions(&file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+
+ ret = validate_uncallable_instructions(&file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+
+out:
+ cleanup(&file);
+
+ /* ignore warnings for now until we get all the code cleaned up */
+ if (ret || warnings)
+ return 0;
+ return 0;
+}
diff --git a/tools/objtool/builtin.h b/tools/objtool/builtin.h
new file mode 100644
index 000000000000..34d2ba78a616
--- /dev/null
+++ b/tools/objtool/builtin.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+#ifndef _BUILTIN_H
+#define _BUILTIN_H
+
+extern int cmd_check(int argc, const char **argv);
+
+#endif /* _BUILTIN_H */
diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c
new file mode 100644
index 000000000000..e11f6b69cce6
--- /dev/null
+++ b/tools/objtool/elf.c
@@ -0,0 +1,412 @@
+/*
+ * elf.c - ELF access library
+ *
+ * Adapted from kpatch (https://github.com/dynup/kpatch):
+ * Copyright (C) 2013-2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ * Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "elf.h"
+#include "warn.h"
+
+struct section *find_section_by_name(struct elf *elf, const char *name)
+{
+ struct section *sec;
+
+ list_for_each_entry(sec, &elf->sections, list)
+ if (!strcmp(sec->name, name))
+ return sec;
+
+ return NULL;
+}
+
+static struct section *find_section_by_index(struct elf *elf,
+ unsigned int idx)
+{
+ struct section *sec;
+
+ list_for_each_entry(sec, &elf->sections, list)
+ if (sec->idx == idx)
+ return sec;
+
+ return NULL;
+}
+
+static struct symbol *find_symbol_by_index(struct elf *elf, unsigned int idx)
+{
+ struct section *sec;
+ struct symbol *sym;
+
+ list_for_each_entry(sec, &elf->sections, list)
+ hash_for_each_possible(sec->symbol_hash, sym, hash, idx)
+ if (sym->idx == idx)
+ return sym;
+
+ return NULL;
+}
+
+struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset)
+{
+ struct symbol *sym;
+
+ list_for_each_entry(sym, &sec->symbol_list, list)
+ if (sym->type != STT_SECTION &&
+ sym->offset == offset)
+ return sym;
+
+ return NULL;
+}
+
+struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
+ unsigned int len)
+{
+ struct rela *rela;
+ unsigned long o;
+
+ if (!sec->rela)
+ return NULL;
+
+ for (o = offset; o < offset + len; o++)
+ hash_for_each_possible(sec->rela->rela_hash, rela, hash, o)
+ if (rela->offset == o)
+ return rela;
+
+ return NULL;
+}
+
+struct rela *find_rela_by_dest(struct section *sec, unsigned long offset)
+{
+ return find_rela_by_dest_range(sec, offset, 1);
+}
+
+struct symbol *find_containing_func(struct section *sec, unsigned long offset)
+{
+ struct symbol *func;
+
+ list_for_each_entry(func, &sec->symbol_list, list)
+ if (func->type == STT_FUNC && offset >= func->offset &&
+ offset < func->offset + func->len)
+ return func;
+
+ return NULL;
+}
+
+static int read_sections(struct elf *elf)
+{
+ Elf_Scn *s = NULL;
+ struct section *sec;
+ size_t shstrndx, sections_nr;
+ int i;
+
+ if (elf_getshdrnum(elf->elf, &sections_nr)) {
+ perror("elf_getshdrnum");
+ return -1;
+ }
+
+ if (elf_getshdrstrndx(elf->elf, &shstrndx)) {
+ perror("elf_getshdrstrndx");
+ return -1;
+ }
+
+ for (i = 0; i < sections_nr; i++) {
+ sec = malloc(sizeof(*sec));
+ if (!sec) {
+ perror("malloc");
+ return -1;
+ }
+ memset(sec, 0, sizeof(*sec));
+
+ INIT_LIST_HEAD(&sec->symbol_list);
+ INIT_LIST_HEAD(&sec->rela_list);
+ hash_init(sec->rela_hash);
+ hash_init(sec->symbol_hash);
+
+ list_add_tail(&sec->list, &elf->sections);
+
+ s = elf_getscn(elf->elf, i);
+ if (!s) {
+ perror("elf_getscn");
+ return -1;
+ }
+
+ sec->idx = elf_ndxscn(s);
+
+ if (!gelf_getshdr(s, &sec->sh)) {
+ perror("gelf_getshdr");
+ return -1;
+ }
+
+ sec->name = elf_strptr(elf->elf, shstrndx, sec->sh.sh_name);
+ if (!sec->name) {
+ perror("elf_strptr");
+ return -1;
+ }
+
+ sec->elf_data = elf_getdata(s, NULL);
+ if (!sec->elf_data) {
+ perror("elf_getdata");
+ return -1;
+ }
+
+ if (sec->elf_data->d_off != 0 ||
+ sec->elf_data->d_size != sec->sh.sh_size) {
+ WARN("unexpected data attributes for %s", sec->name);
+ return -1;
+ }
+
+ sec->data = (unsigned long)sec->elf_data->d_buf;
+ sec->len = sec->elf_data->d_size;
+ }
+
+ /* sanity check, one more call to elf_nextscn() should return NULL */
+ if (elf_nextscn(elf->elf, s)) {
+ WARN("section entry mismatch");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int read_symbols(struct elf *elf)
+{
+ struct section *symtab;
+ struct symbol *sym;
+ struct list_head *entry, *tmp;
+ int symbols_nr, i;
+
+ symtab = find_section_by_name(elf, ".symtab");
+ if (!symtab) {
+ WARN("missing symbol table");
+ return -1;
+ }
+
+ symbols_nr = symtab->sh.sh_size / symtab->sh.sh_entsize;
+
+ for (i = 0; i < symbols_nr; i++) {
+ sym = malloc(sizeof(*sym));
+ if (!sym) {
+ perror("malloc");
+ return -1;
+ }
+ memset(sym, 0, sizeof(*sym));
+
+ sym->idx = i;
+
+ if (!gelf_getsym(symtab->elf_data, i, &sym->sym)) {
+ perror("gelf_getsym");
+ goto err;
+ }
+
+ sym->name = elf_strptr(elf->elf, symtab->sh.sh_link,
+ sym->sym.st_name);
+ if (!sym->name) {
+ perror("elf_strptr");
+ goto err;
+ }
+
+ sym->type = GELF_ST_TYPE(sym->sym.st_info);
+ sym->bind = GELF_ST_BIND(sym->sym.st_info);
+
+ if (sym->sym.st_shndx > SHN_UNDEF &&
+ sym->sym.st_shndx < SHN_LORESERVE) {
+ sym->sec = find_section_by_index(elf,
+ sym->sym.st_shndx);
+ if (!sym->sec) {
+ WARN("couldn't find section for symbol %s",
+ sym->name);
+ goto err;
+ }
+ if (sym->type == STT_SECTION) {
+ sym->name = sym->sec->name;
+ sym->sec->sym = sym;
+ }
+ } else
+ sym->sec = find_section_by_index(elf, 0);
+
+ sym->offset = sym->sym.st_value;
+ sym->len = sym->sym.st_size;
+
+ /* sorted insert into a per-section list */
+ entry = &sym->sec->symbol_list;
+ list_for_each_prev(tmp, &sym->sec->symbol_list) {
+ struct symbol *s;
+
+ s = list_entry(tmp, struct symbol, list);
+
+ if (sym->offset > s->offset) {
+ entry = tmp;
+ break;
+ }
+
+ if (sym->offset == s->offset && sym->len >= s->len) {
+ entry = tmp;
+ break;
+ }
+ }
+ list_add(&sym->list, entry);
+ hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx);
+ }
+
+ return 0;
+
+err:
+ free(sym);
+ return -1;
+}
+
+static int read_relas(struct elf *elf)
+{
+ struct section *sec;
+ struct rela *rela;
+ int i;
+ unsigned int symndx;
+
+ list_for_each_entry(sec, &elf->sections, list) {
+ if (sec->sh.sh_type != SHT_RELA)
+ continue;
+
+ sec->base = find_section_by_name(elf, sec->name + 5);
+ if (!sec->base) {
+ WARN("can't find base section for rela section %s",
+ sec->name);
+ return -1;
+ }
+
+ sec->base->rela = sec;
+
+ for (i = 0; i < sec->sh.sh_size / sec->sh.sh_entsize; i++) {
+ rela = malloc(sizeof(*rela));
+ if (!rela) {
+ perror("malloc");
+ return -1;
+ }
+ memset(rela, 0, sizeof(*rela));
+
+ if (!gelf_getrela(sec->elf_data, i, &rela->rela)) {
+ perror("gelf_getrela");
+ return -1;
+ }
+
+ rela->type = GELF_R_TYPE(rela->rela.r_info);
+ rela->addend = rela->rela.r_addend;
+ rela->offset = rela->rela.r_offset;
+ symndx = GELF_R_SYM(rela->rela.r_info);
+ rela->sym = find_symbol_by_index(elf, symndx);
+ if (!rela->sym) {
+ WARN("can't find rela entry symbol %d for %s",
+ symndx, sec->name);
+ return -1;
+ }
+
+ list_add_tail(&rela->list, &sec->rela_list);
+ hash_add(sec->rela_hash, &rela->hash, rela->offset);
+
+ }
+ }
+
+ return 0;
+}
+
+struct elf *elf_open(const char *name)
+{
+ struct elf *elf;
+
+ elf_version(EV_CURRENT);
+
+ elf = malloc(sizeof(*elf));
+ if (!elf) {
+ perror("malloc");
+ return NULL;
+ }
+ memset(elf, 0, sizeof(*elf));
+
+ INIT_LIST_HEAD(&elf->sections);
+
+ elf->name = strdup(name);
+ if (!elf->name) {
+ perror("strdup");
+ goto err;
+ }
+
+ elf->fd = open(name, O_RDONLY);
+ if (elf->fd == -1) {
+ perror("open");
+ goto err;
+ }
+
+ elf->elf = elf_begin(elf->fd, ELF_C_READ_MMAP, NULL);
+ if (!elf->elf) {
+ perror("elf_begin");
+ goto err;
+ }
+
+ if (!gelf_getehdr(elf->elf, &elf->ehdr)) {
+ perror("gelf_getehdr");
+ goto err;
+ }
+
+ if (read_sections(elf))
+ goto err;
+
+ if (read_symbols(elf))
+ goto err;
+
+ if (read_relas(elf))
+ goto err;
+
+ return elf;
+
+err:
+ elf_close(elf);
+ return NULL;
+}
+
+void elf_close(struct elf *elf)
+{
+ struct section *sec, *tmpsec;
+ struct symbol *sym, *tmpsym;
+ struct rela *rela, *tmprela;
+
+ list_for_each_entry_safe(sec, tmpsec, &elf->sections, list) {
+ list_for_each_entry_safe(sym, tmpsym, &sec->symbol_list, list) {
+ list_del(&sym->list);
+ hash_del(&sym->hash);
+ free(sym);
+ }
+ list_for_each_entry_safe(rela, tmprela, &sec->rela_list, list) {
+ list_del(&rela->list);
+ hash_del(&rela->hash);
+ free(rela);
+ }
+ list_del(&sec->list);
+ free(sec);
+ }
+ if (elf->name)
+ free(elf->name);
+ if (elf->fd > 0)
+ close(elf->fd);
+ if (elf->elf)
+ elf_end(elf->elf);
+ free(elf);
+}
diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h
new file mode 100644
index 000000000000..7f3e00a2f907
--- /dev/null
+++ b/tools/objtool/elf.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _OBJTOOL_ELF_H
+#define _OBJTOOL_ELF_H
+
+#include <stdio.h>
+#include <gelf.h>
+#include <linux/list.h>
+#include <linux/hashtable.h>
+
+struct section {
+ struct list_head list;
+ GElf_Shdr sh;
+ struct list_head symbol_list;
+ DECLARE_HASHTABLE(symbol_hash, 8);
+ struct list_head rela_list;
+ DECLARE_HASHTABLE(rela_hash, 16);
+ struct section *base, *rela;
+ struct symbol *sym;
+ Elf_Data *elf_data;
+ char *name;
+ int idx;
+ unsigned long data;
+ unsigned int len;
+};
+
+struct symbol {
+ struct list_head list;
+ struct hlist_node hash;
+ GElf_Sym sym;
+ struct section *sec;
+ char *name;
+ unsigned int idx;
+ unsigned char bind, type;
+ unsigned long offset;
+ unsigned int len;
+};
+
+struct rela {
+ struct list_head list;
+ struct hlist_node hash;
+ GElf_Rela rela;
+ struct symbol *sym;
+ unsigned int type;
+ unsigned long offset;
+ int addend;
+};
+
+struct elf {
+ Elf *elf;
+ GElf_Ehdr ehdr;
+ int fd;
+ char *name;
+ struct list_head sections;
+ DECLARE_HASHTABLE(rela_hash, 16);
+};
+
+
+struct elf *elf_open(const char *name);
+struct section *find_section_by_name(struct elf *elf, const char *name);
+struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset);
+struct rela *find_rela_by_dest(struct section *sec, unsigned long offset);
+struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset,
+ unsigned int len);
+struct symbol *find_containing_func(struct section *sec, unsigned long offset);
+void elf_close(struct elf *elf);
+
+
+
+#endif /* _OBJTOOL_ELF_H */
diff --git a/tools/objtool/objtool.c b/tools/objtool/objtool.c
new file mode 100644
index 000000000000..46c326db4f46
--- /dev/null
+++ b/tools/objtool/objtool.c
@@ -0,0 +1,136 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * objtool:
+ *
+ * The 'check' subcmd analyzes every .o file and ensures the validity of its
+ * stack trace metadata. It enforces a set of rules on asm code and C inline
+ * assembly code so that stack traces can be reliable.
+ *
+ * For more information, see tools/objtool/Documentation/stack-validation.txt.
+ */
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stdlib.h>
+#include <subcmd/exec-cmd.h>
+#include <subcmd/pager.h>
+
+#include "builtin.h"
+
+#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
+
+struct cmd_struct {
+ const char *name;
+ int (*fn)(int, const char **);
+ const char *help;
+};
+
+static const char objtool_usage_string[] =
+ "objtool [OPTIONS] COMMAND [ARGS]";
+
+static struct cmd_struct objtool_cmds[] = {
+ {"check", cmd_check, "Perform stack metadata validation on an object file" },
+};
+
+bool help;
+
+static void cmd_usage(void)
+{
+ unsigned int i, longest = 0;
+
+ printf("\n usage: %s\n\n", objtool_usage_string);
+
+ for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) {
+ if (longest < strlen(objtool_cmds[i].name))
+ longest = strlen(objtool_cmds[i].name);
+ }
+
+ puts(" Commands:");
+ for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) {
+ printf(" %-*s ", longest, objtool_cmds[i].name);
+ puts(objtool_cmds[i].help);
+ }
+
+ printf("\n");
+
+ exit(1);
+}
+
+static void handle_options(int *argc, const char ***argv)
+{
+ while (*argc > 0) {
+ const char *cmd = (*argv)[0];
+
+ if (cmd[0] != '-')
+ break;
+
+ if (!strcmp(cmd, "--help") || !strcmp(cmd, "-h")) {
+ help = true;
+ break;
+ } else {
+ fprintf(stderr, "Unknown option: %s\n", cmd);
+ fprintf(stderr, "\n Usage: %s\n",
+ objtool_usage_string);
+ exit(1);
+ }
+
+ (*argv)++;
+ (*argc)--;
+ }
+}
+
+static void handle_internal_command(int argc, const char **argv)
+{
+ const char *cmd = argv[0];
+ unsigned int i, ret;
+
+ for (i = 0; i < ARRAY_SIZE(objtool_cmds); i++) {
+ struct cmd_struct *p = objtool_cmds+i;
+
+ if (strcmp(p->name, cmd))
+ continue;
+
+ ret = p->fn(argc, argv);
+
+ exit(ret);
+ }
+
+ cmd_usage();
+}
+
+int main(int argc, const char **argv)
+{
+ static const char *UNUSED = "OBJTOOL_NOT_IMPLEMENTED";
+
+ /* libsubcmd init */
+ exec_cmd_init("objtool", UNUSED, UNUSED, UNUSED);
+ pager_init(UNUSED);
+
+ argv++;
+ argc--;
+ handle_options(&argc, &argv);
+
+ if (!argc || help)
+ cmd_usage();
+
+ handle_internal_command(argc, argv);
+
+ return 0;
+}
diff --git a/tools/objtool/special.c b/tools/objtool/special.c
new file mode 100644
index 000000000000..bff8abb3a4aa
--- /dev/null
+++ b/tools/objtool/special.c
@@ -0,0 +1,193 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This file reads all the special sections which have alternate instructions
+ * which can be patched in or redirected to at runtime.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "special.h"
+#include "warn.h"
+
+#define EX_ENTRY_SIZE 12
+#define EX_ORIG_OFFSET 0
+#define EX_NEW_OFFSET 4
+
+#define JUMP_ENTRY_SIZE 24
+#define JUMP_ORIG_OFFSET 0
+#define JUMP_NEW_OFFSET 8
+
+#define ALT_ENTRY_SIZE 13
+#define ALT_ORIG_OFFSET 0
+#define ALT_NEW_OFFSET 4
+#define ALT_FEATURE_OFFSET 8
+#define ALT_ORIG_LEN_OFFSET 10
+#define ALT_NEW_LEN_OFFSET 11
+
+#define X86_FEATURE_POPCNT (4*32+23)
+
+struct special_entry {
+ const char *sec;
+ bool group, jump_or_nop;
+ unsigned char size, orig, new;
+ unsigned char orig_len, new_len; /* group only */
+ unsigned char feature; /* ALTERNATIVE macro CPU feature */
+};
+
+struct special_entry entries[] = {
+ {
+ .sec = ".altinstructions",
+ .group = true,
+ .size = ALT_ENTRY_SIZE,
+ .orig = ALT_ORIG_OFFSET,
+ .orig_len = ALT_ORIG_LEN_OFFSET,
+ .new = ALT_NEW_OFFSET,
+ .new_len = ALT_NEW_LEN_OFFSET,
+ .feature = ALT_FEATURE_OFFSET,
+ },
+ {
+ .sec = "__jump_table",
+ .jump_or_nop = true,
+ .size = JUMP_ENTRY_SIZE,
+ .orig = JUMP_ORIG_OFFSET,
+ .new = JUMP_NEW_OFFSET,
+ },
+ {
+ .sec = "__ex_table",
+ .size = EX_ENTRY_SIZE,
+ .orig = EX_ORIG_OFFSET,
+ .new = EX_NEW_OFFSET,
+ },
+ {},
+};
+
+static int get_alt_entry(struct elf *elf, struct special_entry *entry,
+ struct section *sec, int idx,
+ struct special_alt *alt)
+{
+ struct rela *orig_rela, *new_rela;
+ unsigned long offset;
+
+ offset = idx * entry->size;
+
+ alt->group = entry->group;
+ alt->jump_or_nop = entry->jump_or_nop;
+
+ if (alt->group) {
+ alt->orig_len = *(unsigned char *)(sec->data + offset +
+ entry->orig_len);
+ alt->new_len = *(unsigned char *)(sec->data + offset +
+ entry->new_len);
+ }
+
+ if (entry->feature) {
+ unsigned short feature;
+
+ feature = *(unsigned short *)(sec->data + offset +
+ entry->feature);
+
+ /*
+ * It has been requested that we don't validate the !POPCNT
+ * feature path which is a "very very small percentage of
+ * machines".
+ */
+ if (feature == X86_FEATURE_POPCNT)
+ alt->skip_orig = true;
+ }
+
+ orig_rela = find_rela_by_dest(sec, offset + entry->orig);
+ if (!orig_rela) {
+ WARN_FUNC("can't find orig rela", sec, offset + entry->orig);
+ return -1;
+ }
+ if (orig_rela->sym->type != STT_SECTION) {
+ WARN_FUNC("don't know how to handle non-section rela symbol %s",
+ sec, offset + entry->orig, orig_rela->sym->name);
+ return -1;
+ }
+
+ alt->orig_sec = orig_rela->sym->sec;
+ alt->orig_off = orig_rela->addend;
+
+ if (!entry->group || alt->new_len) {
+ new_rela = find_rela_by_dest(sec, offset + entry->new);
+ if (!new_rela) {
+ WARN_FUNC("can't find new rela",
+ sec, offset + entry->new);
+ return -1;
+ }
+
+ alt->new_sec = new_rela->sym->sec;
+ alt->new_off = (unsigned int)new_rela->addend;
+
+ /* _ASM_EXTABLE_EX hack */
+ if (alt->new_off >= 0x7ffffff0)
+ alt->new_off -= 0x7ffffff0;
+ }
+
+ return 0;
+}
+
+/*
+ * Read all the special sections and create a list of special_alt structs which
+ * describe all the alternate instructions which can be patched in or
+ * redirected to at runtime.
+ */
+int special_get_alts(struct elf *elf, struct list_head *alts)
+{
+ struct special_entry *entry;
+ struct section *sec;
+ unsigned int nr_entries;
+ struct special_alt *alt;
+ int idx, ret;
+
+ INIT_LIST_HEAD(alts);
+
+ for (entry = entries; entry->sec; entry++) {
+ sec = find_section_by_name(elf, entry->sec);
+ if (!sec)
+ continue;
+
+ if (sec->len % entry->size != 0) {
+ WARN("%s size not a multiple of %d",
+ sec->name, entry->size);
+ return -1;
+ }
+
+ nr_entries = sec->len / entry->size;
+
+ for (idx = 0; idx < nr_entries; idx++) {
+ alt = malloc(sizeof(*alt));
+ if (!alt) {
+ WARN("malloc failed");
+ return -1;
+ }
+ memset(alt, 0, sizeof(*alt));
+
+ ret = get_alt_entry(elf, entry, sec, idx, alt);
+ if (ret)
+ return ret;
+
+ list_add_tail(&alt->list, alts);
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/objtool/special.h b/tools/objtool/special.h
new file mode 100644
index 000000000000..fad1d092f679
--- /dev/null
+++ b/tools/objtool/special.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _SPECIAL_H
+#define _SPECIAL_H
+
+#include <stdbool.h>
+#include "elf.h"
+
+struct special_alt {
+ struct list_head list;
+
+ bool group;
+ bool skip_orig;
+ bool jump_or_nop;
+
+ struct section *orig_sec;
+ unsigned long orig_off;
+
+ struct section *new_sec;
+ unsigned long new_off;
+
+ unsigned int orig_len, new_len; /* group only */
+};
+
+int special_get_alts(struct elf *elf, struct list_head *alts);
+
+#endif /* _SPECIAL_H */
diff --git a/tools/objtool/warn.h b/tools/objtool/warn.h
new file mode 100644
index 000000000000..ac7e07523e84
--- /dev/null
+++ b/tools/objtool/warn.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2015 Josh Poimboeuf <jpoimboe@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef _WARN_H
+#define _WARN_H
+
+extern const char *objname;
+
+static inline char *offstr(struct section *sec, unsigned long offset)
+{
+ struct symbol *func;
+ char *name, *str;
+ unsigned long name_off;
+
+ func = find_containing_func(sec, offset);
+ if (func) {
+ name = func->name;
+ name_off = offset - func->offset;
+ } else {
+ name = sec->name;
+ name_off = offset;
+ }
+
+ str = malloc(strlen(name) + 20);
+
+ if (func)
+ sprintf(str, "%s()+0x%lx", name, name_off);
+ else
+ sprintf(str, "%s+0x%lx", name, name_off);
+
+ return str;
+}
+
+#define WARN(format, ...) \
+ fprintf(stderr, \
+ "%s: warning: objtool: " format "\n", \
+ objname, ##__VA_ARGS__)
+
+#define WARN_FUNC(format, sec, offset, ...) \
+({ \
+ char *_str = offstr(sec, offset); \
+ WARN("%s: " format, _str, ##__VA_ARGS__); \
+ free(_str); \
+})
+
+#endif /* _WARN_H */
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 3ba1c0b09908..098cfb9ca8f0 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -1,5 +1,5 @@
include ../../scripts/Makefile.include
-include ../config/utilities.mak
+include ../../scripts/utilities.mak
MAN1_TXT= \
$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index 79483f40e991..ec723d0a5bb3 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -40,10 +40,12 @@ address should be. The 'p' modifier can be specified multiple times:
0 - SAMPLE_IP can have arbitrary skid
1 - SAMPLE_IP must have constant skid
2 - SAMPLE_IP requested to have 0 skid
- 3 - SAMPLE_IP must have 0 skid
+ 3 - SAMPLE_IP must have 0 skid, or uses randomization to avoid
+ sample shadowing effects.
For Intel systems precise event sampling is implemented with PEBS
-which supports up to precise-level 2.
+which supports up to precise-level 2, and precise level 3 for
+some special cases
On AMD systems it is implemented using IBS (up to precise-level 2).
The precise modifier works with event types 0x76 (cpu-cycles, CPU
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 2e1fa2357528..8c8c6b9ce915 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -74,6 +74,7 @@ arch/*/include/uapi/asm/unistd*.h
arch/*/include/uapi/asm/perf_regs.h
arch/*/lib/memcpy*.S
arch/*/lib/memset*.S
+arch/*/include/asm/*features.h
include/linux/poison.h
include/linux/hw_breakpoint.h
include/uapi/linux/perf_event.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 4a4fad4182f5..000ea210389d 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -3,7 +3,7 @@ include ../scripts/Makefile.include
# The default target of this Makefile is...
all:
-include config/utilities.mak
+include ../scripts/utilities.mak
# Define V to have a more verbose compile.
#
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
index 6c1b8a75db09..f8ccee132867 100644
--- a/tools/perf/arch/powerpc/util/header.c
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -3,9 +3,9 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-
-#include "../../util/header.h"
-#include "../../util/util.h"
+#include <linux/stringify.h>
+#include "header.h"
+#include "util.h"
#define mfspr(rn) ({unsigned long rval; \
asm volatile("mfspr %0," __stringify(rn) \
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index a50df86f2b9b..579a592990dd 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -25,19 +25,17 @@
# endif
#endif
-extern int bench_numa(int argc, const char **argv, const char *prefix);
-extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
-extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
-extern int bench_mem_memcpy(int argc, const char **argv,
- const char *prefix __maybe_unused);
-extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
-extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
-extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
-extern int bench_futex_wake_parallel(int argc, const char **argv,
- const char *prefix);
-extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
+int bench_numa(int argc, const char **argv, const char *prefix);
+int bench_sched_messaging(int argc, const char **argv, const char *prefix);
+int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+int bench_mem_memcpy(int argc, const char **argv, const char *prefix);
+int bench_mem_memset(int argc, const char **argv, const char *prefix);
+int bench_futex_hash(int argc, const char **argv, const char *prefix);
+int bench_futex_wake(int argc, const char **argv, const char *prefix);
+int bench_futex_wake_parallel(int argc, const char **argv, const char *prefix);
+int bench_futex_requeue(int argc, const char **argv, const char *prefix);
/* pi futexes */
-extern int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
+int bench_futex_lock_pi(int argc, const char **argv, const char *prefix);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
index 57b4ed871459..5aad2a9408b0 100644
--- a/tools/perf/bench/mem-memcpy-arch.h
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -2,7 +2,7 @@
#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMCPY_FN(fn, name, desc) \
- extern void *fn(void *, const void *, size_t);
+ void *fn(void *, const void *, size_t);
#include "mem-memcpy-x86-64-asm-def.h"
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
index 633800cb0dcb..0d15786d9ae3 100644
--- a/tools/perf/bench/mem-memset-arch.h
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -2,7 +2,7 @@
#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMSET_FN(fn, name, desc) \
- extern void *fn(void *, int, size_t);
+ void *fn(void *, int, size_t);
#include "mem-memset-x86-64-asm-def.h"
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 5049d6357a46..7500d959d7eb 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -293,7 +293,7 @@ static void bind_to_memnode(int node)
if (node == -1)
return;
- BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask));
+ BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
nodemask = 1L << node;
ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index cfe366375c4b..814158393656 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -94,7 +94,7 @@ static int process_sample_event(struct perf_tool *tool,
struct addr_location al;
int ret = 0;
- if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+ if (machine__resolve(machine, &al, sample) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index 4d72359fd15a..8053a8ceefda 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -330,7 +330,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
struct hists *hists = evsel__hists(evsel);
int ret = -1;
- if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+ if (machine__resolve(machine, &al, sample) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 49d55e21b1b0..bc1de9b8fd67 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -106,12 +106,14 @@ static void exec_woman_emacs(const char *path, const char *page)
if (!check_emacsclient_version()) {
/* This works only with emacsclient version >= 22. */
- struct strbuf man_page = STRBUF_INIT;
+ char *man_page;
if (!path)
path = "emacsclient";
- strbuf_addf(&man_page, "(woman \"%s\")", page);
- execlp(path, "emacsclient", "-e", man_page.buf, NULL);
+ if (asprintf(&man_page, "(woman \"%s\")", page) > 0) {
+ execlp(path, "emacsclient", "-e", man_page, NULL);
+ free(man_page);
+ }
warning("failed to exec '%s': %s", path,
strerror_r(errno, sbuf, sizeof(sbuf)));
}
@@ -122,7 +124,7 @@ static void exec_man_konqueror(const char *path, const char *page)
const char *display = getenv("DISPLAY");
if (display && *display) {
- struct strbuf man_page = STRBUF_INIT;
+ char *man_page;
const char *filename = "kfmclient";
char sbuf[STRERR_BUFSIZE];
@@ -141,8 +143,10 @@ static void exec_man_konqueror(const char *path, const char *page)
filename = file;
} else
path = "kfmclient";
- strbuf_addf(&man_page, "man:%s(1)", page);
- execlp(path, filename, "newTab", man_page.buf, NULL);
+ if (asprintf(&man_page, "man:%s(1)", page) > 0) {
+ execlp(path, filename, "newTab", man_page, NULL);
+ free(man_page);
+ }
warning("failed to exec '%s': %s", path,
strerror_r(errno, sbuf, sizeof(sbuf)));
}
@@ -161,11 +165,13 @@ static void exec_man_man(const char *path, const char *page)
static void exec_man_cmd(const char *cmd, const char *page)
{
- struct strbuf shell_cmd = STRBUF_INIT;
char sbuf[STRERR_BUFSIZE];
+ char *shell_cmd;
- strbuf_addf(&shell_cmd, "%s %s", cmd, page);
- execl("/bin/sh", "sh", "-c", shell_cmd.buf, NULL);
+ if (asprintf(&shell_cmd, "%s %s", cmd, page) > 0) {
+ execl("/bin/sh", "sh", "-c", shell_cmd, NULL);
+ free(shell_cmd);
+ }
warning("failed to exec '%s': %s", cmd,
strerror_r(errno, sbuf, sizeof(sbuf)));
}
@@ -299,43 +305,33 @@ static int is_perf_command(const char *s)
is_in_cmdlist(&other_cmds, s);
}
-static const char *prepend(const char *prefix, const char *cmd)
-{
- size_t pre_len = strlen(prefix);
- size_t cmd_len = strlen(cmd);
- char *p = malloc(pre_len + cmd_len + 1);
- memcpy(p, prefix, pre_len);
- strcpy(p + pre_len, cmd);
- return p;
-}
-
static const char *cmd_to_page(const char *perf_cmd)
{
+ char *s;
+
if (!perf_cmd)
return "perf";
else if (!prefixcmp(perf_cmd, "perf"))
return perf_cmd;
- else
- return prepend("perf-", perf_cmd);
+
+ return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s;
}
static void setup_man_path(void)
{
- struct strbuf new_path = STRBUF_INIT;
+ char *new_path;
const char *old_path = getenv("MANPATH");
/* We should always put ':' after our path. If there is no
* old_path, the ':' at the end will let 'man' to try
* system-wide paths after ours to find the manual page. If
* there is old_path, we need ':' as delimiter. */
- strbuf_addstr(&new_path, system_path(PERF_MAN_PATH));
- strbuf_addch(&new_path, ':');
- if (old_path)
- strbuf_addstr(&new_path, old_path);
-
- setenv("MANPATH", new_path.buf, 1);
-
- strbuf_release(&new_path);
+ if (asprintf(&new_path, "%s:%s", system_path(PERF_MAN_PATH), old_path ?: "") > 0) {
+ setenv("MANPATH", new_path, 1);
+ free(new_path);
+ } else {
+ error("Unable to setup man path");
+ }
}
static void exec_viewer(const char *name, const char *page)
@@ -380,7 +376,7 @@ static int show_info_page(const char *perf_cmd)
return -1;
}
-static int get_html_page_path(struct strbuf *page_path, const char *page)
+static int get_html_page_path(char **page_path, const char *page)
{
struct stat st;
const char *html_path = system_path(PERF_HTML_PATH);
@@ -392,10 +388,7 @@ static int get_html_page_path(struct strbuf *page_path, const char *page)
return -1;
}
- strbuf_init(page_path, 0);
- strbuf_addf(page_path, "%s/%s.html", html_path, page);
-
- return 0;
+ return asprintf(page_path, "%s/%s.html", html_path, page);
}
/*
@@ -413,12 +406,12 @@ static void open_html(const char *path)
static int show_html_page(const char *perf_cmd)
{
const char *page = cmd_to_page(perf_cmd);
- struct strbuf page_path; /* it leaks but we exec bellow */
+ char *page_path; /* it leaks but we exec bellow */
- if (get_html_page_path(&page_path, page) != 0)
+ if (get_html_page_path(&page_path, page) < 0)
return -1;
- open_html(page_path.buf);
+ open_html(page_path);
return 0;
}
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 7fa68663ed72..d1a2d104f2bc 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -131,8 +131,7 @@ static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
union perf_event *event,
- struct perf_session *session
- __maybe_unused)
+ struct perf_session *session)
{
struct perf_inject *inject = container_of(tool, struct perf_inject,
tool);
@@ -417,9 +416,6 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
{
struct addr_location al;
struct thread *thread;
- u8 cpumode;
-
- cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
thread = machine__findnew_thread(machine, sample->pid, sample->tid);
if (thread == NULL) {
@@ -428,7 +424,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
goto repipe;
}
- thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al);
+ thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
if (al.map != NULL) {
if (!al.map->dso->hit) {
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 4d3340cce9a0..c9cb3be47cff 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -602,7 +602,7 @@ static int gfpcmp(const void *a, const void *b)
return fa->flags - fb->flags;
}
-/* see include/trace/events/gfpflags.h */
+/* see include/trace/events/mmflags.h */
static const struct {
const char *original;
const char *compact;
@@ -612,30 +612,39 @@ static const struct {
{ "GFP_HIGHUSER", "HU" },
{ "GFP_USER", "U" },
{ "GFP_TEMPORARY", "TMP" },
+ { "GFP_KERNEL_ACCOUNT", "KAC" },
{ "GFP_KERNEL", "K" },
{ "GFP_NOFS", "NF" },
{ "GFP_ATOMIC", "A" },
{ "GFP_NOIO", "NI" },
- { "GFP_HIGH", "H" },
- { "GFP_WAIT", "W" },
- { "GFP_IO", "I" },
- { "GFP_COLD", "CO" },
- { "GFP_NOWARN", "NWR" },
- { "GFP_REPEAT", "R" },
- { "GFP_NOFAIL", "NF" },
- { "GFP_NORETRY", "NR" },
- { "GFP_COMP", "C" },
- { "GFP_ZERO", "Z" },
- { "GFP_NOMEMALLOC", "NMA" },
- { "GFP_MEMALLOC", "MA" },
- { "GFP_HARDWALL", "HW" },
- { "GFP_THISNODE", "TN" },
- { "GFP_RECLAIMABLE", "RC" },
- { "GFP_MOVABLE", "M" },
- { "GFP_NOTRACK", "NT" },
- { "GFP_NO_KSWAPD", "NK" },
- { "GFP_OTHER_NODE", "ON" },
{ "GFP_NOWAIT", "NW" },
+ { "GFP_DMA", "D" },
+ { "__GFP_HIGHMEM", "HM" },
+ { "GFP_DMA32", "D32" },
+ { "__GFP_HIGH", "H" },
+ { "__GFP_ATOMIC", "_A" },
+ { "__GFP_IO", "I" },
+ { "__GFP_FS", "F" },
+ { "__GFP_COLD", "CO" },
+ { "__GFP_NOWARN", "NWR" },
+ { "__GFP_REPEAT", "R" },
+ { "__GFP_NOFAIL", "NF" },
+ { "__GFP_NORETRY", "NR" },
+ { "__GFP_COMP", "C" },
+ { "__GFP_ZERO", "Z" },
+ { "__GFP_NOMEMALLOC", "NMA" },
+ { "__GFP_MEMALLOC", "MA" },
+ { "__GFP_HARDWALL", "HW" },
+ { "__GFP_THISNODE", "TN" },
+ { "__GFP_RECLAIMABLE", "RC" },
+ { "__GFP_MOVABLE", "M" },
+ { "__GFP_ACCOUNT", "AC" },
+ { "__GFP_NOTRACK", "NT" },
+ { "__GFP_WRITE", "WR" },
+ { "__GFP_RECLAIM", "R" },
+ { "__GFP_DIRECT_RECLAIM", "DR" },
+ { "__GFP_KSWAPD_RECLAIM", "KR" },
+ { "__GFP_OTHER_NODE", "ON" },
};
static size_t max_gfp_len;
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 88aeac9aa1da..85db3be4b3cb 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -131,7 +131,7 @@ dump_raw_samples(struct perf_tool *tool,
struct addr_location al;
const char *fmt;
- if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+ if (machine__resolve(machine, &al, sample) < 0) {
fprintf(stderr, "problem processing %d event, skipping it.\n",
event->header.type);
return -1;
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 7eea49f9ed46..160ea23b45aa 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -41,6 +41,7 @@
#include <dlfcn.h>
#include <linux/bitmap.h>
+#include <linux/stringify.h>
struct report {
struct perf_tool tool;
@@ -154,7 +155,7 @@ static int process_sample_event(struct perf_tool *tool,
};
int ret = 0;
- if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+ if (machine__resolve(machine, &al, sample) < 0) {
pr_debug("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 57f9a7e7f7d3..3770c3dffe5e 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -405,9 +405,7 @@ out:
return 0;
}
-static void print_sample_iregs(union perf_event *event __maybe_unused,
- struct perf_sample *sample,
- struct thread *thread __maybe_unused,
+static void print_sample_iregs(struct perf_sample *sample,
struct perf_event_attr *attr)
{
struct regs_dump *regs = &sample->intr_regs;
@@ -476,10 +474,7 @@ mispred_str(struct branch_entry *br)
return br->flags.predicted ? 'P' : 'M';
}
-static void print_sample_brstack(union perf_event *event __maybe_unused,
- struct perf_sample *sample,
- struct thread *thread __maybe_unused,
- struct perf_event_attr *attr __maybe_unused)
+static void print_sample_brstack(struct perf_sample *sample)
{
struct branch_stack *br = sample->branch_stack;
u64 i;
@@ -498,14 +493,11 @@ static void print_sample_brstack(union perf_event *event __maybe_unused,
}
}
-static void print_sample_brstacksym(union perf_event *event __maybe_unused,
- struct perf_sample *sample,
- struct thread *thread __maybe_unused,
- struct perf_event_attr *attr __maybe_unused)
+static void print_sample_brstacksym(struct perf_sample *sample,
+ struct thread *thread)
{
struct branch_stack *br = sample->branch_stack;
struct addr_location alf, alt;
- u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
u64 i, from, to;
if (!(br && br->nr))
@@ -518,11 +510,11 @@ static void print_sample_brstacksym(union perf_event *event __maybe_unused,
from = br->entries[i].from;
to = br->entries[i].to;
- thread__find_addr_map(thread, cpumode, MAP__FUNCTION, from, &alf);
+ thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, from, &alf);
if (alf.map)
alf.sym = map__find_symbol(alf.map, alf.addr, NULL);
- thread__find_addr_map(thread, cpumode, MAP__FUNCTION, to, &alt);
+ thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, to, &alt);
if (alt.map)
alt.sym = map__find_symbol(alt.map, alt.addr, NULL);
@@ -538,8 +530,7 @@ static void print_sample_brstacksym(union perf_event *event __maybe_unused,
}
-static void print_sample_addr(union perf_event *event,
- struct perf_sample *sample,
+static void print_sample_addr(struct perf_sample *sample,
struct thread *thread,
struct perf_event_attr *attr)
{
@@ -550,7 +541,7 @@ static void print_sample_addr(union perf_event *event,
if (!sample_addr_correlates_sym(attr))
return;
- perf_event__preprocess_sample_addr(event, sample, thread, &al);
+ thread__resolve(thread, &al, sample);
if (PRINT_FIELD(SYM)) {
printf(" ");
@@ -567,8 +558,7 @@ static void print_sample_addr(union perf_event *event,
}
}
-static void print_sample_bts(union perf_event *event,
- struct perf_sample *sample,
+static void print_sample_bts(struct perf_sample *sample,
struct perf_evsel *evsel,
struct thread *thread,
struct addr_location *al)
@@ -598,7 +588,7 @@ static void print_sample_bts(union perf_event *event,
((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
!output[attr->type].user_set)) {
printf(" => ");
- print_sample_addr(event, sample, thread, attr);
+ print_sample_addr(sample, thread, attr);
}
if (print_srcline_last)
@@ -747,7 +737,7 @@ static size_t data_src__printf(u64 data_src)
return printf("%-*s", maxlen, out);
}
-static void process_event(struct perf_script *script, union perf_event *event,
+static void process_event(struct perf_script *script,
struct perf_sample *sample, struct perf_evsel *evsel,
struct addr_location *al)
{
@@ -776,7 +766,7 @@ static void process_event(struct perf_script *script, union perf_event *event,
print_sample_flags(sample->flags);
if (is_bts_event(attr)) {
- print_sample_bts(event, sample, evsel, thread, al);
+ print_sample_bts(sample, evsel, thread, al);
return;
}
@@ -784,7 +774,7 @@ static void process_event(struct perf_script *script, union perf_event *event,
event_format__print(evsel->tp_format, sample->cpu,
sample->raw_data, sample->raw_size);
if (PRINT_FIELD(ADDR))
- print_sample_addr(event, sample, thread, attr);
+ print_sample_addr(sample, thread, attr);
if (PRINT_FIELD(DATA_SRC))
data_src__printf(sample->data_src);
@@ -804,12 +794,12 @@ static void process_event(struct perf_script *script, union perf_event *event,
}
if (PRINT_FIELD(IREGS))
- print_sample_iregs(event, sample, thread, attr);
+ print_sample_iregs(sample, attr);
if (PRINT_FIELD(BRSTACK))
- print_sample_brstack(event, sample, thread, attr);
+ print_sample_brstack(sample);
else if (PRINT_FIELD(BRSTACKSYM))
- print_sample_brstacksym(event, sample, thread, attr);
+ print_sample_brstacksym(sample, thread);
if (perf_evsel__is_bpf_output(evsel) && PRINT_FIELD(BPF_OUTPUT))
print_sample_bpf_output(sample);
@@ -905,7 +895,7 @@ static int process_sample_event(struct perf_tool *tool,
return 0;
}
- if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+ if (machine__resolve(machine, &al, sample) < 0) {
pr_err("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
@@ -920,7 +910,7 @@ static int process_sample_event(struct perf_tool *tool,
if (scripting_ops)
scripting_ops->process_event(event, sample, evsel, &al);
else
- process_event(scr, event, sample, evsel, &al);
+ process_event(scr, sample, evsel, &al);
out_put:
addr_location__put(&al);
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index bd7a7757176f..40cc9bb3506c 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -489,7 +489,7 @@ static const char *cat_backtrace(union perf_event *event,
if (!chain)
goto exit;
- if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+ if (machine__resolve(machine, &al, sample) < 0) {
fprintf(stderr, "problem processing %d event, skipping it.\n",
event->header.type);
goto exit;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 94af190f6843..833214979c4f 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -67,6 +67,7 @@
#include <sys/utsname.h>
#include <sys/mman.h>
+#include <linux/stringify.h>
#include <linux/types.h>
static volatile int done;
@@ -728,7 +729,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
top->exact_samples++;
- if (perf_event__preprocess_sample(event, machine, &al, sample) < 0)
+ if (machine__resolve(machine, &al, sample) < 0)
return;
if (!top->kptr_restrict_warned &&
@@ -809,7 +810,6 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
struct perf_session *session = top->session;
union perf_event *event;
struct machine *machine;
- u8 origin;
int ret;
while ((event = perf_evlist__mmap_read(top->evlist, idx)) != NULL) {
@@ -822,12 +822,10 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
evsel = perf_evlist__id2evsel(session->evlist, sample.id);
assert(evsel != NULL);
- origin = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
if (event->header.type == PERF_RECORD_SAMPLE)
++top->samples;
- switch (origin) {
+ switch (sample.cpumode) {
case PERF_RECORD_MISC_USER:
++top->us_samples;
if (top->hide_user_symbols)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 8dc98c598b1a..93ac724fb635 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2256,11 +2256,10 @@ static void print_location(FILE *f, struct perf_sample *sample,
static int trace__pgfault(struct trace *trace,
struct perf_evsel *evsel,
- union perf_event *event,
+ union perf_event *event __maybe_unused,
struct perf_sample *sample)
{
struct thread *thread;
- u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
struct addr_location al;
char map_type = 'd';
struct thread_trace *ttrace;
@@ -2279,7 +2278,7 @@ static int trace__pgfault(struct trace *trace,
if (trace->summary_only)
goto out;
- thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
+ thread__find_addr_location(thread, sample->cpumode, MAP__FUNCTION,
sample->ip, &al);
trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
@@ -2292,11 +2291,11 @@ static int trace__pgfault(struct trace *trace,
fprintf(trace->output, "] => ");
- thread__find_addr_location(thread, cpumode, MAP__VARIABLE,
+ thread__find_addr_location(thread, sample->cpumode, MAP__VARIABLE,
sample->addr, &al);
if (!al.map) {
- thread__find_addr_location(thread, cpumode,
+ thread__find_addr_location(thread, sample->cpumode,
MAP__FUNCTION, sample->addr, &al);
if (al.map)
diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h
index 3f871b54e261..41c24010ab43 100644
--- a/tools/perf/builtin.h
+++ b/tools/perf/builtin.h
@@ -7,38 +7,38 @@
extern const char perf_usage_string[];
extern const char perf_more_info_string[];
-extern void list_common_cmds_help(void);
-extern const char *help_unknown_cmd(const char *cmd);
-extern void prune_packed_objects(int);
-extern int read_line_with_nul(char *buf, int size, FILE *file);
-extern int check_pager_config(const char *cmd);
+void list_common_cmds_help(void);
+const char *help_unknown_cmd(const char *cmd);
+void prune_packed_objects(int);
+int read_line_with_nul(char *buf, int size, FILE *file);
+int check_pager_config(const char *cmd);
-extern int cmd_annotate(int argc, const char **argv, const char *prefix);
-extern int cmd_bench(int argc, const char **argv, const char *prefix);
-extern int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
-extern int cmd_buildid_list(int argc, const char **argv, const char *prefix);
-extern int cmd_config(int argc, const char **argv, const char *prefix);
-extern int cmd_diff(int argc, const char **argv, const char *prefix);
-extern int cmd_evlist(int argc, const char **argv, const char *prefix);
-extern int cmd_help(int argc, const char **argv, const char *prefix);
-extern int cmd_sched(int argc, const char **argv, const char *prefix);
-extern int cmd_list(int argc, const char **argv, const char *prefix);
-extern int cmd_record(int argc, const char **argv, const char *prefix);
-extern int cmd_report(int argc, const char **argv, const char *prefix);
-extern int cmd_stat(int argc, const char **argv, const char *prefix);
-extern int cmd_timechart(int argc, const char **argv, const char *prefix);
-extern int cmd_top(int argc, const char **argv, const char *prefix);
-extern int cmd_script(int argc, const char **argv, const char *prefix);
-extern int cmd_version(int argc, const char **argv, const char *prefix);
-extern int cmd_probe(int argc, const char **argv, const char *prefix);
-extern int cmd_kmem(int argc, const char **argv, const char *prefix);
-extern int cmd_lock(int argc, const char **argv, const char *prefix);
-extern int cmd_kvm(int argc, const char **argv, const char *prefix);
-extern int cmd_test(int argc, const char **argv, const char *prefix);
-extern int cmd_trace(int argc, const char **argv, const char *prefix);
-extern int cmd_inject(int argc, const char **argv, const char *prefix);
-extern int cmd_mem(int argc, const char **argv, const char *prefix);
-extern int cmd_data(int argc, const char **argv, const char *prefix);
+int cmd_annotate(int argc, const char **argv, const char *prefix);
+int cmd_bench(int argc, const char **argv, const char *prefix);
+int cmd_buildid_cache(int argc, const char **argv, const char *prefix);
+int cmd_buildid_list(int argc, const char **argv, const char *prefix);
+int cmd_config(int argc, const char **argv, const char *prefix);
+int cmd_diff(int argc, const char **argv, const char *prefix);
+int cmd_evlist(int argc, const char **argv, const char *prefix);
+int cmd_help(int argc, const char **argv, const char *prefix);
+int cmd_sched(int argc, const char **argv, const char *prefix);
+int cmd_list(int argc, const char **argv, const char *prefix);
+int cmd_record(int argc, const char **argv, const char *prefix);
+int cmd_report(int argc, const char **argv, const char *prefix);
+int cmd_stat(int argc, const char **argv, const char *prefix);
+int cmd_timechart(int argc, const char **argv, const char *prefix);
+int cmd_top(int argc, const char **argv, const char *prefix);
+int cmd_script(int argc, const char **argv, const char *prefix);
+int cmd_version(int argc, const char **argv, const char *prefix);
+int cmd_probe(int argc, const char **argv, const char *prefix);
+int cmd_kmem(int argc, const char **argv, const char *prefix);
+int cmd_lock(int argc, const char **argv, const char *prefix);
+int cmd_kvm(int argc, const char **argv, const char *prefix);
+int cmd_test(int argc, const char **argv, const char *prefix);
+int cmd_trace(int argc, const char **argv, const char *prefix);
+int cmd_inject(int argc, const char **argv, const char *prefix);
+int cmd_mem(int argc, const char **argv, const char *prefix);
+int cmd_data(int argc, const char **argv, const char *prefix);
-extern int find_scripts(char **scripts_array, char **scripts_path_array);
+int find_scripts(char **scripts_array, char **scripts_path_array);
#endif
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index eca6a912e8c2..f7d7f5a1cad5 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -109,7 +109,7 @@ ifdef PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
CFLAGS += -DHAVE_ARCH_REGS_QUERY_REGISTER_OFFSET
endif
-include $(src-perf)/config/utilities.mak
+include $(srctree)/tools/scripts/utilities.mak
ifeq ($(call get-executable,$(FLEX)),)
dummy := $(error Error: $(FLEX) is missing on this system, please install it)
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index afc9ad0a0515..abd3f0ec0c0b 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -293,7 +293,6 @@ static int process_sample_event(struct machine *machine,
{
struct perf_sample sample;
struct thread *thread;
- u8 cpumode;
int ret;
if (perf_evlist__parse_sample(evlist, event, &sample)) {
@@ -307,9 +306,7 @@ static int process_sample_event(struct machine *machine,
return -1;
}
- cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
- ret = read_object_code(sample.ip, READLEN, cpumode, thread, state);
+ ret = read_object_code(sample.ip, READLEN, sample.cpumode, thread, state);
thread__put(thread);
return ret;
}
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 1c5c0221cea2..8f6eb853aaf7 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -20,10 +20,10 @@
static int mmap_handler(struct perf_tool *tool __maybe_unused,
union perf_event *event,
- struct perf_sample *sample __maybe_unused,
+ struct perf_sample *sample,
struct machine *machine)
{
- return machine__process_mmap2_event(machine, event, NULL);
+ return machine__process_mmap2_event(machine, event, sample);
}
static int init_live_machine(struct machine *machine)
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index 071a8b5f5232..f55f4bd47932 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -100,9 +100,11 @@ struct machine *setup_fake_machine(struct machines *machines)
}
for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
+ struct perf_sample sample = {
+ .cpumode = PERF_RECORD_MISC_USER,
+ };
union perf_event fake_mmap_event = {
.mmap = {
- .header = { .misc = PERF_RECORD_MISC_USER, },
.pid = fake_mmap_info[i].pid,
.tid = fake_mmap_info[i].pid,
.start = fake_mmap_info[i].start,
@@ -114,7 +116,7 @@ struct machine *setup_fake_machine(struct machines *machines)
strcpy(fake_mmap_event.mmap.filename,
fake_mmap_info[i].filename);
- machine__process_mmap_event(machine, &fake_mmap_event, NULL);
+ machine__process_mmap_event(machine, &fake_mmap_event, &sample);
}
for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index ecf136c385d5..ed5aa9eaeb6c 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -81,11 +81,6 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
size_t i;
for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
- const union perf_event event = {
- .header = {
- .misc = PERF_RECORD_MISC_USER,
- },
- };
struct hist_entry_iter iter = {
.evsel = evsel,
.sample = &sample,
@@ -97,13 +92,13 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
else
iter.ops = &hist_iter_normal;
+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = fake_samples[i].pid;
sample.tid = fake_samples[i].pid;
sample.ip = fake_samples[i].ip;
sample.callchain = (struct ip_callchain *)fake_callchains[i];
- if (perf_event__preprocess_sample(&event, machine, &al,
- &sample) < 0)
+ if (machine__resolve(machine, &al, &sample) < 0)
goto out;
if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 34b945a55d4d..b825d24f8186 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -58,11 +58,6 @@ static int add_hist_entries(struct perf_evlist *evlist,
*/
evlist__for_each(evlist, evsel) {
for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
- const union perf_event event = {
- .header = {
- .misc = PERF_RECORD_MISC_USER,
- },
- };
struct hist_entry_iter iter = {
.evsel = evsel,
.sample = &sample,
@@ -76,12 +71,12 @@ static int add_hist_entries(struct perf_evlist *evlist,
hists->dso_filter = NULL;
hists->symbol_filter_str = NULL;
+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = fake_samples[i].pid;
sample.tid = fake_samples[i].pid;
sample.ip = fake_samples[i].ip;
- if (perf_event__preprocess_sample(&event, machine, &al,
- &sample) < 0)
+ if (machine__resolve(machine, &al, &sample) < 0)
goto out;
al.socket = fake_samples[i].socket;
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 64b257d8d557..358324e47805 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -76,17 +76,12 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
struct hists *hists = evsel__hists(evsel);
for (k = 0; k < ARRAY_SIZE(fake_common_samples); k++) {
- const union perf_event event = {
- .header = {
- .misc = PERF_RECORD_MISC_USER,
- },
- };
-
+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = fake_common_samples[k].pid;
sample.tid = fake_common_samples[k].pid;
sample.ip = fake_common_samples[k].ip;
- if (perf_event__preprocess_sample(&event, machine, &al,
- &sample) < 0)
+
+ if (machine__resolve(machine, &al, &sample) < 0)
goto out;
he = __hists__add_entry(hists, &al, NULL,
@@ -102,17 +97,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
}
for (k = 0; k < ARRAY_SIZE(fake_samples[i]); k++) {
- const union perf_event event = {
- .header = {
- .misc = PERF_RECORD_MISC_USER,
- },
- };
-
sample.pid = fake_samples[i][k].pid;
sample.tid = fake_samples[i][k].pid;
sample.ip = fake_samples[i][k].ip;
- if (perf_event__preprocess_sample(&event, machine, &al,
- &sample) < 0)
+ if (machine__resolve(machine, &al, &sample) < 0)
goto out;
he = __hists__add_entry(hists, &al, NULL,
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index 23cce67c7e48..d3556fbe8c5c 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -51,11 +51,6 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
size_t i;
for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
- const union perf_event event = {
- .header = {
- .misc = PERF_RECORD_MISC_USER,
- },
- };
struct hist_entry_iter iter = {
.evsel = evsel,
.sample = &sample,
@@ -63,13 +58,13 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
.hide_unresolved = false,
};
+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.cpu = fake_samples[i].cpu;
sample.pid = fake_samples[i].pid;
sample.tid = fake_samples[i].pid;
sample.ip = fake_samples[i].ip;
- if (perf_event__preprocess_sample(&event, machine, &al,
- &sample) < 0)
+ if (machine__resolve(machine, &al, &sample) < 0)
goto out;
if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
diff --git a/tools/perf/tests/perf-targz-src-pkg b/tools/perf/tests/perf-targz-src-pkg
index 238aa3927c71..f2d9c5fe58e0 100755
--- a/tools/perf/tests/perf-targz-src-pkg
+++ b/tools/perf/tests/perf-targz-src-pkg
@@ -15,7 +15,7 @@ TMP_DEST=$(mktemp -d)
tar xf ${TARBALL} -C $TMP_DEST
rm -f ${TARBALL}
cd - > /dev/null
-make -C $TMP_DEST/perf*/tools/perf > /dev/null 2>&1
+make -C $TMP_DEST/perf*/tools/perf > /dev/null
RC=$?
rm -rf ${TMP_DEST}
exit $RC
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 4b9816555946..2a83414159a6 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -337,7 +337,7 @@ static void callchain_node__init_have_children(struct callchain_node *node,
chain = list_entry(node->val.next, struct callchain_list, list);
chain->has_children = has_sibling;
- if (node->val.next != node->val.prev) {
+ if (!list_empty(&node->val)) {
chain = list_entry(node->val.prev, struct callchain_list, list);
chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
}
diff --git a/tools/perf/ui/gtk/hists.c b/tools/perf/ui/gtk/hists.c
index bd9bf7e343b1..2aa45b606fa4 100644
--- a/tools/perf/ui/gtk/hists.c
+++ b/tools/perf/ui/gtk/hists.c
@@ -55,7 +55,7 @@ static u64 he_get_acc_##_field(struct hist_entry *he) \
return he->stat_acc->_field; \
} \
\
-static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt __maybe_unused, \
+static int perf_gtk__hpp_color_##_type(struct perf_hpp_fmt *fmt, \
struct perf_hpp *hpp, \
struct hist_entry *he) \
{ \
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index eea25e2424e9..da48fd843438 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -1,4 +1,3 @@
-libperf-y += abspath.o
libperf-y += alias.o
libperf-y += annotate.o
libperf-y += build-id.o
diff --git a/tools/perf/util/abspath.c b/tools/perf/util/abspath.c
deleted file mode 100644
index 0e76affe9c36..000000000000
--- a/tools/perf/util/abspath.c
+++ /dev/null
@@ -1,37 +0,0 @@
-#include "cache.h"
-
-static const char *get_pwd_cwd(void)
-{
- static char cwd[PATH_MAX + 1];
- char *pwd;
- struct stat cwd_stat, pwd_stat;
- if (getcwd(cwd, PATH_MAX) == NULL)
- return NULL;
- pwd = getenv("PWD");
- if (pwd && strcmp(pwd, cwd)) {
- stat(cwd, &cwd_stat);
- if (!stat(pwd, &pwd_stat) &&
- pwd_stat.st_dev == cwd_stat.st_dev &&
- pwd_stat.st_ino == cwd_stat.st_ino) {
- strlcpy(cwd, pwd, PATH_MAX);
- }
- }
- return cwd;
-}
-
-const char *make_nonrelative_path(const char *path)
-{
- static char buf[PATH_MAX + 1];
-
- if (is_absolute_path(path)) {
- if (strlcpy(buf, path, PATH_MAX) >= PATH_MAX)
- die("Too long path: %.*s", 60, path);
- } else {
- const char *cwd = get_pwd_cwd();
- if (!cwd)
- die("Cannot determine the current working directory");
- if (snprintf(buf, PATH_MAX, "%s/%s", cwd, path) >= PATH_MAX)
- die("Too long path: %.*s", 60, path);
- }
- return buf;
-}
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index cea323d9ee7e..9241f8c2b7e1 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -158,7 +158,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, size_t privsize);
int hist_entry__annotate(struct hist_entry *he, size_t privsize);
-int symbol__annotate_init(struct map *map __maybe_unused, struct symbol *sym);
+int symbol__annotate_init(struct map *map, struct symbol *sym);
int symbol__annotate_printf(struct symbol *sym, struct map *map,
struct perf_evsel *evsel, bool full_paths,
int min_pcnt, int max_lines, int context);
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index e5a8e2d4f2af..57ff31ecb8e4 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -517,7 +517,7 @@ static inline void auxtrace__free(struct perf_session *session)
static inline struct auxtrace_record *
auxtrace_record__init(struct perf_evlist *evlist __maybe_unused,
- int *err __maybe_unused)
+ int *err)
{
*err = 0;
return NULL;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index f1479eeef7da..0573c2ec861d 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -28,7 +28,6 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
struct machine *machine)
{
struct addr_location al;
- u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
struct thread *thread = machine__findnew_thread(machine, sample->pid,
sample->tid);
@@ -38,7 +37,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
return -1;
}
- thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, &al);
+ thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, &al);
if (al.map != NULL)
al.map->dso->hit = 1;
diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index 3ca453f0c51f..1f5a93c2c9a2 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -26,14 +26,14 @@
extern const char *config_exclusive_filename;
typedef int (*config_fn_t)(const char *, const char *, void *);
-extern int perf_default_config(const char *, const char *, void *);
-extern int perf_config(config_fn_t fn, void *);
-extern int perf_config_int(const char *, const char *);
-extern u64 perf_config_u64(const char *, const char *);
-extern int perf_config_bool(const char *, const char *);
-extern int config_error_nonbool(const char *);
-extern const char *perf_config_dirname(const char *, const char *);
-extern const char *perf_etc_perfconfig(void);
+int perf_default_config(const char *, const char *, void *);
+int perf_config(config_fn_t fn, void *);
+int perf_config_int(const char *, const char *);
+u64 perf_config_u64(const char *, const char *);
+int perf_config_bool(const char *, const char *);
+int config_error_nonbool(const char *);
+const char *perf_config_dirname(const char *, const char *);
+const char *perf_etc_perfconfig(void);
char *alias_lookup(const char *alias);
int split_cmdline(char *cmdline, const char ***argv);
@@ -64,13 +64,9 @@ static inline int is_absolute_path(const char *path)
return path[0] == '/';
}
-const char *make_nonrelative_path(const char *path);
char *strip_path_suffix(const char *path, const char *suffix);
-extern char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
-extern char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
-
-extern char *perf_pathdup(const char *fmt, ...)
- __attribute__((format (printf, 1, 2)));
+char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
+char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
#endif /* __PERF_CACHE_H */
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 18dd22269764..d2a9e694810c 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -220,7 +220,7 @@ int fill_callchain_info(struct addr_location *al, struct callchain_cursor_node *
bool hide_unresolved);
extern const char record_callchain_help[];
-extern int parse_callchain_record(const char *arg, struct callchain_param *param);
+int parse_callchain_record(const char *arg, struct callchain_param *param);
int parse_callchain_record_opt(const char *arg, struct callchain_param *param);
int parse_callchain_report_opt(const char *arg);
int parse_callchain_top_opt(const char *arg);
@@ -236,7 +236,7 @@ static inline void callchain_cursor_snapshot(struct callchain_cursor *dest,
}
#ifdef HAVE_SKIP_CALLCHAIN_IDX
-extern int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain);
+int arch_skip_callchain_idx(struct thread *thread, struct ip_callchain *chain);
#else
static inline int arch_skip_callchain_idx(struct thread *thread __maybe_unused,
struct ip_callchain *chain __maybe_unused)
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index b4b8cb42fe5e..31f8dcdbd7ef 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -13,7 +13,7 @@ struct cgroup_sel {
extern int nr_cgroups; /* number of explicit cgroups defined */
-extern void close_cgroup(struct cgroup_sel *cgrp);
-extern int parse_cgroups(const struct option *opt, const char *str, int unset);
+void close_cgroup(struct cgroup_sel *cgrp);
+int parse_cgroups(const struct option *opt, const char *str, int unset);
#endif /* __CGROUP_H__ */
diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h
index 3bee6773ddb0..d0d465953d36 100644
--- a/tools/perf/util/cloexec.h
+++ b/tools/perf/util/cloexec.h
@@ -5,7 +5,7 @@ unsigned long perf_event_open_cloexec_flag(void);
#ifdef __GLIBC_PREREQ
#if !__GLIBC_PREREQ(2, 6) && !defined(__UCLIBC__)
-extern int sched_getcpu(void) __THROW;
+int sched_getcpu(void) __THROW;
#endif
#endif
diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 811af89ce0bb..bbf69d248ec5 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -632,7 +632,7 @@ static bool is_flush_needed(struct ctf_stream *cs)
}
static int process_sample_event(struct perf_tool *tool,
- union perf_event *_event __maybe_unused,
+ union perf_event *_event,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine __maybe_unused)
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index 1c9689e4cc17..049438d51b9a 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -333,7 +333,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
sample_addr_correlates_sym(&evsel->attr)) {
struct addr_location addr_al;
- perf_event__preprocess_sample_addr(event, sample, thread, &addr_al);
+ thread__resolve(thread, &addr_al, sample);
err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id,
&es.addr_sym_db_id, &es.addr_offset);
if (err)
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 45ec4d0a50ed..0953280629cf 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -162,6 +162,7 @@ struct dso {
u8 loaded;
u8 rel;
u8 build_id[BUILD_ID_SIZE];
+ u64 text_offset;
const char *short_name;
const char *long_name;
u16 long_name_len;
@@ -301,7 +302,7 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
* TODO
*/
int dso__data_get_fd(struct dso *dso, struct machine *machine);
-void dso__data_put_fd(struct dso *dso __maybe_unused);
+void dso__data_put_fd(struct dso *dso);
void dso__data_close(struct dso *dso);
off_t dso__data_size(struct dso *dso, struct machine *machine);
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index a509aa8433a1..577e600c8eb1 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -915,7 +915,7 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
tmp = "*";
else if (tag == DW_TAG_subroutine_type) {
/* Function pointer */
- strbuf_addf(buf, "(function_type)");
+ strbuf_add(buf, "(function_type)", 15);
return 0;
} else {
if (!dwarf_diename(&type))
@@ -932,7 +932,7 @@ int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
}
ret = die_get_typename(&type, buf);
if (ret == 0)
- strbuf_addf(buf, "%s", tmp);
+ strbuf_addstr(buf, tmp);
return ret;
}
@@ -951,7 +951,7 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
ret = die_get_typename(vr_die, buf);
if (ret < 0) {
pr_debug("Failed to get type, make it unknown.\n");
- strbuf_addf(buf, "(unknown_type)");
+ strbuf_add(buf, " (unknown_type)", 14);
}
strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
@@ -1013,7 +1013,7 @@ static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
}
if (!first)
- strbuf_addf(buf, "]>");
+ strbuf_add(buf, "]>", 2);
out:
free(scopes);
@@ -1076,7 +1076,7 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf)
}
if (!first)
- strbuf_addf(buf, "]>");
+ strbuf_add(buf, "]>", 2);
return ret;
}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index c42ec366f2a7..dc0ce1adb075 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -25,48 +25,48 @@
#include <elfutils/version.h>
/* Find the realpath of the target file */
-extern const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname);
+const char *cu_find_realpath(Dwarf_Die *cu_die, const char *fname);
/* Get DW_AT_comp_dir (should be NULL with older gcc) */
-extern const char *cu_get_comp_dir(Dwarf_Die *cu_die);
+const char *cu_get_comp_dir(Dwarf_Die *cu_die);
/* Get a line number and file name for given address */
-extern int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
- const char **fname, int *lineno);
+int cu_find_lineinfo(Dwarf_Die *cudie, unsigned long addr,
+ const char **fname, int *lineno);
/* Walk on funcitons at given address */
-extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
- int (*callback)(Dwarf_Die *, void *), void *data);
+int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ int (*callback)(Dwarf_Die *, void *), void *data);
/* Ensure that this DIE is a subprogram and definition (not declaration) */
-extern bool die_is_func_def(Dwarf_Die *dw_die);
+bool die_is_func_def(Dwarf_Die *dw_die);
/* Ensure that this DIE is an instance of a subprogram */
-extern bool die_is_func_instance(Dwarf_Die *dw_die);
+bool die_is_func_instance(Dwarf_Die *dw_die);
/* Compare diename and tname */
-extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
+bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
/* Matching diename with glob pattern */
-extern bool die_match_name(Dwarf_Die *dw_die, const char *glob);
+bool die_match_name(Dwarf_Die *dw_die, const char *glob);
/* Get callsite line number of inline-function instance */
-extern int die_get_call_lineno(Dwarf_Die *in_die);
+int die_get_call_lineno(Dwarf_Die *in_die);
/* Get callsite file name of inlined function instance */
-extern const char *die_get_call_file(Dwarf_Die *in_die);
+const char *die_get_call_file(Dwarf_Die *in_die);
/* Get type die */
-extern Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
/* Get a type die, but skip qualifiers and typedef */
-extern Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
+Dwarf_Die *die_get_real_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem);
/* Check whether the DIE is signed or not */
-extern bool die_is_signed_type(Dwarf_Die *tp_die);
+bool die_is_signed_type(Dwarf_Die *tp_die);
/* Get data_member_location offset */
-extern int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs);
+int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs);
/* Return values for die_find_child() callbacks */
enum {
@@ -77,29 +77,29 @@ enum {
};
/* Search child DIEs */
-extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
- int (*callback)(Dwarf_Die *, void *),
- void *data, Dwarf_Die *die_mem);
+Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
+ int (*callback)(Dwarf_Die *, void *),
+ void *data, Dwarf_Die *die_mem);
/* Search a non-inlined function including given address */
-extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
- Dwarf_Die *die_mem);
+Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem);
/* Search a non-inlined function with tail call at given address */
Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
Dwarf_Die *die_mem);
/* Search the top inlined function including given address */
-extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
- Dwarf_Die *die_mem);
+Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem);
/* Search the deepest inlined function including given address */
-extern Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
- Dwarf_Die *die_mem);
+Dwarf_Die *die_find_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
+ Dwarf_Die *die_mem);
/* Walk on the instances of given DIE */
-extern int die_walk_instances(Dwarf_Die *in_die,
- int (*callback)(Dwarf_Die *, void *), void *data);
+int die_walk_instances(Dwarf_Die *in_die,
+ int (*callback)(Dwarf_Die *, void *), void *data);
/* Walker on lines (Note: line number will not be sorted) */
typedef int (* line_walk_callback_t) (const char *fname, int lineno,
@@ -109,22 +109,20 @@ typedef int (* line_walk_callback_t) (const char *fname, int lineno,
* Walk on lines inside given DIE. If the DIE is a subprogram, walk only on
* the lines inside the subprogram, otherwise the DIE must be a CU DIE.
*/
-extern int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback,
- void *data);
+int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data);
/* Find a variable called 'name' at given address */
-extern Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
- Dwarf_Addr addr, Dwarf_Die *die_mem);
+Dwarf_Die *die_find_variable_at(Dwarf_Die *sp_die, const char *name,
+ Dwarf_Addr addr, Dwarf_Die *die_mem);
/* Find a member called 'name' */
-extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
- Dwarf_Die *die_mem);
+Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
+ Dwarf_Die *die_mem);
/* Get the name of given variable DIE */
-extern int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
/* Get the name and type of given variable DIE, stored as "type\tname" */
-extern int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
-extern int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
- struct strbuf *buf);
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf);
#endif
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 7bad5c3fa7b7..dad55d04ffdd 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -56,13 +56,22 @@ const char *perf_event__name(unsigned int id)
return perf_event__names[id];
}
-static struct perf_sample synth_sample = {
+static int perf_tool__process_synth_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct machine *machine,
+ perf_event__handler_t process)
+{
+ struct perf_sample synth_sample = {
.pid = -1,
.tid = -1,
.time = -1,
.stream_id = -1,
.cpu = -1,
.period = 1,
+ .cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK,
+ };
+
+ return process(tool, event, &synth_sample, machine);
};
/*
@@ -186,7 +195,7 @@ pid_t perf_event__synthesize_comm(struct perf_tool *tool,
if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0)
return -1;
- if (process(tool, event, &synth_sample, machine) != 0)
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
return -1;
return tgid;
@@ -218,7 +227,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);
- if (process(tool, event, &synth_sample, machine) != 0)
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0)
return -1;
return 0;
@@ -344,7 +353,7 @@ out:
event->mmap2.pid = tgid;
event->mmap2.tid = pid;
- if (process(tool, event, &synth_sample, machine) != 0) {
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
rc = -1;
break;
}
@@ -402,7 +411,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
memcpy(event->mmap.filename, pos->dso->long_name,
pos->dso->long_name_len + 1);
- if (process(tool, event, &synth_sample, machine) != 0) {
+ if (perf_tool__process_synth_event(tool, event, machine, process) != 0) {
rc = -1;
break;
}
@@ -472,7 +481,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
/*
* Send the prepared comm event
*/
- if (process(tool, comm_event, &synth_sample, machine) != 0)
+ if (perf_tool__process_synth_event(tool, comm_event, machine, process) != 0)
break;
rc = 0;
@@ -701,7 +710,7 @@ int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
event->mmap.len = map->end - event->mmap.start;
event->mmap.pid = machine->pid;
- err = process(tool, event, &synth_sample, machine);
+ err = perf_tool__process_synth_event(tool, event, machine, process);
free(event);
return err;
@@ -1295,12 +1304,9 @@ void thread__find_addr_location(struct thread *thread,
* Callers need to drop the reference to al->thread, obtained in
* machine__findnew_thread()
*/
-int perf_event__preprocess_sample(const union perf_event *event,
- struct machine *machine,
- struct addr_location *al,
- struct perf_sample *sample)
+int machine__resolve(struct machine *machine, struct addr_location *al,
+ struct perf_sample *sample)
{
- u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
struct thread *thread = machine__findnew_thread(machine, sample->pid,
sample->tid);
@@ -1315,11 +1321,11 @@ int perf_event__preprocess_sample(const union perf_event *event,
* events, but for older perf.data files there was no such thing, so do
* it now.
*/
- if (cpumode == PERF_RECORD_MISC_KERNEL &&
+ if (sample->cpumode == PERF_RECORD_MISC_KERNEL &&
machine__kernel_map(machine) == NULL)
machine__create_kernel_maps(machine);
- thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->ip, al);
+ thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->ip, al);
dump_printf(" ...... dso: %s\n",
al->map ? al->map->dso->long_name :
al->level == 'H' ? "[hypervisor]" : "<not found>");
@@ -1395,16 +1401,12 @@ bool sample_addr_correlates_sym(struct perf_event_attr *attr)
return false;
}
-void perf_event__preprocess_sample_addr(union perf_event *event,
- struct perf_sample *sample,
- struct thread *thread,
- struct addr_location *al)
+void thread__resolve(struct thread *thread, struct addr_location *al,
+ struct perf_sample *sample)
{
- u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
-
- thread__find_addr_map(thread, cpumode, MAP__FUNCTION, sample->addr, al);
+ thread__find_addr_map(thread, sample->cpumode, MAP__FUNCTION, sample->addr, al);
if (!al->map)
- thread__find_addr_map(thread, cpumode, MAP__VARIABLE,
+ thread__find_addr_map(thread, sample->cpumode, MAP__VARIABLE,
sample->addr, al);
al->cpu = sample->cpu;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index b7ffb7ee9971..6bb1c928350d 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -192,6 +192,7 @@ struct perf_sample {
u64 data_src;
u32 flags;
u16 insn_len;
+ u8 cpumode;
void *raw_data;
struct ip_callchain *callchain;
struct branch_stack *branch_stack;
@@ -597,10 +598,8 @@ int perf_event__process(struct perf_tool *tool,
struct addr_location;
-int perf_event__preprocess_sample(const union perf_event *event,
- struct machine *machine,
- struct addr_location *al,
- struct perf_sample *sample);
+int machine__resolve(struct machine *machine, struct addr_location *al,
+ struct perf_sample *sample);
void addr_location__put(struct addr_location *al);
@@ -608,10 +607,8 @@ struct thread;
bool is_bts_event(struct perf_event_attr *attr);
bool sample_addr_correlates_sym(struct perf_event_attr *attr);
-void perf_event__preprocess_sample_addr(union perf_event *event,
- struct perf_sample *sample,
- struct thread *thread,
- struct addr_location *al);
+void thread__resolve(struct thread *thread, struct addr_location *al,
+ struct perf_sample *sample);
const char *perf_event__name(unsigned int id);
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 0902fe418754..738ce226002b 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1643,6 +1643,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event,
data->stream_id = data->id = data->time = -1ULL;
data->period = evsel->attr.sample_period;
data->weight = 0;
+ data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
if (event->header.type != PERF_RECORD_SAMPLE) {
if (!evsel->attr.sample_id_all)
diff --git a/tools/perf/util/genelf.h b/tools/perf/util/genelf.h
index 45bf9c6d3257..2fbeb59c4bdd 100644
--- a/tools/perf/util/genelf.h
+++ b/tools/perf/util/genelf.h
@@ -2,45 +2,39 @@
#define __GENELF_H__
/* genelf.c */
-extern int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
- const void *code, int csize,
- void *debug, int nr_debug_entries);
+int jit_write_elf(int fd, uint64_t code_addr, const char *sym,
+ const void *code, int csize, void *debug, int nr_debug_entries);
/* genelf_debug.c */
-extern int jit_add_debug_info(Elf *e, uint64_t code_addr,
- void *debug, int nr_debug_entries);
+int jit_add_debug_info(Elf *e, uint64_t code_addr, void *debug, int nr_debug_entries);
#if defined(__arm__)
#define GEN_ELF_ARCH EM_ARM
-#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS32
#elif defined(__aarch64__)
#define GEN_ELF_ARCH EM_AARCH64
-#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS64
#elif defined(__x86_64__)
#define GEN_ELF_ARCH EM_X86_64
-#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS64
#elif defined(__i386__)
#define GEN_ELF_ARCH EM_386
-#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS32
-#elif defined(__ppcle__)
-#define GEN_ELF_ARCH EM_PPC
-#define GEN_ELF_ENDIAN ELFDATA2LSB
-#define GEN_ELF_CLASS ELFCLASS64
-#elif defined(__powerpc__)
-#define GEN_ELF_ARCH EM_PPC64
-#define GEN_ELF_ENDIAN ELFDATA2MSB
-#define GEN_ELF_CLASS ELFCLASS64
-#elif defined(__powerpcle__)
+#elif defined(__powerpc64__)
#define GEN_ELF_ARCH EM_PPC64
-#define GEN_ELF_ENDIAN ELFDATA2LSB
#define GEN_ELF_CLASS ELFCLASS64
+#elif defined(__powerpc__)
+#define GEN_ELF_ARCH EM_PPC
+#define GEN_ELF_CLASS ELFCLASS32
#else
#error "unsupported architecture"
#endif
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define GEN_ELF_ENDIAN ELFDATA2MSB
+#else
+#define GEN_ELF_ENDIAN ELFDATA2LSB
+#endif
+
#if GEN_ELF_CLASS == ELFCLASS64
#define elf_newehdr elf64_newehdr
#define elf_getshdr elf64_getshdr
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 73e38e472ecd..90680ec9f8b8 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1872,11 +1872,6 @@ static int process_cpu_topology(struct perf_file_section *section,
if (ph->needs_swap)
nr = bswap_32(nr);
- if (nr > (u32)cpu_nr) {
- pr_debug("core_id number is too big."
- "You may need to upgrade the perf tool.\n");
- goto free_cpu;
- }
ph->env.cpu[i].core_id = nr;
ret = readn(fd, &nr, sizeof(nr));
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 3d87ca823c0a..d306ca118449 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -121,7 +121,7 @@ int perf_event__synthesize_event_update_cpus(struct perf_tool *tool,
perf_event__handler_t process);
int perf_event__process_attr(struct perf_tool *tool, union perf_event *event,
struct perf_evlist **pevlist);
-int perf_event__process_event_update(struct perf_tool *tool __maybe_unused,
+int perf_event__process_event_update(struct perf_tool *tool,
union perf_event *event,
struct perf_evlist **pevlist);
size_t perf_event__fprintf_event_update(union perf_event *event, FILE *fp);
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 290b3cbf6877..31c4641fe5ff 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -670,7 +670,7 @@ iter_prepare_branch_entry(struct hist_entry_iter *iter, struct addr_location *al
}
static int
-iter_add_single_branch_entry(struct hist_entry_iter *iter __maybe_unused,
+iter_add_single_branch_entry(struct hist_entry_iter *iter,
struct addr_location *al __maybe_unused)
{
/* to avoid calling callback function */
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ead18c82294f..bec0cd660fbd 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -433,8 +433,7 @@ void hist__account_cycles(struct branch_stack *bs, struct addr_location *al,
struct perf_sample *sample, bool nonany_branch_mode);
struct option;
-int parse_filter_percentage(const struct option *opt __maybe_unused,
- const char *arg, int unset __maybe_unused);
+int parse_filter_percentage(const struct option *opt, const char *arg, int unset);
int perf_hist_config(const char *var, const char *value);
void perf_hpp_list__init(struct perf_hpp_list *list);
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index eb0e7f8bf515..abf1366e2a24 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -279,6 +279,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
event.sample.header.misc = PERF_RECORD_MISC_USER;
event.sample.header.size = sizeof(struct perf_event_header);
+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = le64_to_cpu(branch->from);
sample.pid = btsq->pid;
sample.tid = btsq->tid;
@@ -678,7 +679,7 @@ static int intel_bts_process_auxtrace_event(struct perf_session *session,
return 0;
}
-static int intel_bts_flush(struct perf_session *session __maybe_unused,
+static int intel_bts_flush(struct perf_session *session,
struct perf_tool *tool __maybe_unused)
{
struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
diff --git a/tools/perf/util/intel-pt-decoder/insn.c b/tools/perf/util/intel-pt-decoder/insn.c
index 47314a64399c..9f26eae6c9f0 100644
--- a/tools/perf/util/intel-pt-decoder/insn.c
+++ b/tools/perf/util/intel-pt-decoder/insn.c
@@ -374,7 +374,7 @@ void insn_get_displacement(struct insn *insn)
if (mod == 3)
goto out;
if (mod == 1) {
- insn->displacement.value = get_next(char, insn);
+ insn->displacement.value = get_next(signed char, insn);
insn->displacement.nbytes = 1;
} else if (insn->addr_bytes == 2) {
if ((mod == 0 && rm == 6) || mod == 2) {
@@ -532,7 +532,7 @@ void insn_get_immediate(struct insn *insn)
switch (inat_immediate_size(insn->attr)) {
case INAT_IMM_BYTE:
- insn->immediate.value = get_next(char, insn);
+ insn->immediate.value = get_next(signed char, insn);
insn->immediate.nbytes = 1;
break;
case INAT_IMM_WORD:
@@ -566,7 +566,7 @@ void insn_get_immediate(struct insn *insn)
goto err_out;
}
if (inat_has_second_immediate(insn->attr)) {
- insn->immediate2.value = get_next(char, insn);
+ insn->immediate2.value = get_next(signed char, insn);
insn->immediate2.nbytes = 1;
}
done:
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 05d815851be1..617578440989 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -979,6 +979,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
if (!pt->timeless_decoding)
sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = ptq->state->from_ip;
sample.pid = ptq->pid;
sample.tid = ptq->tid;
@@ -1035,6 +1036,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
if (!pt->timeless_decoding)
sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = ptq->state->from_ip;
sample.pid = ptq->pid;
sample.tid = ptq->tid;
@@ -1092,6 +1094,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
if (!pt->timeless_decoding)
sample.time = tsc_to_perf_time(ptq->timestamp, &pt->tc);
+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.ip = ptq->state->from_ip;
sample.pid = ptq->pid;
sample.tid = ptq->tid;
@@ -1127,7 +1130,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
ret);
- if (pt->synth_opts.callchain)
+ if (pt->synth_opts.last_branch)
intel_pt_reset_last_branch_rb(ptq);
return ret;
diff --git a/tools/perf/util/jit.h b/tools/perf/util/jit.h
index a1e99da0715a..3f42ee4d2a0b 100644
--- a/tools/perf/util/jit.h
+++ b/tools/perf/util/jit.h
@@ -3,13 +3,9 @@
#include <data.h>
-extern int jit_process(struct perf_session *session,
- struct perf_data_file *output,
- struct machine *machine,
- char *filename,
- pid_t pid,
- u64 *nbytes);
-
-extern int jit_inject_record(const char *filename);
+int jit_process(struct perf_session *session, struct perf_data_file *output,
+ struct machine *machine, char *filename, pid_t pid, u64 *nbytes);
+
+int jit_inject_record(const char *filename);
#endif /* __JIT_H__ */
diff --git a/tools/perf/util/jitdump.c b/tools/perf/util/jitdump.c
index cd272cc21e05..ad0c0bb1fbc7 100644
--- a/tools/perf/util/jitdump.c
+++ b/tools/perf/util/jitdump.c
@@ -417,6 +417,7 @@ static int jit_repipe_code_load(struct jit_buf_desc *jd, union jr_entry *jr)
* use first address as sample address
*/
memset(&sample, 0, sizeof(sample));
+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = pid;
sample.tid = tid;
sample.time = id->time;
@@ -505,6 +506,7 @@ static int jit_repipe_code_move(struct jit_buf_desc *jd, union jr_entry *jr)
* use first address as sample address
*/
memset(&sample, 0, sizeof(sample));
+ sample.cpumode = PERF_RECORD_MISC_USER;
sample.pid = pid;
sample.tid = tid;
sample.time = id->time;
diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c
index 00724d496d38..33071d6159bc 100644
--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@@ -3,11 +3,11 @@
* Copyright (C) 2015, Huawei Inc.
*/
+#include <limits.h>
#include <stdio.h>
-#include "util.h"
+#include <stdlib.h>
#include "debug.h"
#include "llvm-utils.h"
-#include "cache.h"
#define CLANG_BPF_CMD_DEFAULT_TEMPLATE \
"$CLANG_EXEC -D__KERNEL__ -D__NR_CPUS__=$NR_CPUS "\
@@ -98,11 +98,12 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
void *buf = NULL;
FILE *file = NULL;
size_t read_sz = 0, buf_sz = 0;
+ char serr[STRERR_BUFSIZE];
file = popen(cmd, "r");
if (!file) {
pr_err("ERROR: unable to popen cmd: %s\n",
- strerror(errno));
+ strerror_r(errno, serr, sizeof(serr)));
return -EINVAL;
}
@@ -136,7 +137,7 @@ read_from_pipe(const char *cmd, void **p_buf, size_t *p_read_sz)
if (ferror(file)) {
pr_err("ERROR: error occurred when reading from pipe: %s\n",
- strerror(errno));
+ strerror_r(errno, serr, sizeof(serr)));
err = -EIO;
goto errout;
}
@@ -334,10 +335,18 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
unsigned int kernel_version;
char linux_version_code_str[64];
const char *clang_opt = llvm_param.clang_opt;
- char clang_path[PATH_MAX], nr_cpus_avail_str[64];
+ char clang_path[PATH_MAX], abspath[PATH_MAX], nr_cpus_avail_str[64];
+ char serr[STRERR_BUFSIZE];
char *kbuild_dir = NULL, *kbuild_include_opts = NULL;
const char *template = llvm_param.clang_bpf_cmd_template;
+ if (path[0] != '-' && realpath(path, abspath) == NULL) {
+ err = errno;
+ pr_err("ERROR: problems with path %s: %s\n",
+ path, strerror_r(err, serr, sizeof(serr)));
+ return -err;
+ }
+
if (!template)
template = CLANG_BPF_CMD_DEFAULT_TEMPLATE;
@@ -362,7 +371,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
if (nr_cpus_avail <= 0) {
pr_err(
"WARNING:\tunable to get available CPUs in this system: %s\n"
-" \tUse 128 instead.\n", strerror(errno));
+" \tUse 128 instead.\n", strerror_r(errno, serr, sizeof(serr)));
nr_cpus_avail = 128;
}
snprintf(nr_cpus_avail_str, sizeof(nr_cpus_avail_str), "%d",
@@ -387,8 +396,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,
* stdin to be source file (testing).
*/
force_set_env("CLANG_SOURCE",
- (path[0] == '-') ? path :
- make_nonrelative_path(path));
+ (path[0] == '-') ? path : abspath);
pr_debug("llvm compiling command template: %s\n", template);
err = read_from_pipe(template, &obj_buf, &obj_buf_sz);
diff --git a/tools/perf/util/llvm-utils.h b/tools/perf/util/llvm-utils.h
index 5b3cf1c229e2..23b9a743fe72 100644
--- a/tools/perf/util/llvm-utils.h
+++ b/tools/perf/util/llvm-utils.h
@@ -39,11 +39,10 @@ struct llvm_param {
};
extern struct llvm_param llvm_param;
-extern int perf_llvm_config(const char *var, const char *value);
+int perf_llvm_config(const char *var, const char *value);
-extern int llvm__compile_bpf(const char *path, void **p_obj_buf,
- size_t *p_obj_buf_sz);
+int llvm__compile_bpf(const char *path, void **p_obj_buf, size_t *p_obj_buf_sz);
/* This function is for test__llvm() use only */
-extern int llvm__search_clang(void);
+int llvm__search_clang(void);
#endif
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index ad79297c76c8..80b9b6a87990 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1301,9 +1301,8 @@ out_problem:
int machine__process_mmap2_event(struct machine *machine,
union perf_event *event,
- struct perf_sample *sample __maybe_unused)
+ struct perf_sample *sample)
{
- u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
struct thread *thread;
struct map *map;
enum map_type type;
@@ -1312,8 +1311,8 @@ int machine__process_mmap2_event(struct machine *machine,
if (dump_trace)
perf_event__fprintf_mmap2(event, stdout);
- if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
- cpumode == PERF_RECORD_MISC_KERNEL) {
+ if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+ sample->cpumode == PERF_RECORD_MISC_KERNEL) {
ret = machine__process_kernel_mmap_event(machine, event);
if (ret < 0)
goto out_problem;
@@ -1355,9 +1354,8 @@ out_problem:
}
int machine__process_mmap_event(struct machine *machine, union perf_event *event,
- struct perf_sample *sample __maybe_unused)
+ struct perf_sample *sample)
{
- u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
struct thread *thread;
struct map *map;
enum map_type type;
@@ -1366,8 +1364,8 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
if (dump_trace)
perf_event__fprintf_mmap(event, stdout);
- if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
- cpumode == PERF_RECORD_MISC_KERNEL) {
+ if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
+ sample->cpumode == PERF_RECORD_MISC_KERNEL) {
ret = machine__process_kernel_mmap_event(machine, event);
if (ret < 0)
goto out_problem;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 1a3e45baf97f..8499db281158 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -94,7 +94,7 @@ int machine__process_aux_event(struct machine *machine,
union perf_event *event);
int machine__process_itrace_start_event(struct machine *machine,
union perf_event *event);
-int machine__process_switch_event(struct machine *machine __maybe_unused,
+int machine__process_switch_event(struct machine *machine,
union perf_event *event);
int machine__process_mmap_event(struct machine *machine, union perf_event *event,
struct perf_sample *sample);
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 67e493088e81..d740c3ca9a1d 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -22,19 +22,18 @@ struct tracepoint_path {
struct tracepoint_path *next;
};
-extern struct tracepoint_path *tracepoint_id_to_path(u64 config);
-extern struct tracepoint_path *tracepoint_name_to_path(const char *name);
-extern bool have_tracepoints(struct list_head *evlist);
+struct tracepoint_path *tracepoint_id_to_path(u64 config);
+struct tracepoint_path *tracepoint_name_to_path(const char *name);
+bool have_tracepoints(struct list_head *evlist);
const char *event_type(int type);
-extern int parse_events_option(const struct option *opt, const char *str,
- int unset);
-extern int parse_events(struct perf_evlist *evlist, const char *str,
- struct parse_events_error *error);
-extern int parse_events_terms(struct list_head *terms, const char *str);
-extern int parse_filter(const struct option *opt, const char *str, int unset);
-extern int exclude_perf(const struct option *opt, const char *arg, int unset);
+int parse_events_option(const struct option *opt, const char *str, int unset);
+int parse_events(struct perf_evlist *evlist, const char *str,
+ struct parse_events_error *error);
+int parse_events_terms(struct list_head *terms, const char *str);
+int parse_filter(const struct option *opt, const char *str, int unset);
+int exclude_perf(const struct option *opt, const char *arg, int unset);
#define EVENTS_HELP_MAX (128*1024)
@@ -183,7 +182,7 @@ void print_symbol_events(const char *event_glob, unsigned type,
void print_tracepoint_events(const char *subsys_glob, const char *event_glob,
bool name_only);
int print_hwcache_events(const char *event_glob, bool name_only);
-extern int is_valid_tracepoint(const char *event_string);
+int is_valid_tracepoint(const char *event_string);
int valid_event_mount(const char *eventfs);
char *parse_events_formats_error_string(char *additional_terms);
diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c
index 3654d964e49d..3bf6bf82ff2d 100644
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@@ -41,36 +41,6 @@ static char *cleanup_path(char *path)
return path;
}
-static char *perf_vsnpath(char *buf, size_t n, const char *fmt, va_list args)
-{
- const char *perf_dir = get_perf_dir();
- size_t len;
-
- len = strlen(perf_dir);
- if (n < len + 1)
- goto bad;
- memcpy(buf, perf_dir, len);
- if (len && !is_dir_sep(perf_dir[len-1]))
- buf[len++] = '/';
- len += vsnprintf(buf + len, n - len, fmt, args);
- if (len >= n)
- goto bad;
- return cleanup_path(buf);
-bad:
- strlcpy(buf, bad_path, n);
- return buf;
-}
-
-char *perf_pathdup(const char *fmt, ...)
-{
- char path[PATH_MAX];
- va_list args;
- va_start(args, fmt);
- (void)perf_vsnpath(path, sizeof(path), fmt, args);
- va_end(args);
- return xstrdup(path);
-}
-
char *mkpath(const char *fmt, ...)
{
va_list args;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 93996ec4bbe3..8319fbb08636 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2179,7 +2179,7 @@ static int perf_probe_event__sprintf(const char *group, const char *event,
strbuf_addf(result, " in %s", module);
if (pev->nargs > 0) {
- strbuf_addstr(result, " with");
+ strbuf_add(result, " with", 5);
for (i = 0; i < pev->nargs; i++) {
ret = synthesize_perf_probe_arg(&pev->args[i],
buf, 128);
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index ba926c30f8cd..e54e7b011577 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -114,49 +114,44 @@ int init_probe_symbol_maps(bool user_only);
void exit_probe_symbol_maps(void);
/* Command string to events */
-extern int parse_perf_probe_command(const char *cmd,
- struct perf_probe_event *pev);
-extern int parse_probe_trace_command(const char *cmd,
- struct probe_trace_event *tev);
+int parse_perf_probe_command(const char *cmd, struct perf_probe_event *pev);
+int parse_probe_trace_command(const char *cmd, struct probe_trace_event *tev);
/* Events to command string */
-extern char *synthesize_perf_probe_command(struct perf_probe_event *pev);
-extern char *synthesize_probe_trace_command(struct probe_trace_event *tev);
-extern int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf,
- size_t len);
+char *synthesize_perf_probe_command(struct perf_probe_event *pev);
+char *synthesize_probe_trace_command(struct probe_trace_event *tev);
+int synthesize_perf_probe_arg(struct perf_probe_arg *pa, char *buf, size_t len);
/* Check the perf_probe_event needs debuginfo */
-extern bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
+bool perf_probe_event_need_dwarf(struct perf_probe_event *pev);
/* Release event contents */
-extern void clear_perf_probe_event(struct perf_probe_event *pev);
-extern void clear_probe_trace_event(struct probe_trace_event *tev);
+void clear_perf_probe_event(struct perf_probe_event *pev);
+void clear_probe_trace_event(struct probe_trace_event *tev);
/* Command string to line-range */
-extern int parse_line_range_desc(const char *cmd, struct line_range *lr);
+int parse_line_range_desc(const char *cmd, struct line_range *lr);
/* Release line range members */
-extern void line_range__clear(struct line_range *lr);
+void line_range__clear(struct line_range *lr);
/* Initialize line range */
-extern int line_range__init(struct line_range *lr);
-
-extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
-extern int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
-extern int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
-extern void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
-extern int del_perf_probe_events(struct strfilter *filter);
-
-extern int show_perf_probe_event(const char *group, const char *event,
- struct perf_probe_event *pev,
- const char *module, bool use_stdout);
-extern int show_perf_probe_events(struct strfilter *filter);
-extern int show_line_range(struct line_range *lr, const char *module,
- bool user);
-extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
- struct strfilter *filter);
-extern int show_available_funcs(const char *module, struct strfilter *filter,
- bool user);
+int line_range__init(struct line_range *lr);
+
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int convert_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int apply_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+void cleanup_perf_probe_events(struct perf_probe_event *pevs, int npevs);
+int del_perf_probe_events(struct strfilter *filter);
+
+int show_perf_probe_event(const char *group, const char *event,
+ struct perf_probe_event *pev,
+ const char *module, bool use_stdout);
+int show_perf_probe_events(struct strfilter *filter);
+int show_line_range(struct line_range *lr, const char *module, bool user);
+int show_available_vars(struct perf_probe_event *pevs, int npevs,
+ struct strfilter *filter);
+int show_available_funcs(const char *module, struct strfilter *filter, bool user);
bool arch__prefers_symtab(void);
void arch__fix_tev_from_maps(struct perf_probe_event *pev,
struct probe_trace_event *tev, struct map *map);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 4ce5c5e18f48..b3bd0fba0237 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1314,18 +1314,18 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
if (probe_conf.show_location_range) {
if (!externs) {
if (ret)
- strbuf_addf(&buf, "[INV]\t");
+ strbuf_add(&buf, "[INV]\t", 6);
else
- strbuf_addf(&buf, "[VAL]\t");
+ strbuf_add(&buf, "[VAL]\t", 6);
} else
- strbuf_addf(&buf, "[EXT]\t");
+ strbuf_add(&buf, "[EXT]\t", 6);
}
ret2 = die_get_varname(die_mem, &buf);
if (!ret2 && probe_conf.show_location_range &&
!externs) {
- strbuf_addf(&buf, "\t");
+ strbuf_addch(&buf, '\t');
ret2 = die_get_var_range(&af->pf.sp_die,
die_mem, &buf);
}
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index 0aec7704e395..51137fccb9c8 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -34,27 +34,25 @@ struct debuginfo {
};
/* This also tries to open distro debuginfo */
-extern struct debuginfo *debuginfo__new(const char *path);
-extern void debuginfo__delete(struct debuginfo *dbg);
+struct debuginfo *debuginfo__new(const char *path);
+void debuginfo__delete(struct debuginfo *dbg);
/* Find probe_trace_events specified by perf_probe_event from debuginfo */
-extern int debuginfo__find_trace_events(struct debuginfo *dbg,
- struct perf_probe_event *pev,
- struct probe_trace_event **tevs);
+int debuginfo__find_trace_events(struct debuginfo *dbg,
+ struct perf_probe_event *pev,
+ struct probe_trace_event **tevs);
/* Find a perf_probe_point from debuginfo */
-extern int debuginfo__find_probe_point(struct debuginfo *dbg,
- unsigned long addr,
- struct perf_probe_point *ppt);
+int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr,
+ struct perf_probe_point *ppt);
/* Find a line range */
-extern int debuginfo__find_line_range(struct debuginfo *dbg,
- struct line_range *lr);
+int debuginfo__find_line_range(struct debuginfo *dbg, struct line_range *lr);
/* Find available variables */
-extern int debuginfo__find_available_vars_at(struct debuginfo *dbg,
- struct perf_probe_event *pev,
- struct variable_list **vls);
+int debuginfo__find_available_vars_at(struct debuginfo *dbg,
+ struct perf_probe_event *pev,
+ struct variable_list **vls);
/* Find a src file from a DWARF tag path */
int get_real_path(const char *raw_path, const char *comp_dir,
diff --git a/tools/perf/util/quote.h b/tools/perf/util/quote.h
index 172889ea234f..3340c9c4a6ca 100644
--- a/tools/perf/util/quote.h
+++ b/tools/perf/util/quote.h
@@ -24,6 +24,6 @@
* sq_quote() in a real application.
*/
-extern void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
+void sq_quote_argv(struct strbuf *, const char **argv, size_t maxlen);
#endif /* __PERF_QUOTE_H */
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 60b3593d210d..4abd85c6346d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1107,12 +1107,11 @@ static struct machine *machines__find_for_cpumode(struct machines *machines,
union perf_event *event,
struct perf_sample *sample)
{
- const u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
struct machine *machine;
if (perf_guest &&
- ((cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
- (cpumode == PERF_RECORD_MISC_GUEST_USER))) {
+ ((sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ||
+ (sample->cpumode == PERF_RECORD_MISC_GUEST_USER))) {
u32 pid;
if (event->header.type == PERF_RECORD_MMAP
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 93fa136b0025..47966a1618c7 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -2225,7 +2225,7 @@ int hpp_dimension__add_output(unsigned col)
}
static int sort_dimension__add(struct perf_hpp_list *list, const char *tok,
- struct perf_evlist *evlist __maybe_unused,
+ struct perf_evlist *evlist,
int level)
{
unsigned int i;
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
index b33ffb2af2cf..fdb71961143e 100644
--- a/tools/perf/util/stat-shadow.c
+++ b/tools/perf/util/stat-shadow.c
@@ -152,8 +152,7 @@ static const char *get_ratio_color(enum grc_type type, double ratio)
}
static void print_stalled_cycles_frontend(int cpu,
- struct perf_evsel *evsel
- __maybe_unused, double avg,
+ struct perf_evsel *evsel, double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
@@ -175,8 +174,7 @@ static void print_stalled_cycles_frontend(int cpu,
}
static void print_stalled_cycles_backend(int cpu,
- struct perf_evsel *evsel
- __maybe_unused, double avg,
+ struct perf_evsel *evsel, double avg,
struct perf_stat_output_ctx *out)
{
double total, ratio = 0.0;
@@ -194,7 +192,7 @@ static void print_stalled_cycles_backend(int cpu,
}
static void print_branch_misses(int cpu,
- struct perf_evsel *evsel __maybe_unused,
+ struct perf_evsel *evsel,
double avg,
struct perf_stat_output_ctx *out)
{
@@ -213,7 +211,7 @@ static void print_branch_misses(int cpu,
}
static void print_l1_dcache_misses(int cpu,
- struct perf_evsel *evsel __maybe_unused,
+ struct perf_evsel *evsel,
double avg,
struct perf_stat_output_ctx *out)
{
@@ -232,7 +230,7 @@ static void print_l1_dcache_misses(int cpu,
}
static void print_l1_icache_misses(int cpu,
- struct perf_evsel *evsel __maybe_unused,
+ struct perf_evsel *evsel,
double avg,
struct perf_stat_output_ctx *out)
{
@@ -250,7 +248,7 @@ static void print_l1_icache_misses(int cpu,
}
static void print_dtlb_cache_misses(int cpu,
- struct perf_evsel *evsel __maybe_unused,
+ struct perf_evsel *evsel,
double avg,
struct perf_stat_output_ctx *out)
{
@@ -268,7 +266,7 @@ static void print_dtlb_cache_misses(int cpu,
}
static void print_itlb_cache_misses(int cpu,
- struct perf_evsel *evsel __maybe_unused,
+ struct perf_evsel *evsel,
double avg,
struct perf_stat_output_ctx *out)
{
@@ -286,7 +284,7 @@ static void print_itlb_cache_misses(int cpu,
}
static void print_ll_cache_misses(int cpu,
- struct perf_evsel *evsel __maybe_unused,
+ struct perf_evsel *evsel,
double avg,
struct perf_stat_output_ctx *out)
{
diff --git a/tools/perf/util/strbuf.c b/tools/perf/util/strbuf.c
index d3d279275432..8fb73295ec34 100644
--- a/tools/perf/util/strbuf.c
+++ b/tools/perf/util/strbuf.c
@@ -51,6 +51,13 @@ void strbuf_grow(struct strbuf *sb, size_t extra)
ALLOC_GROW(sb->buf, sb->len + extra + 1, sb->alloc);
}
+void strbuf_addch(struct strbuf *sb, int c)
+{
+ strbuf_grow(sb, 1);
+ sb->buf[sb->len++] = c;
+ sb->buf[sb->len] = '\0';
+}
+
void strbuf_add(struct strbuf *sb, const void *data, size_t len)
{
strbuf_grow(sb, len);
@@ -58,7 +65,7 @@ void strbuf_add(struct strbuf *sb, const void *data, size_t len)
strbuf_setlen(sb, sb->len + len);
}
-void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
+static void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap)
{
int len;
va_list ap_saved;
diff --git a/tools/perf/util/strbuf.h b/tools/perf/util/strbuf.h
index 7a32c838884d..ab9be0fbbd40 100644
--- a/tools/perf/util/strbuf.h
+++ b/tools/perf/util/strbuf.h
@@ -51,16 +51,16 @@ struct strbuf {
#define STRBUF_INIT { 0, 0, strbuf_slopbuf }
/*----- strbuf life cycle -----*/
-extern void strbuf_init(struct strbuf *buf, ssize_t hint);
-extern void strbuf_release(struct strbuf *);
-extern char *strbuf_detach(struct strbuf *, size_t *);
+void strbuf_init(struct strbuf *buf, ssize_t hint);
+void strbuf_release(struct strbuf *buf);
+char *strbuf_detach(struct strbuf *buf, size_t *);
/*----- strbuf size related -----*/
static inline ssize_t strbuf_avail(const struct strbuf *sb) {
return sb->alloc ? sb->alloc - sb->len - 1 : 0;
}
-extern void strbuf_grow(struct strbuf *, size_t);
+void strbuf_grow(struct strbuf *buf, size_t);
static inline void strbuf_setlen(struct strbuf *sb, size_t len) {
if (!sb->alloc)
@@ -71,22 +71,17 @@ static inline void strbuf_setlen(struct strbuf *sb, size_t len) {
}
/*----- add data in your buffer -----*/
-static inline void strbuf_addch(struct strbuf *sb, int c) {
- strbuf_grow(sb, 1);
- sb->buf[sb->len++] = c;
- sb->buf[sb->len] = '\0';
-}
+void strbuf_addch(struct strbuf *sb, int c);
-extern void strbuf_add(struct strbuf *, const void *, size_t);
+void strbuf_add(struct strbuf *buf, const void *, size_t);
static inline void strbuf_addstr(struct strbuf *sb, const char *s) {
strbuf_add(sb, s, strlen(s));
}
__attribute__((format(printf,2,3)))
-extern void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
-extern void strbuf_addv(struct strbuf *sb, const char *fmt, va_list ap);
+void strbuf_addf(struct strbuf *sb, const char *fmt, ...);
/* XXX: if read fails, any partial read is undone */
-extern ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);
+ssize_t strbuf_read(struct strbuf *, int fd, ssize_t hint);
#endif /* __PERF_STRBUF_H */
diff --git a/tools/perf/util/svghelper.h b/tools/perf/util/svghelper.h
index 9292a5291445..946fdf2db97c 100644
--- a/tools/perf/util/svghelper.h
+++ b/tools/perf/util/svghelper.h
@@ -3,32 +3,31 @@
#include <linux/types.h>
-extern void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end);
-extern void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
-extern void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
-extern void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
-extern void svg_box(int Yslot, u64 start, u64 end, const char *type);
-extern void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
-extern void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
-extern void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
-extern void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency);
-
-
-extern void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace);
-extern void svg_cstate(int cpu, u64 start, u64 end, int type);
-extern void svg_pstate(int cpu, u64 start, u64 end, u64 freq);
-
-
-extern void svg_time_grid(double min_thickness);
-extern void svg_io_legenda(void);
-extern void svg_legenda(void);
-extern void svg_wakeline(u64 start, int row1, int row2, const char *backtrace);
-extern void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace);
-extern void svg_interrupt(u64 start, int row, const char *backtrace);
-extern void svg_text(int Yslot, u64 start, const char *text);
-extern void svg_close(void);
-extern int svg_build_topology_map(char *sib_core, int sib_core_nr,
- char *sib_thr, int sib_thr_nr);
+void open_svg(const char *filename, int cpus, int rows, u64 start, u64 end);
+void svg_ubox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_lbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_fbox(int Yslot, u64 start, u64 end, double height, const char *type, int fd, int err, int merges);
+void svg_box(int Yslot, u64 start, u64 end, const char *type);
+void svg_blocked(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_running(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_waiting(int Yslot, int cpu, u64 start, u64 end, const char *backtrace);
+void svg_cpu_box(int cpu, u64 max_frequency, u64 turbo_frequency);
+
+
+void svg_process(int cpu, u64 start, u64 end, int pid, const char *name, const char *backtrace);
+void svg_cstate(int cpu, u64 start, u64 end, int type);
+void svg_pstate(int cpu, u64 start, u64 end, u64 freq);
+
+
+void svg_time_grid(double min_thickness);
+void svg_io_legenda(void);
+void svg_legenda(void);
+void svg_wakeline(u64 start, int row1, int row2, const char *backtrace);
+void svg_partial_wakeline(u64 start, int row1, char *desc1, int row2, char *desc2, const char *backtrace);
+void svg_interrupt(u64 start, int row, const char *backtrace);
+void svg_text(int Yslot, u64 start, const char *text);
+void svg_close(void);
+int svg_build_topology_map(char *sib_core, int sib_core_nr, char *sib_thr, int sib_thr_nr);
extern int svg_page_width;
extern u64 svg_highlight;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index b1dd68f358fc..bc229a74c6a9 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -793,6 +793,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
uint32_t idx;
GElf_Ehdr ehdr;
GElf_Shdr shdr;
+ GElf_Shdr tshdr;
Elf_Data *syms, *opddata = NULL;
GElf_Sym sym;
Elf_Scn *sec, *sec_strndx;
@@ -832,6 +833,9 @@ int dso__load_sym(struct dso *dso, struct map *map,
sec = syms_ss->symtab;
shdr = syms_ss->symshdr;
+ if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
+ dso->text_offset = tshdr.sh_addr - tshdr.sh_offset;
+
if (runtime_ss->opdsec)
opddata = elf_rawdata(runtime_ss->opdsec, NULL);
@@ -880,12 +884,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
* Handle any relocation of vdso necessary because older kernels
* attempted to prelink vdso to its virtual address.
*/
- if (dso__is_vdso(dso)) {
- GElf_Shdr tshdr;
-
- if (elf_section_by_name(elf, &ehdr, &tshdr, ".text", NULL))
- map->reloc = map->start - tshdr.sh_addr + tshdr.sh_offset;
- }
+ if (dso__is_vdso(dso))
+ map->reloc = map->start - dso->text_offset;
dso->adjust_symbols = runtime_ss->adjust_symbols || ref_reloc(kmap);
/*
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index a937053a0ae0..c8b7544d9267 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -34,8 +34,8 @@
#endif
#ifdef HAVE_LIBELF_SUPPORT
-extern Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
- GElf_Shdr *shp, const char *name, size_t *idx);
+Elf_Scn *elf_section_by_name(Elf *elf, GElf_Ehdr *ep,
+ GElf_Shdr *shp, const char *name, size_t *idx);
#endif
#ifndef DMGL_PARAMS
diff --git a/tools/perf/util/usage.c b/tools/perf/util/usage.c
index 6adfa18cdd4e..996046a66fe5 100644
--- a/tools/perf/util/usage.c
+++ b/tools/perf/util/usage.c
@@ -41,15 +41,9 @@ static void warn_builtin(const char *warn, va_list params)
/* If we are in a dlopen()ed .so write to a global variable would segfault
* (ugh), so keep things static. */
static void (*usage_routine)(const char *err) NORETURN = usage_builtin;
-static void (*die_routine)(const char *err, va_list params) NORETURN = die_builtin;
static void (*error_routine)(const char *err, va_list params) = error_builtin;
static void (*warn_routine)(const char *err, va_list params) = warn_builtin;
-void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN)
-{
- die_routine = routine;
-}
-
void set_warning_routine(void (*routine)(const char *err, va_list params))
{
warn_routine = routine;
@@ -65,7 +59,7 @@ void die(const char *err, ...)
va_list params;
va_start(params, err);
- die_routine(err, params);
+ die_builtin(err, params);
va_end(params);
}
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index d0d50cef8b2a..8298d607c738 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -133,25 +133,15 @@ extern char buildid_dir[];
#define PERF_GTK_DSO "libperf-gtk.so"
/* General helper functions */
-extern void usage(const char *err) NORETURN;
-extern void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
-extern int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
-extern void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
+void usage(const char *err) NORETURN;
+void die(const char *err, ...) NORETURN __attribute__((format (printf, 1, 2)));
+int error(const char *err, ...) __attribute__((format (printf, 1, 2)));
+void warning(const char *err, ...) __attribute__((format (printf, 1, 2)));
-#include "../../../include/linux/stringify.h"
+void set_warning_routine(void (*routine)(const char *err, va_list params));
-#define DIE_IF(cnd) \
- do { if (cnd) \
- die(" at (" __FILE__ ":" __stringify(__LINE__) "): " \
- __stringify(cnd) "\n"); \
- } while (0)
-
-
-extern void set_die_routine(void (*routine)(const char *err, va_list params) NORETURN);
-extern void set_warning_routine(void (*routine)(const char *err, va_list params));
-
-extern int prefixcmp(const char *str, const char *prefix);
-extern void set_buildid_dir(const char *dir);
+int prefixcmp(const char *str, const char *prefix);
+void set_buildid_dir(const char *dir);
#ifdef __GLIBC_PREREQ
#if __GLIBC_PREREQ(2, 1)
@@ -172,8 +162,7 @@ static inline char *gitstrchrnul(const char *s, int c)
/*
* Wrappers:
*/
-extern char *xstrdup(const char *str);
-extern void *xrealloc(void *ptr, size_t size) __attribute__((weak));
+void *xrealloc(void *ptr, size_t size) __attribute__((weak));
static inline void *zalloc(size_t size)
diff --git a/tools/perf/util/wrapper.c b/tools/perf/util/wrapper.c
index 19f15b650703..5f1a07c4b87b 100644
--- a/tools/perf/util/wrapper.c
+++ b/tools/perf/util/wrapper.c
@@ -12,18 +12,6 @@ static inline void release_pack_memory(size_t size __maybe_unused,
{
}
-char *xstrdup(const char *str)
-{
- char *ret = strdup(str);
- if (!ret) {
- release_pack_memory(strlen(str) + 1, -1);
- ret = strdup(str);
- if (!ret)
- die("Out of memory, strdup failed");
- }
- return ret;
-}
-
void *xrealloc(void *ptr, size_t size)
{
void *ret = realloc(ptr, size);
diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8
index 622db685b4f9..89a55d5e32f3 100644
--- a/tools/power/x86/turbostat/turbostat.8
+++ b/tools/power/x86/turbostat/turbostat.8
@@ -34,7 +34,10 @@ name as necessary to disambiguate it from others is necessary. Note that option
\fB--debug\fP displays additional system configuration information. Invoking this parameter
more than once may also enable internal turbostat debug information.
.PP
-\fB--interval seconds\fP overrides the default 5-second measurement interval.
+\fB--interval seconds\fP overrides the default 5.0 second measurement interval.
+.PP
+\fB--out output_file\fP turbostat output is written to the specified output_file.
+The file is truncated if it already exists, and it is created if it does not exist.
.PP
\fB--help\fP displays usage for the most common parameters.
.PP
@@ -61,7 +64,7 @@ displays the statistics gathered since it was forked.
.nf
\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together.
\fBAVG_MHz\fP number of cycles executed divided by time elapsed.
-\fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
+\fBBusy%\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state.
\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state).
\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval.
.fi
@@ -83,13 +86,14 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T
\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM.
.fi
.PP
-.SH EXAMPLE
+.SH PERIODIC EXAMPLE
Without any parameters, turbostat displays statistics ever 5 seconds.
-(override interval with "-i sec" option, or specify a command
-for turbostat to fork).
+Periodic output goes to stdout, by default, unless --out is used to specify an output file.
+The 5-second interval can be changed with th "-i sec" option.
+Or a command may be specified as in "FORK EXAMPLE" below.
.nf
[root@hsw]# ./turbostat
- CPU Avg_MHz %Busy Bzy_MHz TSC_MHz
+ CPU Avg_MHz Busy% Bzy_MHz TSC_MHz
- 488 12.51 3898 3498
0 0 0.01 3885 3498
4 3897 99.99 3898 3498
@@ -145,7 +149,7 @@ cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1)
cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1)
cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1)
cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1)
- Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt
+ Core CPU Avg_MHz Busy% Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt
- - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00
0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00
0 4 3897 99.98 3898 3498 0 0.02
@@ -171,14 +175,16 @@ The --debug option adds additional columns to the measurement ouput, including C
See the field definitions above.
.SH FORK EXAMPLE
If turbostat is invoked with a command, it will fork that command
-and output the statistics gathered when the command exits.
+and output the statistics gathered after the command exits.
+In this case, turbostat output goes to stderr, by default.
+Output can instead be saved to a file using the --out option.
eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds
until ^C while the other CPUs are mostly idle:
.nf
root@hsw: turbostat cat /dev/zero > /dev/null
^C
- CPU Avg_MHz %Busy Bzy_MHz TSC_MHz
+ CPU Avg_MHz Busy% Bzy_MHz TSC_MHz
- 482 12.51 3854 3498
0 0 0.01 1960 3498
4 0 0.00 2128 3498
@@ -192,12 +198,12 @@ root@hsw: turbostat cat /dev/zero > /dev/null
.fi
Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit.
-The first row shows the average MHz and %Busy across all the processors in the system.
+The first row shows the average MHz and Busy% across all the processors in the system.
Note that the Avg_MHz column reflects the total number of cycles executed
-divided by the measurement interval. If the %Busy column is 100%,
+divided by the measurement interval. If the Busy% column is 100%,
then the processor was running at that speed the entire interval.
-The Avg_MHz multiplied by the %Busy results in the Bzy_MHz --
+The Avg_MHz multiplied by the Busy% results in the Bzy_MHz --
which is the average frequency while the processor was executing --
not including any non-busy idle time.
@@ -233,7 +239,7 @@ in the brand string in /proc/cpuinfo. On a system where
the TSC stops in idle, TSC_MHz will drop
below the processor's base frequency.
-%Busy = MPERF_delta/TSC_delta
+Busy% = MPERF_delta/TSC_delta
Bzy_MHz = TSC_delta/APERF_delta/MPERF_delta/measurement_interval
diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index 3fa94e291d16..acbf7ff2ee6e 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c
@@ -38,12 +38,15 @@
#include <string.h>
#include <ctype.h>
#include <sched.h>
+#include <time.h>
#include <cpuid.h>
#include <linux/capability.h>
#include <errno.h>
char *proc_stat = "/proc/stat";
-unsigned int interval_sec = 5;
+FILE *outf;
+int *fd_percpu;
+struct timespec interval_ts = {5, 0};
unsigned int debug;
unsigned int rapl_joules;
unsigned int summary_only;
@@ -63,6 +66,8 @@ unsigned int do_slm_cstates;
unsigned int use_c1_residency_msr;
unsigned int has_aperf;
unsigned int has_epb;
+unsigned int do_irtl_snb;
+unsigned int do_irtl_hsw;
unsigned int units = 1000000; /* MHz etc */
unsigned int genuine_intel;
unsigned int has_invariant_tsc;
@@ -72,6 +77,7 @@ unsigned int extra_msr_offset64;
unsigned int extra_delta_offset32;
unsigned int extra_delta_offset64;
unsigned int aperf_mperf_multiplier = 1;
+int do_irq = 1;
int do_smi;
double bclk;
double base_hz;
@@ -86,6 +92,10 @@ char *output_buffer, *outp;
unsigned int do_rapl;
unsigned int do_dts;
unsigned int do_ptm;
+unsigned int do_gfx_rc6_ms;
+unsigned long long gfx_cur_rc6_ms;
+unsigned int do_gfx_mhz;
+unsigned int gfx_cur_mhz;
unsigned int tcc_activation_temp;
unsigned int tcc_activation_temp_override;
double rapl_power_units, rapl_time_units;
@@ -98,6 +108,12 @@ unsigned int crystal_hz;
unsigned long long tsc_hz;
int base_cpu;
double discover_bclk(unsigned int family, unsigned int model);
+unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
+ /* IA32_HWP_REQUEST, IA32_HWP_STATUS */
+unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
+unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
+unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
+unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
#define RAPL_PKG (1 << 0)
/* 0x610 MSR_PKG_POWER_LIMIT */
@@ -145,6 +161,7 @@ struct thread_data {
unsigned long long extra_delta64;
unsigned long long extra_msr32;
unsigned long long extra_delta32;
+ unsigned int irq_count;
unsigned int smi_count;
unsigned int cpu_id;
unsigned int flags;
@@ -172,6 +189,8 @@ struct pkg_data {
unsigned long long pkg_any_core_c0;
unsigned long long pkg_any_gfxe_c0;
unsigned long long pkg_both_core_gfxe_c0;
+ long long gfx_rc6_ms;
+ unsigned int gfx_mhz;
unsigned int package_id;
unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */
unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */
@@ -212,6 +231,9 @@ struct topo_params {
struct timeval tv_even, tv_odd, tv_delta;
+int *irq_column_2_cpu; /* /proc/interrupts column numbers */
+int *irqs_per_cpu; /* indexed by cpu_num */
+
void setup_all_buffers(void);
int cpu_is_not_present(int cpu)
@@ -262,23 +284,34 @@ int cpu_migrate(int cpu)
else
return 0;
}
-
-int get_msr(int cpu, off_t offset, unsigned long long *msr)
+int get_msr_fd(int cpu)
{
- ssize_t retval;
char pathname[32];
int fd;
+ fd = fd_percpu[cpu];
+
+ if (fd)
+ return fd;
+
sprintf(pathname, "/dev/cpu/%d/msr", cpu);
fd = open(pathname, O_RDONLY);
if (fd < 0)
err(-1, "%s open failed, try chown or chmod +r /dev/cpu/*/msr, or run as root", pathname);
- retval = pread(fd, msr, sizeof *msr, offset);
- close(fd);
+ fd_percpu[cpu] = fd;
+
+ return fd;
+}
+
+int get_msr(int cpu, off_t offset, unsigned long long *msr)
+{
+ ssize_t retval;
+
+ retval = pread(get_msr_fd(cpu), msr, sizeof(*msr), offset);
if (retval != sizeof *msr)
- err(-1, "%s offset 0x%llx read failed", pathname, (unsigned long long)offset);
+ err(-1, "msr %d offset 0x%llx read failed", cpu, (unsigned long long)offset);
return 0;
}
@@ -286,8 +319,8 @@ int get_msr(int cpu, off_t offset, unsigned long long *msr)
/*
* Example Format w/ field column widths:
*
- * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz SMI %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
- * 123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
+ * Package Core CPU Avg_MHz Bzy_MHz TSC_MHz IRQ SMI Busy% CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp PkgTmp GFXMHz Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
+ * 12345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
*/
void print_header(void)
@@ -301,7 +334,7 @@ void print_header(void)
if (has_aperf)
outp += sprintf(outp, " Avg_MHz");
if (has_aperf)
- outp += sprintf(outp, " %%Busy");
+ outp += sprintf(outp, " Busy%%");
if (has_aperf)
outp += sprintf(outp, " Bzy_MHz");
outp += sprintf(outp, " TSC_MHz");
@@ -318,6 +351,8 @@ void print_header(void)
if (!debug)
goto done;
+ if (do_irq)
+ outp += sprintf(outp, " IRQ");
if (do_smi)
outp += sprintf(outp, " SMI");
@@ -335,6 +370,12 @@ void print_header(void)
if (do_ptm)
outp += sprintf(outp, " PkgTmp");
+ if (do_gfx_rc6_ms)
+ outp += sprintf(outp, " GFX%%rc6");
+
+ if (do_gfx_mhz)
+ outp += sprintf(outp, " GFXMHz");
+
if (do_skl_residency) {
outp += sprintf(outp, " Totl%%C0");
outp += sprintf(outp, " Any%%C0");
@@ -409,6 +450,8 @@ int dump_counters(struct thread_data *t, struct core_data *c,
extra_msr_offset32, t->extra_msr32);
outp += sprintf(outp, "msr0x%x: %016llX\n",
extra_msr_offset64, t->extra_msr64);
+ if (do_irq)
+ outp += sprintf(outp, "IRQ: %08X\n", t->irq_count);
if (do_smi)
outp += sprintf(outp, "SMI: %08X\n", t->smi_count);
}
@@ -504,7 +547,7 @@ int format_counters(struct thread_data *t, struct core_data *c,
outp += sprintf(outp, "%8.0f",
1.0 / units * t->aperf / interval_float);
- /* %Busy */
+ /* Busy% */
if (has_aperf) {
if (!skip_c0)
outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc/tsc_tweak);
@@ -542,6 +585,10 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (!debug)
goto done;
+ /* IRQ */
+ if (do_irq)
+ outp += sprintf(outp, "%8d", t->irq_count);
+
/* SMI */
if (do_smi)
outp += sprintf(outp, "%8d", t->smi_count);
@@ -575,6 +622,20 @@ int format_counters(struct thread_data *t, struct core_data *c,
if (do_ptm)
outp += sprintf(outp, "%8d", p->pkg_temp_c);
+ /* GFXrc6 */
+ if (do_gfx_rc6_ms) {
+ if (p->gfx_rc6_ms == -1) { /* detect counter reset */
+ outp += sprintf(outp, " ***.**");
+ } else {
+ outp += sprintf(outp, "%8.2f",
+ p->gfx_rc6_ms / 10.0 / interval_float);
+ }
+ }
+
+ /* GFXMHz */
+ if (do_gfx_mhz)
+ outp += sprintf(outp, "%8d", p->gfx_mhz);
+
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
if (do_skl_residency) {
outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc);
@@ -645,15 +706,24 @@ done:
return 0;
}
-void flush_stdout()
+void flush_output_stdout(void)
{
- fputs(output_buffer, stdout);
- fflush(stdout);
+ FILE *filep;
+
+ if (outf == stderr)
+ filep = stdout;
+ else
+ filep = outf;
+
+ fputs(output_buffer, filep);
+ fflush(filep);
+
outp = output_buffer;
}
-void flush_stderr()
+void flush_output_stderr(void)
{
- fputs(output_buffer, stderr);
+ fputs(output_buffer, outf);
+ fflush(outf);
outp = output_buffer;
}
void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
@@ -704,6 +774,14 @@ delta_package(struct pkg_data *new, struct pkg_data *old)
old->pc10 = new->pc10 - old->pc10;
old->pkg_temp_c = new->pkg_temp_c;
+ /* flag an error when rc6 counter resets/wraps */
+ if (old->gfx_rc6_ms > new->gfx_rc6_ms)
+ old->gfx_rc6_ms = -1;
+ else
+ old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
+
+ old->gfx_mhz = new->gfx_mhz;
+
DELTA_WRAP32(new->energy_pkg, old->energy_pkg);
DELTA_WRAP32(new->energy_cores, old->energy_cores);
DELTA_WRAP32(new->energy_gfx, old->energy_gfx);
@@ -745,9 +823,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
} else {
if (!aperf_mperf_unstable) {
- fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname);
- fprintf(stderr, "* Frequency results do not cover entire interval *\n");
- fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n");
+ fprintf(outf, "%s: APERF or MPERF went backwards *\n", progname);
+ fprintf(outf, "* Frequency results do not cover entire interval *\n");
+ fprintf(outf, "* fix this by running Linux-2.6.30 or later *\n");
aperf_mperf_unstable = 1;
}
@@ -782,7 +860,8 @@ delta_thread(struct thread_data *new, struct thread_data *old,
}
if (old->mperf == 0) {
- if (debug > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id);
+ if (debug > 1)
+ fprintf(outf, "cpu%d MPERF 0!\n", old->cpu_id);
old->mperf = 1; /* divide by 0 protection */
}
@@ -797,6 +876,9 @@ delta_thread(struct thread_data *new, struct thread_data *old,
old->extra_msr32 = new->extra_msr32;
old->extra_msr64 = new->extra_msr64;
+ if (do_irq)
+ old->irq_count = new->irq_count - old->irq_count;
+
if (do_smi)
old->smi_count = new->smi_count - old->smi_count;
}
@@ -826,10 +908,12 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
t->mperf = 0;
t->c1 = 0;
- t->smi_count = 0;
t->extra_delta32 = 0;
t->extra_delta64 = 0;
+ t->irq_count = 0;
+ t->smi_count = 0;
+
/* tells format_counters to dump all fields from this set */
t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE;
@@ -861,6 +945,9 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data
p->rapl_pkg_perf_status = 0;
p->rapl_dram_perf_status = 0;
p->pkg_temp_c = 0;
+
+ p->gfx_rc6_ms = 0;
+ p->gfx_mhz = 0;
}
int sum_counters(struct thread_data *t, struct core_data *c,
struct pkg_data *p)
@@ -873,6 +960,9 @@ int sum_counters(struct thread_data *t, struct core_data *c,
average.threads.extra_delta32 += t->extra_delta32;
average.threads.extra_delta64 += t->extra_delta64;
+ average.threads.irq_count += t->irq_count;
+ average.threads.smi_count += t->smi_count;
+
/* sum per-core values only for 1st thread in core */
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
return 0;
@@ -910,6 +1000,9 @@ int sum_counters(struct thread_data *t, struct core_data *c,
average.packages.energy_cores += p->energy_cores;
average.packages.energy_gfx += p->energy_gfx;
+ average.packages.gfx_rc6_ms = p->gfx_rc6_ms;
+ average.packages.gfx_mhz = p->gfx_mhz;
+
average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c);
average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status;
@@ -970,7 +1063,6 @@ static unsigned long long rdtsc(void)
return low | ((unsigned long long)high) << 32;
}
-
/*
* get_counters(...)
* migrate to cpu
@@ -980,23 +1072,74 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int cpu = t->cpu_id;
unsigned long long msr;
+ int aperf_mperf_retry_count = 0;
if (cpu_migrate(cpu)) {
- fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1;
}
+retry:
t->tsc = rdtsc(); /* we are running on local CPU of interest */
if (has_aperf) {
+ unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
+
+ /*
+ * The TSC, APERF and MPERF must be read together for
+ * APERF/MPERF and MPERF/TSC to give accurate results.
+ *
+ * Unfortunately, APERF and MPERF are read by
+ * individual system call, so delays may occur
+ * between them. If the time to read them
+ * varies by a large amount, we re-read them.
+ */
+
+ /*
+ * This initial dummy APERF read has been seen to
+ * reduce jitter in the subsequent reads.
+ */
+
+ if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
+ return -3;
+
+ t->tsc = rdtsc(); /* re-read close to APERF */
+
+ tsc_before = t->tsc;
+
if (get_msr(cpu, MSR_IA32_APERF, &t->aperf))
return -3;
+
+ tsc_between = rdtsc();
+
if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf))
return -4;
+
+ tsc_after = rdtsc();
+
+ aperf_time = tsc_between - tsc_before;
+ mperf_time = tsc_after - tsc_between;
+
+ /*
+ * If the system call latency to read APERF and MPERF
+ * differ by more than 2x, then try again.
+ */
+ if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) {
+ aperf_mperf_retry_count++;
+ if (aperf_mperf_retry_count < 5)
+ goto retry;
+ else
+ warnx("cpu%d jitter %lld %lld",
+ cpu, aperf_time, mperf_time);
+ }
+ aperf_mperf_retry_count = 0;
+
t->aperf = t->aperf * aperf_mperf_multiplier;
t->mperf = t->mperf * aperf_mperf_multiplier;
}
+ if (do_irq)
+ t->irq_count = irqs_per_cpu[cpu];
if (do_smi) {
if (get_msr(cpu, MSR_SMI_COUNT, &msr))
return -5;
@@ -1124,6 +1267,13 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -17;
p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
}
+
+ if (do_gfx_rc6_ms)
+ p->gfx_rc6_ms = gfx_cur_rc6_ms;
+
+ if (do_gfx_mhz)
+ p->gfx_mhz = gfx_cur_mhz;
+
return 0;
}
@@ -1159,6 +1309,7 @@ int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S,
int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
static void
@@ -1175,18 +1326,18 @@ dump_nhm_platform_info(void)
get_msr(base_cpu, MSR_PLATFORM_INFO, &msr);
- fprintf(stderr, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
+ fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 40) & 0xFF;
- fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max efficiency frequency\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0xFF;
- fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz base frequency\n",
ratio, bclk, ratio * bclk);
get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
- fprintf(stderr, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
+ fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
return;
@@ -1200,16 +1351,16 @@ dump_hsw_turbo_ratio_limits(void)
get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr);
- fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
+ fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 8) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 18 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 0) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 17 active cores\n",
ratio, bclk, ratio * bclk);
return;
}
@@ -1222,46 +1373,46 @@ dump_ivt_turbo_ratio_limits(void)
get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr);
- fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
+ fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 56) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 16 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 48) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 15 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 40) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 14 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 32) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 13 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 24) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 12 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 16) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 11 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 10 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 0) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 9 active cores\n",
ratio, bclk, ratio * bclk);
return;
}
@@ -1274,46 +1425,46 @@ dump_nhm_turbo_ratio_limits(void)
get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
- fprintf(stderr, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
+ fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 56) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 8 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 48) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 7 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 40) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 6 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 32) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 5 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 24) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 4 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 16) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 3 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 2 active cores\n",
ratio, bclk, ratio * bclk);
ratio = (msr >> 0) & 0xFF;
if (ratio)
- fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
+ fprintf(outf, "%d * %.0f = %.0f MHz max turbo 1 active cores\n",
ratio, bclk, ratio * bclk);
return;
}
@@ -1321,21 +1472,23 @@ dump_nhm_turbo_ratio_limits(void)
static void
dump_knl_turbo_ratio_limits(void)
{
- int cores;
- unsigned int ratio;
+ const unsigned int buckets_no = 7;
+
unsigned long long msr;
- int delta_cores;
- int delta_ratio;
- int i;
+ int delta_cores, delta_ratio;
+ int i, b_nr;
+ unsigned int cores[buckets_no];
+ unsigned int ratio[buckets_no];
get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr);
- fprintf(stderr, "cpu%d: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n",
+ fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
base_cpu, msr);
/**
* Turbo encoding in KNL is as follows:
- * [7:0] -- Base value of number of active cores of bucket 1.
+ * [0] -- Reserved
+ * [7:1] -- Base value of number of active cores of bucket 1.
* [15:8] -- Base value of freq ratio of bucket 1.
* [20:16] -- +ve delta of number of active cores of bucket 2.
* i.e. active cores of bucket 2 =
@@ -1354,29 +1507,25 @@ dump_knl_turbo_ratio_limits(void)
* [60:56]-- +ve delta of number of active cores of bucket 7.
* [63:61]-- -ve delta of freq ratio of bucket 7.
*/
- cores = msr & 0xFF;
- ratio = (msr >> 8) && 0xFF;
- if (ratio > 0)
- fprintf(stderr,
- "%d * %.0f = %.0f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, cores);
-
- for (i = 16; i < 64; i = i + 8) {
+
+ b_nr = 0;
+ cores[b_nr] = (msr & 0xFF) >> 1;
+ ratio[b_nr] = (msr >> 8) & 0xFF;
+
+ for (i = 16; i < 64; i += 8) {
delta_cores = (msr >> i) & 0x1F;
- delta_ratio = (msr >> (i + 5)) && 0x7;
- if (!delta_cores || !delta_ratio)
- return;
- cores = cores + delta_cores;
- ratio = ratio - delta_ratio;
-
- /** -ve ratios will make successive ratio calculations
- * negative. Hence return instead of carrying on.
- */
- if (ratio > 0)
- fprintf(stderr,
- "%d * %.0f = %.0f MHz max turbo %d active cores\n",
- ratio, bclk, ratio * bclk, cores);
+ delta_ratio = (msr >> (i + 5)) & 0x7;
+
+ cores[b_nr + 1] = cores[b_nr] + delta_cores;
+ ratio[b_nr + 1] = ratio[b_nr] - delta_ratio;
+ b_nr++;
}
+
+ for (i = buckets_no - 1; i >= 0; i--)
+ if (i > 0 ? ratio[i] != ratio[i - 1] : 1)
+ fprintf(outf,
+ "%d * %.0f = %.0f MHz max turbo %d active cores\n",
+ ratio[i], bclk, ratio[i] * bclk, cores[i]);
}
static void
@@ -1389,15 +1538,15 @@ dump_nhm_cst_cfg(void)
#define SNB_C1_AUTO_UNDEMOTE (1UL << 27)
#define SNB_C3_AUTO_UNDEMOTE (1UL << 28)
- fprintf(stderr, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
+ fprintf(outf, "cpu%d: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", base_cpu, msr);
- fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
+ fprintf(outf, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n",
(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "",
(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
(msr & (1 << 15)) ? "" : "UN",
- (unsigned int)msr & 7,
+ (unsigned int)msr & 0xF,
pkg_cstate_limit_strings[pkg_cstate_limit]);
return;
}
@@ -1408,48 +1557,100 @@ dump_config_tdp(void)
unsigned long long msr;
get_msr(base_cpu, MSR_CONFIG_TDP_NOMINAL, &msr);
- fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
- fprintf(stderr, " (base_ratio=%d)\n", (unsigned int)msr & 0xEF);
+ fprintf(outf, "cpu%d: MSR_CONFIG_TDP_NOMINAL: 0x%08llx", base_cpu, msr);
+ fprintf(outf, " (base_ratio=%d)\n", (unsigned int)msr & 0xFF);
get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_1, &msr);
- fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_1: 0x%08llx (", base_cpu, msr);
if (msr) {
- fprintf(stderr, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0xEFFF);
- fprintf(stderr, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0xEFFF);
- fprintf(stderr, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF);
- fprintf(stderr, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0xEFFF);
+ fprintf(outf, "PKG_MIN_PWR_LVL1=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
+ fprintf(outf, "PKG_MAX_PWR_LVL1=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
+ fprintf(outf, "LVL1_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
+ fprintf(outf, "PKG_TDP_LVL1=%d", (unsigned int)(msr) & 0x7FFF);
}
- fprintf(stderr, ")\n");
+ fprintf(outf, ")\n");
get_msr(base_cpu, MSR_CONFIG_TDP_LEVEL_2, &msr);
- fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "cpu%d: MSR_CONFIG_TDP_LEVEL_2: 0x%08llx (", base_cpu, msr);
if (msr) {
- fprintf(stderr, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0xEFFF);
- fprintf(stderr, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0xEFFF);
- fprintf(stderr, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xEF);
- fprintf(stderr, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0xEFFF);
+ fprintf(outf, "PKG_MIN_PWR_LVL2=%d ", (unsigned int)(msr >> 48) & 0x7FFF);
+ fprintf(outf, "PKG_MAX_PWR_LVL2=%d ", (unsigned int)(msr >> 32) & 0x7FFF);
+ fprintf(outf, "LVL2_RATIO=%d ", (unsigned int)(msr >> 16) & 0xFF);
+ fprintf(outf, "PKG_TDP_LVL2=%d", (unsigned int)(msr) & 0x7FFF);
}
- fprintf(stderr, ")\n");
+ fprintf(outf, ")\n");
get_msr(base_cpu, MSR_CONFIG_TDP_CONTROL, &msr);
- fprintf(stderr, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "cpu%d: MSR_CONFIG_TDP_CONTROL: 0x%08llx (", base_cpu, msr);
if ((msr) & 0x3)
- fprintf(stderr, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
- fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1);
- fprintf(stderr, ")\n");
-
+ fprintf(outf, "TDP_LEVEL=%d ", (unsigned int)(msr) & 0x3);
+ fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
+ fprintf(outf, ")\n");
+
get_msr(base_cpu, MSR_TURBO_ACTIVATION_RATIO, &msr);
- fprintf(stderr, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
- fprintf(stderr, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0x7F);
- fprintf(stderr, " lock=%d", (unsigned int)(msr >> 31) & 1);
- fprintf(stderr, ")\n");
+ fprintf(outf, "cpu%d: MSR_TURBO_ACTIVATION_RATIO: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "MAX_NON_TURBO_RATIO=%d", (unsigned int)(msr) & 0xFF);
+ fprintf(outf, " lock=%d", (unsigned int)(msr >> 31) & 1);
+ fprintf(outf, ")\n");
+}
+
+unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
+
+void print_irtl(void)
+{
+ unsigned long long msr;
+
+ get_msr(base_cpu, MSR_PKGC3_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC3_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+
+ get_msr(base_cpu, MSR_PKGC6_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC6_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+
+ get_msr(base_cpu, MSR_PKGC7_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC7_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+
+ if (!do_irtl_hsw)
+ return;
+
+ get_msr(base_cpu, MSR_PKGC8_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC8_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+
+ get_msr(base_cpu, MSR_PKGC9_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC9_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+
+ get_msr(base_cpu, MSR_PKGC10_IRTL, &msr);
+ fprintf(outf, "cpu%d: MSR_PKGC10_IRTL: 0x%08llx (", base_cpu, msr);
+ fprintf(outf, "%svalid, %lld ns)\n", msr & (1 << 15) ? "" : "NOT",
+ (msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
+
+}
+void free_fd_percpu(void)
+{
+ int i;
+
+ for (i = 0; i < topo.max_cpu_num; ++i) {
+ if (fd_percpu[i] != 0)
+ close(fd_percpu[i]);
+ }
+
+ free(fd_percpu);
}
void free_all_buffers(void)
{
CPU_FREE(cpu_present_set);
cpu_present_set = NULL;
- cpu_present_set = 0;
+ cpu_present_setsize = 0;
CPU_FREE(cpu_affinity_set);
cpu_affinity_set = NULL;
@@ -1474,6 +1675,11 @@ void free_all_buffers(void)
free(output_buffer);
output_buffer = NULL;
outp = NULL;
+
+ free_fd_percpu();
+
+ free(irq_column_2_cpu);
+ free(irqs_per_cpu);
}
/*
@@ -1481,7 +1687,7 @@ void free_all_buffers(void)
*/
FILE *fopen_or_die(const char *path, const char *mode)
{
- FILE *filep = fopen(path, "r");
+ FILE *filep = fopen(path, mode);
if (!filep)
err(1, "%s: open failed", path);
return filep;
@@ -1696,6 +1902,136 @@ int mark_cpu_present(int cpu)
return 0;
}
+/*
+ * snapshot_proc_interrupts()
+ *
+ * read and record summary of /proc/interrupts
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_proc_interrupts(void)
+{
+ static FILE *fp;
+ int column, retval;
+
+ if (fp == NULL)
+ fp = fopen_or_die("/proc/interrupts", "r");
+ else
+ rewind(fp);
+
+ /* read 1st line of /proc/interrupts to get cpu* name for each column */
+ for (column = 0; column < topo.num_cpus; ++column) {
+ int cpu_number;
+
+ retval = fscanf(fp, " CPU%d", &cpu_number);
+ if (retval != 1)
+ break;
+
+ if (cpu_number > topo.max_cpu_num) {
+ warn("/proc/interrupts: cpu%d: > %d", cpu_number, topo.max_cpu_num);
+ return 1;
+ }
+
+ irq_column_2_cpu[column] = cpu_number;
+ irqs_per_cpu[cpu_number] = 0;
+ }
+
+ /* read /proc/interrupt count lines and sum up irqs per cpu */
+ while (1) {
+ int column;
+ char buf[64];
+
+ retval = fscanf(fp, " %s:", buf); /* flush irq# "N:" */
+ if (retval != 1)
+ break;
+
+ /* read the count per cpu */
+ for (column = 0; column < topo.num_cpus; ++column) {
+
+ int cpu_number, irq_count;
+
+ retval = fscanf(fp, " %d", &irq_count);
+ if (retval != 1)
+ break;
+
+ cpu_number = irq_column_2_cpu[column];
+ irqs_per_cpu[cpu_number] += irq_count;
+
+ }
+
+ while (getc(fp) != '\n')
+ ; /* flush interrupt description */
+
+ }
+ return 0;
+}
+/*
+ * snapshot_gfx_rc6_ms()
+ *
+ * record snapshot of
+ * /sys/class/drm/card0/power/rc6_residency_ms
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_gfx_rc6_ms(void)
+{
+ FILE *fp;
+ int retval;
+
+ fp = fopen_or_die("/sys/class/drm/card0/power/rc6_residency_ms", "r");
+
+ retval = fscanf(fp, "%lld", &gfx_cur_rc6_ms);
+ if (retval != 1)
+ err(1, "GFX rc6");
+
+ fclose(fp);
+
+ return 0;
+}
+/*
+ * snapshot_gfx_mhz()
+ *
+ * record snapshot of
+ * /sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz
+ *
+ * return 1 if config change requires a restart, else return 0
+ */
+int snapshot_gfx_mhz(void)
+{
+ static FILE *fp;
+ int retval;
+
+ if (fp == NULL)
+ fp = fopen_or_die("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", "r");
+ else
+ rewind(fp);
+
+ retval = fscanf(fp, "%d", &gfx_cur_mhz);
+ if (retval != 1)
+ err(1, "GFX MHz");
+
+ return 0;
+}
+
+/*
+ * snapshot /proc and /sys files
+ *
+ * return 1 if configuration restart needed, else return 0
+ */
+int snapshot_proc_sysfs_files(void)
+{
+ if (snapshot_proc_interrupts())
+ return 1;
+
+ if (do_gfx_rc6_ms)
+ snapshot_gfx_rc6_ms();
+
+ if (do_gfx_mhz)
+ snapshot_gfx_mhz();
+
+ return 0;
+}
+
void turbostat_loop()
{
int retval;
@@ -1704,6 +2040,7 @@ void turbostat_loop()
restart:
restarted++;
+ snapshot_proc_sysfs_files();
retval = for_all_cpus(get_counters, EVEN_COUNTERS);
if (retval < -1) {
exit(retval);
@@ -1722,7 +2059,9 @@ restart:
re_initialize();
goto restart;
}
- sleep(interval_sec);
+ nanosleep(&interval_ts, NULL);
+ if (snapshot_proc_sysfs_files())
+ goto restart;
retval = for_all_cpus(get_counters, ODD_COUNTERS);
if (retval < -1) {
exit(retval);
@@ -1735,8 +2074,10 @@ restart:
for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
compute_average(EVEN_COUNTERS);
format_all_counters(EVEN_COUNTERS);
- flush_stdout();
- sleep(interval_sec);
+ flush_output_stdout();
+ nanosleep(&interval_ts, NULL);
+ if (snapshot_proc_sysfs_files())
+ goto restart;
retval = for_all_cpus(get_counters, EVEN_COUNTERS);
if (retval < -1) {
exit(retval);
@@ -1749,7 +2090,7 @@ restart:
for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS);
compute_average(ODD_COUNTERS);
format_all_counters(ODD_COUNTERS);
- flush_stdout();
+ flush_output_stdout();
}
}
@@ -1858,6 +2199,9 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
case 0x56: /* BDX-DE */
case 0x4E: /* SKL */
case 0x5E: /* SKL */
+ case 0x8E: /* KBL */
+ case 0x9E: /* KBL */
+ case 0x55: /* SKX */
pkg_cstate_limits = hsw_pkg_cstate_limits;
break;
case 0x37: /* BYT */
@@ -1870,6 +2214,9 @@ int probe_nhm_msrs(unsigned int family, unsigned int model)
case 0x57: /* PHI */
pkg_cstate_limits = phi_pkg_cstate_limits;
break;
+ case 0x5C: /* BXT */
+ pkg_cstate_limits = bxt_pkg_cstate_limits;
+ break;
default:
return 0;
}
@@ -1889,6 +2236,7 @@ int has_nhm_turbo_ratio_limit(unsigned int family, unsigned int model)
/* Nehalem compatible, but do not include turbo-ratio limit support */
case 0x2E: /* Nehalem-EX Xeon - Beckton */
case 0x2F: /* Westmere-EX Xeon - Eagleton */
+ case 0x57: /* PHI - Knights Landing (different MSR definition) */
return 0;
default:
return 1;
@@ -1961,6 +2309,9 @@ int has_config_tdp(unsigned int family, unsigned int model)
case 0x56: /* BDX-DE */
case 0x4E: /* SKL */
case 0x5E: /* SKL */
+ case 0x8E: /* KBL */
+ case 0x9E: /* KBL */
+ case 0x55: /* SKX */
case 0x57: /* Knights Landing */
return 1;
@@ -2016,7 +2367,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return 0;
if (cpu_migrate(cpu)) {
- fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1;
}
@@ -2037,7 +2388,98 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p)
epb_string = "custom";
break;
}
- fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
+ fprintf(outf, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string);
+
+ return 0;
+}
+/*
+ * print_hwp()
+ * Decode the MSR_HWP_CAPABILITIES
+ */
+int print_hwp(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ unsigned long long msr;
+ int cpu;
+
+ if (!has_hwp)
+ return 0;
+
+ cpu = t->cpu_id;
+
+ /* MSR_HWP_CAPABILITIES is per-package */
+ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
+ return 0;
+
+ if (cpu_migrate(cpu)) {
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ return -1;
+ }
+
+ if (get_msr(cpu, MSR_PM_ENABLE, &msr))
+ return 0;
+
+ fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
+ cpu, msr, (msr & (1 << 0)) ? "" : "No-");
+
+ /* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
+ if ((msr & (1 << 0)) == 0)
+ return 0;
+
+ if (get_msr(cpu, MSR_HWP_CAPABILITIES, &msr))
+ return 0;
+
+ fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
+ "(high 0x%x guar 0x%x eff 0x%x low 0x%x)\n",
+ cpu, msr,
+ (unsigned int)HWP_HIGHEST_PERF(msr),
+ (unsigned int)HWP_GUARANTEED_PERF(msr),
+ (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
+ (unsigned int)HWP_LOWEST_PERF(msr));
+
+ if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
+ return 0;
+
+ fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
+ "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x pkg 0x%x)\n",
+ cpu, msr,
+ (unsigned int)(((msr) >> 0) & 0xff),
+ (unsigned int)(((msr) >> 8) & 0xff),
+ (unsigned int)(((msr) >> 16) & 0xff),
+ (unsigned int)(((msr) >> 24) & 0xff),
+ (unsigned int)(((msr) >> 32) & 0xff3),
+ (unsigned int)(((msr) >> 42) & 0x1));
+
+ if (has_hwp_pkg) {
+ if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
+ return 0;
+
+ fprintf(outf, "cpu%d: MSR_HWP_REQUEST_PKG: 0x%08llx "
+ "(min 0x%x max 0x%x des 0x%x epp 0x%x window 0x%x)\n",
+ cpu, msr,
+ (unsigned int)(((msr) >> 0) & 0xff),
+ (unsigned int)(((msr) >> 8) & 0xff),
+ (unsigned int)(((msr) >> 16) & 0xff),
+ (unsigned int)(((msr) >> 24) & 0xff),
+ (unsigned int)(((msr) >> 32) & 0xff3));
+ }
+ if (has_hwp_notify) {
+ if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
+ return 0;
+
+ fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
+ "(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
+ cpu, msr,
+ ((msr) & 0x1) ? "EN" : "Dis",
+ ((msr) & 0x2) ? "EN" : "Dis");
+ }
+ if (get_msr(cpu, MSR_HWP_STATUS, &msr))
+ return 0;
+
+ fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
+ "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
+ cpu, msr,
+ ((msr) & 0x1) ? "" : "No-",
+ ((msr) & 0x2) ? "" : "No-");
return 0;
}
@@ -2057,14 +2499,14 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
return 0;
if (cpu_migrate(cpu)) {
- fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1;
}
if (do_core_perf_limit_reasons) {
get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr);
- fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
- fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
+ fprintf(outf, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
+ fprintf(outf, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)",
(msr & 1 << 15) ? "bit15, " : "",
(msr & 1 << 14) ? "bit14, " : "",
(msr & 1 << 13) ? "Transitions, " : "",
@@ -2079,7 +2521,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
(msr & 1 << 2) ? "bit2, " : "",
(msr & 1 << 1) ? "ThermStatus, " : "",
(msr & 1 << 0) ? "PROCHOT, " : "");
- fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
+ fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
(msr & 1 << 31) ? "bit31, " : "",
(msr & 1 << 30) ? "bit30, " : "",
(msr & 1 << 29) ? "Transitions, " : "",
@@ -2098,8 +2540,8 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
}
if (do_gfx_perf_limit_reasons) {
get_msr(cpu, MSR_GFX_PERF_LIMIT_REASONS, &msr);
- fprintf(stderr, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
- fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s)",
+ fprintf(outf, "cpu%d: MSR_GFX_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
+ fprintf(outf, " (Active: %s%s%s%s%s%s%s%s)",
(msr & 1 << 0) ? "PROCHOT, " : "",
(msr & 1 << 1) ? "ThermStatus, " : "",
(msr & 1 << 4) ? "Graphics, " : "",
@@ -2108,7 +2550,7 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
(msr & 1 << 9) ? "GFXPwr, " : "",
(msr & 1 << 10) ? "PkgPwrL1, " : "",
(msr & 1 << 11) ? "PkgPwrL2, " : "");
- fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s)\n",
+ fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
(msr & 1 << 16) ? "PROCHOT, " : "",
(msr & 1 << 17) ? "ThermStatus, " : "",
(msr & 1 << 20) ? "Graphics, " : "",
@@ -2120,15 +2562,15 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data
}
if (do_ring_perf_limit_reasons) {
get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
- fprintf(stderr, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
- fprintf(stderr, " (Active: %s%s%s%s%s%s)",
+ fprintf(outf, "cpu%d: MSR_RING_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr);
+ fprintf(outf, " (Active: %s%s%s%s%s%s)",
(msr & 1 << 0) ? "PROCHOT, " : "",
(msr & 1 << 1) ? "ThermStatus, " : "",
(msr & 1 << 6) ? "VR-Therm, " : "",
(msr & 1 << 8) ? "Amps, " : "",
(msr & 1 << 10) ? "PkgPwrL1, " : "",
(msr & 1 << 11) ? "PkgPwrL2, " : "");
- fprintf(stderr, " (Logged: %s%s%s%s%s%s)\n",
+ fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
(msr & 1 << 16) ? "PROCHOT, " : "",
(msr & 1 << 17) ? "ThermStatus, " : "",
(msr & 1 << 22) ? "VR-Therm, " : "",
@@ -2207,13 +2649,19 @@ void rapl_probe(unsigned int family, unsigned int model)
case 0x47: /* BDW */
do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO;
break;
+ case 0x5C: /* BXT */
+ do_rapl = RAPL_PKG | RAPL_PKG_POWER_INFO;
+ break;
case 0x4E: /* SKL */
case 0x5E: /* SKL */
+ case 0x8E: /* KBL */
+ case 0x9E: /* KBL */
do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
break;
case 0x3F: /* HSX */
case 0x4F: /* BDX */
case 0x56: /* BDX-DE */
+ case 0x55: /* SKX */
case 0x57: /* KNL */
do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
break;
@@ -2251,7 +2699,7 @@ void rapl_probe(unsigned int family, unsigned int model)
rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp;
if (debug)
- fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
+ fprintf(outf, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp);
return;
}
@@ -2293,7 +2741,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
return 0;
if (cpu_migrate(cpu)) {
- fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1;
}
@@ -2302,7 +2750,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
return 0;
dts = (msr >> 16) & 0x7F;
- fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
+ fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
cpu, msr, tcc_activation_temp - dts);
#ifdef THERM_DEBUG
@@ -2311,7 +2759,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
dts = (msr >> 16) & 0x7F;
dts2 = (msr >> 8) & 0x7F;
- fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+ fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
#endif
}
@@ -2325,7 +2773,7 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
dts = (msr >> 16) & 0x7F;
resolution = (msr >> 27) & 0xF;
- fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
+ fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
cpu, msr, tcc_activation_temp - dts, resolution);
#ifdef THERM_DEBUG
@@ -2334,17 +2782,17 @@ int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p
dts = (msr >> 16) & 0x7F;
dts2 = (msr >> 8) & 0x7F;
- fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
+ fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
#endif
}
return 0;
}
-
+
void print_power_limit_msr(int cpu, unsigned long long msr, char *label)
{
- fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
+ fprintf(outf, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n",
cpu, label,
((msr >> 15) & 1) ? "EN" : "DIS",
((msr >> 0) & 0x7FFF) * rapl_power_units,
@@ -2368,7 +2816,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
cpu = t->cpu_id;
if (cpu_migrate(cpu)) {
- fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1;
}
@@ -2376,7 +2824,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -1;
if (debug) {
- fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
+ fprintf(outf, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx "
"(%f Watts, %f Joules, %f sec.)\n", cpu, msr,
rapl_power_units, rapl_energy_units, rapl_time_units);
}
@@ -2386,7 +2834,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
return -5;
- fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
+ fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
cpu, msr,
((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
@@ -2399,11 +2847,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr))
return -9;
- fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
+ fprintf(outf, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n",
cpu, msr, (msr >> 63) & 1 ? "": "UN");
print_power_limit_msr(cpu, msr, "PKG Limit #1");
- fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
+ fprintf(outf, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n",
cpu,
((msr >> 47) & 1) ? "EN" : "DIS",
((msr >> 32) & 0x7FFF) * rapl_power_units,
@@ -2415,7 +2863,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
return -6;
- fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
+ fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
cpu, msr,
((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
@@ -2425,7 +2873,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (do_rapl & RAPL_DRAM) {
if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
return -9;
- fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
+ fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
cpu, msr, (msr >> 31) & 1 ? "": "UN");
print_power_limit_msr(cpu, msr, "DRAM Limit");
@@ -2435,7 +2883,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_PP0_POLICY, &msr))
return -7;
- fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
+ fprintf(outf, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF);
}
}
if (do_rapl & RAPL_CORES) {
@@ -2443,7 +2891,7 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
return -9;
- fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
+ fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
cpu, msr, (msr >> 31) & 1 ? "": "UN");
print_power_limit_msr(cpu, msr, "Cores Limit");
}
@@ -2453,11 +2901,11 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p)
if (get_msr(cpu, MSR_PP1_POLICY, &msr))
return -8;
- fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
+ fprintf(outf, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF);
if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
return -9;
- fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
+ fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
cpu, msr, (msr >> 31) & 1 ? "": "UN");
print_power_limit_msr(cpu, msr, "GFX Limit");
}
@@ -2493,6 +2941,10 @@ int has_snb_msrs(unsigned int family, unsigned int model)
case 0x56: /* BDX-DE */
case 0x4E: /* SKL */
case 0x5E: /* SKL */
+ case 0x8E: /* KBL */
+ case 0x9E: /* KBL */
+ case 0x55: /* SKX */
+ case 0x5C: /* BXT */
return 1;
}
return 0;
@@ -2501,9 +2953,14 @@ int has_snb_msrs(unsigned int family, unsigned int model)
/*
* HSW adds support for additional MSRs:
*
- * MSR_PKG_C8_RESIDENCY 0x00000630
- * MSR_PKG_C9_RESIDENCY 0x00000631
- * MSR_PKG_C10_RESIDENCY 0x00000632
+ * MSR_PKG_C8_RESIDENCY 0x00000630
+ * MSR_PKG_C9_RESIDENCY 0x00000631
+ * MSR_PKG_C10_RESIDENCY 0x00000632
+ *
+ * MSR_PKGC8_IRTL 0x00000633
+ * MSR_PKGC9_IRTL 0x00000634
+ * MSR_PKGC10_IRTL 0x00000635
+ *
*/
int has_hsw_msrs(unsigned int family, unsigned int model)
{
@@ -2515,6 +2972,9 @@ int has_hsw_msrs(unsigned int family, unsigned int model)
case 0x3D: /* BDW */
case 0x4E: /* SKL */
case 0x5E: /* SKL */
+ case 0x8E: /* KBL */
+ case 0x9E: /* KBL */
+ case 0x5C: /* BXT */
return 1;
}
return 0;
@@ -2536,6 +2996,8 @@ int has_skl_msrs(unsigned int family, unsigned int model)
switch (model) {
case 0x4E: /* SKL */
case 0x5E: /* SKL */
+ case 0x8E: /* KBL */
+ case 0x9E: /* KBL */
return 1;
}
return 0;
@@ -2583,23 +3045,23 @@ double slm_bclk(void)
double freq;
if (get_msr(base_cpu, MSR_FSB_FREQ, &msr))
- fprintf(stderr, "SLM BCLK: unknown\n");
+ fprintf(outf, "SLM BCLK: unknown\n");
i = msr & 0xf;
if (i >= SLM_BCLK_FREQS) {
- fprintf(stderr, "SLM BCLK[%d] invalid\n", i);
+ fprintf(outf, "SLM BCLK[%d] invalid\n", i);
msr = 3;
}
freq = slm_freq_table[i];
- fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq);
+ fprintf(outf, "SLM BCLK: %.1f Mhz\n", freq);
return freq;
}
double discover_bclk(unsigned int family, unsigned int model)
{
- if (has_snb_msrs(family, model))
+ if (has_snb_msrs(family, model) || is_knl(family, model))
return 100.00;
else if (is_slm(family, model))
return slm_bclk();
@@ -2635,13 +3097,13 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
cpu = t->cpu_id;
if (cpu_migrate(cpu)) {
- fprintf(stderr, "Could not migrate to CPU %d\n", cpu);
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
return -1;
}
if (tcc_activation_temp_override != 0) {
tcc_activation_temp = tcc_activation_temp_override;
- fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n",
+ fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n",
cpu, tcc_activation_temp);
return 0;
}
@@ -2656,7 +3118,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
target_c_local = (msr >> 16) & 0xFF;
if (debug)
- fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
+ fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n",
cpu, msr, target_c_local);
if (!target_c_local)
@@ -2668,37 +3130,93 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk
guess:
tcc_activation_temp = TJMAX_DEFAULT;
- fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
+ fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n",
cpu, tcc_activation_temp);
return 0;
}
+
+void decode_feature_control_msr(void)
+{
+ unsigned long long msr;
+
+ if (!get_msr(base_cpu, MSR_IA32_FEATURE_CONTROL, &msr))
+ fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
+ base_cpu, msr,
+ msr & FEATURE_CONTROL_LOCKED ? "" : "UN-",
+ msr & (1 << 18) ? "SGX" : "");
+}
+
+void decode_misc_enable_msr(void)
+{
+ unsigned long long msr;
+
+ if (!get_msr(base_cpu, MSR_IA32_MISC_ENABLE, &msr))
+ fprintf(outf, "cpu%d: MSR_IA32_MISC_ENABLE: 0x%08llx (%s %s %s)\n",
+ base_cpu, msr,
+ msr & (1 << 3) ? "TCC" : "",
+ msr & (1 << 16) ? "EIST" : "",
+ msr & (1 << 18) ? "MONITOR" : "");
+}
+
+/*
+ * Decode MSR_MISC_PWR_MGMT
+ *
+ * Decode the bits according to the Nehalem documentation
+ * bit[0] seems to continue to have same meaning going forward
+ * bit[1] less so...
+ */
+void decode_misc_pwr_mgmt_msr(void)
+{
+ unsigned long long msr;
+
+ if (!do_nhm_platform_info)
+ return;
+
+ if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
+ fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB)\n",
+ base_cpu, msr,
+ msr & (1 << 0) ? "DIS" : "EN",
+ msr & (1 << 1) ? "EN" : "DIS");
+}
+
void process_cpuid()
{
- unsigned int eax, ebx, ecx, edx, max_level;
+ unsigned int eax, ebx, ecx, edx, max_level, max_extended_level;
unsigned int fms, family, model, stepping;
eax = ebx = ecx = edx = 0;
- __get_cpuid(0, &max_level, &ebx, &ecx, &edx);
+ __cpuid(0, max_level, ebx, ecx, edx);
if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e)
genuine_intel = 1;
if (debug)
- fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ",
+ fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
(char *)&ebx, (char *)&edx, (char *)&ecx);
- __get_cpuid(1, &fms, &ebx, &ecx, &edx);
+ __cpuid(1, fms, ebx, ecx, edx);
family = (fms >> 8) & 0xf;
model = (fms >> 4) & 0xf;
stepping = fms & 0xf;
if (family == 6 || family == 0xf)
model += ((fms >> 16) & 0xf) << 4;
- if (debug)
- fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
+ if (debug) {
+ fprintf(outf, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
max_level, family, model, stepping, family, model, stepping);
+ fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s\n",
+ ecx & (1 << 0) ? "SSE3" : "-",
+ ecx & (1 << 3) ? "MONITOR" : "-",
+ ecx & (1 << 6) ? "SMX" : "-",
+ ecx & (1 << 7) ? "EIST" : "-",
+ ecx & (1 << 8) ? "TM2" : "-",
+ edx & (1 << 4) ? "TSC" : "-",
+ edx & (1 << 5) ? "MSR" : "-",
+ edx & (1 << 22) ? "ACPI-TM" : "-",
+ edx & (1 << 29) ? "TM" : "-");
+ }
if (!(edx & (1 << 5)))
errx(1, "CPUID: no MSR");
@@ -2709,15 +3227,15 @@ void process_cpuid()
* This check is valid for both Intel and AMD.
*/
ebx = ecx = edx = 0;
- __get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx);
+ __cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
- if (max_level >= 0x80000007) {
+ if (max_extended_level >= 0x80000007) {
/*
* Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8
* this check is valid for both Intel and AMD
*/
- __get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx);
+ __cpuid(0x80000007, eax, ebx, ecx, edx);
has_invariant_tsc = edx & (1 << 8);
}
@@ -2726,20 +3244,48 @@ void process_cpuid()
* this check is valid for both Intel and AMD
*/
- __get_cpuid(0x6, &eax, &ebx, &ecx, &edx);
+ __cpuid(0x6, eax, ebx, ecx, edx);
has_aperf = ecx & (1 << 0);
do_dts = eax & (1 << 0);
do_ptm = eax & (1 << 6);
+ has_hwp = eax & (1 << 7);
+ has_hwp_notify = eax & (1 << 8);
+ has_hwp_activity_window = eax & (1 << 9);
+ has_hwp_epp = eax & (1 << 10);
+ has_hwp_pkg = eax & (1 << 11);
has_epb = ecx & (1 << 3);
if (debug)
- fprintf(stderr, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sEPB\n",
- has_aperf ? "" : "No ",
- do_dts ? "" : "No ",
- do_ptm ? "" : "No ",
- has_epb ? "" : "No ");
+ fprintf(outf, "CPUID(6): %sAPERF, %sDTS, %sPTM, %sHWP, "
+ "%sHWPnotify, %sHWPwindow, %sHWPepp, %sHWPpkg, %sEPB\n",
+ has_aperf ? "" : "No-",
+ do_dts ? "" : "No-",
+ do_ptm ? "" : "No-",
+ has_hwp ? "" : "No-",
+ has_hwp_notify ? "" : "No-",
+ has_hwp_activity_window ? "" : "No-",
+ has_hwp_epp ? "" : "No-",
+ has_hwp_pkg ? "" : "No-",
+ has_epb ? "" : "No-");
+
+ if (debug)
+ decode_misc_enable_msr();
+
+ if (max_level >= 0x7 && debug) {
+ int has_sgx;
+
+ ecx = 0;
+
+ __cpuid_count(0x7, 0, eax, ebx, ecx, edx);
+
+ has_sgx = ebx & (1 << 2);
+ fprintf(outf, "CPUID(7): %sSGX\n", has_sgx ? "" : "No-");
+
+ if (has_sgx)
+ decode_feature_control_msr();
+ }
- if (max_level > 0x15) {
+ if (max_level >= 0x15) {
unsigned int eax_crystal;
unsigned int ebx_tsc;
@@ -2747,19 +3293,27 @@ void process_cpuid()
* CPUID 15H TSC/Crystal ratio, possibly Crystal Hz
*/
eax_crystal = ebx_tsc = crystal_hz = edx = 0;
- __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx);
+ __cpuid(0x15, eax_crystal, ebx_tsc, crystal_hz, edx);
if (ebx_tsc != 0) {
if (debug && (ebx != 0))
- fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
+ fprintf(outf, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n",
eax_crystal, ebx_tsc, crystal_hz);
if (crystal_hz == 0)
switch(model) {
case 0x4E: /* SKL */
case 0x5E: /* SKL */
- crystal_hz = 24000000; /* 24 MHz */
+ case 0x8E: /* KBL */
+ case 0x9E: /* KBL */
+ crystal_hz = 24000000; /* 24.0 MHz */
+ break;
+ case 0x55: /* SKX */
+ crystal_hz = 25000000; /* 25.0 MHz */
+ break;
+ case 0x5C: /* BXT */
+ crystal_hz = 19200000; /* 19.2 MHz */
break;
default:
crystal_hz = 0;
@@ -2768,26 +3322,44 @@ void process_cpuid()
if (crystal_hz) {
tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
if (debug)
- fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
+ fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
}
}
}
+ if (max_level >= 0x16) {
+ unsigned int base_mhz, max_mhz, bus_mhz, edx;
+
+ /*
+ * CPUID 16H Base MHz, Max MHz, Bus MHz
+ */
+ base_mhz = max_mhz = bus_mhz = edx = 0;
+
+ __cpuid(0x16, base_mhz, max_mhz, bus_mhz, edx);
+ if (debug)
+ fprintf(outf, "CPUID(0x16): base_mhz: %d max_mhz: %d bus_mhz: %d\n",
+ base_mhz, max_mhz, bus_mhz);
+ }
if (has_aperf)
aperf_mperf_multiplier = get_aperf_mperf_multiplier(family, model);
do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model);
do_snb_cstates = has_snb_msrs(family, model);
+ do_irtl_snb = has_snb_msrs(family, model);
do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2);
do_pc3 = (pkg_cstate_limit >= PCL__3);
do_pc6 = (pkg_cstate_limit >= PCL__6);
do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7);
do_c8_c9_c10 = has_hsw_msrs(family, model);
+ do_irtl_hsw = has_hsw_msrs(family, model);
do_skl_residency = has_skl_msrs(family, model);
do_slm_cstates = is_slm(family, model);
do_knl_cstates = is_knl(family, model);
+ if (debug)
+ decode_misc_pwr_mgmt_msr();
+
rapl_probe(family, model);
perf_limit_reasons_probe(family, model);
@@ -2797,12 +3369,16 @@ void process_cpuid()
if (has_skl_msrs(family, model))
calculate_tsc_tweak();
+ do_gfx_rc6_ms = !access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK);
+
+ do_gfx_mhz = !access("/sys/class/graphics/fb0/device/drm/card0/gt_cur_freq_mhz", R_OK);
+
return;
}
void help()
{
- fprintf(stderr,
+ fprintf(outf,
"Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
"\n"
"Turbostat forks the specified COMMAND and prints statistics\n"
@@ -2814,6 +3390,7 @@ void help()
"--help print this help message\n"
"--counter msr print 32-bit counter at address \"msr\"\n"
"--Counter msr print 64-bit Counter at address \"msr\"\n"
+ "--out file create or truncate \"file\" for all output\n"
"--msr msr print 32-bit value at address \"msr\"\n"
"--MSR msr print 64-bit Value at address \"msr\"\n"
"--version print version information\n"
@@ -2858,7 +3435,7 @@ void topology_probe()
show_cpu = 1;
if (debug > 1)
- fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
+ fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
if (cpus == NULL)
@@ -2893,7 +3470,7 @@ void topology_probe()
if (cpu_is_not_present(i)) {
if (debug > 1)
- fprintf(stderr, "cpu%d NOT PRESENT\n", i);
+ fprintf(outf, "cpu%d NOT PRESENT\n", i);
continue;
}
cpus[i].core_id = get_core_id(i);
@@ -2908,26 +3485,26 @@ void topology_probe()
if (siblings > max_siblings)
max_siblings = siblings;
if (debug > 1)
- fprintf(stderr, "cpu %d pkg %d core %d\n",
+ fprintf(outf, "cpu %d pkg %d core %d\n",
i, cpus[i].physical_package_id, cpus[i].core_id);
}
topo.num_cores_per_pkg = max_core_id + 1;
if (debug > 1)
- fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n",
+ fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
max_core_id, topo.num_cores_per_pkg);
if (debug && !summary_only && topo.num_cores_per_pkg > 1)
show_core = 1;
topo.num_packages = max_package_id + 1;
if (debug > 1)
- fprintf(stderr, "max_package_id %d, sizing for %d packages\n",
+ fprintf(outf, "max_package_id %d, sizing for %d packages\n",
max_package_id, topo.num_packages);
if (debug && !summary_only && topo.num_packages > 1)
show_pkg = 1;
topo.num_threads_per_core = max_siblings;
if (debug > 1)
- fprintf(stderr, "max_siblings %d\n", max_siblings);
+ fprintf(outf, "max_siblings %d\n", max_siblings);
free(cpus);
}
@@ -3019,10 +3596,27 @@ void allocate_output_buffer()
if (outp == NULL)
err(-1, "calloc output buffer");
}
+void allocate_fd_percpu(void)
+{
+ fd_percpu = calloc(topo.max_cpu_num, sizeof(int));
+ if (fd_percpu == NULL)
+ err(-1, "calloc fd_percpu");
+}
+void allocate_irq_buffers(void)
+{
+ irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
+ if (irq_column_2_cpu == NULL)
+ err(-1, "calloc %d", topo.num_cpus);
+ irqs_per_cpu = calloc(topo.max_cpu_num, sizeof(int));
+ if (irqs_per_cpu == NULL)
+ err(-1, "calloc %d", topo.max_cpu_num);
+}
void setup_all_buffers(void)
{
topology_probe();
+ allocate_irq_buffers();
+ allocate_fd_percpu();
allocate_counters(&thread_even, &core_even, &package_even);
allocate_counters(&thread_odd, &core_odd, &package_odd);
allocate_output_buffer();
@@ -3036,7 +3630,7 @@ void set_base_cpu(void)
err(-ENODEV, "No valid cpus found");
if (debug > 1)
- fprintf(stderr, "base_cpu = %d\n", base_cpu);
+ fprintf(outf, "base_cpu = %d\n", base_cpu);
}
void turbostat_init()
@@ -3049,6 +3643,9 @@ void turbostat_init()
if (debug)
+ for_all_cpus(print_hwp, ODD_COUNTERS);
+
+ if (debug)
for_all_cpus(print_epb, ODD_COUNTERS);
if (debug)
@@ -3061,6 +3658,9 @@ void turbostat_init()
if (debug)
for_all_cpus(print_thermal, ODD_COUNTERS);
+
+ if (debug && do_irtl_snb)
+ print_irtl();
}
int fork_it(char **argv)
@@ -3100,9 +3700,10 @@ int fork_it(char **argv)
for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS);
compute_average(EVEN_COUNTERS);
format_all_counters(EVEN_COUNTERS);
- flush_stderr();
- fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
+ fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
+
+ flush_output_stderr();
return status;
}
@@ -3119,13 +3720,13 @@ int get_and_dump_counters(void)
if (status)
return status;
- flush_stdout();
+ flush_output_stdout();
return status;
}
void print_version() {
- fprintf(stderr, "turbostat version 4.8 26-Sep, 2015"
+ fprintf(outf, "turbostat version 4.12 5 Apr 2016"
" - Len Brown <lenb@kernel.org>\n");
}
@@ -3143,6 +3744,7 @@ void cmdline(int argc, char **argv)
{"Joules", no_argument, 0, 'J'},
{"MSR", required_argument, 0, 'M'},
{"msr", required_argument, 0, 'm'},
+ {"out", required_argument, 0, 'o'},
{"Package", no_argument, 0, 'p'},
{"processor", no_argument, 0, 'p'},
{"Summary", no_argument, 0, 'S'},
@@ -3153,7 +3755,7 @@ void cmdline(int argc, char **argv)
progname = argv[0];
- while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:PpST:v",
+ while ((opt = getopt_long_only(argc, argv, "+C:c:Ddhi:JM:m:o:PpST:v",
long_options, &option_index)) != -1) {
switch (opt) {
case 'C':
@@ -3173,7 +3775,18 @@ void cmdline(int argc, char **argv)
help();
exit(1);
case 'i':
- interval_sec = atoi(optarg);
+ {
+ double interval = strtod(optarg, NULL);
+
+ if (interval < 0.001) {
+ fprintf(outf, "interval %f seconds is too small\n",
+ interval);
+ exit(2);
+ }
+
+ interval_ts.tv_sec = interval;
+ interval_ts.tv_nsec = (interval - interval_ts.tv_sec) * 1000000000;
+ }
break;
case 'J':
rapl_joules++;
@@ -3184,6 +3797,9 @@ void cmdline(int argc, char **argv)
case 'm':
sscanf(optarg, "%x", &extra_msr_offset32);
break;
+ case 'o':
+ outf = fopen_or_die(optarg, "w");
+ break;
case 'P':
show_pkg_only++;
break;
@@ -3206,6 +3822,8 @@ void cmdline(int argc, char **argv)
int main(int argc, char **argv)
{
+ outf = stderr;
+
cmdline(argc, argv);
if (debug)
diff --git a/tools/perf/config/utilities.mak b/tools/scripts/utilities.mak
index c16ce833079c..c16ce833079c 100644
--- a/tools/perf/config/utilities.mak
+++ b/tools/scripts/utilities.mak
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index b3281dcd4a5d..3187322eeed7 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -151,6 +151,11 @@ struct nfit_test {
int (*alloc)(struct nfit_test *t);
void (*setup)(struct nfit_test *t);
int setup_hotplug;
+ struct ars_state {
+ struct nd_cmd_ars_status *ars_status;
+ unsigned long deadline;
+ spinlock_t lock;
+ } ars_state;
};
static struct nfit_test *to_nfit_test(struct device *dev)
@@ -218,6 +223,7 @@ static int nfit_test_cmd_set_config_data(struct nd_cmd_set_config_hdr *nd_cmd,
}
#define NFIT_TEST_ARS_RECORDS 4
+#define NFIT_TEST_CLEAR_ERR_UNIT 256
static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
unsigned int buf_len)
@@ -228,44 +234,113 @@ static int nfit_test_cmd_ars_cap(struct nd_cmd_ars_cap *nd_cmd,
nd_cmd->max_ars_out = sizeof(struct nd_cmd_ars_status)
+ NFIT_TEST_ARS_RECORDS * sizeof(struct nd_ars_record);
nd_cmd->status = (ND_ARS_PERSISTENT | ND_ARS_VOLATILE) << 16;
+ nd_cmd->clear_err_unit = NFIT_TEST_CLEAR_ERR_UNIT;
return 0;
}
-static int nfit_test_cmd_ars_start(struct nd_cmd_ars_start *nd_cmd,
- unsigned int buf_len)
+/*
+ * Initialize the ars_state to return an ars_result 1 second in the future with
+ * a 4K error range in the middle of the requested address range.
+ */
+static void post_ars_status(struct ars_state *ars_state, u64 addr, u64 len)
{
- if (buf_len < sizeof(*nd_cmd))
+ struct nd_cmd_ars_status *ars_status;
+ struct nd_ars_record *ars_record;
+
+ ars_state->deadline = jiffies + 1*HZ;
+ ars_status = ars_state->ars_status;
+ ars_status->status = 0;
+ ars_status->out_length = sizeof(struct nd_cmd_ars_status)
+ + sizeof(struct nd_ars_record);
+ ars_status->address = addr;
+ ars_status->length = len;
+ ars_status->type = ND_ARS_PERSISTENT;
+ ars_status->num_records = 1;
+ ars_record = &ars_status->records[0];
+ ars_record->handle = 0;
+ ars_record->err_address = addr + len / 2;
+ ars_record->length = SZ_4K;
+}
+
+static int nfit_test_cmd_ars_start(struct ars_state *ars_state,
+ struct nd_cmd_ars_start *ars_start, unsigned int buf_len,
+ int *cmd_rc)
+{
+ if (buf_len < sizeof(*ars_start))
return -EINVAL;
- nd_cmd->status = 0;
+ spin_lock(&ars_state->lock);
+ if (time_before(jiffies, ars_state->deadline)) {
+ ars_start->status = NFIT_ARS_START_BUSY;
+ *cmd_rc = -EBUSY;
+ } else {
+ ars_start->status = 0;
+ ars_start->scrub_time = 1;
+ post_ars_status(ars_state, ars_start->address,
+ ars_start->length);
+ *cmd_rc = 0;
+ }
+ spin_unlock(&ars_state->lock);
return 0;
}
-static int nfit_test_cmd_ars_status(struct nd_cmd_ars_status *nd_cmd,
- unsigned int buf_len)
+static int nfit_test_cmd_ars_status(struct ars_state *ars_state,
+ struct nd_cmd_ars_status *ars_status, unsigned int buf_len,
+ int *cmd_rc)
{
- if (buf_len < sizeof(*nd_cmd))
+ if (buf_len < ars_state->ars_status->out_length)
return -EINVAL;
- nd_cmd->out_length = sizeof(struct nd_cmd_ars_status);
- /* TODO: emit error records */
- nd_cmd->num_records = 0;
- nd_cmd->address = 0;
- nd_cmd->length = -1ULL;
- nd_cmd->status = 0;
+ spin_lock(&ars_state->lock);
+ if (time_before(jiffies, ars_state->deadline)) {
+ memset(ars_status, 0, buf_len);
+ ars_status->status = NFIT_ARS_STATUS_BUSY;
+ ars_status->out_length = sizeof(*ars_status);
+ *cmd_rc = -EBUSY;
+ } else {
+ memcpy(ars_status, ars_state->ars_status,
+ ars_state->ars_status->out_length);
+ *cmd_rc = 0;
+ }
+ spin_unlock(&ars_state->lock);
+ return 0;
+}
+static int nfit_test_cmd_clear_error(struct nd_cmd_clear_error *clear_err,
+ unsigned int buf_len, int *cmd_rc)
+{
+ const u64 mask = NFIT_TEST_CLEAR_ERR_UNIT - 1;
+ if (buf_len < sizeof(*clear_err))
+ return -EINVAL;
+
+ if ((clear_err->address & mask) || (clear_err->length & mask))
+ return -EINVAL;
+
+ /*
+ * Report 'all clear' success for all commands even though a new
+ * scrub will find errors again. This is enough to have the
+ * error removed from the 'badblocks' tracking in the pmem
+ * driver.
+ */
+ clear_err->status = 0;
+ clear_err->cleared = clear_err->length;
+ *cmd_rc = 0;
return 0;
}
static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
struct nvdimm *nvdimm, unsigned int cmd, void *buf,
- unsigned int buf_len)
+ unsigned int buf_len, int *cmd_rc)
{
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc);
- int i, rc = 0;
+ int i, rc = 0, __cmd_rc;
+
+ if (!cmd_rc)
+ cmd_rc = &__cmd_rc;
+ *cmd_rc = 0;
if (nvdimm) {
struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm);
@@ -297,6 +372,8 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
return -ENOTTY;
}
} else {
+ struct ars_state *ars_state = &t->ars_state;
+
if (!nd_desc || !test_bit(cmd, &nd_desc->dsm_mask))
return -ENOTTY;
@@ -305,10 +382,15 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
rc = nfit_test_cmd_ars_cap(buf, buf_len);
break;
case ND_CMD_ARS_START:
- rc = nfit_test_cmd_ars_start(buf, buf_len);
+ rc = nfit_test_cmd_ars_start(ars_state, buf, buf_len,
+ cmd_rc);
break;
case ND_CMD_ARS_STATUS:
- rc = nfit_test_cmd_ars_status(buf, buf_len);
+ rc = nfit_test_cmd_ars_status(ars_state, buf, buf_len,
+ cmd_rc);
+ break;
+ case ND_CMD_CLEAR_ERROR:
+ rc = nfit_test_cmd_clear_error(buf, buf_len, cmd_rc);
break;
default:
return -ENOTTY;
@@ -424,11 +506,25 @@ static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr)
return NULL;
}
+static int ars_state_init(struct device *dev, struct ars_state *ars_state)
+{
+ ars_state->ars_status = devm_kzalloc(dev,
+ sizeof(struct nd_cmd_ars_status)
+ + sizeof(struct nd_ars_record) * NFIT_TEST_ARS_RECORDS,
+ GFP_KERNEL);
+ if (!ars_state->ars_status)
+ return -ENOMEM;
+ spin_lock_init(&ars_state->lock);
+ return 0;
+}
+
static int nfit_test0_alloc(struct nfit_test *t)
{
size_t nfit_size = sizeof(struct acpi_nfit_system_address) * NUM_SPA
+ sizeof(struct acpi_nfit_memory_map) * NUM_MEM
+ sizeof(struct acpi_nfit_control_region) * NUM_DCR
+ + offsetof(struct acpi_nfit_control_region,
+ window_size) * NUM_DCR
+ sizeof(struct acpi_nfit_data_region) * NUM_BDW
+ sizeof(struct acpi_nfit_flush_address) * NUM_DCR;
int i;
@@ -471,14 +567,14 @@ static int nfit_test0_alloc(struct nfit_test *t)
return -ENOMEM;
}
- return 0;
+ return ars_state_init(&t->pdev.dev, &t->ars_state);
}
static int nfit_test1_alloc(struct nfit_test *t)
{
size_t nfit_size = sizeof(struct acpi_nfit_system_address)
+ sizeof(struct acpi_nfit_memory_map)
- + sizeof(struct acpi_nfit_control_region);
+ + offsetof(struct acpi_nfit_control_region, window_size);
t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma);
if (!t->nfit_buf)
@@ -489,12 +585,11 @@ static int nfit_test1_alloc(struct nfit_test *t)
if (!t->spa_set[0])
return -ENOMEM;
- return 0;
+ return ars_state_init(&t->pdev.dev, &t->ars_state);
}
static void nfit_test0_setup(struct nfit_test *t)
{
- struct nvdimm_bus_descriptor *nd_desc;
struct acpi_nfit_desc *acpi_desc;
struct acpi_nfit_memory_map *memdev;
void *nfit_buf = t->nfit_buf;
@@ -611,7 +706,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->physical_id = 0;
memdev->region_id = 0;
memdev->range_index = 0+1;
- memdev->region_index = 0+1;
+ memdev->region_index = 4+1;
memdev->region_size = SPA0_SIZE/2;
memdev->region_offset = t->spa_set_dma[0];
memdev->address = 0;
@@ -626,7 +721,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->physical_id = 1;
memdev->region_id = 0;
memdev->range_index = 0+1;
- memdev->region_index = 1+1;
+ memdev->region_index = 5+1;
memdev->region_size = SPA0_SIZE/2;
memdev->region_offset = t->spa_set_dma[0] + SPA0_SIZE/2;
memdev->address = 0;
@@ -641,7 +736,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->physical_id = 0;
memdev->region_id = 1;
memdev->range_index = 1+1;
- memdev->region_index = 0+1;
+ memdev->region_index = 4+1;
memdev->region_size = SPA1_SIZE/4;
memdev->region_offset = t->spa_set_dma[1];
memdev->address = SPA0_SIZE/2;
@@ -656,7 +751,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->physical_id = 1;
memdev->region_id = 1;
memdev->range_index = 1+1;
- memdev->region_index = 1+1;
+ memdev->region_index = 5+1;
memdev->region_size = SPA1_SIZE/4;
memdev->region_offset = t->spa_set_dma[1] + SPA1_SIZE/4;
memdev->address = SPA0_SIZE/2;
@@ -671,7 +766,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->physical_id = 2;
memdev->region_id = 0;
memdev->range_index = 1+1;
- memdev->region_index = 2+1;
+ memdev->region_index = 6+1;
memdev->region_size = SPA1_SIZE/4;
memdev->region_offset = t->spa_set_dma[1] + 2*SPA1_SIZE/4;
memdev->address = SPA0_SIZE/2;
@@ -686,7 +781,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->physical_id = 3;
memdev->region_id = 0;
memdev->range_index = 1+1;
- memdev->region_index = 3+1;
+ memdev->region_index = 7+1;
memdev->region_size = SPA1_SIZE/4;
memdev->region_offset = t->spa_set_dma[1] + 3*SPA1_SIZE/4;
memdev->address = SPA0_SIZE/2;
@@ -814,7 +909,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->interleave_ways = 1;
offset = offset + sizeof(struct acpi_nfit_memory_map) * 14;
- /* dcr-descriptor0 */
+ /* dcr-descriptor0: blk */
dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
@@ -823,6 +918,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->device_id = 0;
dcr->revision_id = 1;
dcr->serial_number = ~handle[0];
+ dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
dcr->window_size = DCR_SIZE;
dcr->command_offset = 0;
@@ -830,7 +926,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->status_offset = 8;
dcr->status_size = 4;
- /* dcr-descriptor1 */
+ /* dcr-descriptor1: blk */
dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region);
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
@@ -839,6 +935,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->device_id = 0;
dcr->revision_id = 1;
dcr->serial_number = ~handle[1];
+ dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
dcr->window_size = DCR_SIZE;
dcr->command_offset = 0;
@@ -846,7 +943,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->status_offset = 8;
dcr->status_size = 4;
- /* dcr-descriptor2 */
+ /* dcr-descriptor2: blk */
dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
@@ -855,6 +952,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->device_id = 0;
dcr->revision_id = 1;
dcr->serial_number = ~handle[2];
+ dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
dcr->window_size = DCR_SIZE;
dcr->command_offset = 0;
@@ -862,7 +960,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->status_offset = 8;
dcr->status_size = 4;
- /* dcr-descriptor3 */
+ /* dcr-descriptor3: blk */
dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
@@ -871,6 +969,7 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->device_id = 0;
dcr->revision_id = 1;
dcr->serial_number = ~handle[3];
+ dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
dcr->window_size = DCR_SIZE;
dcr->command_offset = 0;
@@ -879,6 +978,63 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->status_size = 4;
offset = offset + sizeof(struct acpi_nfit_control_region) * 4;
+ /* dcr-descriptor0: pmem */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 4+1;
+ dcr->vendor_id = 0xabcd;
+ dcr->device_id = 0;
+ dcr->revision_id = 1;
+ dcr->serial_number = ~handle[0];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+
+ /* dcr-descriptor1: pmem */
+ dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 5+1;
+ dcr->vendor_id = 0xabcd;
+ dcr->device_id = 0;
+ dcr->revision_id = 1;
+ dcr->serial_number = ~handle[1];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+
+ /* dcr-descriptor2: pmem */
+ dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
+ window_size) * 2;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 6+1;
+ dcr->vendor_id = 0xabcd;
+ dcr->device_id = 0;
+ dcr->revision_id = 1;
+ dcr->serial_number = ~handle[2];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+
+ /* dcr-descriptor3: pmem */
+ dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
+ window_size) * 3;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 7+1;
+ dcr->vendor_id = 0xabcd;
+ dcr->device_id = 0;
+ dcr->revision_id = 1;
+ dcr->serial_number = ~handle[3];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+
+ offset = offset + offsetof(struct acpi_nfit_control_region,
+ window_size) * 4;
/* bdw0 (spa/dcr0, dimm0) */
bdw = nfit_buf + offset;
bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
@@ -958,15 +1114,16 @@ static void nfit_test0_setup(struct nfit_test *t)
if (t->setup_hotplug) {
offset = offset + sizeof(struct acpi_nfit_flush_address) * 4;
- /* dcr-descriptor4 */
+ /* dcr-descriptor4: blk */
dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
dcr->header.length = sizeof(struct acpi_nfit_control_region);
- dcr->region_index = 4+1;
+ dcr->region_index = 8+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr->serial_number = ~handle[4];
+ dcr->code = NFIT_FIC_BLK;
dcr->windows = 1;
dcr->window_size = DCR_SIZE;
dcr->command_offset = 0;
@@ -975,11 +1132,26 @@ static void nfit_test0_setup(struct nfit_test *t)
dcr->status_size = 4;
offset = offset + sizeof(struct acpi_nfit_control_region);
+ /* dcr-descriptor4: pmem */
+ dcr = nfit_buf + offset;
+ dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
+ dcr->region_index = 9+1;
+ dcr->vendor_id = 0xabcd;
+ dcr->device_id = 0;
+ dcr->revision_id = 1;
+ dcr->serial_number = ~handle[4];
+ dcr->code = NFIT_FIC_BYTEN;
+ dcr->windows = 0;
+
+ offset = offset + offsetof(struct acpi_nfit_control_region,
+ window_size);
/* bdw4 (spa/dcr4, dimm4) */
bdw = nfit_buf + offset;
bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
bdw->header.length = sizeof(struct acpi_nfit_data_region);
- bdw->region_index = 4+1;
+ bdw->region_index = 8+1;
bdw->windows = 1;
bdw->offset = 0;
bdw->size = BDW_SIZE;
@@ -1027,7 +1199,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->physical_id = 4;
memdev->region_id = 0;
memdev->range_index = 10+1;
- memdev->region_index = 4+1;
+ memdev->region_index = 8+1;
memdev->region_size = 0;
memdev->region_offset = 0;
memdev->address = 0;
@@ -1043,14 +1215,14 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->physical_id = 4;
memdev->region_id = 0;
memdev->range_index = 11+1;
- memdev->region_index = 4+1;
+ memdev->region_index = 9+1;
memdev->region_size = SPA0_SIZE;
memdev->region_offset = t->spa_set_dma[2];
memdev->address = 0;
memdev->interleave_index = 0;
memdev->interleave_ways = 1;
- /* mem-region16 (spa/dcr4, dimm4) */
+ /* mem-region16 (spa/bdw4, dimm4) */
memdev = nfit_buf + offset +
sizeof(struct acpi_nfit_memory_map) * 2;
memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1059,7 +1231,7 @@ static void nfit_test0_setup(struct nfit_test *t)
memdev->physical_id = 4;
memdev->region_id = 0;
memdev->range_index = 12+1;
- memdev->region_index = 4+1;
+ memdev->region_index = 8+1;
memdev->region_size = 0;
memdev->region_offset = 0;
memdev->address = 0;
@@ -1076,6 +1248,8 @@ static void nfit_test0_setup(struct nfit_test *t)
flush->hint_address[0] = t->flush_dma[4];
}
+ post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA0_SIZE);
+
acpi_desc = &t->acpi_desc;
set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en);
set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en);
@@ -1083,8 +1257,7 @@ static void nfit_test0_setup(struct nfit_test *t)
set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en);
set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en);
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en);
- nd_desc = &acpi_desc->nd_desc;
- nd_desc->ndctl = nfit_test_ctl;
+ set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en);
}
static void nfit_test1_setup(struct nfit_test *t)
@@ -1094,7 +1267,6 @@ static void nfit_test1_setup(struct nfit_test *t)
struct acpi_nfit_memory_map *memdev;
struct acpi_nfit_control_region *dcr;
struct acpi_nfit_system_address *spa;
- struct nvdimm_bus_descriptor *nd_desc;
struct acpi_nfit_desc *acpi_desc;
offset = 0;
@@ -1130,26 +1302,23 @@ static void nfit_test1_setup(struct nfit_test *t)
/* dcr-descriptor0 */
dcr = nfit_buf + offset;
dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
- dcr->header.length = sizeof(struct acpi_nfit_control_region);
+ dcr->header.length = offsetof(struct acpi_nfit_control_region,
+ window_size);
dcr->region_index = 0+1;
dcr->vendor_id = 0xabcd;
dcr->device_id = 0;
dcr->revision_id = 1;
dcr->serial_number = ~0;
- dcr->code = 0x201;
+ dcr->code = NFIT_FIC_BYTE;
dcr->windows = 0;
- dcr->window_size = 0;
- dcr->command_offset = 0;
- dcr->command_size = 0;
- dcr->status_offset = 0;
- dcr->status_size = 0;
+
+ post_ars_status(&t->ars_state, t->spa_set_dma[0], SPA2_SIZE);
acpi_desc = &t->acpi_desc;
set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_dsm_force_en);
set_bit(ND_CMD_ARS_START, &acpi_desc->bus_dsm_force_en);
set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_dsm_force_en);
- nd_desc = &acpi_desc->nd_desc;
- nd_desc->ndctl = nfit_test_ctl;
+ set_bit(ND_CMD_CLEAR_ERROR, &acpi_desc->bus_dsm_force_en);
}
static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa,
@@ -1232,26 +1401,16 @@ static int nfit_test_probe(struct platform_device *pdev)
nfit_test->setup(nfit_test);
acpi_desc = &nfit_test->acpi_desc;
- acpi_desc->dev = &pdev->dev;
+ acpi_nfit_desc_init(acpi_desc, &pdev->dev);
acpi_desc->nfit = nfit_test->nfit_buf;
acpi_desc->blk_do_io = nfit_test_blk_do_io;
nd_desc = &acpi_desc->nd_desc;
- nd_desc->attr_groups = acpi_nfit_attribute_groups;
+ nd_desc->provider_name = NULL;
+ nd_desc->ndctl = nfit_test_ctl;
acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc);
if (!acpi_desc->nvdimm_bus)
return -ENXIO;
- INIT_LIST_HEAD(&acpi_desc->spa_maps);
- INIT_LIST_HEAD(&acpi_desc->spas);
- INIT_LIST_HEAD(&acpi_desc->dcrs);
- INIT_LIST_HEAD(&acpi_desc->bdws);
- INIT_LIST_HEAD(&acpi_desc->idts);
- INIT_LIST_HEAD(&acpi_desc->flushes);
- INIT_LIST_HEAD(&acpi_desc->memdevs);
- INIT_LIST_HEAD(&acpi_desc->dimms);
- mutex_init(&acpi_desc->spa_map_mutex);
- mutex_init(&acpi_desc->init_mutex);
-
rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size);
if (rc) {
nvdimm_bus_unregister(acpi_desc->nvdimm_bus);
diff --git a/tools/testing/radix-tree/.gitignore b/tools/testing/radix-tree/.gitignore
new file mode 100644
index 000000000000..11d888ca6a92
--- /dev/null
+++ b/tools/testing/radix-tree/.gitignore
@@ -0,0 +1,2 @@
+main
+radix-tree.c
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
new file mode 100644
index 000000000000..604212db9d4b
--- /dev/null
+++ b/tools/testing/radix-tree/Makefile
@@ -0,0 +1,19 @@
+
+CFLAGS += -I. -g -Wall -D_LGPL_SOURCE
+LDFLAGS += -lpthread -lurcu
+TARGETS = main
+OFILES = main.o radix-tree.o linux.o test.o tag_check.o find_next_bit.o \
+ regression1.o regression2.o regression3.o
+
+targets: $(TARGETS)
+
+main: $(OFILES)
+ $(CC) $(CFLAGS) $(LDFLAGS) $(OFILES) -o main
+
+clean:
+ $(RM) -f $(TARGETS) *.o radix-tree.c
+
+$(OFILES): *.h */*.h
+
+radix-tree.c: ../../../lib/radix-tree.c
+ sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@
diff --git a/tools/testing/radix-tree/find_next_bit.c b/tools/testing/radix-tree/find_next_bit.c
new file mode 100644
index 000000000000..d1c2178bb2d4
--- /dev/null
+++ b/tools/testing/radix-tree/find_next_bit.c
@@ -0,0 +1,57 @@
+/* find_next_bit.c: fallback find next bit implementation
+ *
+ * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+/*
+ * Find the next set bit in a memory region.
+ */
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL << offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + __ffs(tmp);
+}
diff --git a/tools/testing/radix-tree/linux.c b/tools/testing/radix-tree/linux.c
new file mode 100644
index 000000000000..154823737b20
--- /dev/null
+++ b/tools/testing/radix-tree/linux.c
@@ -0,0 +1,60 @@
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include <linux/mempool.h>
+#include <linux/slab.h>
+#include <urcu/uatomic.h>
+
+int nr_allocated;
+
+void *mempool_alloc(mempool_t *pool, int gfp_mask)
+{
+ return pool->alloc(gfp_mask, pool->data);
+}
+
+void mempool_free(void *element, mempool_t *pool)
+{
+ pool->free(element, pool->data);
+}
+
+mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data)
+{
+ mempool_t *ret = malloc(sizeof(*ret));
+
+ ret->alloc = alloc_fn;
+ ret->free = free_fn;
+ ret->data = pool_data;
+ return ret;
+}
+
+void *kmem_cache_alloc(struct kmem_cache *cachep, int flags)
+{
+ void *ret = malloc(cachep->size);
+ if (cachep->ctor)
+ cachep->ctor(ret);
+ uatomic_inc(&nr_allocated);
+ return ret;
+}
+
+void kmem_cache_free(struct kmem_cache *cachep, void *objp)
+{
+ assert(objp);
+ uatomic_dec(&nr_allocated);
+ memset(objp, 0, cachep->size);
+ free(objp);
+}
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t offset,
+ unsigned long flags, void (*ctor)(void *))
+{
+ struct kmem_cache *ret = malloc(sizeof(*ret));
+
+ ret->size = size;
+ ret->ctor = ctor;
+ return ret;
+}
diff --git a/tools/testing/radix-tree/linux/bitops.h b/tools/testing/radix-tree/linux/bitops.h
new file mode 100644
index 000000000000..71d58427ab60
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bitops.h
@@ -0,0 +1,150 @@
+#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+
+#include <linux/types.h>
+
+#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p |= mask;
+}
+
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p &= ~mask;
+}
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __change_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p ^= mask;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old | mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old & ~mask;
+ return (old & mask) != 0;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __test_and_change_bit(int nr,
+ volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old ^ mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit(int nr, const volatile unsigned long *addr)
+{
+ return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+ int num = 0;
+
+ if ((word & 0xffffffff) == 0) {
+ num += 32;
+ word >>= 32;
+ }
+ if ((word & 0xffff) == 0) {
+ num += 16;
+ word >>= 16;
+ }
+ if ((word & 0xff) == 0) {
+ num += 8;
+ word >>= 8;
+ }
+ if ((word & 0xf) == 0) {
+ num += 4;
+ word >>= 4;
+ }
+ if ((word & 0x3) == 0) {
+ num += 2;
+ word >>= 2;
+ }
+ if ((word & 0x1) == 0)
+ num += 1;
+ return num;
+}
+
+unsigned long find_next_bit(const unsigned long *addr,
+ unsigned long size,
+ unsigned long offset);
+
+#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/__ffs.h b/tools/testing/radix-tree/linux/bitops/__ffs.h
new file mode 100644
index 000000000000..9a3274aecf83
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bitops/__ffs.h
@@ -0,0 +1,43 @@
+#ifndef _ASM_GENERIC_BITOPS___FFS_H_
+#define _ASM_GENERIC_BITOPS___FFS_H_
+
+#include <asm/types.h>
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static inline unsigned long __ffs(unsigned long word)
+{
+ int num = 0;
+
+#if BITS_PER_LONG == 64
+ if ((word & 0xffffffff) == 0) {
+ num += 32;
+ word >>= 32;
+ }
+#endif
+ if ((word & 0xffff) == 0) {
+ num += 16;
+ word >>= 16;
+ }
+ if ((word & 0xff) == 0) {
+ num += 8;
+ word >>= 8;
+ }
+ if ((word & 0xf) == 0) {
+ num += 4;
+ word >>= 4;
+ }
+ if ((word & 0x3) == 0) {
+ num += 2;
+ word >>= 2;
+ }
+ if ((word & 0x1) == 0)
+ num += 1;
+ return num;
+}
+
+#endif /* _ASM_GENERIC_BITOPS___FFS_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/ffs.h b/tools/testing/radix-tree/linux/bitops/ffs.h
new file mode 100644
index 000000000000..fbbb43af7dc0
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bitops/ffs.h
@@ -0,0 +1,41 @@
+#ifndef _ASM_GENERIC_BITOPS_FFS_H_
+#define _ASM_GENERIC_BITOPS_FFS_H_
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from the above ffz (man ffs).
+ */
+static inline int ffs(int x)
+{
+ int r = 1;
+
+ if (!x)
+ return 0;
+ if (!(x & 0xffff)) {
+ x >>= 16;
+ r += 16;
+ }
+ if (!(x & 0xff)) {
+ x >>= 8;
+ r += 8;
+ }
+ if (!(x & 0xf)) {
+ x >>= 4;
+ r += 4;
+ }
+ if (!(x & 3)) {
+ x >>= 2;
+ r += 2;
+ }
+ if (!(x & 1)) {
+ x >>= 1;
+ r += 1;
+ }
+ return r;
+}
+
+#endif /* _ASM_GENERIC_BITOPS_FFS_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/ffz.h b/tools/testing/radix-tree/linux/bitops/ffz.h
new file mode 100644
index 000000000000..6744bd4cdf46
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bitops/ffz.h
@@ -0,0 +1,12 @@
+#ifndef _ASM_GENERIC_BITOPS_FFZ_H_
+#define _ASM_GENERIC_BITOPS_FFZ_H_
+
+/*
+ * ffz - find first zero in word.
+ * @word: The word to search
+ *
+ * Undefined if no zero exists, so code should check against ~0UL first.
+ */
+#define ffz(x) __ffs(~(x))
+
+#endif /* _ASM_GENERIC_BITOPS_FFZ_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/find.h b/tools/testing/radix-tree/linux/bitops/find.h
new file mode 100644
index 000000000000..72a51e5a12ef
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bitops/find.h
@@ -0,0 +1,13 @@
+#ifndef _ASM_GENERIC_BITOPS_FIND_H_
+#define _ASM_GENERIC_BITOPS_FIND_H_
+
+extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
+ size, unsigned long offset);
+
+extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned
+ long size, unsigned long offset);
+
+#define find_first_bit(addr, size) find_next_bit((addr), (size), 0)
+#define find_first_zero_bit(addr, size) find_next_zero_bit((addr), (size), 0)
+
+#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/fls.h b/tools/testing/radix-tree/linux/bitops/fls.h
new file mode 100644
index 000000000000..850859bc5069
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bitops/fls.h
@@ -0,0 +1,41 @@
+#ifndef _ASM_GENERIC_BITOPS_FLS_H_
+#define _ASM_GENERIC_BITOPS_FLS_H_
+
+/**
+ * fls - find last (most-significant) bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+
+static inline int fls(int x)
+{
+ int r = 32;
+
+ if (!x)
+ return 0;
+ if (!(x & 0xffff0000u)) {
+ x <<= 16;
+ r -= 16;
+ }
+ if (!(x & 0xff000000u)) {
+ x <<= 8;
+ r -= 8;
+ }
+ if (!(x & 0xf0000000u)) {
+ x <<= 4;
+ r -= 4;
+ }
+ if (!(x & 0xc0000000u)) {
+ x <<= 2;
+ r -= 2;
+ }
+ if (!(x & 0x80000000u)) {
+ x <<= 1;
+ r -= 1;
+ }
+ return r;
+}
+
+#endif /* _ASM_GENERIC_BITOPS_FLS_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/fls64.h b/tools/testing/radix-tree/linux/bitops/fls64.h
new file mode 100644
index 000000000000..1b6b17ce2428
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bitops/fls64.h
@@ -0,0 +1,14 @@
+#ifndef _ASM_GENERIC_BITOPS_FLS64_H_
+#define _ASM_GENERIC_BITOPS_FLS64_H_
+
+#include <asm/types.h>
+
+static inline int fls64(__u64 x)
+{
+ __u32 h = x >> 32;
+ if (h)
+ return fls(h) + 32;
+ return fls(x);
+}
+
+#endif /* _ASM_GENERIC_BITOPS_FLS64_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/hweight.h b/tools/testing/radix-tree/linux/bitops/hweight.h
new file mode 100644
index 000000000000..fbbc383771da
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bitops/hweight.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_
+#define _ASM_GENERIC_BITOPS_HWEIGHT_H_
+
+#include <asm/types.h>
+
+extern unsigned int hweight32(unsigned int w);
+extern unsigned int hweight16(unsigned int w);
+extern unsigned int hweight8(unsigned int w);
+extern unsigned long hweight64(__u64 w);
+
+#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/le.h b/tools/testing/radix-tree/linux/bitops/le.h
new file mode 100644
index 000000000000..b9c7e5d2d2ad
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bitops/le.h
@@ -0,0 +1,53 @@
+#ifndef _ASM_GENERIC_BITOPS_LE_H_
+#define _ASM_GENERIC_BITOPS_LE_H_
+
+#include <asm/types.h>
+#include <asm/byteorder.h>
+
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+#define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
+
+#if defined(__LITTLE_ENDIAN)
+
+#define generic_test_le_bit(nr, addr) test_bit(nr, addr)
+#define generic___set_le_bit(nr, addr) __set_bit(nr, addr)
+#define generic___clear_le_bit(nr, addr) __clear_bit(nr, addr)
+
+#define generic_test_and_set_le_bit(nr, addr) test_and_set_bit(nr, addr)
+#define generic_test_and_clear_le_bit(nr, addr) test_and_clear_bit(nr, addr)
+
+#define generic___test_and_set_le_bit(nr, addr) __test_and_set_bit(nr, addr)
+#define generic___test_and_clear_le_bit(nr, addr) __test_and_clear_bit(nr, addr)
+
+#define generic_find_next_zero_le_bit(addr, size, offset) find_next_zero_bit(addr, size, offset)
+
+#elif defined(__BIG_ENDIAN)
+
+#define generic_test_le_bit(nr, addr) \
+ test_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+#define generic___set_le_bit(nr, addr) \
+ __set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+#define generic___clear_le_bit(nr, addr) \
+ __clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+
+#define generic_test_and_set_le_bit(nr, addr) \
+ test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+#define generic_test_and_clear_le_bit(nr, addr) \
+ test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+
+#define generic___test_and_set_le_bit(nr, addr) \
+ __test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+#define generic___test_and_clear_le_bit(nr, addr) \
+ __test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, (addr))
+
+extern unsigned long generic_find_next_zero_le_bit(const unsigned long *addr,
+ unsigned long size, unsigned long offset);
+
+#else
+#error "Please fix <asm/byteorder.h>"
+#endif
+
+#define generic_find_first_zero_le_bit(addr, size) \
+ generic_find_next_zero_le_bit((addr), (size), 0)
+
+#endif /* _ASM_GENERIC_BITOPS_LE_H_ */
diff --git a/tools/testing/radix-tree/linux/bitops/non-atomic.h b/tools/testing/radix-tree/linux/bitops/non-atomic.h
new file mode 100644
index 000000000000..46a825cf2ae1
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bitops/non-atomic.h
@@ -0,0 +1,111 @@
+#ifndef _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+#define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_
+
+#include <asm/types.h>
+
+#define BITOP_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+/**
+ * __set_bit - Set a bit in memory
+ * @nr: the bit to set
+ * @addr: the address to start counting from
+ *
+ * Unlike set_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p |= mask;
+}
+
+static inline void __clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p &= ~mask;
+}
+
+/**
+ * __change_bit - Toggle a bit in memory
+ * @nr: the bit to change
+ * @addr: the address to start counting from
+ *
+ * Unlike change_bit(), this function is non-atomic and may be reordered.
+ * If it's called on the same region of memory simultaneously, the effect
+ * may be that only one operation succeeds.
+ */
+static inline void __change_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+
+ *p ^= mask;
+}
+
+/**
+ * __test_and_set_bit - Set a bit and return its old value
+ * @nr: Bit to set
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_set_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old | mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * __test_and_clear_bit - Clear a bit and return its old value
+ * @nr: Bit to clear
+ * @addr: Address to count from
+ *
+ * This operation is non-atomic and can be reordered.
+ * If two examples of this operation race, one can appear to succeed
+ * but actually fail. You must protect multiple accesses with a lock.
+ */
+static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old & ~mask;
+ return (old & mask) != 0;
+}
+
+/* WARNING: non atomic and it can be reordered! */
+static inline int __test_and_change_bit(int nr,
+ volatile unsigned long *addr)
+{
+ unsigned long mask = BITOP_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BITOP_WORD(nr);
+ unsigned long old = *p;
+
+ *p = old ^ mask;
+ return (old & mask) != 0;
+}
+
+/**
+ * test_bit - Determine whether a bit is set
+ * @nr: bit number to test
+ * @addr: Address to start counting from
+ */
+static inline int test_bit(int nr, const volatile unsigned long *addr)
+{
+ return 1UL & (addr[BITOP_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
+}
+
+#endif /* _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ */
diff --git a/tools/testing/radix-tree/linux/bug.h b/tools/testing/radix-tree/linux/bug.h
new file mode 100644
index 000000000000..ccbe444977df
--- /dev/null
+++ b/tools/testing/radix-tree/linux/bug.h
@@ -0,0 +1 @@
+#define WARN_ON_ONCE(x) assert(x)
diff --git a/tools/testing/radix-tree/linux/cpu.h b/tools/testing/radix-tree/linux/cpu.h
new file mode 100644
index 000000000000..60a40459f269
--- /dev/null
+++ b/tools/testing/radix-tree/linux/cpu.h
@@ -0,0 +1,34 @@
+
+#define hotcpu_notifier(a, b)
+
+#define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */
+#define CPU_UP_PREPARE 0x0003 /* CPU (unsigned)v coming up */
+#define CPU_UP_CANCELED 0x0004 /* CPU (unsigned)v NOT coming up */
+#define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */
+#define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */
+#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */
+#define CPU_DYING 0x0008 /* CPU (unsigned)v not running any task,
+ * not handling interrupts, soon dead.
+ * Called on the dying cpu, interrupts
+ * are already disabled. Must not
+ * sleep, must not fail */
+#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug
+ * lock is dropped */
+#define CPU_STARTING 0x000A /* CPU (unsigned)v soon running.
+ * Called on the new cpu, just before
+ * enabling interrupts. Must not sleep,
+ * must not fail */
+#define CPU_DYING_IDLE 0x000B /* CPU (unsigned)v dying, reached
+ * idle loop. */
+#define CPU_BROKEN 0x000C /* CPU (unsigned)v did not die properly,
+ * perhaps due to preemption. */
+#define CPU_TASKS_FROZEN 0x0010
+
+#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN)
+#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN)
+#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN)
+#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN)
+#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN)
+#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN)
+#define CPU_DYING_FROZEN (CPU_DYING | CPU_TASKS_FROZEN)
+#define CPU_STARTING_FROZEN (CPU_STARTING | CPU_TASKS_FROZEN)
diff --git a/tools/testing/radix-tree/linux/export.h b/tools/testing/radix-tree/linux/export.h
new file mode 100644
index 000000000000..b6afd131998d
--- /dev/null
+++ b/tools/testing/radix-tree/linux/export.h
@@ -0,0 +1,2 @@
+
+#define EXPORT_SYMBOL(sym)
diff --git a/tools/testing/radix-tree/linux/gfp.h b/tools/testing/radix-tree/linux/gfp.h
new file mode 100644
index 000000000000..0e37f7a760eb
--- /dev/null
+++ b/tools/testing/radix-tree/linux/gfp.h
@@ -0,0 +1,10 @@
+#ifndef _GFP_H
+#define _GFP_H
+
+#define __GFP_BITS_SHIFT 22
+#define __GFP_BITS_MASK ((gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
+#define __GFP_WAIT 1
+#define __GFP_ACCOUNT 0
+#define __GFP_NOWARN 0
+
+#endif
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
new file mode 100644
index 000000000000..ae013b0160ac
--- /dev/null
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -0,0 +1,35 @@
+#ifndef _KERNEL_H
+#define _KERNEL_H
+
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <stddef.h>
+#include <limits.h>
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#define BUG_ON(expr) assert(!(expr))
+#define __init
+#define __must_check
+#define panic(expr)
+#define printk printf
+#define __force
+#define likely(c) (c)
+#define unlikely(c) (c)
+#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define container_of(ptr, type, member) ({ \
+ const typeof( ((type *)0)->member ) *__mptr = (ptr); \
+ (type *)( (char *)__mptr - offsetof(type, member) );})
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+static inline int in_interrupt(void)
+{
+ return 0;
+}
+#endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/kmemleak.h b/tools/testing/radix-tree/linux/kmemleak.h
new file mode 100644
index 000000000000..155f112786c4
--- /dev/null
+++ b/tools/testing/radix-tree/linux/kmemleak.h
@@ -0,0 +1 @@
+static inline void kmemleak_update_trace(const void *ptr) { }
diff --git a/tools/testing/radix-tree/linux/mempool.h b/tools/testing/radix-tree/linux/mempool.h
new file mode 100644
index 000000000000..6a2dc55b41d6
--- /dev/null
+++ b/tools/testing/radix-tree/linux/mempool.h
@@ -0,0 +1,16 @@
+
+#include <linux/slab.h>
+
+typedef void *(mempool_alloc_t)(int gfp_mask, void *pool_data);
+typedef void (mempool_free_t)(void *element, void *pool_data);
+
+typedef struct {
+ mempool_alloc_t *alloc;
+ mempool_free_t *free;
+ void *data;
+} mempool_t;
+
+void *mempool_alloc(mempool_t *pool, int gfp_mask);
+void mempool_free(void *element, mempool_t *pool);
+mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
+ mempool_free_t *free_fn, void *pool_data);
diff --git a/tools/testing/radix-tree/linux/notifier.h b/tools/testing/radix-tree/linux/notifier.h
new file mode 100644
index 000000000000..70e4797d5a46
--- /dev/null
+++ b/tools/testing/radix-tree/linux/notifier.h
@@ -0,0 +1,8 @@
+#ifndef _NOTIFIER_H
+#define _NOTIFIER_H
+
+struct notifier_block;
+
+#define NOTIFY_OK 0x0001 /* Suits me */
+
+#endif
diff --git a/tools/testing/radix-tree/linux/percpu.h b/tools/testing/radix-tree/linux/percpu.h
new file mode 100644
index 000000000000..5837f1d56f17
--- /dev/null
+++ b/tools/testing/radix-tree/linux/percpu.h
@@ -0,0 +1,7 @@
+
+#define DEFINE_PER_CPU(type, val) type val
+
+#define __get_cpu_var(var) var
+#define this_cpu_ptr(var) var
+#define per_cpu_ptr(ptr, cpu) ({ (void)(cpu); (ptr); })
+#define per_cpu(var, cpu) (*per_cpu_ptr(&(var), cpu))
diff --git a/tools/testing/radix-tree/linux/preempt.h b/tools/testing/radix-tree/linux/preempt.h
new file mode 100644
index 000000000000..6210672e3baa
--- /dev/null
+++ b/tools/testing/radix-tree/linux/preempt.h
@@ -0,0 +1,4 @@
+/* */
+
+#define preempt_disable() do { } while (0)
+#define preempt_enable() do { } while (0)
diff --git a/tools/testing/radix-tree/linux/radix-tree.h b/tools/testing/radix-tree/linux/radix-tree.h
new file mode 100644
index 000000000000..ce694ddd4aea
--- /dev/null
+++ b/tools/testing/radix-tree/linux/radix-tree.h
@@ -0,0 +1 @@
+#include "../../../../include/linux/radix-tree.h"
diff --git a/tools/testing/radix-tree/linux/rcupdate.h b/tools/testing/radix-tree/linux/rcupdate.h
new file mode 100644
index 000000000000..f7129ea2a899
--- /dev/null
+++ b/tools/testing/radix-tree/linux/rcupdate.h
@@ -0,0 +1,9 @@
+#ifndef _RCUPDATE_H
+#define _RCUPDATE_H
+
+#include <urcu.h>
+
+#define rcu_dereference_raw(p) rcu_dereference(p)
+#define rcu_dereference_protected(p, cond) rcu_dereference(p)
+
+#endif
diff --git a/tools/testing/radix-tree/linux/slab.h b/tools/testing/radix-tree/linux/slab.h
new file mode 100644
index 000000000000..57282506c21d
--- /dev/null
+++ b/tools/testing/radix-tree/linux/slab.h
@@ -0,0 +1,28 @@
+#ifndef SLAB_H
+#define SLAB_H
+
+#include <linux/types.h>
+
+#define GFP_KERNEL 1
+#define SLAB_HWCACHE_ALIGN 1
+#define SLAB_PANIC 2
+#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */
+
+static inline int gfpflags_allow_blocking(gfp_t mask)
+{
+ return 1;
+}
+
+struct kmem_cache {
+ int size;
+ void (*ctor)(void *);
+};
+
+void *kmem_cache_alloc(struct kmem_cache *cachep, int flags);
+void kmem_cache_free(struct kmem_cache *cachep, void *objp);
+
+struct kmem_cache *
+kmem_cache_create(const char *name, size_t size, size_t offset,
+ unsigned long flags, void (*ctor)(void *));
+
+#endif /* SLAB_H */
diff --git a/tools/testing/radix-tree/linux/types.h b/tools/testing/radix-tree/linux/types.h
new file mode 100644
index 000000000000..72a9d85f6c76
--- /dev/null
+++ b/tools/testing/radix-tree/linux/types.h
@@ -0,0 +1,28 @@
+#ifndef _TYPES_H
+#define _TYPES_H
+
+#define __rcu
+#define __read_mostly
+
+#define BITS_PER_LONG (sizeof(long) * 8)
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+static inline void INIT_LIST_HEAD(struct list_head *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+typedef struct {
+ unsigned int x;
+} spinlock_t;
+
+#define uninitialized_var(x) x = x
+
+typedef unsigned gfp_t;
+#include <linux/gfp.h>
+
+#endif
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
new file mode 100644
index 000000000000..0e83cad27a9f
--- /dev/null
+++ b/tools/testing/radix-tree/main.c
@@ -0,0 +1,272 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+#include <assert.h>
+
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+
+#include "test.h"
+#include "regression.h"
+
+void __gang_check(unsigned long middle, long down, long up, int chunk, int hop)
+{
+ long idx;
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ middle = 1 << 30;
+
+ for (idx = -down; idx < up; idx++)
+ item_insert(&tree, middle + idx);
+
+ item_check_absent(&tree, middle - down - 1);
+ for (idx = -down; idx < up; idx++)
+ item_check_present(&tree, middle + idx);
+ item_check_absent(&tree, middle + up);
+
+ item_gang_check_present(&tree, middle - down,
+ up + down, chunk, hop);
+ item_full_scan(&tree, middle - down, down + up, chunk);
+ item_kill_tree(&tree);
+}
+
+void gang_check(void)
+{
+ __gang_check(1 << 30, 128, 128, 35, 2);
+ __gang_check(1 << 31, 128, 128, 32, 32);
+ __gang_check(1 << 31, 128, 128, 32, 100);
+ __gang_check(1 << 31, 128, 128, 17, 7);
+ __gang_check(0xffff0000, 0, 65536, 17, 7);
+ __gang_check(0xfffffffe, 1, 1, 17, 7);
+}
+
+void __big_gang_check(void)
+{
+ unsigned long start;
+ int wrapped = 0;
+
+ start = 0;
+ do {
+ unsigned long old_start;
+
+// printf("0x%08lx\n", start);
+ __gang_check(start, rand() % 113 + 1, rand() % 71,
+ rand() % 157, rand() % 91 + 1);
+ old_start = start;
+ start += rand() % 1000000;
+ start %= 1ULL << 33;
+ if (start < old_start)
+ wrapped = 1;
+ } while (!wrapped);
+}
+
+void big_gang_check(void)
+{
+ int i;
+
+ for (i = 0; i < 1000; i++) {
+ __big_gang_check();
+ srand(time(0));
+ printf("%d ", i);
+ fflush(stdout);
+ }
+}
+
+void add_and_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert(&tree, 44);
+ item_check_present(&tree, 44);
+ item_check_absent(&tree, 43);
+ item_kill_tree(&tree);
+}
+
+void dynamic_height_check(void)
+{
+ int i;
+ RADIX_TREE(tree, GFP_KERNEL);
+ tree_verify_min_height(&tree, 0);
+
+ item_insert(&tree, 42);
+ tree_verify_min_height(&tree, 42);
+
+ item_insert(&tree, 1000000);
+ tree_verify_min_height(&tree, 1000000);
+
+ assert(item_delete(&tree, 1000000));
+ tree_verify_min_height(&tree, 42);
+
+ assert(item_delete(&tree, 42));
+ tree_verify_min_height(&tree, 0);
+
+ for (i = 0; i < 1000; i++) {
+ item_insert(&tree, i);
+ tree_verify_min_height(&tree, i);
+ }
+
+ i--;
+ for (;;) {
+ assert(item_delete(&tree, i));
+ if (i == 0) {
+ tree_verify_min_height(&tree, 0);
+ break;
+ }
+ i--;
+ tree_verify_min_height(&tree, i);
+ }
+
+ item_kill_tree(&tree);
+}
+
+void check_copied_tags(struct radix_tree_root *tree, unsigned long start, unsigned long end, unsigned long *idx, int count, int fromtag, int totag)
+{
+ int i;
+
+ for (i = 0; i < count; i++) {
+/* if (i % 1000 == 0)
+ putchar('.'); */
+ if (idx[i] < start || idx[i] > end) {
+ if (item_tag_get(tree, idx[i], totag)) {
+ printf("%lu-%lu: %lu, tags %d-%d\n", start, end, idx[i], item_tag_get(tree, idx[i], fromtag), item_tag_get(tree, idx[i], totag));
+ }
+ assert(!item_tag_get(tree, idx[i], totag));
+ continue;
+ }
+ if (item_tag_get(tree, idx[i], fromtag) ^
+ item_tag_get(tree, idx[i], totag)) {
+ printf("%lu-%lu: %lu, tags %d-%d\n", start, end, idx[i], item_tag_get(tree, idx[i], fromtag), item_tag_get(tree, idx[i], totag));
+ }
+ assert(!(item_tag_get(tree, idx[i], fromtag) ^
+ item_tag_get(tree, idx[i], totag)));
+ }
+}
+
+#define ITEMS 50000
+
+void copy_tag_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ unsigned long idx[ITEMS];
+ unsigned long start, end, count = 0, tagged, cur, tmp;
+ int i;
+
+// printf("generating radix tree indices...\n");
+ start = rand();
+ end = rand();
+ if (start > end && (rand() % 10)) {
+ cur = start;
+ start = end;
+ end = cur;
+ }
+ /* Specifically create items around the start and the end of the range
+ * with high probability to check for off by one errors */
+ cur = rand();
+ if (cur & 1) {
+ item_insert(&tree, start);
+ if (cur & 2) {
+ if (start <= end)
+ count++;
+ item_tag_set(&tree, start, 0);
+ }
+ }
+ if (cur & 4) {
+ item_insert(&tree, start-1);
+ if (cur & 8)
+ item_tag_set(&tree, start-1, 0);
+ }
+ if (cur & 16) {
+ item_insert(&tree, end);
+ if (cur & 32) {
+ if (start <= end)
+ count++;
+ item_tag_set(&tree, end, 0);
+ }
+ }
+ if (cur & 64) {
+ item_insert(&tree, end+1);
+ if (cur & 128)
+ item_tag_set(&tree, end+1, 0);
+ }
+
+ for (i = 0; i < ITEMS; i++) {
+ do {
+ idx[i] = rand();
+ } while (item_lookup(&tree, idx[i]));
+
+ item_insert(&tree, idx[i]);
+ if (rand() & 1) {
+ item_tag_set(&tree, idx[i], 0);
+ if (idx[i] >= start && idx[i] <= end)
+ count++;
+ }
+/* if (i % 1000 == 0)
+ putchar('.'); */
+ }
+
+// printf("\ncopying tags...\n");
+ cur = start;
+ tagged = radix_tree_range_tag_if_tagged(&tree, &cur, end, ITEMS, 0, 1);
+
+// printf("checking copied tags\n");
+ assert(tagged == count);
+ check_copied_tags(&tree, start, end, idx, ITEMS, 0, 1);
+
+ /* Copy tags in several rounds */
+// printf("\ncopying tags...\n");
+ cur = start;
+ do {
+ tmp = rand() % (count/10+2);
+ tagged = radix_tree_range_tag_if_tagged(&tree, &cur, end, tmp, 0, 2);
+ } while (tmp == tagged);
+
+// printf("%lu %lu %lu\n", tagged, tmp, count);
+// printf("checking copied tags\n");
+ check_copied_tags(&tree, start, end, idx, ITEMS, 0, 2);
+ assert(tagged < tmp);
+ verify_tag_consistency(&tree, 0);
+ verify_tag_consistency(&tree, 1);
+ verify_tag_consistency(&tree, 2);
+// printf("\n");
+ item_kill_tree(&tree);
+}
+
+static void single_thread_tests(void)
+{
+ int i;
+
+ tag_check();
+ printf("after tag_check: %d allocated\n", nr_allocated);
+ gang_check();
+ printf("after gang_check: %d allocated\n", nr_allocated);
+ add_and_check();
+ printf("after add_and_check: %d allocated\n", nr_allocated);
+ dynamic_height_check();
+ printf("after dynamic_height_check: %d allocated\n", nr_allocated);
+ big_gang_check();
+ printf("after big_gang_check: %d allocated\n", nr_allocated);
+ for (i = 0; i < 2000; i++) {
+ copy_tag_check();
+ printf("%d ", i);
+ fflush(stdout);
+ }
+ printf("after copy_tag_check: %d allocated\n", nr_allocated);
+}
+
+int main(void)
+{
+ rcu_register_thread();
+ radix_tree_init();
+
+ regression1_test();
+ regression2_test();
+ regression3_test();
+ single_thread_tests();
+
+ sleep(1);
+ printf("after sleep(1): %d allocated\n", nr_allocated);
+ rcu_unregister_thread();
+
+ exit(0);
+}
diff --git a/tools/testing/radix-tree/rcupdate.c b/tools/testing/radix-tree/rcupdate.c
new file mode 100644
index 000000000000..31a2d14225d6
--- /dev/null
+++ b/tools/testing/radix-tree/rcupdate.c
@@ -0,0 +1,86 @@
+#include <linux/rcupdate.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <assert.h>
+
+static pthread_mutex_t rculock = PTHREAD_MUTEX_INITIALIZER;
+static struct rcu_head *rcuhead_global = NULL;
+static __thread int nr_rcuhead = 0;
+static __thread struct rcu_head *rcuhead = NULL;
+static __thread struct rcu_head *rcutail = NULL;
+
+static pthread_cond_t rcu_worker_cond = PTHREAD_COND_INITIALIZER;
+
+/* switch to urcu implementation when it is merged. */
+void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *head))
+{
+ head->func = func;
+ head->next = rcuhead;
+ rcuhead = head;
+ if (!rcutail)
+ rcutail = head;
+ nr_rcuhead++;
+ if (nr_rcuhead >= 1000) {
+ int signal = 0;
+
+ pthread_mutex_lock(&rculock);
+ if (!rcuhead_global)
+ signal = 1;
+ rcutail->next = rcuhead_global;
+ rcuhead_global = head;
+ pthread_mutex_unlock(&rculock);
+
+ nr_rcuhead = 0;
+ rcuhead = NULL;
+ rcutail = NULL;
+
+ if (signal) {
+ pthread_cond_signal(&rcu_worker_cond);
+ }
+ }
+}
+
+static void *rcu_worker(void *arg)
+{
+ struct rcu_head *r;
+
+ rcupdate_thread_init();
+
+ while (1) {
+ pthread_mutex_lock(&rculock);
+ while (!rcuhead_global) {
+ pthread_cond_wait(&rcu_worker_cond, &rculock);
+ }
+ r = rcuhead_global;
+ rcuhead_global = NULL;
+
+ pthread_mutex_unlock(&rculock);
+
+ synchronize_rcu();
+
+ while (r) {
+ struct rcu_head *tmp = r->next;
+ r->func(r);
+ r = tmp;
+ }
+ }
+
+ rcupdate_thread_exit();
+
+ return NULL;
+}
+
+static pthread_t worker_thread;
+void rcupdate_init(void)
+{
+ pthread_create(&worker_thread, NULL, rcu_worker, NULL);
+}
+
+void rcupdate_thread_init(void)
+{
+ rcu_register_thread();
+}
+void rcupdate_thread_exit(void)
+{
+ rcu_unregister_thread();
+}
diff --git a/tools/testing/radix-tree/regression.h b/tools/testing/radix-tree/regression.h
new file mode 100644
index 000000000000..e018c4816688
--- /dev/null
+++ b/tools/testing/radix-tree/regression.h
@@ -0,0 +1,8 @@
+#ifndef __REGRESSION_H__
+#define __REGRESSION_H__
+
+void regression1_test(void);
+void regression2_test(void);
+void regression3_test(void);
+
+#endif
diff --git a/tools/testing/radix-tree/regression1.c b/tools/testing/radix-tree/regression1.c
new file mode 100644
index 000000000000..2d03a63bb79c
--- /dev/null
+++ b/tools/testing/radix-tree/regression1.c
@@ -0,0 +1,220 @@
+/*
+ * Regression1
+ * Description:
+ * Salman Qazi describes the following radix-tree bug:
+ *
+ * In the following case, we get can get a deadlock:
+ *
+ * 0. The radix tree contains two items, one has the index 0.
+ * 1. The reader (in this case find_get_pages) takes the rcu_read_lock.
+ * 2. The reader acquires slot(s) for item(s) including the index 0 item.
+ * 3. The non-zero index item is deleted, and as a consequence the other item
+ * is moved to the root of the tree. The place where it used to be is queued
+ * for deletion after the readers finish.
+ * 3b. The zero item is deleted, removing it from the direct slot, it remains in
+ * the rcu-delayed indirect node.
+ * 4. The reader looks at the index 0 slot, and finds that the page has 0 ref
+ * count
+ * 5. The reader looks at it again, hoping that the item will either be freed
+ * or the ref count will increase. This never happens, as the slot it is
+ * looking at will never be updated. Also, this slot can never be reclaimed
+ * because the reader is holding rcu_read_lock and is in an infinite loop.
+ *
+ * The fix is to re-use the same "indirect" pointer case that requires a slot
+ * lookup retry into a general "retry the lookup" bit.
+ *
+ * Running:
+ * This test should run to completion in a few seconds. The above bug would
+ * cause it to hang indefinitely.
+ *
+ * Upstream commit:
+ * Not yet
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <assert.h>
+
+#include "regression.h"
+
+static RADIX_TREE(mt_tree, GFP_KERNEL);
+static pthread_mutex_t mt_lock;
+
+struct page {
+ pthread_mutex_t lock;
+ struct rcu_head rcu;
+ int count;
+ unsigned long index;
+};
+
+static struct page *page_alloc(void)
+{
+ struct page *p;
+ p = malloc(sizeof(struct page));
+ p->count = 1;
+ p->index = 1;
+ pthread_mutex_init(&p->lock, NULL);
+
+ return p;
+}
+
+static void page_rcu_free(struct rcu_head *rcu)
+{
+ struct page *p = container_of(rcu, struct page, rcu);
+ assert(!p->count);
+ pthread_mutex_destroy(&p->lock);
+ free(p);
+}
+
+static void page_free(struct page *p)
+{
+ call_rcu(&p->rcu, page_rcu_free);
+}
+
+static unsigned find_get_pages(unsigned long start,
+ unsigned int nr_pages, struct page **pages)
+{
+ unsigned int i;
+ unsigned int ret;
+ unsigned int nr_found;
+
+ rcu_read_lock();
+restart:
+ nr_found = radix_tree_gang_lookup_slot(&mt_tree,
+ (void ***)pages, NULL, start, nr_pages);
+ ret = 0;
+ for (i = 0; i < nr_found; i++) {
+ struct page *page;
+repeat:
+ page = radix_tree_deref_slot((void **)pages[i]);
+ if (unlikely(!page))
+ continue;
+
+ if (radix_tree_exception(page)) {
+ if (radix_tree_deref_retry(page)) {
+ /*
+ * Transient condition which can only trigger
+ * when entry at index 0 moves out of or back
+ * to root: none yet gotten, safe to restart.
+ */
+ assert((start | i) == 0);
+ goto restart;
+ }
+ /*
+ * No exceptional entries are inserted in this test.
+ */
+ assert(0);
+ }
+
+ pthread_mutex_lock(&page->lock);
+ if (!page->count) {
+ pthread_mutex_unlock(&page->lock);
+ goto repeat;
+ }
+ /* don't actually update page refcount */
+ pthread_mutex_unlock(&page->lock);
+
+ /* Has the page moved? */
+ if (unlikely(page != *((void **)pages[i]))) {
+ goto repeat;
+ }
+
+ pages[ret] = page;
+ ret++;
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+static pthread_barrier_t worker_barrier;
+
+static void *regression1_fn(void *arg)
+{
+ rcu_register_thread();
+
+ if (pthread_barrier_wait(&worker_barrier) ==
+ PTHREAD_BARRIER_SERIAL_THREAD) {
+ int j;
+
+ for (j = 0; j < 1000000; j++) {
+ struct page *p;
+
+ p = page_alloc();
+ pthread_mutex_lock(&mt_lock);
+ radix_tree_insert(&mt_tree, 0, p);
+ pthread_mutex_unlock(&mt_lock);
+
+ p = page_alloc();
+ pthread_mutex_lock(&mt_lock);
+ radix_tree_insert(&mt_tree, 1, p);
+ pthread_mutex_unlock(&mt_lock);
+
+ pthread_mutex_lock(&mt_lock);
+ p = radix_tree_delete(&mt_tree, 1);
+ pthread_mutex_lock(&p->lock);
+ p->count--;
+ pthread_mutex_unlock(&p->lock);
+ pthread_mutex_unlock(&mt_lock);
+ page_free(p);
+
+ pthread_mutex_lock(&mt_lock);
+ p = radix_tree_delete(&mt_tree, 0);
+ pthread_mutex_lock(&p->lock);
+ p->count--;
+ pthread_mutex_unlock(&p->lock);
+ pthread_mutex_unlock(&mt_lock);
+ page_free(p);
+ }
+ } else {
+ int j;
+
+ for (j = 0; j < 100000000; j++) {
+ struct page *pages[10];
+
+ find_get_pages(0, 10, pages);
+ }
+ }
+
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+static pthread_t *threads;
+void regression1_test(void)
+{
+ int nr_threads;
+ int i;
+ long arg;
+
+ /* Regression #1 */
+ printf("running regression test 1, should finish in under a minute\n");
+ nr_threads = 2;
+ pthread_barrier_init(&worker_barrier, NULL, nr_threads);
+
+ threads = malloc(nr_threads * sizeof(pthread_t *));
+
+ for (i = 0; i < nr_threads; i++) {
+ arg = i;
+ if (pthread_create(&threads[i], NULL, regression1_fn, (void *)arg)) {
+ perror("pthread_create");
+ exit(1);
+ }
+ }
+
+ for (i = 0; i < nr_threads; i++) {
+ if (pthread_join(threads[i], NULL)) {
+ perror("pthread_join");
+ exit(1);
+ }
+ }
+
+ free(threads);
+
+ printf("regression test 1, done\n");
+}
diff --git a/tools/testing/radix-tree/regression2.c b/tools/testing/radix-tree/regression2.c
new file mode 100644
index 000000000000..5d2fa28cdca3
--- /dev/null
+++ b/tools/testing/radix-tree/regression2.c
@@ -0,0 +1,126 @@
+/*
+ * Regression2
+ * Description:
+ * Toshiyuki Okajima describes the following radix-tree bug:
+ *
+ * In the following case, we can get a hangup on
+ * radix_radix_tree_gang_lookup_tag_slot.
+ *
+ * 0. The radix tree contains RADIX_TREE_MAP_SIZE items. And the tag of
+ * a certain item has PAGECACHE_TAG_DIRTY.
+ * 1. radix_tree_range_tag_if_tagged(, start, end, , PAGECACHE_TAG_DIRTY,
+ * PAGECACHE_TAG_TOWRITE) is called to add PAGECACHE_TAG_TOWRITE tag
+ * for the tag which has PAGECACHE_TAG_DIRTY. However, there is no tag with
+ * PAGECACHE_TAG_DIRTY within the range from start to end. As the result,
+ * There is no tag with PAGECACHE_TAG_TOWRITE but the root tag has
+ * PAGECACHE_TAG_TOWRITE.
+ * 2. An item is added into the radix tree and then the level of it is
+ * extended into 2 from 1. At that time, the new radix tree node succeeds
+ * the tag status of the root tag. Therefore the tag of the new radix tree
+ * node has PAGECACHE_TAG_TOWRITE but there is not slot with
+ * PAGECACHE_TAG_TOWRITE tag in the child node of the new radix tree node.
+ * 3. The tag of a certain item is cleared with PAGECACHE_TAG_DIRTY.
+ * 4. All items within the index range from 0 to RADIX_TREE_MAP_SIZE - 1 are
+ * released. (Only the item which index is RADIX_TREE_MAP_SIZE exist in the
+ * radix tree.) As the result, the slot of the radix tree node is NULL but
+ * the tag which corresponds to the slot has PAGECACHE_TAG_TOWRITE.
+ * 5. radix_tree_gang_lookup_tag_slot(PAGECACHE_TAG_TOWRITE) calls
+ * __lookup_tag. __lookup_tag returns with 0. And __lookup_tag doesn't
+ * change the index that is the input and output parameter. Because the 1st
+ * slot of the radix tree node is NULL, but the tag which corresponds to
+ * the slot has PAGECACHE_TAG_TOWRITE.
+ * Therefore radix_tree_gang_lookup_tag_slot tries to get some items by
+ * calling __lookup_tag, but it cannot get any items forever.
+ *
+ * The fix is to change that radix_tree_tag_if_tagged doesn't tag the root tag
+ * if it doesn't set any tags within the specified range.
+ *
+ * Running:
+ * This test should run to completion immediately. The above bug would cause it
+ * to hang indefinitely.
+ *
+ * Upstream commit:
+ * Not yet
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "regression.h"
+
+#ifdef __KERNEL__
+#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6)
+#else
+#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */
+#endif
+
+#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT)
+#define PAGECACHE_TAG_DIRTY 0
+#define PAGECACHE_TAG_WRITEBACK 1
+#define PAGECACHE_TAG_TOWRITE 2
+
+static RADIX_TREE(mt_tree, GFP_KERNEL);
+unsigned long page_count = 0;
+
+struct page {
+ unsigned long index;
+};
+
+static struct page *page_alloc(void)
+{
+ struct page *p;
+ p = malloc(sizeof(struct page));
+ p->index = page_count++;
+
+ return p;
+}
+
+void regression2_test(void)
+{
+ int i;
+ struct page *p;
+ int max_slots = RADIX_TREE_MAP_SIZE;
+ unsigned long int start, end;
+ struct page *pages[1];
+
+ printf("running regression test 2 (should take milliseconds)\n");
+ /* 0. */
+ for (i = 0; i <= max_slots - 1; i++) {
+ p = page_alloc();
+ radix_tree_insert(&mt_tree, i, p);
+ }
+ radix_tree_tag_set(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY);
+
+ /* 1. */
+ start = 0;
+ end = max_slots - 2;
+ radix_tree_range_tag_if_tagged(&mt_tree, &start, end, 1,
+ PAGECACHE_TAG_DIRTY, PAGECACHE_TAG_TOWRITE);
+
+ /* 2. */
+ p = page_alloc();
+ radix_tree_insert(&mt_tree, max_slots, p);
+
+ /* 3. */
+ radix_tree_tag_clear(&mt_tree, max_slots - 1, PAGECACHE_TAG_DIRTY);
+
+ /* 4. */
+ for (i = max_slots - 1; i >= 0; i--)
+ radix_tree_delete(&mt_tree, i);
+
+ /* 5. */
+ // NOTE: start should not be 0 because radix_tree_gang_lookup_tag_slot
+ // can return.
+ start = 1;
+ end = max_slots - 2;
+ radix_tree_gang_lookup_tag_slot(&mt_tree, (void ***)pages, start, end,
+ PAGECACHE_TAG_TOWRITE);
+
+ /* We remove all the remained nodes */
+ radix_tree_delete(&mt_tree, max_slots);
+
+ printf("regression test 2, done\n");
+}
diff --git a/tools/testing/radix-tree/regression3.c b/tools/testing/radix-tree/regression3.c
new file mode 100644
index 000000000000..1f06ed73d0a8
--- /dev/null
+++ b/tools/testing/radix-tree/regression3.c
@@ -0,0 +1,117 @@
+/*
+ * Regression3
+ * Description:
+ * Helper radix_tree_iter_retry resets next_index to the current index.
+ * In following radix_tree_next_slot current chunk size becomes zero.
+ * This isn't checked and it tries to dereference null pointer in slot.
+ *
+ * Helper radix_tree_iter_next reset slot to NULL and next_index to index + 1,
+ * for tagger iteraction it also must reset cached tags in iterator to abort
+ * next radix_tree_next_slot and go to slow-path into radix_tree_next_chunk.
+ *
+ * Running:
+ * This test should run to completion immediately. The above bug would
+ * cause it to segfault.
+ *
+ * Upstream commit:
+ * Not yet
+ */
+#include <linux/kernel.h>
+#include <linux/gfp.h>
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+#include <stdlib.h>
+#include <stdio.h>
+
+#include "regression.h"
+
+void regression3_test(void)
+{
+ RADIX_TREE(root, GFP_KERNEL);
+ void *ptr0 = (void *)4ul;
+ void *ptr = (void *)8ul;
+ struct radix_tree_iter iter;
+ void **slot;
+ bool first;
+
+ printf("running regression test 3 (should take milliseconds)\n");
+
+ radix_tree_insert(&root, 0, ptr0);
+ radix_tree_tag_set(&root, 0, 0);
+
+ first = true;
+ radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
+ printf("tagged %ld %p\n", iter.index, *slot);
+ if (first) {
+ radix_tree_insert(&root, 1, ptr);
+ radix_tree_tag_set(&root, 1, 0);
+ first = false;
+ }
+ if (radix_tree_deref_retry(*slot)) {
+ printf("retry at %ld\n", iter.index);
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+ }
+ radix_tree_delete(&root, 1);
+
+ first = true;
+ radix_tree_for_each_slot(slot, &root, &iter, 0) {
+ printf("slot %ld %p\n", iter.index, *slot);
+ if (first) {
+ radix_tree_insert(&root, 1, ptr);
+ first = false;
+ }
+ if (radix_tree_deref_retry(*slot)) {
+ printk("retry at %ld\n", iter.index);
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+ }
+ radix_tree_delete(&root, 1);
+
+ first = true;
+ radix_tree_for_each_contig(slot, &root, &iter, 0) {
+ printk("contig %ld %p\n", iter.index, *slot);
+ if (first) {
+ radix_tree_insert(&root, 1, ptr);
+ first = false;
+ }
+ if (radix_tree_deref_retry(*slot)) {
+ printk("retry at %ld\n", iter.index);
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+ }
+
+ radix_tree_for_each_slot(slot, &root, &iter, 0) {
+ printf("slot %ld %p\n", iter.index, *slot);
+ if (!iter.index) {
+ printf("next at %ld\n", iter.index);
+ slot = radix_tree_iter_next(&iter);
+ }
+ }
+
+ radix_tree_for_each_contig(slot, &root, &iter, 0) {
+ printf("contig %ld %p\n", iter.index, *slot);
+ if (!iter.index) {
+ printf("next at %ld\n", iter.index);
+ slot = radix_tree_iter_next(&iter);
+ }
+ }
+
+ radix_tree_tag_set(&root, 0, 0);
+ radix_tree_tag_set(&root, 1, 0);
+ radix_tree_for_each_tagged(slot, &root, &iter, 0, 0) {
+ printf("tagged %ld %p\n", iter.index, *slot);
+ if (!iter.index) {
+ printf("next at %ld\n", iter.index);
+ slot = radix_tree_iter_next(&iter);
+ }
+ }
+
+ radix_tree_delete(&root, 0);
+ radix_tree_delete(&root, 1);
+
+ printf("regression test 3 passed\n");
+}
diff --git a/tools/testing/radix-tree/tag_check.c b/tools/testing/radix-tree/tag_check.c
new file mode 100644
index 000000000000..83136be552a0
--- /dev/null
+++ b/tools/testing/radix-tree/tag_check.c
@@ -0,0 +1,332 @@
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <linux/slab.h>
+#include <linux/radix-tree.h>
+
+#include "test.h"
+
+
+static void
+__simple_checks(struct radix_tree_root *tree, unsigned long index, int tag)
+{
+ int ret;
+
+ item_check_absent(tree, index);
+ assert(item_tag_get(tree, index, tag) == 0);
+
+ item_insert(tree, index);
+ assert(item_tag_get(tree, index, tag) == 0);
+ item_tag_set(tree, index, tag);
+ ret = item_tag_get(tree, index, tag);
+ assert(ret != 0);
+ ret = item_delete(tree, index);
+ assert(ret != 0);
+ item_insert(tree, index);
+ ret = item_tag_get(tree, index, tag);
+ assert(ret == 0);
+ ret = item_delete(tree, index);
+ assert(ret != 0);
+ ret = item_delete(tree, index);
+ assert(ret == 0);
+}
+
+void simple_checks(void)
+{
+ unsigned long index;
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ for (index = 0; index < 10000; index++) {
+ __simple_checks(&tree, index, 0);
+ __simple_checks(&tree, index, 1);
+ }
+ verify_tag_consistency(&tree, 0);
+ verify_tag_consistency(&tree, 1);
+ printf("before item_kill_tree: %d allocated\n", nr_allocated);
+ item_kill_tree(&tree);
+ printf("after item_kill_tree: %d allocated\n", nr_allocated);
+}
+
+/*
+ * Check that tags propagate correctly when extending a tree.
+ */
+static void extend_checks(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert(&tree, 43);
+ assert(item_tag_get(&tree, 43, 0) == 0);
+ item_tag_set(&tree, 43, 0);
+ assert(item_tag_get(&tree, 43, 0) == 1);
+ item_insert(&tree, 1000000);
+ assert(item_tag_get(&tree, 43, 0) == 1);
+
+ item_insert(&tree, 0);
+ item_tag_set(&tree, 0, 0);
+ item_delete(&tree, 1000000);
+ assert(item_tag_get(&tree, 43, 0) != 0);
+ item_delete(&tree, 43);
+ assert(item_tag_get(&tree, 43, 0) == 0); /* crash */
+ assert(item_tag_get(&tree, 0, 0) == 1);
+
+ verify_tag_consistency(&tree, 0);
+
+ item_kill_tree(&tree);
+}
+
+/*
+ * Check that tags propagate correctly when contracting a tree.
+ */
+static void contract_checks(void)
+{
+ struct item *item;
+ int tmp;
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ tmp = 1<<RADIX_TREE_MAP_SHIFT;
+ item_insert(&tree, tmp);
+ item_insert(&tree, tmp+1);
+ item_tag_set(&tree, tmp, 0);
+ item_tag_set(&tree, tmp, 1);
+ item_tag_set(&tree, tmp+1, 0);
+ item_delete(&tree, tmp+1);
+ item_tag_clear(&tree, tmp, 1);
+
+ assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 0) == 1);
+ assert(radix_tree_gang_lookup_tag(&tree, (void **)&item, 0, 1, 1) == 0);
+
+ assert(item_tag_get(&tree, tmp, 0) == 1);
+ assert(item_tag_get(&tree, tmp, 1) == 0);
+
+ verify_tag_consistency(&tree, 0);
+ item_kill_tree(&tree);
+}
+
+/*
+ * Stupid tag thrasher
+ *
+ * Create a large linear array corresponding to the tree. Each element in
+ * the array is coherent with each node in the tree
+ */
+
+enum {
+ NODE_ABSENT = 0,
+ NODE_PRESENT = 1,
+ NODE_TAGGED = 2,
+};
+
+#define THRASH_SIZE 1000 * 1000
+#define N 127
+#define BATCH 33
+
+static void gang_check(struct radix_tree_root *tree,
+ char *thrash_state, int tag)
+{
+ struct item *items[BATCH];
+ int nr_found;
+ unsigned long index = 0;
+ unsigned long last_index = 0;
+
+ while ((nr_found = radix_tree_gang_lookup_tag(tree, (void **)items,
+ index, BATCH, tag))) {
+ int i;
+
+ for (i = 0; i < nr_found; i++) {
+ struct item *item = items[i];
+
+ while (last_index < item->index) {
+ assert(thrash_state[last_index] != NODE_TAGGED);
+ last_index++;
+ }
+ assert(thrash_state[last_index] == NODE_TAGGED);
+ last_index++;
+ }
+ index = items[nr_found - 1]->index + 1;
+ }
+}
+
+static void do_thrash(struct radix_tree_root *tree, char *thrash_state, int tag)
+{
+ int insert_chunk;
+ int delete_chunk;
+ int tag_chunk;
+ int untag_chunk;
+ int total_tagged = 0;
+ int total_present = 0;
+
+ for (insert_chunk = 1; insert_chunk < THRASH_SIZE; insert_chunk *= N)
+ for (delete_chunk = 1; delete_chunk < THRASH_SIZE; delete_chunk *= N)
+ for (tag_chunk = 1; tag_chunk < THRASH_SIZE; tag_chunk *= N)
+ for (untag_chunk = 1; untag_chunk < THRASH_SIZE; untag_chunk *= N) {
+ int i;
+ unsigned long index;
+ int nr_inserted = 0;
+ int nr_deleted = 0;
+ int nr_tagged = 0;
+ int nr_untagged = 0;
+ int actual_total_tagged;
+ int actual_total_present;
+
+ for (i = 0; i < insert_chunk; i++) {
+ index = rand() % THRASH_SIZE;
+ if (thrash_state[index] != NODE_ABSENT)
+ continue;
+ item_check_absent(tree, index);
+ item_insert(tree, index);
+ assert(thrash_state[index] != NODE_PRESENT);
+ thrash_state[index] = NODE_PRESENT;
+ nr_inserted++;
+ total_present++;
+ }
+
+ for (i = 0; i < delete_chunk; i++) {
+ index = rand() % THRASH_SIZE;
+ if (thrash_state[index] == NODE_ABSENT)
+ continue;
+ item_check_present(tree, index);
+ if (item_tag_get(tree, index, tag)) {
+ assert(thrash_state[index] == NODE_TAGGED);
+ total_tagged--;
+ } else {
+ assert(thrash_state[index] == NODE_PRESENT);
+ }
+ item_delete(tree, index);
+ assert(thrash_state[index] != NODE_ABSENT);
+ thrash_state[index] = NODE_ABSENT;
+ nr_deleted++;
+ total_present--;
+ }
+
+ for (i = 0; i < tag_chunk; i++) {
+ index = rand() % THRASH_SIZE;
+ if (thrash_state[index] != NODE_PRESENT) {
+ if (item_lookup(tree, index))
+ assert(item_tag_get(tree, index, tag));
+ continue;
+ }
+ item_tag_set(tree, index, tag);
+ item_tag_set(tree, index, tag);
+ assert(thrash_state[index] != NODE_TAGGED);
+ thrash_state[index] = NODE_TAGGED;
+ nr_tagged++;
+ total_tagged++;
+ }
+
+ for (i = 0; i < untag_chunk; i++) {
+ index = rand() % THRASH_SIZE;
+ if (thrash_state[index] != NODE_TAGGED)
+ continue;
+ item_check_present(tree, index);
+ assert(item_tag_get(tree, index, tag));
+ item_tag_clear(tree, index, tag);
+ item_tag_clear(tree, index, tag);
+ assert(thrash_state[index] != NODE_PRESENT);
+ thrash_state[index] = NODE_PRESENT;
+ nr_untagged++;
+ total_tagged--;
+ }
+
+ actual_total_tagged = 0;
+ actual_total_present = 0;
+ for (index = 0; index < THRASH_SIZE; index++) {
+ switch (thrash_state[index]) {
+ case NODE_ABSENT:
+ item_check_absent(tree, index);
+ break;
+ case NODE_PRESENT:
+ item_check_present(tree, index);
+ assert(!item_tag_get(tree, index, tag));
+ actual_total_present++;
+ break;
+ case NODE_TAGGED:
+ item_check_present(tree, index);
+ assert(item_tag_get(tree, index, tag));
+ actual_total_present++;
+ actual_total_tagged++;
+ break;
+ }
+ }
+
+ gang_check(tree, thrash_state, tag);
+
+ printf("%d(%d) %d(%d) %d(%d) %d(%d) / "
+ "%d(%d) present, %d(%d) tagged\n",
+ insert_chunk, nr_inserted,
+ delete_chunk, nr_deleted,
+ tag_chunk, nr_tagged,
+ untag_chunk, nr_untagged,
+ total_present, actual_total_present,
+ total_tagged, actual_total_tagged);
+ }
+}
+
+static void thrash_tags(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ char *thrash_state;
+
+ thrash_state = malloc(THRASH_SIZE);
+ memset(thrash_state, 0, THRASH_SIZE);
+
+ do_thrash(&tree, thrash_state, 0);
+
+ verify_tag_consistency(&tree, 0);
+ item_kill_tree(&tree);
+ free(thrash_state);
+}
+
+static void leak_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ item_insert(&tree, 1000000);
+ item_delete(&tree, 1000000);
+ item_kill_tree(&tree);
+}
+
+static void __leak_check(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+
+ printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+ item_insert(&tree, 1000000);
+ printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+ item_delete(&tree, 1000000);
+ printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+ item_kill_tree(&tree);
+ printf("%d: nr_allocated=%d\n", __LINE__, nr_allocated);
+}
+
+static void single_check(void)
+{
+ struct item *items[BATCH];
+ RADIX_TREE(tree, GFP_KERNEL);
+ int ret;
+
+ item_insert(&tree, 0);
+ item_tag_set(&tree, 0, 0);
+ ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 0, BATCH, 0);
+ assert(ret == 1);
+ ret = radix_tree_gang_lookup_tag(&tree, (void **)items, 1, BATCH, 0);
+ assert(ret == 0);
+ verify_tag_consistency(&tree, 0);
+ verify_tag_consistency(&tree, 1);
+ item_kill_tree(&tree);
+}
+
+void tag_check(void)
+{
+ single_check();
+ extend_checks();
+ contract_checks();
+ printf("after extend_checks: %d allocated\n", nr_allocated);
+ __leak_check();
+ leak_check();
+ printf("after leak_check: %d allocated\n", nr_allocated);
+ simple_checks();
+ printf("after simple_checks: %d allocated\n", nr_allocated);
+ thrash_tags();
+ printf("after thrash_tags: %d allocated\n", nr_allocated);
+}
diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c
new file mode 100644
index 000000000000..2bebf34cdc27
--- /dev/null
+++ b/tools/testing/radix-tree/test.c
@@ -0,0 +1,219 @@
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/bitops.h>
+
+#include "test.h"
+
+struct item *
+item_tag_set(struct radix_tree_root *root, unsigned long index, int tag)
+{
+ return radix_tree_tag_set(root, index, tag);
+}
+
+struct item *
+item_tag_clear(struct radix_tree_root *root, unsigned long index, int tag)
+{
+ return radix_tree_tag_clear(root, index, tag);
+}
+
+int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag)
+{
+ return radix_tree_tag_get(root, index, tag);
+}
+
+int __item_insert(struct radix_tree_root *root, struct item *item)
+{
+ return radix_tree_insert(root, item->index, item);
+}
+
+int item_insert(struct radix_tree_root *root, unsigned long index)
+{
+ return __item_insert(root, item_create(index));
+}
+
+int item_delete(struct radix_tree_root *root, unsigned long index)
+{
+ struct item *item = radix_tree_delete(root, index);
+
+ if (item) {
+ assert(item->index == index);
+ free(item);
+ return 1;
+ }
+ return 0;
+}
+
+struct item *item_create(unsigned long index)
+{
+ struct item *ret = malloc(sizeof(*ret));
+
+ ret->index = index;
+ return ret;
+}
+
+void item_check_present(struct radix_tree_root *root, unsigned long index)
+{
+ struct item *item;
+
+ item = radix_tree_lookup(root, index);
+ assert(item != 0);
+ assert(item->index == index);
+}
+
+struct item *item_lookup(struct radix_tree_root *root, unsigned long index)
+{
+ return radix_tree_lookup(root, index);
+}
+
+void item_check_absent(struct radix_tree_root *root, unsigned long index)
+{
+ struct item *item;
+
+ item = radix_tree_lookup(root, index);
+ assert(item == 0);
+}
+
+/*
+ * Scan only the passed (start, start+nr] for present items
+ */
+void item_gang_check_present(struct radix_tree_root *root,
+ unsigned long start, unsigned long nr,
+ int chunk, int hop)
+{
+ struct item *items[chunk];
+ unsigned long into;
+
+ for (into = 0; into < nr; ) {
+ int nfound;
+ int nr_to_find = chunk;
+ int i;
+
+ if (nr_to_find > (nr - into))
+ nr_to_find = nr - into;
+
+ nfound = radix_tree_gang_lookup(root, (void **)items,
+ start + into, nr_to_find);
+ assert(nfound == nr_to_find);
+ for (i = 0; i < nfound; i++)
+ assert(items[i]->index == start + into + i);
+ into += hop;
+ }
+}
+
+/*
+ * Scan the entire tree, only expecting present items (start, start+nr]
+ */
+void item_full_scan(struct radix_tree_root *root, unsigned long start,
+ unsigned long nr, int chunk)
+{
+ struct item *items[chunk];
+ unsigned long into = 0;
+ unsigned long this_index = start;
+ int nfound;
+ int i;
+
+// printf("%s(0x%08lx, 0x%08lx, %d)\n", __FUNCTION__, start, nr, chunk);
+
+ while ((nfound = radix_tree_gang_lookup(root, (void **)items, into,
+ chunk))) {
+// printf("At 0x%08lx, nfound=%d\n", into, nfound);
+ for (i = 0; i < nfound; i++) {
+ assert(items[i]->index == this_index);
+ this_index++;
+ }
+// printf("Found 0x%08lx->0x%08lx\n",
+// items[0]->index, items[nfound-1]->index);
+ into = this_index;
+ }
+ if (chunk)
+ assert(this_index == start + nr);
+ nfound = radix_tree_gang_lookup(root, (void **)items,
+ this_index, chunk);
+ assert(nfound == 0);
+}
+
+static int verify_node(struct radix_tree_node *slot, unsigned int tag,
+ unsigned int height, int tagged)
+{
+ int anyset = 0;
+ int i;
+ int j;
+
+ slot = indirect_to_ptr(slot);
+
+ /* Verify consistency at this level */
+ for (i = 0; i < RADIX_TREE_TAG_LONGS; i++) {
+ if (slot->tags[tag][i]) {
+ anyset = 1;
+ break;
+ }
+ }
+ if (tagged != anyset) {
+ printf("tag: %u, height %u, tagged: %d, anyset: %d\n", tag, height, tagged, anyset);
+ for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) {
+ printf("tag %d: ", j);
+ for (i = 0; i < RADIX_TREE_TAG_LONGS; i++)
+ printf("%016lx ", slot->tags[j][i]);
+ printf("\n");
+ }
+ return 1;
+ }
+ assert(tagged == anyset);
+
+ /* Go for next level */
+ if (height > 1) {
+ for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
+ if (slot->slots[i])
+ if (verify_node(slot->slots[i], tag, height - 1,
+ !!test_bit(i, slot->tags[tag]))) {
+ printf("Failure at off %d\n", i);
+ for (j = 0; j < RADIX_TREE_MAX_TAGS; j++) {
+ printf("tag %d: ", j);
+ for (i = 0; i < RADIX_TREE_TAG_LONGS; i++)
+ printf("%016lx ", slot->tags[j][i]);
+ printf("\n");
+ }
+ return 1;
+ }
+ }
+ return 0;
+}
+
+void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag)
+{
+ if (!root->height)
+ return;
+ verify_node(root->rnode, tag, root->height, !!root_tag_get(root, tag));
+}
+
+void item_kill_tree(struct radix_tree_root *root)
+{
+ struct item *items[32];
+ int nfound;
+
+ while ((nfound = radix_tree_gang_lookup(root, (void **)items, 0, 32))) {
+ int i;
+
+ for (i = 0; i < nfound; i++) {
+ void *ret;
+
+ ret = radix_tree_delete(root, items[i]->index);
+ assert(ret == items[i]);
+ free(items[i]);
+ }
+ }
+ assert(radix_tree_gang_lookup(root, (void **)items, 0, 32) == 0);
+ assert(root->rnode == NULL);
+}
+
+void tree_verify_min_height(struct radix_tree_root *root, int maxindex)
+{
+ assert(radix_tree_maxindex(root->height) >= maxindex);
+ if (root->height > 1)
+ assert(radix_tree_maxindex(root->height-1) < maxindex);
+ else if (root->height == 1)
+ assert(radix_tree_maxindex(root->height-1) <= maxindex);
+}
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
new file mode 100644
index 000000000000..4e1d95faaa94
--- /dev/null
+++ b/tools/testing/radix-tree/test.h
@@ -0,0 +1,40 @@
+#include <linux/gfp.h>
+#include <linux/types.h>
+#include <linux/radix-tree.h>
+#include <linux/rcupdate.h>
+
+struct item {
+ unsigned long index;
+};
+
+struct item *item_create(unsigned long index);
+int __item_insert(struct radix_tree_root *root, struct item *item);
+int item_insert(struct radix_tree_root *root, unsigned long index);
+int item_delete(struct radix_tree_root *root, unsigned long index);
+struct item *item_lookup(struct radix_tree_root *root, unsigned long index);
+
+void item_check_present(struct radix_tree_root *root, unsigned long index);
+void item_check_absent(struct radix_tree_root *root, unsigned long index);
+void item_gang_check_present(struct radix_tree_root *root,
+ unsigned long start, unsigned long nr,
+ int chunk, int hop);
+void item_full_scan(struct radix_tree_root *root, unsigned long start,
+ unsigned long nr, int chunk);
+void item_kill_tree(struct radix_tree_root *root);
+
+void tag_check(void);
+
+struct item *
+item_tag_set(struct radix_tree_root *root, unsigned long index, int tag);
+struct item *
+item_tag_clear(struct radix_tree_root *root, unsigned long index, int tag);
+int item_tag_get(struct radix_tree_root *root, unsigned long index, int tag);
+void tree_verify_min_height(struct radix_tree_root *root, int maxindex);
+void verify_tag_consistency(struct radix_tree_root *root, unsigned int tag);
+
+extern int nr_allocated;
+
+/* Normally private parts of lib/radix-tree.c */
+void *indirect_to_ptr(void *ptr);
+int root_tag_get(struct radix_tree_root *root, unsigned int tag);
+unsigned long radix_tree_maxindex(unsigned int height);
diff --git a/tools/testing/selftests/breakpoints/.gitignore b/tools/testing/selftests/breakpoints/.gitignore
index 9b3193d06608..a23bb4a6f06c 100644
--- a/tools/testing/selftests/breakpoints/.gitignore
+++ b/tools/testing/selftests/breakpoints/.gitignore
@@ -1 +1,2 @@
breakpoint_test
+step_after_suspend_test
diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile
index c0d957015f52..74e533fd4bc5 100644
--- a/tools/testing/selftests/breakpoints/Makefile
+++ b/tools/testing/selftests/breakpoints/Makefile
@@ -6,9 +6,11 @@ ifeq ($(ARCH),x86)
TEST_PROGS := breakpoint_test
endif
+TEST_PROGS += step_after_suspend_test
+
all: $(TEST_PROGS)
include ../lib.mk
clean:
- rm -fr breakpoint_test
+ rm -fr breakpoint_test step_after_suspend_test
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
new file mode 100644
index 000000000000..60b8a95dac26
--- /dev/null
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -0,0 +1,218 @@
+/*
+ * Copyright (C) 2016 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ptrace.h>
+#include <sys/stat.h>
+#include <sys/timerfd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+
+void child(int cpu)
+{
+ cpu_set_t set;
+
+ CPU_ZERO(&set);
+ CPU_SET(cpu, &set);
+ if (sched_setaffinity(0, sizeof(set), &set) != 0) {
+ perror("sched_setaffinity() failed");
+ _exit(1);
+ }
+
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) {
+ perror("ptrace(PTRACE_TRACEME) failed");
+ _exit(1);
+ }
+
+ if (raise(SIGSTOP) != 0) {
+ perror("raise(SIGSTOP) failed");
+ _exit(1);
+ }
+
+ _exit(0);
+}
+
+bool run_test(int cpu)
+{
+ int status;
+ pid_t pid = fork();
+ pid_t wpid;
+
+ if (pid < 0) {
+ perror("fork() failed");
+ return false;
+ }
+ if (pid == 0)
+ child(cpu);
+
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ perror("waitpid() failed");
+ return false;
+ }
+ if (!WIFSTOPPED(status)) {
+ printf("child did not stop\n");
+ return false;
+ }
+ if (WSTOPSIG(status) != SIGSTOP) {
+ printf("child did not stop with SIGSTOP\n");
+ return false;
+ }
+
+ if (ptrace(PTRACE_SINGLESTEP, pid, NULL, NULL) < 0) {
+ if (errno == EIO) {
+ printf("ptrace(PTRACE_SINGLESTEP) not supported on this architecture\n");
+ ksft_exit_skip();
+ }
+ perror("ptrace(PTRACE_SINGLESTEP) failed");
+ return false;
+ }
+
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ perror("waitpid() failed");
+ return false;
+ }
+ if (WIFEXITED(status)) {
+ printf("child did not single-step\n");
+ return false;
+ }
+ if (!WIFSTOPPED(status)) {
+ printf("child did not stop\n");
+ return false;
+ }
+ if (WSTOPSIG(status) != SIGTRAP) {
+ printf("child did not stop with SIGTRAP\n");
+ return false;
+ }
+
+ if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
+ perror("ptrace(PTRACE_CONT) failed");
+ return false;
+ }
+
+ wpid = waitpid(pid, &status, __WALL);
+ if (wpid != pid) {
+ perror("waitpid() failed");
+ return false;
+ }
+ if (!WIFEXITED(status)) {
+ printf("child did not exit after PTRACE_CONT\n");
+ return false;
+ }
+
+ return true;
+}
+
+void suspend(void)
+{
+ int power_state_fd;
+ struct sigevent event = {};
+ int timerfd;
+ int err;
+ struct itimerspec spec = {};
+
+ power_state_fd = open("/sys/power/state", O_RDWR);
+ if (power_state_fd < 0) {
+ perror("open(\"/sys/power/state\") failed (is this test running as root?)");
+ ksft_exit_fail();
+ }
+
+ timerfd = timerfd_create(CLOCK_BOOTTIME_ALARM, 0);
+ if (timerfd < 0) {
+ perror("timerfd_create() failed");
+ ksft_exit_fail();
+ }
+
+ spec.it_value.tv_sec = 5;
+ err = timerfd_settime(timerfd, 0, &spec, NULL);
+ if (err < 0) {
+ perror("timerfd_settime() failed");
+ ksft_exit_fail();
+ }
+
+ if (write(power_state_fd, "mem", strlen("mem")) != strlen("mem")) {
+ perror("entering suspend failed");
+ ksft_exit_fail();
+ }
+
+ close(timerfd);
+ close(power_state_fd);
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+ bool do_suspend = true;
+ bool succeeded = true;
+ cpu_set_t available_cpus;
+ int err;
+ int cpu;
+
+ while ((opt = getopt(argc, argv, "n")) != -1) {
+ switch (opt) {
+ case 'n':
+ do_suspend = false;
+ break;
+ default:
+ printf("Usage: %s [-n]\n", argv[0]);
+ printf(" -n: do not trigger a suspend/resume cycle before the test\n");
+ return -1;
+ }
+ }
+
+ if (do_suspend)
+ suspend();
+
+ err = sched_getaffinity(0, sizeof(available_cpus), &available_cpus);
+ if (err < 0) {
+ perror("sched_getaffinity() failed");
+ ksft_exit_fail();
+ }
+
+ for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
+ bool test_success;
+
+ if (!CPU_ISSET(cpu, &available_cpus))
+ continue;
+
+ test_success = run_test(cpu);
+ printf("CPU %d: ", cpu);
+ if (test_success) {
+ printf("[OK]\n");
+ ksft_inc_pass_cnt();
+ } else {
+ printf("[FAILED]\n");
+ ksft_inc_fail_cnt();
+ succeeded = false;
+ }
+ }
+
+ ksft_print_cnts();
+ if (succeeded)
+ ksft_exit_pass();
+ else
+ ksft_exit_fail();
+}
diff --git a/tools/testing/selftests/cpu-hotplug/config b/tools/testing/selftests/cpu-hotplug/config
new file mode 100644
index 000000000000..e6ab090cfbf3
--- /dev/null
+++ b/tools/testing/selftests/cpu-hotplug/config
@@ -0,0 +1,2 @@
+CONFIG_NOTIFIER_ERROR_INJECTION=y
+CONFIG_CPU_NOTIFIER_ERROR_INJECT=m
diff --git a/tools/testing/selftests/firmware/config b/tools/testing/selftests/firmware/config
new file mode 100644
index 000000000000..c8137f70e291
--- /dev/null
+++ b/tools/testing/selftests/firmware/config
@@ -0,0 +1 @@
+CONFIG_TEST_FIRMWARE=y
diff --git a/tools/testing/selftests/ftrace/config b/tools/testing/selftests/ftrace/config
new file mode 100644
index 000000000000..ef8214661612
--- /dev/null
+++ b/tools/testing/selftests/ftrace/config
@@ -0,0 +1 @@
+CONFIG_FTRACE=y
diff --git a/tools/testing/selftests/ipc/.gitignore b/tools/testing/selftests/ipc/.gitignore
new file mode 100644
index 000000000000..84b66a3c1f74
--- /dev/null
+++ b/tools/testing/selftests/ipc/.gitignore
@@ -0,0 +1 @@
+msgque_test
diff --git a/tools/testing/selftests/ipc/config b/tools/testing/selftests/ipc/config
new file mode 100644
index 000000000000..0702447109f5
--- /dev/null
+++ b/tools/testing/selftests/ipc/config
@@ -0,0 +1,2 @@
+CONFIG_EXPERT=y
+CONFIG_CHECKPOINT_RESTORE=y
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index 47147b968514..08360060ab14 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -3,6 +3,6 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-TEST_PROGS := printf.sh
+TEST_PROGS := printf.sh bitmap.sh
include ../lib.mk
diff --git a/tools/testing/selftests/lib/bitmap.sh b/tools/testing/selftests/lib/bitmap.sh
new file mode 100755
index 000000000000..2da187b6ddad
--- /dev/null
+++ b/tools/testing/selftests/lib/bitmap.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+# Runs bitmap infrastructure tests using test_bitmap kernel module
+
+if /sbin/modprobe -q test_bitmap; then
+ /sbin/modprobe -q -r test_bitmap
+ echo "bitmap: ok"
+else
+ echo "bitmap: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/media_tests/.gitignore b/tools/testing/selftests/media_tests/.gitignore
new file mode 100644
index 000000000000..1c0711708b98
--- /dev/null
+++ b/tools/testing/selftests/media_tests/.gitignore
@@ -0,0 +1 @@
+media_device_test
diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile
new file mode 100644
index 000000000000..7071bcc1d066
--- /dev/null
+++ b/tools/testing/selftests/media_tests/Makefile
@@ -0,0 +1,7 @@
+TEST_PROGS := media_device_test
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+ rm -fr media_device_test
diff --git a/tools/testing/selftests/media_tests/media_device_test.c b/tools/testing/selftests/media_tests/media_device_test.c
new file mode 100644
index 000000000000..cbf53a032ab5
--- /dev/null
+++ b/tools/testing/selftests/media_tests/media_device_test.c
@@ -0,0 +1,95 @@
+/*
+ * media_devkref_test.c - Media Controller Device Kref API Test
+ *
+ * Copyright (c) 2016 Shuah Khan <shuahkh@osg.samsung.com>
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ *
+ * This file is released under the GPLv2.
+ */
+
+/*
+ * This file adds a test for Media Controller API.
+ * This test should be run as root and should not be
+ * included in the Kselftest run. This test should be
+ * run when hardware and driver that makes use Media
+ * Controller API are present in the system.
+ *
+ * This test opens user specified Media Device and calls
+ * MEDIA_IOC_DEVICE_INFO ioctl in a loop once every 10
+ * seconds.
+ *
+ * Usage:
+ * sudo ./media_device_test -d /dev/mediaX
+ *
+ * While test is running, remove the device and
+ * ensure there are no use after free errors and
+ * other Oops in the dmesg. Enable KaSan kernel
+ * config option for use-after-free error detection.
+*/
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <linux/media.h>
+
+int main(int argc, char **argv)
+{
+ int opt;
+ char media_device[256];
+ int count = 0;
+ struct media_device_info mdi;
+ int ret;
+ int fd;
+
+ if (argc < 2) {
+ printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+ exit(-1);
+ }
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "d:")) != -1) {
+ switch (opt) {
+ case 'd':
+ strncpy(media_device, optarg, sizeof(media_device) - 1);
+ media_device[sizeof(media_device)-1] = '\0';
+ break;
+ default:
+ printf("Usage: %s [-d </dev/mediaX>]\n", argv[0]);
+ exit(-1);
+ }
+ }
+
+ if (getuid() != 0) {
+ printf("Please run the test as root - Exiting.\n");
+ exit(-1);
+ }
+
+ /* Open Media device and keep it open */
+ fd = open(media_device, O_RDWR);
+ if (fd == -1) {
+ printf("Media Device open errno %s\n", strerror(errno));
+ exit(-1);
+ }
+
+ printf("\nNote:\n"
+ "While test is running, remove the device and\n"
+ "ensure there are no use after free errors and\n"
+ "other Oops in the dmesg. Enable KaSan kernel\n"
+ "config option for use-after-free error detection.\n\n");
+
+ while (count < 100) {
+ ret = ioctl(fd, MEDIA_IOC_DEVICE_INFO, &mdi);
+ if (ret < 0)
+ printf("Media Device Info errno %s\n", strerror(errno));
+ else
+ printf("Media device model %s driver %s\n",
+ mdi.model, mdi.driver);
+ sleep(10);
+ count++;
+ }
+}
diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config
new file mode 100644
index 000000000000..2fde30191a47
--- /dev/null
+++ b/tools/testing/selftests/memory-hotplug/config
@@ -0,0 +1,4 @@
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTPLUG_SPARSE=y
+CONFIG_NOTIFIER_ERROR_INJECTION=y
+CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
diff --git a/tools/testing/selftests/mount/config b/tools/testing/selftests/mount/config
new file mode 100644
index 000000000000..b5d881e48548
--- /dev/null
+++ b/tools/testing/selftests/mount/config
@@ -0,0 +1,2 @@
+CONFIG_USER_NS=y
+CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 6fb23366b258..0840684deb7d 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -2,3 +2,5 @@ socket
psock_fanout
psock_tpacket
reuseport_bpf
+reuseport_bpf_cpu
+reuseport_dualstack
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 41449b5ad0a9..0e5340742620 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -4,7 +4,7 @@ CFLAGS = -Wall -O2 -g
CFLAGS += -I../../../../usr/include/
-NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf
+NET_PROGS = socket psock_fanout psock_tpacket reuseport_bpf reuseport_bpf_cpu reuseport_dualstack
all: $(NET_PROGS)
%: %.c
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
new file mode 100644
index 000000000000..e57b4ac40e72
--- /dev/null
+++ b/tools/testing/selftests/net/config
@@ -0,0 +1,3 @@
+CONFIG_USER_NS=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_TEST_BPF=m
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index bec1b5dd2530..96ba386b1b7b 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -9,10 +9,12 @@
#include <errno.h>
#include <error.h>
+#include <fcntl.h>
#include <linux/bpf.h>
#include <linux/filter.h>
#include <linux/unistd.h>
#include <netinet/in.h>
+#include <netinet/tcp.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -169,9 +171,15 @@ static void build_recv_group(const struct test_params p, int fd[], uint16_t mod,
if (bind(fd[i], addr, sockaddr_size()))
error(1, errno, "failed to bind recv socket %d", i);
- if (p.protocol == SOCK_STREAM)
+ if (p.protocol == SOCK_STREAM) {
+ opt = 4;
+ if (setsockopt(fd[i], SOL_TCP, TCP_FASTOPEN, &opt,
+ sizeof(opt)))
+ error(1, errno,
+ "failed to set TCP_FASTOPEN on %d", i);
if (listen(fd[i], p.recv_socks * 10))
error(1, errno, "failed to listen on socket");
+ }
}
free(addr);
}
@@ -189,10 +197,8 @@ static void send_from(struct test_params p, uint16_t sport, char *buf,
if (bind(fd, saddr, sockaddr_size()))
error(1, errno, "failed to bind send socket");
- if (connect(fd, daddr, sockaddr_size()))
- error(1, errno, "failed to connect");
- if (send(fd, buf, len, 0) < 0)
+ if (sendto(fd, buf, len, MSG_FASTOPEN, daddr, sockaddr_size()) < 0)
error(1, errno, "failed to send message");
close(fd);
@@ -260,7 +266,7 @@ static void test_recv_order(const struct test_params p, int fd[], int mod)
}
}
-static void test_reuseport_ebpf(const struct test_params p)
+static void test_reuseport_ebpf(struct test_params p)
{
int i, fd[p.recv_socks];
@@ -268,6 +274,7 @@ static void test_reuseport_ebpf(const struct test_params p)
build_recv_group(p, fd, p.recv_socks, attach_ebpf);
test_recv_order(p, fd, p.recv_socks);
+ p.send_port_min += p.recv_socks * 2;
fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
attach_ebpf(fd[0], p.recv_socks / 2);
test_recv_order(p, fd, p.recv_socks / 2);
@@ -276,7 +283,7 @@ static void test_reuseport_ebpf(const struct test_params p)
close(fd[i]);
}
-static void test_reuseport_cbpf(const struct test_params p)
+static void test_reuseport_cbpf(struct test_params p)
{
int i, fd[p.recv_socks];
@@ -284,6 +291,7 @@ static void test_reuseport_cbpf(const struct test_params p)
build_recv_group(p, fd, p.recv_socks, attach_cbpf);
test_recv_order(p, fd, p.recv_socks);
+ p.send_port_min += p.recv_socks * 2;
fprintf(stderr, "Reprograming, testing mod %zd...\n", p.recv_socks / 2);
attach_cbpf(fd[0], p.recv_socks / 2);
test_recv_order(p, fd, p.recv_socks / 2);
@@ -377,7 +385,7 @@ static void test_filter_no_reuseport(const struct test_params p)
static void test_filter_without_bind(void)
{
- int fd1, fd2;
+ int fd1, fd2, opt = 1;
fprintf(stderr, "Testing filter add without bind...\n");
fd1 = socket(AF_INET, SOCK_DGRAM, 0);
@@ -386,6 +394,10 @@ static void test_filter_without_bind(void)
fd2 = socket(AF_INET, SOCK_DGRAM, 0);
if (fd2 < 0)
error(1, errno, "failed to create socket 2");
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 1");
+ if (setsockopt(fd2, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT on socket 2");
attach_ebpf(fd1, 10);
attach_cbpf(fd2, 10);
@@ -394,6 +406,32 @@ static void test_filter_without_bind(void)
close(fd2);
}
+void enable_fastopen(void)
+{
+ int fd = open("/proc/sys/net/ipv4/tcp_fastopen", 0);
+ int rw_mask = 3; /* bit 1: client side; bit-2 server side */
+ int val, size;
+ char buf[16];
+
+ if (fd < 0)
+ error(1, errno, "Unable to open tcp_fastopen sysctl");
+ if (read(fd, buf, sizeof(buf)) <= 0)
+ error(1, errno, "Unable to read tcp_fastopen sysctl");
+ val = atoi(buf);
+ close(fd);
+
+ if ((val & rw_mask) != rw_mask) {
+ fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+ if (fd < 0)
+ error(1, errno,
+ "Unable to open tcp_fastopen sysctl for writing");
+ val |= rw_mask;
+ size = snprintf(buf, 16, "%d", val);
+ if (write(fd, buf, size) <= 0)
+ error(1, errno, "Unable to write tcp_fastopen sysctl");
+ close(fd);
+ }
+}
int main(void)
{
@@ -506,6 +544,71 @@ int main(void)
.recv_port = 8007,
.send_port_min = 9100});
+ /* TCP fastopen is required for the TCP tests */
+ enable_fastopen();
+ fprintf(stderr, "---- IPv4 TCP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8008,
+ .send_port_min = 9120});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8009,
+ .send_port_min = 9160});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8010});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8011});
+
+ fprintf(stderr, "---- IPv6 TCP ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8012,
+ .send_port_min = 9200});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8013,
+ .send_port_min = 9240});
+ test_extra_filter((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8014});
+ test_filter_no_reuseport((struct test_params) {
+ .recv_family = AF_INET6,
+ .protocol = SOCK_STREAM,
+ .recv_port = 8015});
+
+ fprintf(stderr, "---- IPv6 TCP w/ mapped IPv4 ----\n");
+ test_reuseport_ebpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8016,
+ .send_port_min = 9320});
+ test_reuseport_cbpf((struct test_params) {
+ .recv_family = AF_INET6,
+ .send_family = AF_INET,
+ .protocol = SOCK_STREAM,
+ .recv_socks = 10,
+ .recv_port = 8017,
+ .send_port_min = 9360});
test_filter_without_bind();
diff --git a/tools/testing/selftests/net/reuseport_bpf_cpu.c b/tools/testing/selftests/net/reuseport_bpf_cpu.c
new file mode 100644
index 000000000000..b23d6f54de7b
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_bpf_cpu.c
@@ -0,0 +1,258 @@
+/*
+ * Test functionality of BPF filters with SO_REUSEPORT. This program creates
+ * an SO_REUSEPORT receiver group containing one socket per CPU core. It then
+ * creates a BPF program that will select a socket from this group based
+ * on the core id that receives the packet. The sending code artificially
+ * moves itself to run on different core ids and sends one message from
+ * each core. Since these packets are delivered over loopback, they should
+ * arrive on the same core that sent them. The receiving code then ensures
+ * that the packet was received on the socket for the corresponding core id.
+ * This entire process is done for several different core id permutations
+ * and for each IPv4/IPv6 and TCP/UDP combination.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/filter.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+static const int PORT = 8888;
+
+static void build_rcv_group(int *rcv_fd, size_t len, int family, int proto)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+ size_t i;
+ int opt;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ for (i = 0; i < len; ++i) {
+ rcv_fd[i] = socket(family, proto, 0);
+ if (rcv_fd[i] < 0)
+ error(1, errno, "failed to create receive socket");
+
+ opt = 1;
+ if (setsockopt(rcv_fd[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT");
+
+ if (bind(rcv_fd[i], (struct sockaddr *)&addr, sizeof(addr)))
+ error(1, errno, "failed to bind receive socket");
+
+ if (proto == SOCK_STREAM && listen(rcv_fd[i], len * 10))
+ error(1, errno, "failed to listen on receive port");
+ }
+}
+
+static void attach_bpf(int fd)
+{
+ struct sock_filter code[] = {
+ /* A = raw_smp_processor_id() */
+ { BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU },
+ /* return A */
+ { BPF_RET | BPF_A, 0, 0, 0 },
+ };
+ struct sock_fprog p = {
+ .len = 2,
+ .filter = code,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_CBPF, &p, sizeof(p)))
+ error(1, errno, "failed to set SO_ATTACH_REUSEPORT_CBPF");
+}
+
+static void send_from_cpu(int cpu_id, int family, int proto)
+{
+ struct sockaddr_storage saddr, daddr;
+ struct sockaddr_in *saddr4, *daddr4;
+ struct sockaddr_in6 *saddr6, *daddr6;
+ cpu_set_t cpu_set;
+ int fd;
+
+ switch (family) {
+ case AF_INET:
+ saddr4 = (struct sockaddr_in *)&saddr;
+ saddr4->sin_family = AF_INET;
+ saddr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ saddr4->sin_port = 0;
+
+ daddr4 = (struct sockaddr_in *)&daddr;
+ daddr4->sin_family = AF_INET;
+ daddr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ daddr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ saddr6 = (struct sockaddr_in6 *)&saddr;
+ saddr6->sin6_family = AF_INET6;
+ saddr6->sin6_addr = in6addr_any;
+ saddr6->sin6_port = 0;
+
+ daddr6 = (struct sockaddr_in6 *)&daddr;
+ daddr6->sin6_family = AF_INET6;
+ daddr6->sin6_addr = in6addr_loopback;
+ daddr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ memset(&cpu_set, 0, sizeof(cpu_set));
+ CPU_SET(cpu_id, &cpu_set);
+ if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set) < 0)
+ error(1, errno, "failed to pin to cpu");
+
+ fd = socket(family, proto, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
+ error(1, errno, "failed to bind send socket");
+
+ if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
+ error(1, errno, "failed to connect send socket");
+
+ if (send(fd, "a", 1, 0) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+}
+
+static
+void receive_on_cpu(int *rcv_fd, int len, int epfd, int cpu_id, int proto)
+{
+ struct epoll_event ev;
+ int i, fd;
+ char buf[8];
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll_wait failed");
+
+ if (proto == SOCK_STREAM) {
+ fd = accept(ev.data.fd, NULL, NULL);
+ if (fd < 0)
+ error(1, errno, "failed to accept");
+ i = recv(fd, buf, sizeof(buf), 0);
+ close(fd);
+ } else {
+ i = recv(ev.data.fd, buf, sizeof(buf), 0);
+ }
+
+ if (i < 0)
+ error(1, errno, "failed to recv");
+
+ for (i = 0; i < len; ++i)
+ if (ev.data.fd == rcv_fd[i])
+ break;
+ if (i == len)
+ error(1, 0, "failed to find socket");
+ fprintf(stderr, "send cpu %d, receive socket %d\n", cpu_id, i);
+ if (cpu_id != i)
+ error(1, 0, "cpu id/receive socket mismatch");
+}
+
+static void test(int *rcv_fd, int len, int family, int proto)
+{
+ struct epoll_event ev;
+ int epfd, cpu;
+
+ build_rcv_group(rcv_fd, len, family, proto);
+ attach_bpf(rcv_fd[0]);
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+ for (cpu = 0; cpu < len; ++cpu) {
+ ev.events = EPOLLIN;
+ ev.data.fd = rcv_fd[cpu];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fd[cpu], &ev))
+ error(1, errno, "failed to register sock epoll");
+ }
+
+ /* Forward iterate */
+ for (cpu = 0; cpu < len; ++cpu) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Reverse iterate */
+ for (cpu = len - 1; cpu >= 0; --cpu) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Even cores */
+ for (cpu = 0; cpu < len; cpu += 2) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ /* Odd cores */
+ for (cpu = 1; cpu < len; cpu += 2) {
+ send_from_cpu(cpu, family, proto);
+ receive_on_cpu(rcv_fd, len, epfd, cpu, proto);
+ }
+
+ close(epfd);
+ for (cpu = 0; cpu < len; ++cpu)
+ close(rcv_fd[cpu]);
+}
+
+int main(void)
+{
+ int *rcv_fd, cpus;
+
+ cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ if (cpus <= 0)
+ error(1, errno, "failed counting cpus");
+
+ rcv_fd = calloc(cpus, sizeof(int));
+ if (!rcv_fd)
+ error(1, 0, "failed to allocate array");
+
+ fprintf(stderr, "---- IPv4 UDP ----\n");
+ test(rcv_fd, cpus, AF_INET, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv6 UDP ----\n");
+ test(rcv_fd, cpus, AF_INET6, SOCK_DGRAM);
+
+ fprintf(stderr, "---- IPv4 TCP ----\n");
+ test(rcv_fd, cpus, AF_INET, SOCK_STREAM);
+
+ fprintf(stderr, "---- IPv6 TCP ----\n");
+ test(rcv_fd, cpus, AF_INET6, SOCK_STREAM);
+
+ free(rcv_fd);
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/reuseport_dualstack.c b/tools/testing/selftests/net/reuseport_dualstack.c
new file mode 100644
index 000000000000..90958aaaafb9
--- /dev/null
+++ b/tools/testing/selftests/net/reuseport_dualstack.c
@@ -0,0 +1,208 @@
+/*
+ * It is possible to use SO_REUSEPORT to open multiple sockets bound to
+ * equivalent local addresses using AF_INET and AF_INET6 at the same time. If
+ * the AF_INET6 socket has IPV6_V6ONLY set, it's clear which socket should
+ * receive a given incoming packet. However, when it is not set, incoming v4
+ * packets should prefer the AF_INET socket(s). This behavior was defined with
+ * the original SO_REUSEPORT implementation, but broke with
+ * e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection")
+ * This test creates these mixed AF_INET/AF_INET6 sockets and asserts the
+ * AF_INET preference for v4 packets.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/in.h>
+#include <linux/unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/epoll.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+static const int PORT = 8888;
+
+static void build_rcv_fd(int family, int proto, int *rcv_fds, int count)
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in *addr4;
+ struct sockaddr_in6 *addr6;
+ int opt, i;
+
+ switch (family) {
+ case AF_INET:
+ addr4 = (struct sockaddr_in *)&addr;
+ addr4->sin_family = AF_INET;
+ addr4->sin_addr.s_addr = htonl(INADDR_ANY);
+ addr4->sin_port = htons(PORT);
+ break;
+ case AF_INET6:
+ addr6 = (struct sockaddr_in6 *)&addr;
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_addr = in6addr_any;
+ addr6->sin6_port = htons(PORT);
+ break;
+ default:
+ error(1, 0, "Unsupported family %d", family);
+ }
+
+ for (i = 0; i < count; ++i) {
+ rcv_fds[i] = socket(family, proto, 0);
+ if (rcv_fds[i] < 0)
+ error(1, errno, "failed to create receive socket");
+
+ opt = 1;
+ if (setsockopt(rcv_fds[i], SOL_SOCKET, SO_REUSEPORT, &opt,
+ sizeof(opt)))
+ error(1, errno, "failed to set SO_REUSEPORT");
+
+ if (bind(rcv_fds[i], (struct sockaddr *)&addr, sizeof(addr)))
+ error(1, errno, "failed to bind receive socket");
+
+ if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
+ error(1, errno, "failed to listen on receive port");
+ }
+}
+
+static void send_from_v4(int proto)
+{
+ struct sockaddr_in saddr, daddr;
+ int fd;
+
+ saddr.sin_family = AF_INET;
+ saddr.sin_addr.s_addr = htonl(INADDR_ANY);
+ saddr.sin_port = 0;
+
+ daddr.sin_family = AF_INET;
+ daddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ daddr.sin_port = htons(PORT);
+
+ fd = socket(AF_INET, proto, 0);
+ if (fd < 0)
+ error(1, errno, "failed to create send socket");
+
+ if (bind(fd, (struct sockaddr *)&saddr, sizeof(saddr)))
+ error(1, errno, "failed to bind send socket");
+
+ if (connect(fd, (struct sockaddr *)&daddr, sizeof(daddr)))
+ error(1, errno, "failed to connect send socket");
+
+ if (send(fd, "a", 1, 0) < 0)
+ error(1, errno, "failed to send message");
+
+ close(fd);
+}
+
+static int receive_once(int epfd, int proto)
+{
+ struct epoll_event ev;
+ int i, fd;
+ char buf[8];
+
+ i = epoll_wait(epfd, &ev, 1, -1);
+ if (i < 0)
+ error(1, errno, "epoll_wait failed");
+
+ if (proto == SOCK_STREAM) {
+ fd = accept(ev.data.fd, NULL, NULL);
+ if (fd < 0)
+ error(1, errno, "failed to accept");
+ i = recv(fd, buf, sizeof(buf), 0);
+ close(fd);
+ } else {
+ i = recv(ev.data.fd, buf, sizeof(buf), 0);
+ }
+
+ if (i < 0)
+ error(1, errno, "failed to recv");
+
+ return ev.data.fd;
+}
+
+static void test(int *rcv_fds, int count, int proto)
+{
+ struct epoll_event ev;
+ int epfd, i, test_fd;
+ uint16_t test_family;
+ socklen_t len;
+
+ epfd = epoll_create(1);
+ if (epfd < 0)
+ error(1, errno, "failed to create epoll");
+
+ ev.events = EPOLLIN;
+ for (i = 0; i < count; ++i) {
+ ev.data.fd = rcv_fds[i];
+ if (epoll_ctl(epfd, EPOLL_CTL_ADD, rcv_fds[i], &ev))
+ error(1, errno, "failed to register sock epoll");
+ }
+
+ send_from_v4(proto);
+
+ test_fd = receive_once(epfd, proto);
+ if (getsockopt(test_fd, SOL_SOCKET, SO_DOMAIN, &test_family, &len))
+ error(1, errno, "failed to read socket domain");
+ if (test_family != AF_INET)
+ error(1, 0, "expected to receive on v4 socket but got v6 (%d)",
+ test_family);
+
+ close(epfd);
+}
+
+int main(void)
+{
+ int rcv_fds[32], i;
+
+ fprintf(stderr, "---- UDP IPv4 created before IPv6 ----\n");
+ build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 5);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[5]), 5);
+ test(rcv_fds, 10, SOCK_DGRAM);
+ for (i = 0; i < 10; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- UDP IPv6 created before IPv4 ----\n");
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds, 5);
+ build_rcv_fd(AF_INET, SOCK_DGRAM, &(rcv_fds[5]), 5);
+ test(rcv_fds, 10, SOCK_DGRAM);
+ for (i = 0; i < 10; ++i)
+ close(rcv_fds[i]);
+
+ /* NOTE: UDP socket lookups traverse a different code path when there
+ * are > 10 sockets in a group.
+ */
+ fprintf(stderr, "---- UDP IPv4 created before IPv6 (large) ----\n");
+ build_rcv_fd(AF_INET, SOCK_DGRAM, rcv_fds, 16);
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, &(rcv_fds[16]), 16);
+ test(rcv_fds, 32, SOCK_DGRAM);
+ for (i = 0; i < 32; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- UDP IPv6 created before IPv4 (large) ----\n");
+ build_rcv_fd(AF_INET6, SOCK_DGRAM, rcv_fds, 16);
+ build_rcv_fd(AF_INET, SOCK_DGRAM, &(rcv_fds[16]), 16);
+ test(rcv_fds, 32, SOCK_DGRAM);
+ for (i = 0; i < 32; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- TCP IPv4 created before IPv6 ----\n");
+ build_rcv_fd(AF_INET, SOCK_STREAM, rcv_fds, 5);
+ build_rcv_fd(AF_INET6, SOCK_STREAM, &(rcv_fds[5]), 5);
+ test(rcv_fds, 10, SOCK_STREAM);
+ for (i = 0; i < 10; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "---- TCP IPv6 created before IPv4 ----\n");
+ build_rcv_fd(AF_INET6, SOCK_STREAM, rcv_fds, 5);
+ build_rcv_fd(AF_INET, SOCK_STREAM, &(rcv_fds[5]), 5);
+ test(rcv_fds, 10, SOCK_STREAM);
+ for (i = 0; i < 10; ++i)
+ close(rcv_fds[i]);
+
+ fprintf(stderr, "SUCCESS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 0c2706bda330..b08f77cbe31b 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -8,7 +8,7 @@ ifeq ($(ARCH),powerpc)
GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown")
-CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
+CFLAGS := -Wall -O2 -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS)
export CFLAGS
@@ -22,7 +22,8 @@ SUB_DIRS = benchmarks \
switch_endian \
syscalls \
tm \
- vphn
+ vphn \
+ math
endif
diff --git a/tools/testing/selftests/powerpc/basic_asm.h b/tools/testing/selftests/powerpc/basic_asm.h
new file mode 100644
index 000000000000..3349a0704d1a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/basic_asm.h
@@ -0,0 +1,70 @@
+#ifndef _SELFTESTS_POWERPC_BASIC_ASM_H
+#define _SELFTESTS_POWERPC_BASIC_ASM_H
+
+#include <ppc-asm.h>
+#include <asm/unistd.h>
+
+#define LOAD_REG_IMMEDIATE(reg,expr) \
+ lis reg,(expr)@highest; \
+ ori reg,reg,(expr)@higher; \
+ rldicr reg,reg,32,31; \
+ oris reg,reg,(expr)@high; \
+ ori reg,reg,(expr)@l;
+
+/*
+ * Note: These macros assume that variables being stored on the stack are
+ * doublewords, while this is usually the case it may not always be the
+ * case for each use case.
+ */
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+#define STACK_FRAME_MIN_SIZE 32
+#define STACK_FRAME_TOC_POS 24
+#define __STACK_FRAME_PARAM(_param) (32 + ((_param)*8))
+#define __STACK_FRAME_LOCAL(_num_params,_var_num) ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8))
+#else
+#define STACK_FRAME_MIN_SIZE 112
+#define STACK_FRAME_TOC_POS 40
+#define __STACK_FRAME_PARAM(i) (48 + ((i)*8))
+
+/*
+ * Caveat: if a function passed more than 8 doublewords, the caller will have
+ * made more space... which would render the 112 incorrect.
+ */
+#define __STACK_FRAME_LOCAL(_num_params,_var_num) (112 + ((_var_num)*8))
+#endif
+
+/* Parameter x saved to the stack */
+#define STACK_FRAME_PARAM(var) __STACK_FRAME_PARAM(var)
+
+/* Local variable x saved to the stack after x parameters */
+#define STACK_FRAME_LOCAL(num_params,var) __STACK_FRAME_LOCAL(num_params,var)
+#define STACK_FRAME_LR_POS 16
+#define STACK_FRAME_CR_POS 8
+
+/*
+ * It is very important to note here that _extra is the extra amount of
+ * stack space needed. This space can be accessed using STACK_FRAME_PARAM()
+ * or STACK_FRAME_LOCAL() macros.
+ *
+ * r1 and r2 are not defined in ppc-asm.h (instead they are defined as sp
+ * and toc). Kernel programmers tend to prefer rX even for r1 and r2, hence
+ * %1 and %r2. r0 is defined in ppc-asm.h and therefore %r0 gets
+ * preprocessed incorrectly, hence r0.
+ */
+#define PUSH_BASIC_STACK(_extra) \
+ mflr r0; \
+ std r0,STACK_FRAME_LR_POS(%r1); \
+ stdu %r1,-(_extra + STACK_FRAME_MIN_SIZE)(%r1); \
+ mfcr r0; \
+ stw r0,STACK_FRAME_CR_POS(%r1); \
+ std %r2,STACK_FRAME_TOC_POS(%r1);
+
+#define POP_BASIC_STACK(_extra) \
+ ld %r2,STACK_FRAME_TOC_POS(%r1); \
+ lwz r0,STACK_FRAME_CR_POS(%r1); \
+ mtcr r0; \
+ addi %r1,%r1,(_extra + STACK_FRAME_MIN_SIZE); \
+ ld r0,STACK_FRAME_LR_POS(%r1); \
+ mtlr r0;
+
+#endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
new file mode 100644
index 000000000000..4fe13a439fd7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -0,0 +1,6 @@
+fpu_syscall
+vmx_syscall
+fpu_preempt
+vmx_preempt
+fpu_signal
+vmx_signal
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
new file mode 100644
index 000000000000..5b88875d5955
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -0,0 +1,19 @@
+TEST_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal
+
+all: $(TEST_PROGS)
+
+$(TEST_PROGS): ../harness.c
+$(TEST_PROGS): CFLAGS += -O2 -g -pthread -m64 -maltivec
+
+fpu_syscall: fpu_asm.S
+fpu_preempt: fpu_asm.S
+fpu_signal: fpu_asm.S
+
+vmx_syscall: vmx_asm.S
+vmx_preempt: vmx_asm.S
+vmx_signal: vmx_asm.S
+
+include ../../lib.mk
+
+clean:
+ rm -f $(TEST_PROGS) *.o
diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S
new file mode 100644
index 000000000000..f3711d80e709
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_asm.S
@@ -0,0 +1,198 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "../basic_asm.h"
+
+#define PUSH_FPU(pos) \
+ stfd f14,pos(sp); \
+ stfd f15,pos+8(sp); \
+ stfd f16,pos+16(sp); \
+ stfd f17,pos+24(sp); \
+ stfd f18,pos+32(sp); \
+ stfd f19,pos+40(sp); \
+ stfd f20,pos+48(sp); \
+ stfd f21,pos+56(sp); \
+ stfd f22,pos+64(sp); \
+ stfd f23,pos+72(sp); \
+ stfd f24,pos+80(sp); \
+ stfd f25,pos+88(sp); \
+ stfd f26,pos+96(sp); \
+ stfd f27,pos+104(sp); \
+ stfd f28,pos+112(sp); \
+ stfd f29,pos+120(sp); \
+ stfd f30,pos+128(sp); \
+ stfd f31,pos+136(sp);
+
+#define POP_FPU(pos) \
+ lfd f14,pos(sp); \
+ lfd f15,pos+8(sp); \
+ lfd f16,pos+16(sp); \
+ lfd f17,pos+24(sp); \
+ lfd f18,pos+32(sp); \
+ lfd f19,pos+40(sp); \
+ lfd f20,pos+48(sp); \
+ lfd f21,pos+56(sp); \
+ lfd f22,pos+64(sp); \
+ lfd f23,pos+72(sp); \
+ lfd f24,pos+80(sp); \
+ lfd f25,pos+88(sp); \
+ lfd f26,pos+96(sp); \
+ lfd f27,pos+104(sp); \
+ lfd f28,pos+112(sp); \
+ lfd f29,pos+120(sp); \
+ lfd f30,pos+128(sp); \
+ lfd f31,pos+136(sp);
+
+# Careful calling this, it will 'clobber' fpu (by design)
+# Don't call this from C
+FUNC_START(load_fpu)
+ lfd f14,0(r3)
+ lfd f15,8(r3)
+ lfd f16,16(r3)
+ lfd f17,24(r3)
+ lfd f18,32(r3)
+ lfd f19,40(r3)
+ lfd f20,48(r3)
+ lfd f21,56(r3)
+ lfd f22,64(r3)
+ lfd f23,72(r3)
+ lfd f24,80(r3)
+ lfd f25,88(r3)
+ lfd f26,96(r3)
+ lfd f27,104(r3)
+ lfd f28,112(r3)
+ lfd f29,120(r3)
+ lfd f30,128(r3)
+ lfd f31,136(r3)
+ blr
+FUNC_END(load_fpu)
+
+FUNC_START(check_fpu)
+ mr r4,r3
+ li r3,1 # assume a bad result
+ lfd f0,0(r4)
+ fcmpu cr1,f0,f14
+ bne cr1,1f
+ lfd f0,8(r4)
+ fcmpu cr1,f0,f15
+ bne cr1,1f
+ lfd f0,16(r4)
+ fcmpu cr1,f0,f16
+ bne cr1,1f
+ lfd f0,24(r4)
+ fcmpu cr1,f0,f17
+ bne cr1,1f
+ lfd f0,32(r4)
+ fcmpu cr1,f0,f18
+ bne cr1,1f
+ lfd f0,40(r4)
+ fcmpu cr1,f0,f19
+ bne cr1,1f
+ lfd f0,48(r4)
+ fcmpu cr1,f0,f20
+ bne cr1,1f
+ lfd f0,56(r4)
+ fcmpu cr1,f0,f21
+ bne cr1,1f
+ lfd f0,64(r4)
+ fcmpu cr1,f0,f22
+ bne cr1,1f
+ lfd f0,72(r4)
+ fcmpu cr1,f0,f23
+ bne cr1,1f
+ lfd f0,80(r4)
+ fcmpu cr1,f0,f24
+ bne cr1,1f
+ lfd f0,88(r4)
+ fcmpu cr1,f0,f25
+ bne cr1,1f
+ lfd f0,96(r4)
+ fcmpu cr1,f0,f26
+ bne cr1,1f
+ lfd f0,104(r4)
+ fcmpu cr1,f0,f27
+ bne cr1,1f
+ lfd f0,112(r4)
+ fcmpu cr1,f0,f28
+ bne cr1,1f
+ lfd f0,120(r4)
+ fcmpu cr1,f0,f29
+ bne cr1,1f
+ lfd f0,128(r4)
+ fcmpu cr1,f0,f30
+ bne cr1,1f
+ lfd f0,136(r4)
+ fcmpu cr1,f0,f31
+ bne cr1,1f
+ li r3,0 # Success!!!
+1: blr
+
+FUNC_START(test_fpu)
+ # r3 holds pointer to where to put the result of fork
+ # r4 holds pointer to the pid
+ # f14-f31 are non volatiles
+ PUSH_BASIC_STACK(256)
+ std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray
+ std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid
+ PUSH_FPU(STACK_FRAME_LOCAL(2,0))
+
+ bl load_fpu
+ nop
+ li r0,__NR_fork
+ sc
+
+ # pass the result of the fork to the caller
+ ld r9,STACK_FRAME_PARAM(1)(sp)
+ std r3,0(r9)
+
+ ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_fpu
+ nop
+
+ POP_FPU(STACK_FRAME_LOCAL(2,0))
+ POP_BASIC_STACK(256)
+ blr
+FUNC_END(test_fpu)
+
+# int preempt_fpu(double *darray, int *threads_running, int *running)
+# On starting will (atomically) decrement not_ready as a signal that the FPU
+# has been loaded with darray. Will proceed to check the validity of the FPU
+# registers while running is not zero.
+FUNC_START(preempt_fpu)
+ PUSH_BASIC_STACK(256)
+ std r3,STACK_FRAME_PARAM(0)(sp) # double *darray
+ std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+ std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+ PUSH_FPU(STACK_FRAME_LOCAL(3,0))
+
+ bl load_fpu
+ nop
+
+ sync
+ # Atomic DEC
+ ld r3,STACK_FRAME_PARAM(1)(sp)
+1: lwarx r4,0,r3
+ addi r4,r4,-1
+ stwcx. r4,0,r3
+ bne- 1b
+
+2: ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_fpu
+ nop
+ cmpdi r3,0
+ bne 3f
+ ld r4,STACK_FRAME_PARAM(2)(sp)
+ ld r5,0(r4)
+ cmpwi r5,0
+ bne 2b
+
+3: POP_FPU(STACK_FRAME_LOCAL(3,0))
+ POP_BASIC_STACK(256)
+ blr
+FUNC_END(preempt_fpu)
diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c
new file mode 100644
index 000000000000..0f85b79d883d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c
@@ -0,0 +1,113 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers change across preemption.
+ * Two things should be noted here a) The check_fpu function in asm only checks
+ * the non volatile registers as it is reused from the syscall test b) There is
+ * no way to be sure preemption happened so this test just uses many threads
+ * and a long wait. As such, a successful test doesn't mean much but a failure
+ * is bad.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+ 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+ 2.1};
+
+int threads_starting;
+int running;
+
+extern void preempt_fpu(double *darray, int *threads_starting, int *running);
+
+void *preempt_fpu_c(void *p)
+{
+ int i;
+ srand(pthread_self());
+ for (i = 0; i < 21; i++)
+ darray[i] = rand();
+
+ /* Test failed if it ever returns */
+ preempt_fpu(darray, &threads_starting, &running);
+
+ return p;
+}
+
+int test_preempt_fpu(void)
+{
+ int i, rc, threads;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc((threads) * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, preempt_fpu_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ /* Not really necessary but nice to wait for every thread to start */
+ printf("\tWaiting for all workers to start...");
+ while(threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
+ sleep(PREEMPT_TIME);
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ /*
+ * Working are checking this value every loop. In preempt_fpu 'cmpwi r5,0; bne 2b'.
+ * r5 will have loaded the value of running.
+ */
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ void *rc_p;
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why preempt_fpu
+ * returned
+ */
+ if ((long) rc_p)
+ printf("oops\n");
+ FAIL_IF((long) rc_p);
+ }
+ printf("done\n");
+
+ free(tids);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_preempt_fpu, "fpu_preempt");
+}
diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c
new file mode 100644
index 000000000000..888aa51b4204
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_signal.c
@@ -0,0 +1,135 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers are correctly reported in a
+ * signal context. Each worker just spins checking its FPU registers, at some
+ * point a signal will interrupt it and C code will check the signal context
+ * ensuring it is also the same.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Number of times each thread should receive the signal */
+#define ITERATIONS 10
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+ 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+ 2.1};
+
+bool bad_context;
+int threads_starting;
+int running;
+
+extern long preempt_fpu(double *darray, int *threads_starting, int *running);
+
+void signal_fpu_sig(int sig, siginfo_t *info, void *context)
+{
+ int i;
+ ucontext_t *uc = context;
+ mcontext_t *mc = &uc->uc_mcontext;
+
+ /* Only the non volatiles were loaded up */
+ for (i = 14; i < 32; i++) {
+ if (mc->fp_regs[i] != darray[i - 14]) {
+ bad_context = true;
+ break;
+ }
+ }
+}
+
+void *signal_fpu_c(void *p)
+{
+ int i;
+ long rc;
+ struct sigaction act;
+ act.sa_sigaction = signal_fpu_sig;
+ act.sa_flags = SA_SIGINFO;
+ rc = sigaction(SIGUSR1, &act, NULL);
+ if (rc)
+ return p;
+
+ srand(pthread_self());
+ for (i = 0; i < 21; i++)
+ darray[i] = rand();
+
+ rc = preempt_fpu(darray, &threads_starting, &running);
+
+ return (void *) rc;
+}
+
+int test_signal_fpu(void)
+{
+ int i, j, rc, threads;
+ void *rc_p;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, signal_fpu_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ printf("\tWaiting for all workers to start...");
+ while (threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tSending signals to all threads %d times...", ITERATIONS);
+ for (i = 0; i < ITERATIONS; i++) {
+ for (j = 0; j < threads; j++) {
+ pthread_kill(tids[j], SIGUSR1);
+ }
+ sleep(1);
+ }
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why signal_fpu
+ * returned
+ */
+ if ((long) rc_p || bad_context)
+ printf("oops\n");
+ if (bad_context)
+ fprintf(stderr, "\t!! bad_context is true\n");
+ FAIL_IF((long) rc_p || bad_context);
+ }
+ printf("done\n");
+
+ free(tids);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_signal_fpu, "fpu_signal");
+}
diff --git a/tools/testing/selftests/powerpc/math/fpu_syscall.c b/tools/testing/selftests/powerpc/math/fpu_syscall.c
new file mode 100644
index 000000000000..949e6721256d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_syscall.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the FPU registers change across a syscall (fork).
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+
+#include "utils.h"
+
+extern int test_fpu(double *darray, pid_t *pid);
+
+double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
+ 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
+ 2.1};
+
+int syscall_fpu(void)
+{
+ pid_t fork_pid;
+ int i;
+ int ret;
+ int child_ret;
+ for (i = 0; i < 1000; i++) {
+ /* test_fpu will fork() */
+ ret = test_fpu(darray, &fork_pid);
+ if (fork_pid == -1)
+ return -1;
+ if (fork_pid == 0)
+ exit(ret);
+ waitpid(fork_pid, &child_ret, 0);
+ if (ret || child_ret)
+ return 1;
+ }
+
+ return 0;
+}
+
+int test_syscall_fpu(void)
+{
+ /*
+ * Setup an environment with much context switching
+ */
+ pid_t pid2;
+ pid_t pid = fork();
+ int ret;
+ int child_ret;
+ FAIL_IF(pid == -1);
+
+ pid2 = fork();
+ /* Can't FAIL_IF(pid2 == -1); because already forked once */
+ if (pid2 == -1) {
+ /*
+ * Couldn't fork, ensure test is a fail
+ */
+ child_ret = ret = 1;
+ } else {
+ ret = syscall_fpu();
+ if (pid2)
+ waitpid(pid2, &child_ret, 0);
+ else
+ exit(ret);
+ }
+
+ ret |= child_ret;
+
+ if (pid)
+ waitpid(pid, &child_ret, 0);
+ else
+ exit(ret);
+
+ FAIL_IF(ret || child_ret);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_syscall_fpu, "syscall_fpu");
+
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_asm.S b/tools/testing/selftests/powerpc/math/vmx_asm.S
new file mode 100644
index 000000000000..1b8c248b3ac1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_asm.S
@@ -0,0 +1,235 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include "../basic_asm.h"
+
+# POS MUST BE 16 ALIGNED!
+#define PUSH_VMX(pos,reg) \
+ li reg,pos; \
+ stvx v20,reg,sp; \
+ addi reg,reg,16; \
+ stvx v21,reg,sp; \
+ addi reg,reg,16; \
+ stvx v22,reg,sp; \
+ addi reg,reg,16; \
+ stvx v23,reg,sp; \
+ addi reg,reg,16; \
+ stvx v24,reg,sp; \
+ addi reg,reg,16; \
+ stvx v25,reg,sp; \
+ addi reg,reg,16; \
+ stvx v26,reg,sp; \
+ addi reg,reg,16; \
+ stvx v27,reg,sp; \
+ addi reg,reg,16; \
+ stvx v28,reg,sp; \
+ addi reg,reg,16; \
+ stvx v29,reg,sp; \
+ addi reg,reg,16; \
+ stvx v30,reg,sp; \
+ addi reg,reg,16; \
+ stvx v31,reg,sp;
+
+# POS MUST BE 16 ALIGNED!
+#define POP_VMX(pos,reg) \
+ li reg,pos; \
+ lvx v20,reg,sp; \
+ addi reg,reg,16; \
+ lvx v21,reg,sp; \
+ addi reg,reg,16; \
+ lvx v22,reg,sp; \
+ addi reg,reg,16; \
+ lvx v23,reg,sp; \
+ addi reg,reg,16; \
+ lvx v24,reg,sp; \
+ addi reg,reg,16; \
+ lvx v25,reg,sp; \
+ addi reg,reg,16; \
+ lvx v26,reg,sp; \
+ addi reg,reg,16; \
+ lvx v27,reg,sp; \
+ addi reg,reg,16; \
+ lvx v28,reg,sp; \
+ addi reg,reg,16; \
+ lvx v29,reg,sp; \
+ addi reg,reg,16; \
+ lvx v30,reg,sp; \
+ addi reg,reg,16; \
+ lvx v31,reg,sp;
+
+# Carefull this will 'clobber' vmx (by design)
+# Don't call this from C
+FUNC_START(load_vmx)
+ li r5,0
+ lvx v20,r5,r3
+ addi r5,r5,16
+ lvx v21,r5,r3
+ addi r5,r5,16
+ lvx v22,r5,r3
+ addi r5,r5,16
+ lvx v23,r5,r3
+ addi r5,r5,16
+ lvx v24,r5,r3
+ addi r5,r5,16
+ lvx v25,r5,r3
+ addi r5,r5,16
+ lvx v26,r5,r3
+ addi r5,r5,16
+ lvx v27,r5,r3
+ addi r5,r5,16
+ lvx v28,r5,r3
+ addi r5,r5,16
+ lvx v29,r5,r3
+ addi r5,r5,16
+ lvx v30,r5,r3
+ addi r5,r5,16
+ lvx v31,r5,r3
+ blr
+FUNC_END(load_vmx)
+
+# Should be safe from C, only touches r4, r5 and v0,v1,v2
+FUNC_START(check_vmx)
+ PUSH_BASIC_STACK(32)
+ mr r4,r3
+ li r3,1 # assume a bad result
+ li r5,0
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v20
+ vmr v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v21
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v22
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v23
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v24
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v25
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v26
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v27
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v28
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v29
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v30
+ vand v2,v2,v1
+
+ addi r5,r5,16
+ lvx v0,r5,r4
+ vcmpequd. v1,v0,v31
+ vand v2,v2,v1
+
+ li r5,STACK_FRAME_LOCAL(0,0)
+ stvx v2,r5,sp
+ ldx r0,r5,sp
+ cmpdi r0,0xffffffffffffffff
+ bne 1f
+ li r3,0
+1: POP_BASIC_STACK(32)
+ blr
+FUNC_END(check_vmx)
+
+# Safe from C
+FUNC_START(test_vmx)
+ # r3 holds pointer to where to put the result of fork
+ # r4 holds pointer to the pid
+ # v20-v31 are non-volatile
+ PUSH_BASIC_STACK(512)
+ std r3,STACK_FRAME_PARAM(0)(sp) # Address of varray
+ std r4,STACK_FRAME_PARAM(1)(sp) # address of pid
+ PUSH_VMX(STACK_FRAME_LOCAL(2,0),r4)
+
+ bl load_vmx
+ nop
+
+ li r0,__NR_fork
+ sc
+ # Pass the result of fork back to the caller
+ ld r9,STACK_FRAME_PARAM(1)(sp)
+ std r3,0(r9)
+
+ ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_vmx
+ nop
+
+ POP_VMX(STACK_FRAME_LOCAL(2,0),r4)
+ POP_BASIC_STACK(512)
+ blr
+FUNC_END(test_vmx)
+
+# int preempt_vmx(vector int *varray, int *threads_starting, int *running)
+# On starting will (atomically) decrement threads_starting as a signal that
+# the VMX have been loaded with varray. Will proceed to check the validity of
+# the VMX registers while running is not zero.
+FUNC_START(preempt_vmx)
+ PUSH_BASIC_STACK(512)
+ std r3,STACK_FRAME_PARAM(0)(sp) # vector int *varray
+ std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
+ std r5,STACK_FRAME_PARAM(2)(sp) # int *running
+ # VMX need to write to 16 byte aligned addresses, skip STACK_FRAME_LOCAL(3,0)
+ PUSH_VMX(STACK_FRAME_LOCAL(4,0),r4)
+
+ bl load_vmx
+ nop
+
+ sync
+ # Atomic DEC
+ ld r3,STACK_FRAME_PARAM(1)(sp)
+1: lwarx r4,0,r3
+ addi r4,r4,-1
+ stwcx. r4,0,r3
+ bne- 1b
+
+2: ld r3,STACK_FRAME_PARAM(0)(sp)
+ bl check_vmx
+ nop
+ cmpdi r3,0
+ bne 3f
+ ld r4,STACK_FRAME_PARAM(2)(sp)
+ ld r5,0(r4)
+ cmpwi r5,0
+ bne 2b
+
+3: POP_VMX(STACK_FRAME_LOCAL(4,0),r4)
+ POP_BASIC_STACK(512)
+ blr
+FUNC_END(preempt_vmx)
diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c
new file mode 100644
index 000000000000..9ef376c55b13
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c
@@ -0,0 +1,112 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers change across preemption.
+ * Two things should be noted here a) The check_vmx function in asm only checks
+ * the non volatile registers as it is reused from the syscall test b) There is
+ * no way to be sure preemption happened so this test just uses many threads
+ * and a long wait. As such, a successful test doesn't mean much but a failure
+ * is bad.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <pthread.h>
+
+#include "utils.h"
+
+/* Time to wait for workers to get preempted (seconds) */
+#define PREEMPT_TIME 20
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+int threads_starting;
+int running;
+
+extern void preempt_vmx(vector int *varray, int *threads_starting, int *running);
+
+void *preempt_vmx_c(void *p)
+{
+ int i, j;
+ srand(pthread_self());
+ for (i = 0; i < 12; i++)
+ for (j = 0; j < 4; j++)
+ varray[i][j] = rand();
+
+ /* Test fails if it ever returns */
+ preempt_vmx(varray, &threads_starting, &running);
+ return p;
+}
+
+int test_preempt_vmx(void)
+{
+ int i, rc, threads;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, preempt_vmx_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ /* Not really nessesary but nice to wait for every thread to start */
+ printf("\tWaiting for all workers to start...");
+ while(threads_starting)
+ asm volatile("": : :"memory");
+ printf("done\n");
+
+ printf("\tWaiting for %d seconds to let some workers get preempted...", PREEMPT_TIME);
+ sleep(PREEMPT_TIME);
+ printf("done\n");
+
+ printf("\tStopping workers...");
+ /*
+ * Working are checking this value every loop. In preempt_vmx 'cmpwi r5,0; bne 2b'.
+ * r5 will have loaded the value of running.
+ */
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ void *rc_p;
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why preempt_vmx
+ * returned
+ */
+ if ((long) rc_p)
+ printf("oops\n");
+ FAIL_IF((long) rc_p);
+ }
+ printf("done\n");
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_preempt_vmx, "vmx_preempt");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c
new file mode 100644
index 000000000000..671d7533a557
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_signal.c
@@ -0,0 +1,156 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers are correctly reported in a
+ * signal context. Each worker just spins checking its VMX registers, at some
+ * point a signal will interrupt it and C code will check the signal context
+ * ensuring it is also the same.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <string.h>
+#include <pthread.h>
+#include <altivec.h>
+
+#include "utils.h"
+
+/* Number of times each thread should receive the signal */
+#define ITERATIONS 10
+/*
+ * Factor by which to multiply number of online CPUs for total number of
+ * worker threads
+ */
+#define THREAD_FACTOR 8
+
+__thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+bool bad_context;
+int running;
+int threads_starting;
+
+extern int preempt_vmx(vector int *varray, int *threads_starting, int *sentinal);
+
+void signal_vmx_sig(int sig, siginfo_t *info, void *context)
+{
+ int i;
+ ucontext_t *uc = context;
+ mcontext_t *mc = &uc->uc_mcontext;
+
+ /* Only the non volatiles were loaded up */
+ for (i = 20; i < 32; i++) {
+ if (memcmp(mc->v_regs->vrregs[i], &varray[i - 20], 16)) {
+ int j;
+ /*
+ * Shouldn't printf() in a signal handler, however, this is a
+ * test and we've detected failure. Understanding what failed
+ * is paramount. All that happens after this is tests exit with
+ * failure.
+ */
+ printf("VMX mismatch at reg %d!\n", i);
+ printf("Reg | Actual | Expected\n");
+ for (j = 20; j < 32; j++) {
+ printf("%d | 0x%04x%04x%04x%04x | 0x%04x%04x%04x%04x\n", j, mc->v_regs->vrregs[j][0],
+ mc->v_regs->vrregs[j][1], mc->v_regs->vrregs[j][2], mc->v_regs->vrregs[j][3],
+ varray[j - 20][0], varray[j - 20][1], varray[j - 20][2], varray[j - 20][3]);
+ }
+ bad_context = true;
+ break;
+ }
+ }
+}
+
+void *signal_vmx_c(void *p)
+{
+ int i, j;
+ long rc;
+ struct sigaction act;
+ act.sa_sigaction = signal_vmx_sig;
+ act.sa_flags = SA_SIGINFO;
+ rc = sigaction(SIGUSR1, &act, NULL);
+ if (rc)
+ return p;
+
+ srand(pthread_self());
+ for (i = 0; i < 12; i++)
+ for (j = 0; j < 4; j++)
+ varray[i][j] = rand();
+
+ rc = preempt_vmx(varray, &threads_starting, &running);
+
+ return (void *) rc;
+}
+
+int test_signal_vmx(void)
+{
+ int i, j, rc, threads;
+ void *rc_p;
+ pthread_t *tids;
+
+ threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
+ tids = malloc(threads * sizeof(pthread_t));
+ FAIL_IF(!tids);
+
+ running = true;
+ threads_starting = threads;
+ for (i = 0; i < threads; i++) {
+ rc = pthread_create(&tids[i], NULL, signal_vmx_c, NULL);
+ FAIL_IF(rc);
+ }
+
+ setbuf(stdout, NULL);
+ printf("\tWaiting for %d workers to start... %d", threads, threads_starting);
+ while (threads_starting) {
+ asm volatile("": : :"memory");
+ usleep(1000);
+ printf(", %d", threads_starting);
+ }
+ printf(" ...done\n");
+
+ printf("\tSending signals to all threads %d times...", ITERATIONS);
+ for (i = 0; i < ITERATIONS; i++) {
+ for (j = 0; j < threads; j++) {
+ pthread_kill(tids[j], SIGUSR1);
+ }
+ sleep(1);
+ }
+ printf("done\n");
+
+ printf("\tKilling workers...");
+ running = 0;
+ for (i = 0; i < threads; i++) {
+ pthread_join(tids[i], &rc_p);
+
+ /*
+ * Harness will say the fail was here, look at why signal_vmx
+ * returned
+ */
+ if ((long) rc_p || bad_context)
+ printf("oops\n");
+ if (bad_context)
+ fprintf(stderr, "\t!! bad_context is true\n");
+ FAIL_IF((long) rc_p || bad_context);
+ }
+ printf("done\n");
+
+ free(tids);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_signal_vmx, "vmx_signal");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c
new file mode 100644
index 000000000000..a017918ee1ca
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c
@@ -0,0 +1,91 @@
+/*
+ * Copyright 2015, Cyril Bur, IBM Corp.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ *
+ * This test attempts to see if the VMX registers change across a syscall (fork).
+ */
+
+#include <altivec.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/time.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "utils.h"
+
+vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
+ {13,14,15,16},{17,18,19,20},{21,22,23,24},
+ {25,26,27,28},{29,30,31,32},{33,34,35,36},
+ {37,38,39,40},{41,42,43,44},{45,46,47,48}};
+
+extern int test_vmx(vector int *varray, pid_t *pid);
+
+int vmx_syscall(void)
+{
+ pid_t fork_pid;
+ int i;
+ int ret;
+ int child_ret;
+ for (i = 0; i < 1000; i++) {
+ /* test_vmx will fork() */
+ ret = test_vmx(varray, &fork_pid);
+ if (fork_pid == -1)
+ return -1;
+ if (fork_pid == 0)
+ exit(ret);
+ waitpid(fork_pid, &child_ret, 0);
+ if (ret || child_ret)
+ return 1;
+ }
+
+ return 0;
+}
+
+int test_vmx_syscall(void)
+{
+ /*
+ * Setup an environment with much context switching
+ */
+ pid_t pid2;
+ pid_t pid = fork();
+ int ret;
+ int child_ret;
+ FAIL_IF(pid == -1);
+
+ pid2 = fork();
+ ret = vmx_syscall();
+ /* Can't FAIL_IF(pid2 == -1); because we've already forked */
+ if (pid2 == -1) {
+ /*
+ * Couldn't fork, ensure child_ret is set and is a fail
+ */
+ ret = child_ret = 1;
+ } else {
+ if (pid2)
+ waitpid(pid2, &child_ret, 0);
+ else
+ exit(ret);
+ }
+
+ ret |= child_ret;
+
+ if (pid)
+ waitpid(pid, &child_ret, 0);
+ else
+ exit(ret);
+
+ FAIL_IF(ret || child_ret);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_vmx_syscall, "vmx_syscall");
+
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c
index d86653f282b1..8c54d18b3e9a 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-msr-resv.c
@@ -40,7 +40,7 @@ void signal_usr1(int signum, siginfo_t *info, void *uc)
#ifdef __powerpc64__
ucp->uc_mcontext.gp_regs[PT_MSR] |= (7ULL << 32);
#else
- ucp->uc_mcontext.regs->gpr[PT_MSR] |= (7ULL);
+ ucp->uc_mcontext.uc_regs->gregs[PT_MSR] |= (7ULL);
#endif
/* Should segv on return becuase of invalid context */
segv_expected = 1;
diff --git a/tools/testing/selftests/pstore/config b/tools/testing/selftests/pstore/config
new file mode 100644
index 000000000000..6a8e5a9bfc10
--- /dev/null
+++ b/tools/testing/selftests/pstore/config
@@ -0,0 +1,4 @@
+CONFIG_MISC_FILESYSTEMS=y
+CONFIG_PSTORE=y
+CONFIG_PSTORE_PMSG=y
+CONFIG_PSTORE_CONSOLE=y
diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config
new file mode 100644
index 000000000000..db1e11b08c8a
--- /dev/null
+++ b/tools/testing/selftests/seccomp/config
@@ -0,0 +1,2 @@
+CONFIG_SECCOMP=y
+CONFIG_SECCOMP_FILTER=y
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index b9453b838162..150829dd7998 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1497,15 +1497,15 @@ TEST_F(TRACE_syscall, syscall_dropped)
#define SECCOMP_SET_MODE_FILTER 1
#endif
-#ifndef SECCOMP_FLAG_FILTER_TSYNC
-#define SECCOMP_FLAG_FILTER_TSYNC 1
+#ifndef SECCOMP_FILTER_FLAG_TSYNC
+#define SECCOMP_FILTER_FLAG_TSYNC 1
#endif
#ifndef seccomp
-int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter)
+int seccomp(unsigned int op, unsigned int flags, void *args)
{
errno = 0;
- return syscall(__NR_seccomp, op, flags, filter);
+ return syscall(__NR_seccomp, op, flags, args);
}
#endif
@@ -1613,7 +1613,7 @@ TEST(TSYNC_first)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&prog);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
@@ -1831,7 +1831,7 @@ TEST_F(TSYNC, two_siblings_with_ancestor)
self->sibling_count++;
}
- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(0, ret) {
TH_LOG("Could install filter on all threads!");
@@ -1892,7 +1892,7 @@ TEST_F(TSYNC, two_siblings_with_no_filter)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_NE(ENOSYS, errno) {
TH_LOG("Kernel does not support seccomp syscall!");
@@ -1940,7 +1940,7 @@ TEST_F(TSYNC, two_siblings_with_one_divergence)
self->sibling_count++;
}
- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(self->sibling[0].system_tid, ret) {
TH_LOG("Did not fail on diverged sibling.");
@@ -1992,7 +1992,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter)
TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
}
- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(ret, self->sibling[0].system_tid) {
TH_LOG("Did not fail on diverged sibling.");
@@ -2021,7 +2021,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter)
/* Switch to the remaining sibling */
sib = !sib;
- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(0, ret) {
TH_LOG("Expected the remaining sibling to sync");
@@ -2044,7 +2044,7 @@ TEST_F(TSYNC, two_siblings_not_under_filter)
while (!kill(self->sibling[sib].system_tid, 0))
sleep(0.1);
- ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+ ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_TSYNC,
&self->apply_prog);
ASSERT_EQ(0, ret); /* just us chickens */
}
diff --git a/tools/testing/selftests/static_keys/config b/tools/testing/selftests/static_keys/config
new file mode 100644
index 000000000000..d538fb774b96
--- /dev/null
+++ b/tools/testing/selftests/static_keys/config
@@ -0,0 +1 @@
+CONFIG_TEST_STATIC_KEYS=m
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
index 72cacf5383dd..2b361b830395 100644
--- a/tools/testing/selftests/timers/alarmtimer-suspend.c
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -153,7 +153,7 @@ int main(void)
alarmcount = 0;
if (timer_create(alarm_clock_id, &se, &tm1) == -1) {
- printf("timer_create failled, %s unspported?\n",
+ printf("timer_create failed, %s unsupported?\n",
clockstring(alarm_clock_id));
break;
}
diff --git a/tools/testing/selftests/user/config b/tools/testing/selftests/user/config
new file mode 100644
index 000000000000..784ed8416324
--- /dev/null
+++ b/tools/testing/selftests/user/config
@@ -0,0 +1 @@
+CONFIG_TEST_USER_COPY=m
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
new file mode 100644
index 000000000000..698c7ed28a26
--- /dev/null
+++ b/tools/testing/selftests/vm/config
@@ -0,0 +1 @@
+CONFIG_USERFAULTFD=y
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index d5ce7d7aae3e..b47ebd170690 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -5,7 +5,7 @@ include ../lib.mk
.PHONY: all all_32 all_64 warn_32bit_failure clean
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \
- check_initial_reg_state sigreturn ldt_gdt
+ check_initial_reg_state sigreturn ldt_gdt iopl
TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
new file mode 100644
index 000000000000..c496ca97bc18
--- /dev/null
+++ b/tools/testing/selftests/x86/iopl.c
@@ -0,0 +1,135 @@
+/*
+ * iopl.c - Test case for a Linux on Xen 64-bit bug
+ * Copyright (c) 2015 Andrew Lutomirski
+ */
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <sched.h>
+#include <sys/io.h>
+
+static int nerrs = 0;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *si, void *ctx_void)
+{
+ siglongjmp(jmpbuf, 1);
+}
+
+int main(void)
+{
+ cpu_set_t cpuset;
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ err(1, "sched_setaffinity to CPU 0");
+
+ /* Probe for iopl support. Note that iopl(0) works even as nonroot. */
+ if (iopl(3) != 0) {
+ printf("[OK]\tiopl(3) failed (%d) -- try running as root\n",
+ errno);
+ return 0;
+ }
+
+ /* Restore our original state prior to starting the test. */
+ if (iopl(0) != 0)
+ err(1, "iopl(0)");
+
+ pid_t child = fork();
+ if (child == -1)
+ err(1, "fork");
+
+ if (child == 0) {
+ printf("\tchild: set IOPL to 3\n");
+ if (iopl(3) != 0)
+ err(1, "iopl");
+
+ printf("[RUN]\tchild: write to 0x80\n");
+ asm volatile ("outb %%al, $0x80" : : "a" (0));
+
+ return 0;
+ } else {
+ int status;
+ if (waitpid(child, &status, 0) != child ||
+ !WIFEXITED(status)) {
+ printf("[FAIL]\tChild died\n");
+ nerrs++;
+ } else if (WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild succeeded\n");
+ }
+ }
+
+ printf("[RUN]\tparent: write to 0x80 (should fail)\n");
+
+ sethandler(SIGSEGV, sigsegv, 0);
+ if (sigsetjmp(jmpbuf, 1) != 0) {
+ printf("[OK]\twrite was denied\n");
+ } else {
+ asm volatile ("outb %%al, $0x80" : : "a" (0));
+ printf("[FAIL]\twrite was allowed\n");
+ nerrs++;
+ }
+
+ /* Test the capability checks. */
+ printf("\tiopl(3)\n");
+ if (iopl(3) != 0)
+ err(1, "iopl(3)");
+
+ printf("\tDrop privileges\n");
+ if (setresuid(1, 1, 1) != 0) {
+ printf("[WARN]\tDropping privileges failed\n");
+ goto done;
+ }
+
+ printf("[RUN]\tiopl(3) unprivileged but with IOPL==3\n");
+ if (iopl(3) != 0) {
+ printf("[FAIL]\tiopl(3) should work if iopl is already 3 even if unprivileged\n");
+ nerrs++;
+ }
+
+ printf("[RUN]\tiopl(0) unprivileged\n");
+ if (iopl(0) != 0) {
+ printf("[FAIL]\tiopl(0) should work if iopl is already 3 even if unprivileged\n");
+ nerrs++;
+ }
+
+ printf("[RUN]\tiopl(3) unprivileged\n");
+ if (iopl(3) == 0) {
+ printf("[FAIL]\tiopl(3) should fail if when unprivileged if iopl==0\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tFailed as expected\n");
+ }
+
+done:
+ return nerrs ? 1 : 0;
+}
+
diff --git a/tools/testing/selftests/zram/config b/tools/testing/selftests/zram/config
new file mode 100644
index 000000000000..e0cc47e2c7e2
--- /dev/null
+++ b/tools/testing/selftests/zram/config
@@ -0,0 +1,2 @@
+CONFIG_ZSMALLOC=y
+CONFIG_ZRAM=m
diff --git a/tools/virtio/linux/dma-mapping.h b/tools/virtio/linux/dma-mapping.h
new file mode 100644
index 000000000000..4f93af89ae16
--- /dev/null
+++ b/tools/virtio/linux/dma-mapping.h
@@ -0,0 +1,17 @@
+#ifndef _LINUX_DMA_MAPPING_H
+#define _LINUX_DMA_MAPPING_H
+
+#ifdef CONFIG_HAS_DMA
+# error Virtio userspace code does not support CONFIG_HAS_DMA
+#endif
+
+#define PCI_DMA_BUS_IS_PHYS 1
+
+enum dma_data_direction {
+ DMA_BIDIRECTIONAL = 0,
+ DMA_TO_DEVICE = 1,
+ DMA_FROM_DEVICE = 2,
+ DMA_NONE = 3,
+};
+
+#endif
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index 5a6016224bb9..e92903fc7113 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -61,6 +61,8 @@
#define PM_PFRAME_BITS 55
#define PM_PFRAME_MASK ((1LL << PM_PFRAME_BITS) - 1)
#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
+#define MAX_SWAPFILES_SHIFT 5
+#define PM_SWAP_OFFSET(x) (((x) & PM_PFRAME_MASK) >> MAX_SWAPFILES_SHIFT)
#define PM_SOFT_DIRTY (1ULL << 55)
#define PM_MMAP_EXCLUSIVE (1ULL << 56)
#define PM_FILE (1ULL << 61)
@@ -73,6 +75,7 @@
#define KPF_BYTES 8
#define PROC_KPAGEFLAGS "/proc/kpageflags"
+#define PROC_KPAGECGROUP "/proc/kpagecgroup"
/* [32-] kernel hacking assistances */
#define KPF_RESERVED 32
@@ -92,7 +95,8 @@
#define KPF_SLOB_FREE 49
#define KPF_SLUB_FROZEN 50
#define KPF_SLUB_DEBUG 51
-#define KPF_FILE 62
+#define KPF_FILE 61
+#define KPF_SWAP 62
#define KPF_MMAP_EXCLUSIVE 63
#define KPF_ALL_BITS ((uint64_t)~0ULL)
@@ -146,6 +150,7 @@ static const char * const page_flag_names[] = {
[KPF_SLUB_DEBUG] = "E:slub_debug",
[KPF_FILE] = "F:file",
+ [KPF_SWAP] = "w:swap",
[KPF_MMAP_EXCLUSIVE] = "1:mmap_exclusive",
};
@@ -164,7 +169,9 @@ static int opt_raw; /* for kernel developers */
static int opt_list; /* list pages (in ranges) */
static int opt_no_summary; /* don't show summary */
static pid_t opt_pid; /* process to walk */
-const char * opt_file;
+const char * opt_file; /* file or directory path */
+static uint64_t opt_cgroup; /* cgroup inode */
+static int opt_list_cgroup;/* list page cgroup */
#define MAX_ADDR_RANGES 1024
static int nr_addr_ranges;
@@ -185,6 +192,7 @@ static int page_size;
static int pagemap_fd;
static int kpageflags_fd;
+static int kpagecgroup_fd = -1;
static int opt_hwpoison;
static int opt_unpoison;
@@ -278,6 +286,16 @@ static unsigned long kpageflags_read(uint64_t *buf,
return do_u64_read(kpageflags_fd, PROC_KPAGEFLAGS, buf, index, pages);
}
+static unsigned long kpagecgroup_read(uint64_t *buf,
+ unsigned long index,
+ unsigned long pages)
+{
+ if (kpagecgroup_fd < 0)
+ return pages;
+
+ return do_u64_read(kpagecgroup_fd, PROC_KPAGEFLAGS, buf, index, pages);
+}
+
static unsigned long pagemap_read(uint64_t *buf,
unsigned long index,
unsigned long pages)
@@ -297,6 +315,10 @@ static unsigned long pagemap_pfn(uint64_t val)
return pfn;
}
+static unsigned long pagemap_swap_offset(uint64_t val)
+{
+ return val & PM_SWAP ? PM_SWAP_OFFSET(val) : 0;
+}
/*
* page flag names
@@ -346,14 +368,15 @@ static char *page_flag_longname(uint64_t flags)
*/
static void show_page_range(unsigned long voffset, unsigned long offset,
- unsigned long size, uint64_t flags)
+ unsigned long size, uint64_t flags, uint64_t cgroup)
{
static uint64_t flags0;
+ static uint64_t cgroup0;
static unsigned long voff;
static unsigned long index;
static unsigned long count;
- if (flags == flags0 && offset == index + count &&
+ if (flags == flags0 && cgroup == cgroup0 && offset == index + count &&
size && voffset == voff + count) {
count += size;
return;
@@ -364,11 +387,14 @@ static void show_page_range(unsigned long voffset, unsigned long offset,
printf("%lx\t", voff);
if (opt_file)
printf("%lu\t", voff);
+ if (opt_list_cgroup)
+ printf("@%llu\t", (unsigned long long)cgroup0);
printf("%lx\t%lx\t%s\n",
index, count, page_flag_name(flags0));
}
flags0 = flags;
+ cgroup0= cgroup;
index = offset;
voff = voffset;
count = size;
@@ -376,16 +402,18 @@ static void show_page_range(unsigned long voffset, unsigned long offset,
static void flush_page_range(void)
{
- show_page_range(0, 0, 0, 0);
+ show_page_range(0, 0, 0, 0, 0);
}
-static void show_page(unsigned long voffset,
- unsigned long offset, uint64_t flags)
+static void show_page(unsigned long voffset, unsigned long offset,
+ uint64_t flags, uint64_t cgroup)
{
if (opt_pid)
printf("%lx\t", voffset);
if (opt_file)
printf("%lu\t", voffset);
+ if (opt_list_cgroup)
+ printf("@%llu\t", (unsigned long long)cgroup);
printf("%lx\t%s\n", offset, page_flag_name(flags));
}
@@ -452,6 +480,8 @@ static uint64_t expand_overloaded_flags(uint64_t flags, uint64_t pme)
flags |= BIT(SOFTDIRTY);
if (pme & PM_FILE)
flags |= BIT(FILE);
+ if (pme & PM_SWAP)
+ flags |= BIT(SWAP);
if (pme & PM_MMAP_EXCLUSIVE)
flags |= BIT(MMAP_EXCLUSIVE);
@@ -566,23 +596,26 @@ static size_t hash_slot(uint64_t flags)
exit(EXIT_FAILURE);
}
-static void add_page(unsigned long voffset,
- unsigned long offset, uint64_t flags, uint64_t pme)
+static void add_page(unsigned long voffset, unsigned long offset,
+ uint64_t flags, uint64_t cgroup, uint64_t pme)
{
flags = kpageflags_flags(flags, pme);
if (!bit_mask_ok(flags))
return;
+ if (opt_cgroup && cgroup != (uint64_t)opt_cgroup)
+ return;
+
if (opt_hwpoison)
hwpoison_page(offset);
if (opt_unpoison)
unpoison_page(offset);
if (opt_list == 1)
- show_page_range(voffset, offset, 1, flags);
+ show_page_range(voffset, offset, 1, flags, cgroup);
else if (opt_list == 2)
- show_page(voffset, offset, flags);
+ show_page(voffset, offset, flags, cgroup);
nr_pages[hash_slot(flags)]++;
total_pages++;
@@ -595,24 +628,57 @@ static void walk_pfn(unsigned long voffset,
uint64_t pme)
{
uint64_t buf[KPAGEFLAGS_BATCH];
+ uint64_t cgi[KPAGEFLAGS_BATCH];
unsigned long batch;
unsigned long pages;
unsigned long i;
+ /*
+ * kpagecgroup_read() reads only if kpagecgroup were opened, but
+ * /proc/kpagecgroup might even not exist, so it's better to fill
+ * them with zeros here.
+ */
+ if (count == 1)
+ cgi[0] = 0;
+ else
+ memset(cgi, 0, sizeof cgi);
+
while (count) {
batch = min_t(unsigned long, count, KPAGEFLAGS_BATCH);
pages = kpageflags_read(buf, index, batch);
if (pages == 0)
break;
+ if (kpagecgroup_read(cgi, index, pages) != pages)
+ fatal("kpagecgroup returned fewer pages than expected");
+
for (i = 0; i < pages; i++)
- add_page(voffset + i, index + i, buf[i], pme);
+ add_page(voffset + i, index + i, buf[i], cgi[i], pme);
index += pages;
count -= pages;
}
}
+static void walk_swap(unsigned long voffset, uint64_t pme)
+{
+ uint64_t flags = kpageflags_flags(0, pme);
+
+ if (!bit_mask_ok(flags))
+ return;
+
+ if (opt_cgroup)
+ return;
+
+ if (opt_list == 1)
+ show_page_range(voffset, pagemap_swap_offset(pme), 1, flags, 0);
+ else if (opt_list == 2)
+ show_page(voffset, pagemap_swap_offset(pme), flags, 0);
+
+ nr_pages[hash_slot(flags)]++;
+ total_pages++;
+}
+
#define PAGEMAP_BATCH (64 << 10)
static void walk_vma(unsigned long index, unsigned long count)
{
@@ -632,6 +698,8 @@ static void walk_vma(unsigned long index, unsigned long count)
pfn = pagemap_pfn(buf[i]);
if (pfn)
walk_pfn(index + i, pfn, 1, buf[i]);
+ if (buf[i] & PM_SWAP)
+ walk_swap(index + i, buf[i]);
}
index += pages;
@@ -713,10 +781,12 @@ static void usage(void)
" -d|--describe flags Describe flags\n"
" -a|--addr addr-spec Walk a range of pages\n"
" -b|--bits bits-spec Walk pages with specified bits\n"
+" -c|--cgroup path|@inode Walk pages within memory cgroup\n"
" -p|--pid pid Walk process address space\n"
" -f|--file filename Walk file address space\n"
" -l|--list Show page details in ranges\n"
" -L|--list-each Show page details one by one\n"
+" -C|--list-cgroup Show cgroup inode for pages\n"
" -N|--no-summary Don't show summary info\n"
" -X|--hwpoison hwpoison pages\n"
" -x|--unpoison unpoison pages\n"
@@ -851,6 +921,7 @@ static void walk_file(const char *name, const struct stat *st)
{
uint8_t vec[PAGEMAP_BATCH];
uint64_t buf[PAGEMAP_BATCH], flags;
+ uint64_t cgroup = 0;
unsigned long nr_pages, pfn, i;
off_t off, end = st->st_size;
int fd;
@@ -908,12 +979,15 @@ got_sigbus:
continue;
if (!kpageflags_read(&flags, pfn, 1))
continue;
+ if (!kpagecgroup_read(&cgroup, pfn, 1))
+ fatal("kpagecgroup_read failed");
if (first && opt_list) {
first = 0;
flush_page_range();
show_file(name, st);
}
- add_page(off / page_size + i, pfn, flags, buf[i]);
+ add_page(off / page_size + i, pfn,
+ flags, cgroup, buf[i]);
}
}
@@ -965,6 +1039,24 @@ static void parse_file(const char *name)
opt_file = name;
}
+static void parse_cgroup(const char *path)
+{
+ if (path[0] == '@') {
+ opt_cgroup = parse_number(path + 1);
+ return;
+ }
+
+ struct stat st;
+
+ if (stat(path, &st))
+ fatal("stat failed: %s: %m\n", path);
+
+ if (!S_ISDIR(st.st_mode))
+ fatal("cgroup supposed to be a directory: %s\n", path);
+
+ opt_cgroup = st.st_ino;
+}
+
static void parse_addr_range(const char *optarg)
{
unsigned long offset;
@@ -1088,9 +1180,11 @@ static const struct option opts[] = {
{ "file" , 1, NULL, 'f' },
{ "addr" , 1, NULL, 'a' },
{ "bits" , 1, NULL, 'b' },
+ { "cgroup" , 1, NULL, 'c' },
{ "describe" , 1, NULL, 'd' },
{ "list" , 0, NULL, 'l' },
{ "list-each" , 0, NULL, 'L' },
+ { "list-cgroup", 0, NULL, 'C' },
{ "no-summary", 0, NULL, 'N' },
{ "hwpoison" , 0, NULL, 'X' },
{ "unpoison" , 0, NULL, 'x' },
@@ -1105,7 +1199,7 @@ int main(int argc, char *argv[])
page_size = getpagesize();
while ((c = getopt_long(argc, argv,
- "rp:f:a:b:d:lLNXxh", opts, NULL)) != -1) {
+ "rp:f:a:b:d:c:ClLNXxh", opts, NULL)) != -1) {
switch (c) {
case 'r':
opt_raw = 1;
@@ -1122,6 +1216,12 @@ int main(int argc, char *argv[])
case 'b':
parse_bits_mask(optarg);
break;
+ case 'c':
+ parse_cgroup(optarg);
+ break;
+ case 'C':
+ opt_list_cgroup = 1;
+ break;
case 'd':
describe_flags(optarg);
exit(0);
@@ -1151,10 +1251,15 @@ int main(int argc, char *argv[])
}
}
+ if (opt_cgroup || opt_list_cgroup)
+ kpagecgroup_fd = checked_open(PROC_KPAGECGROUP, O_RDONLY);
+
if (opt_list && opt_pid)
printf("voffset\t");
if (opt_list && opt_file)
printf("foffset\t");
+ if (opt_list && opt_list_cgroup)
+ printf("cgroup\t");
if (opt_list == 1)
printf("offset\tlen\tflags\n");
if (opt_list == 2)
diff --git a/tools/vm/slabinfo.c b/tools/vm/slabinfo.c
index 86e698d07e20..1889163f2f05 100644
--- a/tools/vm/slabinfo.c
+++ b/tools/vm/slabinfo.c
@@ -135,7 +135,7 @@ static void usage(void)
"\nValid debug options (FZPUT may be combined)\n"
"a / A Switch on all debug options (=FZUP)\n"
"- Switch off all debug options\n"
- "f / F Sanity Checks (SLAB_DEBUG_FREE)\n"
+ "f / F Sanity Checks (SLAB_CONSISTENCY_CHECKS)\n"
"z / Z Redzoning\n"
"p / P Poisoning\n"
"u / U Tracking\n"