summaryrefslogtreecommitdiff
path: root/tools/include
diff options
context:
space:
mode:
Diffstat (limited to 'tools/include')
-rw-r--r--tools/include/linux/build_bug.h5
-rw-r--r--tools/include/linux/filter.h24
-rw-r--r--tools/include/linux/rbtree.h192
-rw-r--r--tools/include/linux/static_call_types.h50
-rw-r--r--tools/include/linux/types.h3
-rw-r--r--tools/include/nolibc/nolibc.h153
-rw-r--r--tools/include/uapi/linux/bpf.h123
-rw-r--r--tools/include/uapi/linux/bpf_perf_event.h1
-rw-r--r--tools/include/uapi/linux/kvm.h3
-rw-r--r--tools/include/uapi/linux/perf_event.h96
-rw-r--r--tools/include/uapi/linux/pkt_sched.h1
-rw-r--r--tools/include/uapi/linux/prctl.h3
-rw-r--r--tools/include/uapi/linux/tcp.h357
13 files changed, 941 insertions, 70 deletions
diff --git a/tools/include/linux/build_bug.h b/tools/include/linux/build_bug.h
index ce365d212768..cc7070c7439b 100644
--- a/tools/include/linux/build_bug.h
+++ b/tools/include/linux/build_bug.h
@@ -79,9 +79,4 @@
#define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
#endif // static_assert
-#ifdef __GENKSYMS__
-/* genksyms gets confused by _Static_assert */
-#define _Static_assert(expr, ...)
-#endif
-
#endif /* _LINUX_BUILD_BUG_H */
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index ca28b6ab8db7..736bdeccdfe4 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -169,15 +169,31 @@
.off = OFF, \
.imm = 0 })
-/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
+/*
+ * Atomic operations:
+ *
+ * BPF_ADD *(uint *) (dst_reg + off16) += src_reg
+ * BPF_AND *(uint *) (dst_reg + off16) &= src_reg
+ * BPF_OR *(uint *) (dst_reg + off16) |= src_reg
+ * BPF_XOR *(uint *) (dst_reg + off16) ^= src_reg
+ * BPF_ADD | BPF_FETCH src_reg = atomic_fetch_add(dst_reg + off16, src_reg);
+ * BPF_AND | BPF_FETCH src_reg = atomic_fetch_and(dst_reg + off16, src_reg);
+ * BPF_OR | BPF_FETCH src_reg = atomic_fetch_or(dst_reg + off16, src_reg);
+ * BPF_XOR | BPF_FETCH src_reg = atomic_fetch_xor(dst_reg + off16, src_reg);
+ * BPF_XCHG src_reg = atomic_xchg(dst_reg + off16, src_reg)
+ * BPF_CMPXCHG r0 = atomic_cmpxchg(dst_reg + off16, r0, src_reg)
+ */
-#define BPF_STX_XADD(SIZE, DST, SRC, OFF) \
+#define BPF_ATOMIC_OP(SIZE, OP, DST, SRC, OFF) \
((struct bpf_insn) { \
- .code = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD, \
+ .code = BPF_STX | BPF_SIZE(SIZE) | BPF_ATOMIC, \
.dst_reg = DST, \
.src_reg = SRC, \
.off = OFF, \
- .imm = 0 })
+ .imm = OP })
+
+/* Legacy alias */
+#define BPF_STX_XADD(SIZE, DST, SRC, OFF) BPF_ATOMIC_OP(SIZE, BPF_ADD, DST, SRC, OFF)
/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h
index 30dd21f976c3..2680f2edb837 100644
--- a/tools/include/linux/rbtree.h
+++ b/tools/include/linux/rbtree.h
@@ -152,4 +152,194 @@ static inline void rb_replace_node_cached(struct rb_node *victim,
rb_replace_node(victim, new, &root->rb_root);
}
-#endif /* __TOOLS_LINUX_PERF_RBTREE_H */
+/*
+ * The below helper functions use 2 operators with 3 different
+ * calling conventions. The operators are related like:
+ *
+ * comp(a->key,b) < 0 := less(a,b)
+ * comp(a->key,b) > 0 := less(b,a)
+ * comp(a->key,b) == 0 := !less(a,b) && !less(b,a)
+ *
+ * If these operators define a partial order on the elements we make no
+ * guarantee on which of the elements matching the key is found. See
+ * rb_find().
+ *
+ * The reason for this is to allow the find() interface without requiring an
+ * on-stack dummy object, which might not be feasible due to object size.
+ */
+
+/**
+ * rb_add_cached() - insert @node into the leftmost cached tree @tree
+ * @node: node to insert
+ * @tree: leftmost cached tree to insert @node into
+ * @less: operator defining the (partial) node order
+ */
+static __always_inline void
+rb_add_cached(struct rb_node *node, struct rb_root_cached *tree,
+ bool (*less)(struct rb_node *, const struct rb_node *))
+{
+ struct rb_node **link = &tree->rb_root.rb_node;
+ struct rb_node *parent = NULL;
+ bool leftmost = true;
+
+ while (*link) {
+ parent = *link;
+ if (less(node, parent)) {
+ link = &parent->rb_left;
+ } else {
+ link = &parent->rb_right;
+ leftmost = false;
+ }
+ }
+
+ rb_link_node(node, parent, link);
+ rb_insert_color_cached(node, tree, leftmost);
+}
+
+/**
+ * rb_add() - insert @node into @tree
+ * @node: node to insert
+ * @tree: tree to insert @node into
+ * @less: operator defining the (partial) node order
+ */
+static __always_inline void
+rb_add(struct rb_node *node, struct rb_root *tree,
+ bool (*less)(struct rb_node *, const struct rb_node *))
+{
+ struct rb_node **link = &tree->rb_node;
+ struct rb_node *parent = NULL;
+
+ while (*link) {
+ parent = *link;
+ if (less(node, parent))
+ link = &parent->rb_left;
+ else
+ link = &parent->rb_right;
+ }
+
+ rb_link_node(node, parent, link);
+ rb_insert_color(node, tree);
+}
+
+/**
+ * rb_find_add() - find equivalent @node in @tree, or add @node
+ * @node: node to look-for / insert
+ * @tree: tree to search / modify
+ * @cmp: operator defining the node order
+ *
+ * Returns the rb_node matching @node, or NULL when no match is found and @node
+ * is inserted.
+ */
+static __always_inline struct rb_node *
+rb_find_add(struct rb_node *node, struct rb_root *tree,
+ int (*cmp)(struct rb_node *, const struct rb_node *))
+{
+ struct rb_node **link = &tree->rb_node;
+ struct rb_node *parent = NULL;
+ int c;
+
+ while (*link) {
+ parent = *link;
+ c = cmp(node, parent);
+
+ if (c < 0)
+ link = &parent->rb_left;
+ else if (c > 0)
+ link = &parent->rb_right;
+ else
+ return parent;
+ }
+
+ rb_link_node(node, parent, link);
+ rb_insert_color(node, tree);
+ return NULL;
+}
+
+/**
+ * rb_find() - find @key in tree @tree
+ * @key: key to match
+ * @tree: tree to search
+ * @cmp: operator defining the node order
+ *
+ * Returns the rb_node matching @key or NULL.
+ */
+static __always_inline struct rb_node *
+rb_find(const void *key, const struct rb_root *tree,
+ int (*cmp)(const void *key, const struct rb_node *))
+{
+ struct rb_node *node = tree->rb_node;
+
+ while (node) {
+ int c = cmp(key, node);
+
+ if (c < 0)
+ node = node->rb_left;
+ else if (c > 0)
+ node = node->rb_right;
+ else
+ return node;
+ }
+
+ return NULL;
+}
+
+/**
+ * rb_find_first() - find the first @key in @tree
+ * @key: key to match
+ * @tree: tree to search
+ * @cmp: operator defining node order
+ *
+ * Returns the leftmost node matching @key, or NULL.
+ */
+static __always_inline struct rb_node *
+rb_find_first(const void *key, const struct rb_root *tree,
+ int (*cmp)(const void *key, const struct rb_node *))
+{
+ struct rb_node *node = tree->rb_node;
+ struct rb_node *match = NULL;
+
+ while (node) {
+ int c = cmp(key, node);
+
+ if (c <= 0) {
+ if (!c)
+ match = node;
+ node = node->rb_left;
+ } else if (c > 0) {
+ node = node->rb_right;
+ }
+ }
+
+ return match;
+}
+
+/**
+ * rb_next_match() - find the next @key in @tree
+ * @key: key to match
+ * @tree: tree to search
+ * @cmp: operator defining node order
+ *
+ * Returns the next node matching @key, or NULL.
+ */
+static __always_inline struct rb_node *
+rb_next_match(const void *key, struct rb_node *node,
+ int (*cmp)(const void *key, const struct rb_node *))
+{
+ node = rb_next(node);
+ if (node && cmp(key, node))
+ node = NULL;
+ return node;
+}
+
+/**
+ * rb_for_each() - iterates a subtree matching @key
+ * @node: iterator
+ * @key: key to match
+ * @tree: tree to search
+ * @cmp: operator defining node order
+ */
+#define rb_for_each(node, key, tree, cmp) \
+ for ((node) = rb_find_first((key), (tree), (cmp)); \
+ (node); (node) = rb_next_match((key), (node), (cmp)))
+
+#endif /* __TOOLS_LINUX_PERF_RBTREE_H */
diff --git a/tools/include/linux/static_call_types.h b/tools/include/linux/static_call_types.h
index 89135bb35bf7..ae5662d368b9 100644
--- a/tools/include/linux/static_call_types.h
+++ b/tools/include/linux/static_call_types.h
@@ -4,11 +4,13 @@
#include <linux/types.h>
#include <linux/stringify.h>
+#include <linux/compiler.h>
#define STATIC_CALL_KEY_PREFIX __SCK__
#define STATIC_CALL_KEY_PREFIX_STR __stringify(STATIC_CALL_KEY_PREFIX)
#define STATIC_CALL_KEY_PREFIX_LEN (sizeof(STATIC_CALL_KEY_PREFIX_STR) - 1)
#define STATIC_CALL_KEY(name) __PASTE(STATIC_CALL_KEY_PREFIX, name)
+#define STATIC_CALL_KEY_STR(name) __stringify(STATIC_CALL_KEY(name))
#define STATIC_CALL_TRAMP_PREFIX __SCT__
#define STATIC_CALL_TRAMP_PREFIX_STR __stringify(STATIC_CALL_TRAMP_PREFIX)
@@ -32,4 +34,52 @@ struct static_call_site {
s32 key;
};
+#define DECLARE_STATIC_CALL(name, func) \
+ extern struct static_call_key STATIC_CALL_KEY(name); \
+ extern typeof(func) STATIC_CALL_TRAMP(name);
+
+#ifdef CONFIG_HAVE_STATIC_CALL
+
+#define __raw_static_call(name) (&STATIC_CALL_TRAMP(name))
+
+#ifdef CONFIG_HAVE_STATIC_CALL_INLINE
+
+/*
+ * __ADDRESSABLE() is used to ensure the key symbol doesn't get stripped from
+ * the symbol table so that objtool can reference it when it generates the
+ * .static_call_sites section.
+ */
+#define __STATIC_CALL_ADDRESSABLE(name) \
+ __ADDRESSABLE(STATIC_CALL_KEY(name))
+
+#define __static_call(name) \
+({ \
+ __STATIC_CALL_ADDRESSABLE(name); \
+ __raw_static_call(name); \
+})
+
+#else /* !CONFIG_HAVE_STATIC_CALL_INLINE */
+
+#define __STATIC_CALL_ADDRESSABLE(name)
+#define __static_call(name) __raw_static_call(name)
+
+#endif /* CONFIG_HAVE_STATIC_CALL_INLINE */
+
+#ifdef MODULE
+#define __STATIC_CALL_MOD_ADDRESSABLE(name)
+#define static_call_mod(name) __raw_static_call(name)
+#else
+#define __STATIC_CALL_MOD_ADDRESSABLE(name) __STATIC_CALL_ADDRESSABLE(name)
+#define static_call_mod(name) __static_call(name)
+#endif
+
+#define static_call(name) __static_call(name)
+
+#else
+
+#define static_call(name) \
+ ((typeof(STATIC_CALL_TRAMP(name))*)(STATIC_CALL_KEY(name).func))
+
+#endif /* CONFIG_HAVE_STATIC_CALL */
+
#endif /* _STATIC_CALL_TYPES_H */
diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h
index 154eb4e3ca7c..e9c5a215837d 100644
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -6,7 +6,10 @@
#include <stddef.h>
#include <stdint.h>
+#ifndef __SANE_USERSPACE_TYPES__
#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
+#endif
+
#include <asm/types.h>
#include <asm/posix_types.h>
diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h
index e61d36cd4e50..8b7a9830dd22 100644
--- a/tools/include/nolibc/nolibc.h
+++ b/tools/include/nolibc/nolibc.h
@@ -71,7 +71,7 @@
*
* A simple static executable may be built this way :
* $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
- * -static -include nolibc.h -lgcc -o hello hello.c
+ * -static -include nolibc.h -o hello hello.c -lgcc
*
* A very useful calling convention table may be found here :
* http://man7.org/linux/man-pages/man2/syscall.2.html
@@ -81,19 +81,12 @@
*
*/
-/* Some archs (at least aarch64) don't expose the regular syscalls anymore by
- * default, either because they have an "_at" replacement, or because there are
- * more modern alternatives. For now we'd rather still use them.
- */
-#define __ARCH_WANT_SYSCALL_NO_AT
-#define __ARCH_WANT_SYSCALL_NO_FLAGS
-#define __ARCH_WANT_SYSCALL_DEPRECATED
-
#include <asm/unistd.h>
#include <asm/ioctls.h>
#include <asm/errno.h>
#include <linux/fs.h>
#include <linux/loop.h>
+#include <linux/time.h>
#define NOLIBC
@@ -152,24 +145,6 @@ struct pollfd {
short int revents;
};
-/* for select() */
-struct timeval {
- long tv_sec;
- long tv_usec;
-};
-
-/* for pselect() */
-struct timespec {
- long tv_sec;
- long tv_nsec;
-};
-
-/* for gettimeofday() */
-struct timezone {
- int tz_minuteswest;
- int tz_dsttime;
-};
-
/* for getdents64() */
struct linux_dirent64 {
uint64_t d_ino;
@@ -271,6 +246,8 @@ struct stat {
#define WEXITSTATUS(status) (((status) & 0xff00) >> 8)
#define WIFEXITED(status) (((status) & 0x7f) == 0)
+/* for SIGCHLD */
+#include <asm/signal.h>
/* Below comes the architecture-specific code. For each architecture, we have
* the syscall declarations and the _start code definition. This is the only
@@ -1469,8 +1446,10 @@ int sys_chmod(const char *path, mode_t mode)
{
#ifdef __NR_fchmodat
return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0);
-#else
+#elif defined(__NR_chmod)
return my_syscall2(__NR_chmod, path, mode);
+#else
+#error Neither __NR_fchmodat nor __NR_chmod defined, cannot implement sys_chmod()
#endif
}
@@ -1479,8 +1458,10 @@ int sys_chown(const char *path, uid_t owner, gid_t group)
{
#ifdef __NR_fchownat
return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0);
-#else
+#elif defined(__NR_chown)
return my_syscall3(__NR_chown, path, owner, group);
+#else
+#error Neither __NR_fchownat nor __NR_chown defined, cannot implement sys_chown()
#endif
}
@@ -1502,10 +1483,24 @@ int sys_dup(int fd)
return my_syscall1(__NR_dup, fd);
}
+#ifdef __NR_dup3
+static __attribute__((unused))
+int sys_dup3(int old, int new, int flags)
+{
+ return my_syscall3(__NR_dup3, old, new, flags);
+}
+#endif
+
static __attribute__((unused))
int sys_dup2(int old, int new)
{
+#ifdef __NR_dup3
+ return my_syscall3(__NR_dup3, old, new, 0);
+#elif defined(__NR_dup2)
return my_syscall2(__NR_dup2, old, new);
+#else
+#error Neither __NR_dup3 nor __NR_dup2 defined, cannot implement sys_dup2()
+#endif
}
static __attribute__((unused))
@@ -1517,7 +1512,17 @@ int sys_execve(const char *filename, char *const argv[], char *const envp[])
static __attribute__((unused))
pid_t sys_fork(void)
{
+#ifdef __NR_clone
+ /* note: some archs only have clone() and not fork(). Different archs
+ * have a different API, but most archs have the flags on first arg and
+ * will not use the rest with no other flag.
+ */
+ return my_syscall5(__NR_clone, SIGCHLD, 0, 0, 0, 0);
+#elif defined(__NR_fork)
return my_syscall0(__NR_fork);
+#else
+#error Neither __NR_clone nor __NR_fork defined, cannot implement sys_fork()
+#endif
}
static __attribute__((unused))
@@ -1533,9 +1538,15 @@ int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count)
}
static __attribute__((unused))
+pid_t sys_getpgid(pid_t pid)
+{
+ return my_syscall1(__NR_getpgid, pid);
+}
+
+static __attribute__((unused))
pid_t sys_getpgrp(void)
{
- return my_syscall0(__NR_getpgrp);
+ return sys_getpgid(0);
}
static __attribute__((unused))
@@ -1567,8 +1578,10 @@ int sys_link(const char *old, const char *new)
{
#ifdef __NR_linkat
return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0);
-#else
+#elif defined(__NR_link)
return my_syscall2(__NR_link, old, new);
+#else
+#error Neither __NR_linkat nor __NR_link defined, cannot implement sys_link()
#endif
}
@@ -1583,8 +1596,10 @@ int sys_mkdir(const char *path, mode_t mode)
{
#ifdef __NR_mkdirat
return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode);
-#else
+#elif defined(__NR_mkdir)
return my_syscall2(__NR_mkdir, path, mode);
+#else
+#error Neither __NR_mkdirat nor __NR_mkdir defined, cannot implement sys_mkdir()
#endif
}
@@ -1593,8 +1608,10 @@ long sys_mknod(const char *path, mode_t mode, dev_t dev)
{
#ifdef __NR_mknodat
return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev);
-#else
+#elif defined(__NR_mknod)
return my_syscall3(__NR_mknod, path, mode, dev);
+#else
+#error Neither __NR_mknodat nor __NR_mknod defined, cannot implement sys_mknod()
#endif
}
@@ -1610,8 +1627,10 @@ int sys_open(const char *path, int flags, mode_t mode)
{
#ifdef __NR_openat
return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode);
-#else
+#elif defined(__NR_open)
return my_syscall3(__NR_open, path, flags, mode);
+#else
+#error Neither __NR_openat nor __NR_open defined, cannot implement sys_open()
#endif
}
@@ -1624,7 +1643,19 @@ int sys_pivot_root(const char *new, const char *old)
static __attribute__((unused))
int sys_poll(struct pollfd *fds, int nfds, int timeout)
{
+#if defined(__NR_ppoll)
+ struct timespec t;
+
+ if (timeout >= 0) {
+ t.tv_sec = timeout / 1000;
+ t.tv_nsec = (timeout % 1000) * 1000000;
+ }
+ return my_syscall4(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL);
+#elif defined(__NR_poll)
return my_syscall3(__NR_poll, fds, nfds, timeout);
+#else
+#error Neither __NR_ppoll nor __NR_poll defined, cannot implement sys_poll()
+#endif
}
static __attribute__((unused))
@@ -1663,11 +1694,13 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva
t.tv_nsec = timeout->tv_usec * 1000;
}
return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL);
-#else
+#elif defined(__NR__newselect) || defined(__NR_select)
#ifndef __NR__newselect
#define __NR__newselect __NR_select
#endif
return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout);
+#else
+#error None of __NR_select, __NR_pselect6, nor __NR__newselect defined, cannot implement sys_select()
#endif
}
@@ -1692,8 +1725,10 @@ int sys_stat(const char *path, struct stat *buf)
#ifdef __NR_newfstatat
/* only solution for arm64 */
ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0);
-#else
+#elif defined(__NR_stat)
ret = my_syscall2(__NR_stat, path, &stat);
+#else
+#error Neither __NR_newfstatat nor __NR_stat defined, cannot implement sys_stat()
#endif
buf->st_dev = stat.st_dev;
buf->st_ino = stat.st_ino;
@@ -1717,8 +1752,10 @@ int sys_symlink(const char *old, const char *new)
{
#ifdef __NR_symlinkat
return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new);
-#else
+#elif defined(__NR_symlink)
return my_syscall2(__NR_symlink, old, new);
+#else
+#error Neither __NR_symlinkat nor __NR_symlink defined, cannot implement sys_symlink()
#endif
}
@@ -1739,8 +1776,10 @@ int sys_unlink(const char *path)
{
#ifdef __NR_unlinkat
return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0);
-#else
+#elif defined(__NR_unlink)
return my_syscall1(__NR_unlink, path);
+#else
+#error Neither __NR_unlinkat nor __NR_unlink defined, cannot implement sys_unlink()
#endif
}
@@ -1853,6 +1892,18 @@ int close(int fd)
}
static __attribute__((unused))
+int dup(int fd)
+{
+ int ret = sys_dup(fd);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
int dup2(int old, int new)
{
int ret = sys_dup2(old, new);
@@ -1864,6 +1915,20 @@ int dup2(int old, int new)
return ret;
}
+#ifdef __NR_dup3
+static __attribute__((unused))
+int dup3(int old, int new, int flags)
+{
+ int ret = sys_dup3(old, new, flags);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+#endif
+
static __attribute__((unused))
int execve(const char *filename, char *const argv[], char *const envp[])
{
@@ -1913,6 +1978,18 @@ int getdents64(int fd, struct linux_dirent64 *dirp, int count)
}
static __attribute__((unused))
+pid_t getpgid(pid_t pid)
+{
+ pid_t ret = sys_getpgid(pid);
+
+ if (ret < 0) {
+ SET_ERRNO(-ret);
+ ret = -1;
+ }
+ return ret;
+}
+
+static __attribute__((unused))
pid_t getpgrp(void)
{
pid_t ret = sys_getpgrp();
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 77d7c1bb2923..4c24daa43bac 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -19,7 +19,8 @@
/* ld/ldx fields */
#define BPF_DW 0x18 /* double word (64-bit) */
-#define BPF_XADD 0xc0 /* exclusive add */
+#define BPF_ATOMIC 0xc0 /* atomic memory ops - op type in immediate */
+#define BPF_XADD 0xc0 /* exclusive add - legacy name */
/* alu/jmp fields */
#define BPF_MOV 0xb0 /* mov reg to reg */
@@ -43,6 +44,11 @@
#define BPF_CALL 0x80 /* function call */
#define BPF_EXIT 0x90 /* function return */
+/* atomic op type fields (stored in immediate) */
+#define BPF_FETCH 0x01 /* not an opcode on its own, used to build others */
+#define BPF_XCHG (0xe0 | BPF_FETCH) /* atomic exchange */
+#define BPF_CMPXCHG (0xf0 | BPF_FETCH) /* atomic compare-and-write */
+
/* Register numbers */
enum {
BPF_REG_0 = 0,
@@ -1650,22 +1656,30 @@ union bpf_attr {
* networking traffic statistics as it provides a global socket
* identifier that can be assumed unique.
* Return
- * A 8-byte long non-decreasing number on success, or 0 if the
- * socket field is missing inside *skb*.
+ * A 8-byte long unique number on success, or 0 if the socket
+ * field is missing inside *skb*.
*
* u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
* Description
* Equivalent to bpf_get_socket_cookie() helper that accepts
* *skb*, but gets socket from **struct bpf_sock_addr** context.
* Return
- * A 8-byte long non-decreasing number.
+ * A 8-byte long unique number.
*
* u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
* Description
* Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
* *skb*, but gets socket from **struct bpf_sock_ops** context.
* Return
- * A 8-byte long non-decreasing number.
+ * A 8-byte long unique number.
+ *
+ * u64 bpf_get_socket_cookie(struct sock *sk)
+ * Description
+ * Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
+ * *sk*, but gets socket from a BTF **struct sock**. This helper
+ * also works for sleepable programs.
+ * Return
+ * A 8-byte long unique number or 0 if *sk* is NULL.
*
* u32 bpf_get_socket_uid(struct sk_buff *skb)
* Return
@@ -2225,6 +2239,9 @@ union bpf_attr {
* * > 0 one of **BPF_FIB_LKUP_RET_** codes explaining why the
* packet is not forwarded or needs assist from full stack
*
+ * If lookup fails with BPF_FIB_LKUP_RET_FRAG_NEEDED, then the MTU
+ * was exceeded and output params->mtu_result contains the MTU.
+ *
* long bpf_sock_hash_update(struct bpf_sock_ops *skops, struct bpf_map *map, void *key, u64 flags)
* Description
* Add an entry to, or update a sockhash *map* referencing sockets.
@@ -2448,7 +2465,7 @@ union bpf_attr {
* running simultaneously.
*
* A user should care about the synchronization by himself.
- * For example, by using the **BPF_STX_XADD** instruction to alter
+ * For example, by using the **BPF_ATOMIC** instructions to alter
* the shared data.
* Return
* A pointer to the local storage area.
@@ -2993,10 +3010,10 @@ union bpf_attr {
* string length is larger than *size*, just *size*-1 bytes are
* copied and the last byte is set to NUL.
*
- * On success, the length of the copied string is returned. This
- * makes this helper useful in tracing programs for reading
- * strings, and more importantly to get its length at runtime. See
- * the following snippet:
+ * On success, returns the number of bytes that were written,
+ * including the terminal NUL. This makes this helper useful in
+ * tracing programs for reading strings, and more importantly to
+ * get its length at runtime. See the following snippet:
*
* ::
*
@@ -3024,7 +3041,7 @@ union bpf_attr {
* **->mm->env_start**: using this helper and the return value,
* one can quickly iterate at the right offset of the memory area.
* Return
- * On success, the strictly positive length of the string,
+ * On success, the strictly positive length of the output string,
* including the trailing NUL character. On error, a negative
* value.
*
@@ -3830,6 +3847,69 @@ union bpf_attr {
* Return
* A pointer to a struct socket on success or NULL if the file is
* not a socket.
+ *
+ * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
+ * Description
+
+ * Check ctx packet size against exceeding MTU of net device (based
+ * on *ifindex*). This helper will likely be used in combination
+ * with helpers that adjust/change the packet size.
+ *
+ * The argument *len_diff* can be used for querying with a planned
+ * size change. This allows to check MTU prior to changing packet
+ * ctx. Providing an *len_diff* adjustment that is larger than the
+ * actual packet size (resulting in negative packet size) will in
+ * principle not exceed the MTU, why it is not considered a
+ * failure. Other BPF-helpers are needed for performing the
+ * planned size change, why the responsability for catch a negative
+ * packet size belong in those helpers.
+ *
+ * Specifying *ifindex* zero means the MTU check is performed
+ * against the current net device. This is practical if this isn't
+ * used prior to redirect.
+ *
+ * The Linux kernel route table can configure MTUs on a more
+ * specific per route level, which is not provided by this helper.
+ * For route level MTU checks use the **bpf_fib_lookup**\ ()
+ * helper.
+ *
+ * *ctx* is either **struct xdp_md** for XDP programs or
+ * **struct sk_buff** for tc cls_act programs.
+ *
+ * The *flags* argument can be a combination of one or more of the
+ * following values:
+ *
+ * **BPF_MTU_CHK_SEGS**
+ * This flag will only works for *ctx* **struct sk_buff**.
+ * If packet context contains extra packet segment buffers
+ * (often knows as GSO skb), then MTU check is harder to
+ * check at this point, because in transmit path it is
+ * possible for the skb packet to get re-segmented
+ * (depending on net device features). This could still be
+ * a MTU violation, so this flag enables performing MTU
+ * check against segments, with a different violation
+ * return code to tell it apart. Check cannot use len_diff.
+ *
+ * On return *mtu_len* pointer contains the MTU value of the net
+ * device. Remember the net device configured MTU is the L3 size,
+ * which is returned here and XDP and TX length operate at L2.
+ * Helper take this into account for you, but remember when using
+ * MTU value in your BPF-code. On input *mtu_len* must be a valid
+ * pointer and be initialized (to zero), else verifier will reject
+ * BPF program.
+ *
+ * Return
+ * * 0 on success, and populate MTU value in *mtu_len* pointer.
+ *
+ * * < 0 if any input argument is invalid (*mtu_len* not updated)
+ *
+ * MTU violations return positive values, but also populate MTU
+ * value in *mtu_len* pointer, as this can be needed for
+ * implementing PMTU handing:
+ *
+ * * **BPF_MTU_CHK_RET_FRAG_NEEDED**
+ * * **BPF_MTU_CHK_RET_SEGS_TOOBIG**
+ *
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -3995,6 +4075,7 @@ union bpf_attr {
FN(ktime_get_coarse_ns), \
FN(ima_inode_hash), \
FN(sock_from_file), \
+ FN(check_mtu), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
@@ -4495,6 +4576,7 @@ struct bpf_prog_info {
__aligned_u64 prog_tags;
__u64 run_time_ns;
__u64 run_cnt;
+ __u64 recursion_misses;
} __attribute__((aligned(8)));
struct bpf_map_info {
@@ -4975,9 +5057,13 @@ struct bpf_fib_lookup {
__be16 sport;
__be16 dport;
- /* total length of packet from network header - used for MTU check */
- __u16 tot_len;
+ union { /* used for MTU check */
+ /* input to lookup */
+ __u16 tot_len; /* L3 length from network hdr (iph->tot_len) */
+ /* output: MTU value */
+ __u16 mtu_result;
+ };
/* input: L3 device index for lookup
* output: device index from FIB lookup
*/
@@ -5023,6 +5109,17 @@ struct bpf_redir_neigh {
};
};
+/* bpf_check_mtu flags*/
+enum bpf_check_mtu_flags {
+ BPF_MTU_CHK_SEGS = (1U << 0),
+};
+
+enum bpf_check_mtu_ret {
+ BPF_MTU_CHK_RET_SUCCESS, /* check and lookup successful */
+ BPF_MTU_CHK_RET_FRAG_NEEDED, /* fragmentation required to fwd */
+ BPF_MTU_CHK_RET_SEGS_TOOBIG, /* GSO re-segmentation needed to fwd */
+};
+
enum bpf_task_fd_type {
BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */
BPF_FD_TYPE_TRACEPOINT, /* tp name */
diff --git a/tools/include/uapi/linux/bpf_perf_event.h b/tools/include/uapi/linux/bpf_perf_event.h
index 8f95303f9d80..eb1b9d21250c 100644
--- a/tools/include/uapi/linux/bpf_perf_event.h
+++ b/tools/include/uapi/linux/bpf_perf_event.h
@@ -13,6 +13,7 @@
struct bpf_perf_event_data {
bpf_user_pt_regs_t regs;
__u64 sample_period;
+ __u64 addr;
};
#endif /* _UAPI__LINUX_BPF_PERF_EVENT_H__ */
diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h
index 886802b8ffba..abb89bbe5635 100644
--- a/tools/include/uapi/linux/kvm.h
+++ b/tools/include/uapi/linux/kvm.h
@@ -251,6 +251,7 @@ struct kvm_hyperv_exit {
#define KVM_EXIT_X86_RDMSR 29
#define KVM_EXIT_X86_WRMSR 30
#define KVM_EXIT_DIRTY_RING_FULL 31
+#define KVM_EXIT_AP_RESET_HOLD 32
/* For KVM_EXIT_INTERNAL_ERROR */
/* Emulate instruction failed. */
@@ -573,6 +574,7 @@ struct kvm_vapic_addr {
#define KVM_MP_STATE_CHECK_STOP 6
#define KVM_MP_STATE_OPERATING 7
#define KVM_MP_STATE_LOAD 8
+#define KVM_MP_STATE_AP_RESET_HOLD 9
struct kvm_mp_state {
__u32 mp_state;
@@ -1056,6 +1058,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
#define KVM_CAP_SYS_HYPERV_CPUID 191
#define KVM_CAP_DIRTY_LOG_RING 192
+#define KVM_CAP_PPC_DAWR1 194
#ifdef KVM_CAP_IRQ_ROUTING
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index b15e3447cd9f..ad15e40d7f5d 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -145,12 +145,14 @@ enum perf_event_sample_format {
PERF_SAMPLE_CGROUP = 1U << 21,
PERF_SAMPLE_DATA_PAGE_SIZE = 1U << 22,
PERF_SAMPLE_CODE_PAGE_SIZE = 1U << 23,
+ PERF_SAMPLE_WEIGHT_STRUCT = 1U << 24,
- PERF_SAMPLE_MAX = 1U << 24, /* non-ABI */
+ PERF_SAMPLE_MAX = 1U << 25, /* non-ABI */
__PERF_SAMPLE_CALLCHAIN_EARLY = 1ULL << 63, /* non-ABI; internal use */
};
+#define PERF_SAMPLE_WEIGHT_TYPE (PERF_SAMPLE_WEIGHT | PERF_SAMPLE_WEIGHT_STRUCT)
/*
* values to program into branch_sample_type when PERF_SAMPLE_BRANCH is set
*
@@ -386,7 +388,8 @@ struct perf_event_attr {
aux_output : 1, /* generate AUX records instead of events */
cgroup : 1, /* include cgroup events */
text_poke : 1, /* include text poke events */
- __reserved_1 : 30;
+ build_id : 1, /* use build id in mmap2 events */
+ __reserved_1 : 29;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -659,6 +662,22 @@ struct perf_event_mmap_page {
__u64 aux_size;
};
+/*
+ * The current state of perf_event_header::misc bits usage:
+ * ('|' used bit, '-' unused bit)
+ *
+ * 012 CDEF
+ * |||---------||||
+ *
+ * Where:
+ * 0-2 CPUMODE_MASK
+ *
+ * C PROC_MAP_PARSE_TIMEOUT
+ * D MMAP_DATA / COMM_EXEC / FORK_EXEC / SWITCH_OUT
+ * E MMAP_BUILD_ID / EXACT_IP / SCHED_OUT_PREEMPT
+ * F (reserved)
+ */
+
#define PERF_RECORD_MISC_CPUMODE_MASK (7 << 0)
#define PERF_RECORD_MISC_CPUMODE_UNKNOWN (0 << 0)
#define PERF_RECORD_MISC_KERNEL (1 << 0)
@@ -690,6 +709,7 @@ struct perf_event_mmap_page {
*
* PERF_RECORD_MISC_EXACT_IP - PERF_RECORD_SAMPLE of precise events
* PERF_RECORD_MISC_SWITCH_OUT_PREEMPT - PERF_RECORD_SWITCH* events
+ * PERF_RECORD_MISC_MMAP_BUILD_ID - PERF_RECORD_MMAP2 event
*
*
* PERF_RECORD_MISC_EXACT_IP:
@@ -699,9 +719,13 @@ struct perf_event_mmap_page {
*
* PERF_RECORD_MISC_SWITCH_OUT_PREEMPT:
* Indicates that thread was preempted in TASK_RUNNING state.
+ *
+ * PERF_RECORD_MISC_MMAP_BUILD_ID:
+ * Indicates that mmap2 event carries build id data.
*/
#define PERF_RECORD_MISC_EXACT_IP (1 << 14)
#define PERF_RECORD_MISC_SWITCH_OUT_PREEMPT (1 << 14)
+#define PERF_RECORD_MISC_MMAP_BUILD_ID (1 << 14)
/*
* Reserve the last bit to indicate some extended misc field
*/
@@ -890,7 +914,24 @@ enum perf_event_type {
* char data[size];
* u64 dyn_size; } && PERF_SAMPLE_STACK_USER
*
- * { u64 weight; } && PERF_SAMPLE_WEIGHT
+ * { union perf_sample_weight
+ * {
+ * u64 full; && PERF_SAMPLE_WEIGHT
+ * #if defined(__LITTLE_ENDIAN_BITFIELD)
+ * struct {
+ * u32 var1_dw;
+ * u16 var2_w;
+ * u16 var3_w;
+ * } && PERF_SAMPLE_WEIGHT_STRUCT
+ * #elif defined(__BIG_ENDIAN_BITFIELD)
+ * struct {
+ * u16 var3_w;
+ * u16 var2_w;
+ * u32 var1_dw;
+ * } && PERF_SAMPLE_WEIGHT_STRUCT
+ * #endif
+ * }
+ * }
* { u64 data_src; } && PERF_SAMPLE_DATA_SRC
* { u64 transaction; } && PERF_SAMPLE_TRANSACTION
* { u64 abi; # enum perf_sample_regs_abi
@@ -915,10 +956,20 @@ enum perf_event_type {
* u64 addr;
* u64 len;
* u64 pgoff;
- * u32 maj;
- * u32 min;
- * u64 ino;
- * u64 ino_generation;
+ * union {
+ * struct {
+ * u32 maj;
+ * u32 min;
+ * u64 ino;
+ * u64 ino_generation;
+ * };
+ * struct {
+ * u8 build_id_size;
+ * u8 __reserved_1;
+ * u16 __reserved_2;
+ * u8 build_id[20];
+ * };
+ * };
* u32 prot, flags;
* char filename[];
* struct sample_id sample_id;
@@ -1127,14 +1178,16 @@ union perf_mem_data_src {
mem_lvl_num:4, /* memory hierarchy level number */
mem_remote:1, /* remote */
mem_snoopx:2, /* snoop mode, ext */
- mem_rsvd:24;
+ mem_blk:3, /* access blocked */
+ mem_rsvd:21;
};
};
#elif defined(__BIG_ENDIAN_BITFIELD)
union perf_mem_data_src {
__u64 val;
struct {
- __u64 mem_rsvd:24,
+ __u64 mem_rsvd:21,
+ mem_blk:3, /* access blocked */
mem_snoopx:2, /* snoop mode, ext */
mem_remote:1, /* remote */
mem_lvl_num:4, /* memory hierarchy level number */
@@ -1217,6 +1270,12 @@ union perf_mem_data_src {
#define PERF_MEM_TLB_OS 0x40 /* OS fault handler */
#define PERF_MEM_TLB_SHIFT 26
+/* Access blocked */
+#define PERF_MEM_BLK_NA 0x01 /* not available */
+#define PERF_MEM_BLK_DATA 0x02 /* data could not be forwarded */
+#define PERF_MEM_BLK_ADDR 0x04 /* address conflict */
+#define PERF_MEM_BLK_SHIFT 40
+
#define PERF_MEM_S(a, s) \
(((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT)
@@ -1248,4 +1307,23 @@ struct perf_branch_entry {
reserved:40;
};
+union perf_sample_weight {
+ __u64 full;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ struct {
+ __u32 var1_dw;
+ __u16 var2_w;
+ __u16 var3_w;
+ };
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ struct {
+ __u16 var3_w;
+ __u16 var2_w;
+ __u32 var1_dw;
+ };
+#else
+#error "Unknown endianness"
+#endif
+};
+
#endif /* _UAPI_LINUX_PERF_EVENT_H */
diff --git a/tools/include/uapi/linux/pkt_sched.h b/tools/include/uapi/linux/pkt_sched.h
index 0d18b1d1fbbc..5c903abc9fa5 100644
--- a/tools/include/uapi/linux/pkt_sched.h
+++ b/tools/include/uapi/linux/pkt_sched.h
@@ -414,6 +414,7 @@ enum {
TCA_HTB_RATE64,
TCA_HTB_CEIL64,
TCA_HTB_PAD,
+ TCA_HTB_OFFLOAD,
__TCA_HTB_MAX,
};
diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h
index 90deb41c8a34..667f1aed091c 100644
--- a/tools/include/uapi/linux/prctl.h
+++ b/tools/include/uapi/linux/prctl.h
@@ -251,5 +251,8 @@ struct prctl_mm_map {
#define PR_SET_SYSCALL_USER_DISPATCH 59
# define PR_SYS_DISPATCH_OFF 0
# define PR_SYS_DISPATCH_ON 1
+/* The control values for the user space selector when dispatch is enabled */
+# define SYSCALL_DISPATCH_FILTER_ALLOW 0
+# define SYSCALL_DISPATCH_FILTER_BLOCK 1
#endif /* _LINUX_PRCTL_H */
diff --git a/tools/include/uapi/linux/tcp.h b/tools/include/uapi/linux/tcp.h
new file mode 100644
index 000000000000..13ceeb395eb8
--- /dev/null
+++ b/tools/include/uapi/linux/tcp.h
@@ -0,0 +1,357 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * INET An implementation of the TCP/IP protocol suite for the LINUX
+ * operating system. INET is implemented using the BSD Socket
+ * interface as the means of communication with the user level.
+ *
+ * Definitions for the TCP protocol.
+ *
+ * Version: @(#)tcp.h 1.0.2 04/28/93
+ *
+ * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#ifndef _UAPI_LINUX_TCP_H
+#define _UAPI_LINUX_TCP_H
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <linux/socket.h>
+
+struct tcphdr {
+ __be16 source;
+ __be16 dest;
+ __be32 seq;
+ __be32 ack_seq;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+ __u16 res1:4,
+ doff:4,
+ fin:1,
+ syn:1,
+ rst:1,
+ psh:1,
+ ack:1,
+ urg:1,
+ ece:1,
+ cwr:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+ __u16 doff:4,
+ res1:4,
+ cwr:1,
+ ece:1,
+ urg:1,
+ ack:1,
+ psh:1,
+ rst:1,
+ syn:1,
+ fin:1;
+#else
+#error "Adjust your <asm/byteorder.h> defines"
+#endif
+ __be16 window;
+ __sum16 check;
+ __be16 urg_ptr;
+};
+
+/*
+ * The union cast uses a gcc extension to avoid aliasing problems
+ * (union is compatible to any of its members)
+ * This means this part of the code is -fstrict-aliasing safe now.
+ */
+union tcp_word_hdr {
+ struct tcphdr hdr;
+ __be32 words[5];
+};
+
+#define tcp_flag_word(tp) ( ((union tcp_word_hdr *)(tp))->words [3])
+
+enum {
+ TCP_FLAG_CWR = __constant_cpu_to_be32(0x00800000),
+ TCP_FLAG_ECE = __constant_cpu_to_be32(0x00400000),
+ TCP_FLAG_URG = __constant_cpu_to_be32(0x00200000),
+ TCP_FLAG_ACK = __constant_cpu_to_be32(0x00100000),
+ TCP_FLAG_PSH = __constant_cpu_to_be32(0x00080000),
+ TCP_FLAG_RST = __constant_cpu_to_be32(0x00040000),
+ TCP_FLAG_SYN = __constant_cpu_to_be32(0x00020000),
+ TCP_FLAG_FIN = __constant_cpu_to_be32(0x00010000),
+ TCP_RESERVED_BITS = __constant_cpu_to_be32(0x0F000000),
+ TCP_DATA_OFFSET = __constant_cpu_to_be32(0xF0000000)
+};
+
+/*
+ * TCP general constants
+ */
+#define TCP_MSS_DEFAULT 536U /* IPv4 (RFC1122, RFC2581) */
+#define TCP_MSS_DESIRED 1220U /* IPv6 (tunneled), EDNS0 (RFC3226) */
+
+/* TCP socket options */
+#define TCP_NODELAY 1 /* Turn off Nagle's algorithm. */
+#define TCP_MAXSEG 2 /* Limit MSS */
+#define TCP_CORK 3 /* Never send partially complete segments */
+#define TCP_KEEPIDLE 4 /* Start keeplives after this period */
+#define TCP_KEEPINTVL 5 /* Interval between keepalives */
+#define TCP_KEEPCNT 6 /* Number of keepalives before death */
+#define TCP_SYNCNT 7 /* Number of SYN retransmits */
+#define TCP_LINGER2 8 /* Life time of orphaned FIN-WAIT-2 state */
+#define TCP_DEFER_ACCEPT 9 /* Wake up listener only when data arrive */
+#define TCP_WINDOW_CLAMP 10 /* Bound advertised window */
+#define TCP_INFO 11 /* Information about this connection. */
+#define TCP_QUICKACK 12 /* Block/reenable quick acks */
+#define TCP_CONGESTION 13 /* Congestion control algorithm */
+#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */
+#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/
+#define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */
+#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */
+#define TCP_REPAIR 19 /* TCP sock is under repair right now */
+#define TCP_REPAIR_QUEUE 20
+#define TCP_QUEUE_SEQ 21
+#define TCP_REPAIR_OPTIONS 22
+#define TCP_FASTOPEN 23 /* Enable FastOpen on listeners */
+#define TCP_TIMESTAMP 24
+#define TCP_NOTSENT_LOWAT 25 /* limit number of unsent bytes in write queue */
+#define TCP_CC_INFO 26 /* Get Congestion Control (optional) info */
+#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */
+#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */
+#define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */
+#define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */
+#define TCP_ULP 31 /* Attach a ULP to a TCP connection */
+#define TCP_MD5SIG_EXT 32 /* TCP MD5 Signature with extensions */
+#define TCP_FASTOPEN_KEY 33 /* Set the key for Fast Open (cookie) */
+#define TCP_FASTOPEN_NO_COOKIE 34 /* Enable TFO without a TFO cookie */
+#define TCP_ZEROCOPY_RECEIVE 35
+#define TCP_INQ 36 /* Notify bytes available to read as a cmsg on read */
+
+#define TCP_CM_INQ TCP_INQ
+
+#define TCP_TX_DELAY 37 /* delay outgoing packets by XX usec */
+
+
+#define TCP_REPAIR_ON 1
+#define TCP_REPAIR_OFF 0
+#define TCP_REPAIR_OFF_NO_WP -1 /* Turn off without window probes */
+
+struct tcp_repair_opt {
+ __u32 opt_code;
+ __u32 opt_val;
+};
+
+struct tcp_repair_window {
+ __u32 snd_wl1;
+ __u32 snd_wnd;
+ __u32 max_window;
+
+ __u32 rcv_wnd;
+ __u32 rcv_wup;
+};
+
+enum {
+ TCP_NO_QUEUE,
+ TCP_RECV_QUEUE,
+ TCP_SEND_QUEUE,
+ TCP_QUEUES_NR,
+};
+
+/* why fastopen failed from client perspective */
+enum tcp_fastopen_client_fail {
+ TFO_STATUS_UNSPEC, /* catch-all */
+ TFO_COOKIE_UNAVAILABLE, /* if not in TFO_CLIENT_NO_COOKIE mode */
+ TFO_DATA_NOT_ACKED, /* SYN-ACK did not ack SYN data */
+ TFO_SYN_RETRANSMITTED, /* SYN-ACK did not ack SYN data after timeout */
+};
+
+/* for TCP_INFO socket option */
+#define TCPI_OPT_TIMESTAMPS 1
+#define TCPI_OPT_SACK 2
+#define TCPI_OPT_WSCALE 4
+#define TCPI_OPT_ECN 8 /* ECN was negociated at TCP session init */
+#define TCPI_OPT_ECN_SEEN 16 /* we received at least one packet with ECT */
+#define TCPI_OPT_SYN_DATA 32 /* SYN-ACK acked data in SYN sent or rcvd */
+
+/*
+ * Sender's congestion state indicating normal or abnormal situations
+ * in the last round of packets sent. The state is driven by the ACK
+ * information and timer events.
+ */
+enum tcp_ca_state {
+ /*
+ * Nothing bad has been observed recently.
+ * No apparent reordering, packet loss, or ECN marks.
+ */
+ TCP_CA_Open = 0,
+#define TCPF_CA_Open (1<<TCP_CA_Open)
+ /*
+ * The sender enters disordered state when it has received DUPACKs or
+ * SACKs in the last round of packets sent. This could be due to packet
+ * loss or reordering but needs further information to confirm packets
+ * have been lost.
+ */
+ TCP_CA_Disorder = 1,
+#define TCPF_CA_Disorder (1<<TCP_CA_Disorder)
+ /*
+ * The sender enters Congestion Window Reduction (CWR) state when it
+ * has received ACKs with ECN-ECE marks, or has experienced congestion
+ * or packet discard on the sender host (e.g. qdisc).
+ */
+ TCP_CA_CWR = 2,
+#define TCPF_CA_CWR (1<<TCP_CA_CWR)
+ /*
+ * The sender is in fast recovery and retransmitting lost packets,
+ * typically triggered by ACK events.
+ */
+ TCP_CA_Recovery = 3,
+#define TCPF_CA_Recovery (1<<TCP_CA_Recovery)
+ /*
+ * The sender is in loss recovery triggered by retransmission timeout.
+ */
+ TCP_CA_Loss = 4
+#define TCPF_CA_Loss (1<<TCP_CA_Loss)
+};
+
+struct tcp_info {
+ __u8 tcpi_state;
+ __u8 tcpi_ca_state;
+ __u8 tcpi_retransmits;
+ __u8 tcpi_probes;
+ __u8 tcpi_backoff;
+ __u8 tcpi_options;
+ __u8 tcpi_snd_wscale : 4, tcpi_rcv_wscale : 4;
+ __u8 tcpi_delivery_rate_app_limited:1, tcpi_fastopen_client_fail:2;
+
+ __u32 tcpi_rto;
+ __u32 tcpi_ato;
+ __u32 tcpi_snd_mss;
+ __u32 tcpi_rcv_mss;
+
+ __u32 tcpi_unacked;
+ __u32 tcpi_sacked;
+ __u32 tcpi_lost;
+ __u32 tcpi_retrans;
+ __u32 tcpi_fackets;
+
+ /* Times. */
+ __u32 tcpi_last_data_sent;
+ __u32 tcpi_last_ack_sent; /* Not remembered, sorry. */
+ __u32 tcpi_last_data_recv;
+ __u32 tcpi_last_ack_recv;
+
+ /* Metrics. */
+ __u32 tcpi_pmtu;
+ __u32 tcpi_rcv_ssthresh;
+ __u32 tcpi_rtt;
+ __u32 tcpi_rttvar;
+ __u32 tcpi_snd_ssthresh;
+ __u32 tcpi_snd_cwnd;
+ __u32 tcpi_advmss;
+ __u32 tcpi_reordering;
+
+ __u32 tcpi_rcv_rtt;
+ __u32 tcpi_rcv_space;
+
+ __u32 tcpi_total_retrans;
+
+ __u64 tcpi_pacing_rate;
+ __u64 tcpi_max_pacing_rate;
+ __u64 tcpi_bytes_acked; /* RFC4898 tcpEStatsAppHCThruOctetsAcked */
+ __u64 tcpi_bytes_received; /* RFC4898 tcpEStatsAppHCThruOctetsReceived */
+ __u32 tcpi_segs_out; /* RFC4898 tcpEStatsPerfSegsOut */
+ __u32 tcpi_segs_in; /* RFC4898 tcpEStatsPerfSegsIn */
+
+ __u32 tcpi_notsent_bytes;
+ __u32 tcpi_min_rtt;
+ __u32 tcpi_data_segs_in; /* RFC4898 tcpEStatsDataSegsIn */
+ __u32 tcpi_data_segs_out; /* RFC4898 tcpEStatsDataSegsOut */
+
+ __u64 tcpi_delivery_rate;
+
+ __u64 tcpi_busy_time; /* Time (usec) busy sending data */
+ __u64 tcpi_rwnd_limited; /* Time (usec) limited by receive window */
+ __u64 tcpi_sndbuf_limited; /* Time (usec) limited by send buffer */
+
+ __u32 tcpi_delivered;
+ __u32 tcpi_delivered_ce;
+
+ __u64 tcpi_bytes_sent; /* RFC4898 tcpEStatsPerfHCDataOctetsOut */
+ __u64 tcpi_bytes_retrans; /* RFC4898 tcpEStatsPerfOctetsRetrans */
+ __u32 tcpi_dsack_dups; /* RFC4898 tcpEStatsStackDSACKDups */
+ __u32 tcpi_reord_seen; /* reordering events seen */
+
+ __u32 tcpi_rcv_ooopack; /* Out-of-order packets received */
+
+ __u32 tcpi_snd_wnd; /* peer's advertised receive window after
+ * scaling (bytes)
+ */
+};
+
+/* netlink attributes types for SCM_TIMESTAMPING_OPT_STATS */
+enum {
+ TCP_NLA_PAD,
+ TCP_NLA_BUSY, /* Time (usec) busy sending data */
+ TCP_NLA_RWND_LIMITED, /* Time (usec) limited by receive window */
+ TCP_NLA_SNDBUF_LIMITED, /* Time (usec) limited by send buffer */
+ TCP_NLA_DATA_SEGS_OUT, /* Data pkts sent including retransmission */
+ TCP_NLA_TOTAL_RETRANS, /* Data pkts retransmitted */
+ TCP_NLA_PACING_RATE, /* Pacing rate in bytes per second */
+ TCP_NLA_DELIVERY_RATE, /* Delivery rate in bytes per second */
+ TCP_NLA_SND_CWND, /* Sending congestion window */
+ TCP_NLA_REORDERING, /* Reordering metric */
+ TCP_NLA_MIN_RTT, /* minimum RTT */
+ TCP_NLA_RECUR_RETRANS, /* Recurring retransmits for the current pkt */
+ TCP_NLA_DELIVERY_RATE_APP_LMT, /* delivery rate application limited ? */
+ TCP_NLA_SNDQ_SIZE, /* Data (bytes) pending in send queue */
+ TCP_NLA_CA_STATE, /* ca_state of socket */
+ TCP_NLA_SND_SSTHRESH, /* Slow start size threshold */
+ TCP_NLA_DELIVERED, /* Data pkts delivered incl. out-of-order */
+ TCP_NLA_DELIVERED_CE, /* Like above but only ones w/ CE marks */
+ TCP_NLA_BYTES_SENT, /* Data bytes sent including retransmission */
+ TCP_NLA_BYTES_RETRANS, /* Data bytes retransmitted */
+ TCP_NLA_DSACK_DUPS, /* DSACK blocks received */
+ TCP_NLA_REORD_SEEN, /* reordering events seen */
+ TCP_NLA_SRTT, /* smoothed RTT in usecs */
+ TCP_NLA_TIMEOUT_REHASH, /* Timeout-triggered rehash attempts */
+ TCP_NLA_BYTES_NOTSENT, /* Bytes in write queue not yet sent */
+ TCP_NLA_EDT, /* Earliest departure time (CLOCK_MONOTONIC) */
+};
+
+/* for TCP_MD5SIG socket option */
+#define TCP_MD5SIG_MAXKEYLEN 80
+
+/* tcp_md5sig extension flags for TCP_MD5SIG_EXT */
+#define TCP_MD5SIG_FLAG_PREFIX 0x1 /* address prefix length */
+#define TCP_MD5SIG_FLAG_IFINDEX 0x2 /* ifindex set */
+
+struct tcp_md5sig {
+ struct __kernel_sockaddr_storage tcpm_addr; /* address associated */
+ __u8 tcpm_flags; /* extension flags */
+ __u8 tcpm_prefixlen; /* address prefix */
+ __u16 tcpm_keylen; /* key length */
+ int tcpm_ifindex; /* device index for scope */
+ __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN]; /* key (binary) */
+};
+
+/* INET_DIAG_MD5SIG */
+struct tcp_diag_md5sig {
+ __u8 tcpm_family;
+ __u8 tcpm_prefixlen;
+ __u16 tcpm_keylen;
+ __be32 tcpm_addr[4];
+ __u8 tcpm_key[TCP_MD5SIG_MAXKEYLEN];
+};
+
+/* setsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE, ...) */
+
+#define TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT 0x1
+struct tcp_zerocopy_receive {
+ __u64 address; /* in: address of mapping */
+ __u32 length; /* in/out: number of bytes to map/mapped */
+ __u32 recv_skip_hint; /* out: amount of bytes to skip */
+ __u32 inq; /* out: amount of bytes in read queue */
+ __s32 err; /* out: socket error */
+ __u64 copybuf_address; /* in: copybuf address (small reads) */
+ __s32 copybuf_len; /* in/out: copybuf bytes avail/used or error */
+ __u32 flags; /* in: flags */
+};
+#endif /* _UAPI_LINUX_TCP_H */