summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-02-17 00:14:06 +0300
committerDavid S. Miller <davem@davemloft.net>2021-02-17 00:14:06 +0300
commitb8af417e4d93caeefb89bbfbd56ec95dedd8dab5 (patch)
tree1c8d22e1aec330238830a43cc8aee0cf768ae1c7 /include/linux
parent9ec5eea5b6acfae7279203097eeec5d02d01d9b7 (diff)
parent45159b27637b0fef6d5ddb86fc7c46b13c77960f (diff)
downloadlinux-b8af417e4d93caeefb89bbfbd56ec95dedd8dab5.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Daniel Borkmann says: ==================== pull-request: bpf-next 2021-02-16 The following pull-request contains BPF updates for your *net-next* tree. There's a small merge conflict between 7eeba1706eba ("tcp: Add receive timestamp support for receive zerocopy.") from net-next tree and 9cacf81f8161 ("bpf: Remove extra lock_sock for TCP_ZEROCOPY_RECEIVE") from bpf-next tree. Resolve as follows: [...] lock_sock(sk); err = tcp_zerocopy_receive(sk, &zc, &tss); err = BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sk, level, optname, &zc, &len, err); release_sock(sk); [...] We've added 116 non-merge commits during the last 27 day(s) which contain a total of 156 files changed, 5662 insertions(+), 1489 deletions(-). The main changes are: 1) Adds support of pointers to types with known size among global function args to overcome the limit on max # of allowed args, from Dmitrii Banshchikov. 2) Add bpf_iter for task_vma which can be used to generate information similar to /proc/pid/maps, from Song Liu. 3) Enable bpf_{g,s}etsockopt() from all sock_addr related program hooks. Allow rewriting bind user ports from BPF side below the ip_unprivileged_port_start range, both from Stanislav Fomichev. 4) Prevent recursion on fentry/fexit & sleepable programs and allow map-in-map as well as per-cpu maps for the latter, from Alexei Starovoitov. 5) Add selftest script to run BPF CI locally. Also enable BPF ringbuffer for sleepable programs, both from KP Singh. 6) Extend verifier to enable variable offset read/write access to the BPF program stack, from Andrei Matei. 7) Improve tc & XDP MTU handling and add a new bpf_check_mtu() helper to query device MTU from programs, from Jesper Dangaard Brouer. 8) Allow bpf_get_socket_cookie() helper also be called from [sleepable] BPF tracing programs, from Florent Revest. 9) Extend x86 JIT to pad JMPs with NOPs for helping image to converge when otherwise too many passes are required, from Gary Lin. 10) Verifier fixes on atomics with BPF_FETCH as well as function-by-function verification both related to zero-extension handling, from Ilya Leoshkevich. 11) Better kernel build integration of resolve_btfids tool, from Jiri Olsa. 12) Batch of AF_XDP selftest cleanups and small performance improvement for libbpf's xsk map redirect for newer kernels, from Björn Töpel. 13) Follow-up BPF doc and verifier improvements around atomics with BPF_FETCH, from Brendan Jackman. 14) Permit zero-sized data sections e.g. if ELF .rodata section contains read-only data from local variables, from Yonghong Song. 15) veth driver skb bulk-allocation for ndo_xdp_xmit, from Lorenzo Bianconi. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/bpf-cgroup.h101
-rw-r--r--include/linux/bpf.h74
-rw-r--r--include/linux/bpf_verifier.h5
-rw-r--r--include/linux/filter.h21
-rw-r--r--include/linux/indirect_call_wrapper.h6
-rw-r--r--include/linux/netdevice.h32
-rw-r--r--include/linux/skmsg.h1
7 files changed, 171 insertions, 69 deletions
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 72e69a0e1e8c..c42e02b4d84b 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -23,8 +23,8 @@ struct ctl_table_header;
#ifdef CONFIG_CGROUP_BPF
-extern struct static_key_false cgroup_bpf_enabled_key;
-#define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key)
+extern struct static_key_false cgroup_bpf_enabled_key[MAX_BPF_ATTACH_TYPE];
+#define cgroup_bpf_enabled(type) static_branch_unlikely(&cgroup_bpf_enabled_key[type])
DECLARE_PER_CPU(struct bpf_cgroup_storage*,
bpf_cgroup_storage[MAX_BPF_CGROUP_STORAGE_TYPE]);
@@ -125,7 +125,8 @@ int __cgroup_bpf_run_filter_sk(struct sock *sk,
int __cgroup_bpf_run_filter_sock_addr(struct sock *sk,
struct sockaddr *uaddr,
enum bpf_attach_type type,
- void *t_ctx);
+ void *t_ctx,
+ u32 *flags);
int __cgroup_bpf_run_filter_sock_ops(struct sock *sk,
struct bpf_sock_ops_kern *sock_ops,
@@ -147,6 +148,10 @@ int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
int __user *optlen, int max_optlen,
int retval);
+int __cgroup_bpf_run_filter_getsockopt_kern(struct sock *sk, int level,
+ int optname, void *optval,
+ int *optlen, int retval);
+
static inline enum bpf_cgroup_storage_type cgroup_storage_type(
struct bpf_map *map)
{
@@ -185,7 +190,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(BPF_CGROUP_INET_INGRESS)) \
__ret = __cgroup_bpf_run_filter_skb(sk, skb, \
BPF_CGROUP_INET_INGRESS); \
\
@@ -195,7 +200,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled && sk && sk == skb->sk) { \
+ if (cgroup_bpf_enabled(BPF_CGROUP_INET_EGRESS) && sk && sk == skb->sk) { \
typeof(sk) __sk = sk_to_full_sk(sk); \
if (sk_fullsock(__sk)) \
__ret = __cgroup_bpf_run_filter_skb(__sk, skb, \
@@ -207,7 +212,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_SK_PROG(sk, type) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) { \
+ if (cgroup_bpf_enabled(type)) { \
__ret = __cgroup_bpf_run_filter_sk(sk, type); \
} \
__ret; \
@@ -227,33 +232,53 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_SA_PROG(sk, uaddr, type) \
({ \
+ u32 __unused_flags; \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(type)) \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
- NULL); \
+ NULL, \
+ &__unused_flags); \
__ret; \
})
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) \
({ \
+ u32 __unused_flags; \
int __ret = 0; \
- if (cgroup_bpf_enabled) { \
+ if (cgroup_bpf_enabled(type)) { \
lock_sock(sk); \
__ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
- t_ctx); \
+ t_ctx, \
+ &__unused_flags); \
release_sock(sk); \
} \
__ret; \
})
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET4_BIND, NULL)
-
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) \
- BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, BPF_CGROUP_INET6_BIND, NULL)
+/* BPF_CGROUP_INET4_BIND and BPF_CGROUP_INET6_BIND can return extra flags
+ * via upper bits of return code. The only flag that is supported
+ * (at bit position 0) is to indicate CAP_NET_BIND_SERVICE capability check
+ * should be bypassed (BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE).
+ */
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, bind_flags) \
+({ \
+ u32 __flags = 0; \
+ int __ret = 0; \
+ if (cgroup_bpf_enabled(type)) { \
+ lock_sock(sk); \
+ __ret = __cgroup_bpf_run_filter_sock_addr(sk, uaddr, type, \
+ NULL, &__flags); \
+ release_sock(sk); \
+ if (__flags & BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE) \
+ *bind_flags |= BIND_NO_CAP_NET_BIND_SERVICE; \
+ } \
+ __ret; \
+})
-#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (cgroup_bpf_enabled && \
- sk->sk_prot->pre_connect)
+#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) \
+ ((cgroup_bpf_enabled(BPF_CGROUP_INET4_CONNECT) || \
+ cgroup_bpf_enabled(BPF_CGROUP_INET6_CONNECT)) && \
+ (sk)->sk_prot->pre_connect)
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) \
BPF_CGROUP_RUN_SA_PROG(sk, uaddr, BPF_CGROUP_INET4_CONNECT)
@@ -297,7 +322,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_SOCK_OPS_SK(sock_ops, sk) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS)) \
__ret = __cgroup_bpf_run_filter_sock_ops(sk, \
sock_ops, \
BPF_CGROUP_SOCK_OPS); \
@@ -307,7 +332,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled && (sock_ops)->sk) { \
+ if (cgroup_bpf_enabled(BPF_CGROUP_SOCK_OPS) && (sock_ops)->sk) { \
typeof(sk) __sk = sk_to_full_sk((sock_ops)->sk); \
if (__sk && sk_fullsock(__sk)) \
__ret = __cgroup_bpf_run_filter_sock_ops(__sk, \
@@ -320,7 +345,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(BPF_CGROUP_DEVICE)) \
__ret = __cgroup_bpf_check_dev_permission(type, major, minor, \
access, \
BPF_CGROUP_DEVICE); \
@@ -332,7 +357,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(BPF_CGROUP_SYSCTL)) \
__ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \
buf, count, pos, \
BPF_CGROUP_SYSCTL); \
@@ -343,7 +368,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
kernel_optval) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(BPF_CGROUP_SETSOCKOPT)) \
__ret = __cgroup_bpf_run_filter_setsockopt(sock, level, \
optname, optval, \
optlen, \
@@ -354,7 +379,7 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) \
({ \
int __ret = 0; \
- if (cgroup_bpf_enabled) \
+ if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
get_user(__ret, optlen); \
__ret; \
})
@@ -363,11 +388,24 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
max_optlen, retval) \
({ \
int __ret = retval; \
- if (cgroup_bpf_enabled) \
- __ret = __cgroup_bpf_run_filter_getsockopt(sock, level, \
- optname, optval, \
- optlen, max_optlen, \
- retval); \
+ if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
+ if (!(sock)->sk_prot->bpf_bypass_getsockopt || \
+ !INDIRECT_CALL_INET_1((sock)->sk_prot->bpf_bypass_getsockopt, \
+ tcp_bpf_bypass_getsockopt, \
+ level, optname)) \
+ __ret = __cgroup_bpf_run_filter_getsockopt( \
+ sock, level, optname, optval, optlen, \
+ max_optlen, retval); \
+ __ret; \
+})
+
+#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
+ optlen, retval) \
+({ \
+ int __ret = retval; \
+ if (cgroup_bpf_enabled(BPF_CGROUP_GETSOCKOPT)) \
+ __ret = __cgroup_bpf_run_filter_getsockopt_kern( \
+ sock, level, optname, optval, optlen, retval); \
__ret; \
})
@@ -427,15 +465,14 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
return 0;
}
-#define cgroup_bpf_enabled (0)
+#define cgroup_bpf_enabled(type) (0)
#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; })
#define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
#define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET_SOCK_RELEASE(sk) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET4_BIND_LOCK(sk, uaddr) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_INET6_BIND_LOCK(sk, uaddr) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr, type, flags) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk) ({ 0; })
#define BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr) ({ 0; })
@@ -452,6 +489,8 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
#define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
#define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
optlen, max_optlen, retval) ({ retval; })
+#define BPF_CGROUP_RUN_PROG_GETSOCKOPT_KERN(sock, level, optname, optval, \
+ optlen, retval) ({ retval; })
#define BPF_CGROUP_RUN_PROG_SETSOCKOPT(sock, level, optname, optval, optlen, \
kernel_optval) ({ 0; })
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1aac2af12fed..cccaef1088ea 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -14,7 +14,6 @@
#include <linux/numa.h>
#include <linux/mm_types.h>
#include <linux/wait.h>
-#include <linux/u64_stats_sync.h>
#include <linux/refcount.h>
#include <linux/mutex.h>
#include <linux/module.h>
@@ -507,12 +506,6 @@ enum bpf_cgroup_storage_type {
*/
#define MAX_BPF_FUNC_ARGS 12
-struct bpf_prog_stats {
- u64 cnt;
- u64 nsecs;
- struct u64_stats_sync syncp;
-} __aligned(2 * sizeof(u64));
-
struct btf_func_model {
u8 ret_size;
u8 nr_args;
@@ -536,7 +529,7 @@ struct btf_func_model {
/* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50
* bytes on x86. Pick a number to fit into BPF_IMAGE_SIZE / 2
*/
-#define BPF_MAX_TRAMP_PROGS 40
+#define BPF_MAX_TRAMP_PROGS 38
struct bpf_tramp_progs {
struct bpf_prog *progs[BPF_MAX_TRAMP_PROGS];
@@ -568,10 +561,10 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
struct bpf_tramp_progs *tprogs,
void *orig_call);
/* these two functions are called from generated trampoline */
-u64 notrace __bpf_prog_enter(void);
+u64 notrace __bpf_prog_enter(struct bpf_prog *prog);
void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
-void notrace __bpf_prog_enter_sleepable(void);
-void notrace __bpf_prog_exit_sleepable(void);
+u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog);
+void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
struct bpf_ksym {
unsigned long start;
@@ -845,7 +838,6 @@ struct bpf_prog_aux {
u32 linfo_idx;
u32 num_exentries;
struct exception_table_entry *extable;
- struct bpf_prog_stats __percpu *stats;
union {
struct work_struct work;
struct rcu_head rcu;
@@ -1073,6 +1065,34 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
struct bpf_prog *include_prog,
struct bpf_prog_array **new_array);
+/* BPF program asks to bypass CAP_NET_BIND_SERVICE in bind. */
+#define BPF_RET_BIND_NO_CAP_NET_BIND_SERVICE (1 << 0)
+/* BPF program asks to set CN on the packet. */
+#define BPF_RET_SET_CN (1 << 0)
+
+#define BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, ret_flags) \
+ ({ \
+ struct bpf_prog_array_item *_item; \
+ struct bpf_prog *_prog; \
+ struct bpf_prog_array *_array; \
+ u32 _ret = 1; \
+ u32 func_ret; \
+ migrate_disable(); \
+ rcu_read_lock(); \
+ _array = rcu_dereference(array); \
+ _item = &_array->items[0]; \
+ while ((_prog = READ_ONCE(_item->prog))) { \
+ bpf_cgroup_storage_set(_item->cgroup_storage); \
+ func_ret = func(_prog, ctx); \
+ _ret &= (func_ret & 1); \
+ *(ret_flags) |= (func_ret >> 1); \
+ _item++; \
+ } \
+ rcu_read_unlock(); \
+ migrate_enable(); \
+ _ret; \
+ })
+
#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
({ \
struct bpf_prog_array_item *_item; \
@@ -1120,25 +1140,11 @@ _out: \
*/
#define BPF_PROG_CGROUP_INET_EGRESS_RUN_ARRAY(array, ctx, func) \
({ \
- struct bpf_prog_array_item *_item; \
- struct bpf_prog *_prog; \
- struct bpf_prog_array *_array; \
- u32 ret; \
- u32 _ret = 1; \
- u32 _cn = 0; \
- migrate_disable(); \
- rcu_read_lock(); \
- _array = rcu_dereference(array); \
- _item = &_array->items[0]; \
- while ((_prog = READ_ONCE(_item->prog))) { \
- bpf_cgroup_storage_set(_item->cgroup_storage); \
- ret = func(_prog, ctx); \
- _ret &= (ret & 1); \
- _cn |= (ret & 2); \
- _item++; \
- } \
- rcu_read_unlock(); \
- migrate_enable(); \
+ u32 _flags = 0; \
+ bool _cn; \
+ u32 _ret; \
+ _ret = BPF_PROG_RUN_ARRAY_FLAGS(array, ctx, func, &_flags); \
+ _cn = _flags & BPF_RET_SET_CN; \
if (_ret) \
_ret = (_cn ? NET_XMIT_CN : NET_XMIT_SUCCESS); \
else \
@@ -1276,6 +1282,11 @@ static inline bool bpf_allow_ptr_leaks(void)
return perfmon_capable();
}
+static inline bool bpf_allow_uninit_stack(void)
+{
+ return perfmon_capable();
+}
+
static inline bool bpf_allow_ptr_to_map_access(void)
{
return perfmon_capable();
@@ -1874,6 +1885,7 @@ extern const struct bpf_func_proto bpf_per_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_this_cpu_ptr_proto;
extern const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto;
extern const struct bpf_func_proto bpf_sock_from_file_proto;
+extern const struct bpf_func_proto bpf_get_socket_ptr_cookie_proto;
const struct bpf_func_proto *bpf_tracing_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index dfe6f85d97dd..971b33aca13d 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -195,7 +195,7 @@ struct bpf_func_state {
* 0 = main function, 1 = first callee.
*/
u32 frameno;
- /* subprog number == index within subprog_stack_depth
+ /* subprog number == index within subprog_info
* zero == main subprog
*/
u32 subprogno;
@@ -404,6 +404,7 @@ struct bpf_verifier_env {
u32 used_btf_cnt; /* number of used BTF objects */
u32 id_gen; /* used to generate unique reg IDs */
bool allow_ptr_leaks;
+ bool allow_uninit_stack;
bool allow_ptr_to_map_access;
bool bpf_capable;
bool bypass_spec_v1;
@@ -470,6 +471,8 @@ bpf_prog_offload_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt);
int check_ctx_reg(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg, int regno);
+int check_mem_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
+ u32 regno, u32 mem_size);
/* this lives here instead of in bpf.h because it needs to dereference tgt_prog */
static inline u64 bpf_trampoline_compute_key(const struct bpf_prog *tgt_prog,
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 7fdce5407214..3b00fc906ccd 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -22,6 +22,7 @@
#include <linux/vmalloc.h>
#include <linux/sockptr.h>
#include <crypto/sha1.h>
+#include <linux/u64_stats_sync.h>
#include <net/sch_generic.h>
@@ -539,6 +540,13 @@ struct bpf_binary_header {
u8 image[] __aligned(BPF_IMAGE_ALIGNMENT);
};
+struct bpf_prog_stats {
+ u64 cnt;
+ u64 nsecs;
+ u64 misses;
+ struct u64_stats_sync syncp;
+} __aligned(2 * sizeof(u64));
+
struct bpf_prog {
u16 pages; /* Number of allocated pages */
u16 jited:1, /* Is our filter JIT'ed? */
@@ -557,10 +565,12 @@ struct bpf_prog {
u32 len; /* Number of filter blocks */
u32 jited_len; /* Size of jited insns in bytes */
u8 tag[BPF_TAG_SIZE];
- struct bpf_prog_aux *aux; /* Auxiliary fields */
- struct sock_fprog_kern *orig_prog; /* Original BPF program */
+ struct bpf_prog_stats __percpu *stats;
+ int __percpu *active;
unsigned int (*bpf_func)(const void *ctx,
const struct bpf_insn *insn);
+ struct bpf_prog_aux *aux; /* Auxiliary fields */
+ struct sock_fprog_kern *orig_prog; /* Original BPF program */
/* Instructions for interpreter */
struct sock_filter insns[0];
struct bpf_insn insnsi[];
@@ -581,7 +591,7 @@ DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key);
struct bpf_prog_stats *__stats; \
u64 __start = sched_clock(); \
__ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \
- __stats = this_cpu_ptr(prog->aux->stats); \
+ __stats = this_cpu_ptr(prog->stats); \
u64_stats_update_begin(&__stats->syncp); \
__stats->cnt++; \
__stats->nsecs += sched_clock() - __start; \
@@ -1298,6 +1308,11 @@ struct bpf_sysctl_kern {
u64 tmp_reg;
};
+#define BPF_SOCKOPT_KERN_BUF_SIZE 32
+struct bpf_sockopt_buf {
+ u8 data[BPF_SOCKOPT_KERN_BUF_SIZE];
+};
+
struct bpf_sockopt_kern {
struct sock *sk;
u8 *optval;
diff --git a/include/linux/indirect_call_wrapper.h b/include/linux/indirect_call_wrapper.h
index a8345c8a613d..c1c76a70a6ce 100644
--- a/include/linux/indirect_call_wrapper.h
+++ b/include/linux/indirect_call_wrapper.h
@@ -62,4 +62,10 @@
#define INDIRECT_CALL_INET(f, f2, f1, ...) f(__VA_ARGS__)
#endif
+#if IS_ENABLED(CONFIG_INET)
+#define INDIRECT_CALL_INET_1(f, f1, ...) INDIRECT_CALL_1(f, f1, __VA_ARGS__)
+#else
+#define INDIRECT_CALL_INET_1(f, f1, ...) f(__VA_ARGS__)
+#endif
+
#endif
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index bfadf3b82f9c..ddf4cfc12615 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3931,14 +3931,42 @@ int xdp_umem_query(struct net_device *dev, u16 queue_id);
int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
+int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb);
bool is_skb_forwardable(const struct net_device *dev,
const struct sk_buff *skb);
+static __always_inline bool __is_skb_forwardable(const struct net_device *dev,
+ const struct sk_buff *skb,
+ const bool check_mtu)
+{
+ const u32 vlan_hdr_len = 4; /* VLAN_HLEN */
+ unsigned int len;
+
+ if (!(dev->flags & IFF_UP))
+ return false;
+
+ if (!check_mtu)
+ return true;
+
+ len = dev->mtu + dev->hard_header_len + vlan_hdr_len;
+ if (skb->len <= len)
+ return true;
+
+ /* if TSO is enabled, we don't care about the length as the packet
+ * could be forwarded without being segmented before
+ */
+ if (skb_is_gso(skb))
+ return true;
+
+ return false;
+}
+
static __always_inline int ____dev_forward_skb(struct net_device *dev,
- struct sk_buff *skb)
+ struct sk_buff *skb,
+ const bool check_mtu)
{
if (skb_orphan_frags(skb, GFP_ATOMIC) ||
- unlikely(!is_skb_forwardable(dev, skb))) {
+ unlikely(!__is_skb_forwardable(dev, skb, check_mtu))) {
atomic_long_inc(&dev->rx_dropped);
kfree_skb(skb);
return NET_RX_DROP;
diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index fec0c5ac1c4f..8edbbf5f2f93 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -390,7 +390,6 @@ static inline struct sk_psock *sk_psock_get(struct sock *sk)
}
void sk_psock_stop(struct sock *sk, struct sk_psock *psock);
-void sk_psock_destroy(struct rcu_head *rcu);
void sk_psock_drop(struct sock *sk, struct sk_psock *psock);
static inline void sk_psock_put(struct sock *sk, struct sk_psock *psock)