summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/bpf/test_run.c72
-rw-r--r--net/core/Makefile3
-rw-r--r--net/core/dev.c1
-rw-r--r--net/core/dev.h2
-rw-r--r--net/core/filter.c17
-rw-r--r--net/core/net-sysfs.c79
-rw-r--r--net/core/netdev-genl-gen.c48
-rw-r--r--net/core/netdev-genl-gen.h23
-rw-r--r--net/core/netdev-genl.c179
-rw-r--r--net/core/skbuff.c31
-rw-r--r--net/core/sysctl_net_core.c81
-rw-r--r--net/core/xdp.c28
-rw-r--r--net/ipv4/tcp_bbr.c16
-rw-r--r--net/ipv4/tcp_cong.c10
-rw-r--r--net/ipv4/tcp_cubic.c12
-rw-r--r--net/ipv4/tcp_dctcp.c12
-rw-r--r--net/netfilter/Kconfig3
-rw-r--r--net/netfilter/Makefile1
-rw-r--r--net/netfilter/nf_conntrack_bpf.c20
-rw-r--r--net/netfilter/nf_conntrack_helper.c98
-rw-r--r--net/netfilter/nf_conntrack_ovs.c178
-rw-r--r--net/netfilter/nf_nat_bpf.c6
-rw-r--r--net/netlink/genetlink.c4
-rw-r--r--net/openvswitch/Kconfig1
-rw-r--r--net/openvswitch/conntrack.c85
-rw-r--r--net/rxrpc/call_object.c6
-rw-r--r--net/rxrpc/conn_event.c2
-rw-r--r--net/rxrpc/output.c10
-rw-r--r--net/rxrpc/recvmsg.c2
-rw-r--r--net/rxrpc/skbuff.c4
-rw-r--r--net/sched/Kconfig1
-rw-r--r--net/sched/act_ct.c76
-rw-r--r--net/tipc/netlink_compat.c16
-rw-r--r--net/xdp/xsk_buff_pool.c7
-rw-r--r--net/xfrm/xfrm_interface_bpf.c7
35 files changed, 731 insertions, 410 deletions
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 8da0d73b368e..b766a84c8536 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -234,7 +234,7 @@ static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
int i, n;
LIST_HEAD(list);
- n = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, (void **)skbs);
+ n = kmem_cache_alloc_bulk(skbuff_cache, gfp, nframes, (void **)skbs);
if (unlikely(n == 0)) {
for (i = 0; i < nframes; i++)
xdp_return_frame(frames[i]);
@@ -484,7 +484,7 @@ out:
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in vmlinux BTF");
-int noinline bpf_fentry_test1(int a)
+__bpf_kfunc int bpf_fentry_test1(int a)
{
return a + 1;
}
@@ -529,27 +529,35 @@ int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg)
return (long)arg->a;
}
-int noinline bpf_modify_return_test(int a, int *b)
+__bpf_kfunc int bpf_modify_return_test(int a, int *b)
{
*b += 1;
return a + *b;
}
-u64 noinline bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
+__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
{
return a + b + c + d;
}
-int noinline bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
+__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
{
return a + b;
}
-struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
+__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk)
{
return sk;
}
+long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d)
+{
+ /* Provoke the compiler to assume that the caller has sign-extended a,
+ * b and c on platforms where this is required (e.g. s390x).
+ */
+ return (long)a + (long)b + (long)c + d;
+}
+
struct prog_test_member1 {
int a;
};
@@ -574,21 +582,21 @@ static struct prog_test_ref_kfunc prog_test_struct = {
.cnt = REFCOUNT_INIT(1),
};
-noinline struct prog_test_ref_kfunc *
+__bpf_kfunc struct prog_test_ref_kfunc *
bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
{
refcount_inc(&prog_test_struct.cnt);
return &prog_test_struct;
}
-noinline struct prog_test_member *
+__bpf_kfunc struct prog_test_member *
bpf_kfunc_call_memb_acquire(void)
{
WARN_ON_ONCE(1);
return NULL;
}
-noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
+__bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
{
if (!p)
return;
@@ -596,11 +604,11 @@ noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
refcount_dec(&p->cnt);
}
-noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p)
+__bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p)
{
}
-noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
+__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
{
WARN_ON_ONCE(1);
}
@@ -613,12 +621,14 @@ static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const i
return (int *)p;
}
-noinline int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size)
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p,
+ const int rdwr_buf_size)
{
return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size);
}
-noinline int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size)
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p,
+ const int rdonly_buf_size)
{
return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
}
@@ -628,16 +638,17 @@ noinline int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p,
* Acquire functions must return struct pointers, so these ones are
* failing.
*/
-noinline int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size)
+__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p,
+ const int rdonly_buf_size)
{
return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
}
-noinline void bpf_kfunc_call_int_mem_release(int *p)
+__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p)
{
}
-noinline struct prog_test_ref_kfunc *
+__bpf_kfunc struct prog_test_ref_kfunc *
bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **pp, int a, int b)
{
struct prog_test_ref_kfunc *p = READ_ONCE(*pp);
@@ -686,48 +697,53 @@ struct prog_test_fail3 {
char arr2[];
};
-noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
{
}
-noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
{
}
-noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
{
}
-noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
{
}
-noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
{
}
-noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
{
}
-noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
{
}
-noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
{
}
-noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
{
}
-noinline void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
+__bpf_kfunc void bpf_kfunc_call_test_destructive(void)
{
}
-noinline void bpf_kfunc_call_test_destructive(void)
+__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused)
{
+ return arg;
}
__diag_pop();
@@ -746,6 +762,7 @@ BTF_SET8_START(test_sk_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
BTF_ID_FLAGS(func, bpf_kfunc_call_test3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test4)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE)
@@ -767,6 +784,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
BTF_SET8_END(test_sk_check_kfunc_ids)
static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
diff --git a/net/core/Makefile b/net/core/Makefile
index 10edd66a8a37..8f367813bc68 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -12,7 +12,8 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
- fib_notifier.o xdp.o flow_offload.o gro.o
+ fib_notifier.o xdp.o flow_offload.o gro.o \
+ netdev-genl.o netdev-genl-gen.o
obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
diff --git a/net/core/dev.c b/net/core/dev.c
index bb42150a38ec..7307a0c15c9f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1614,6 +1614,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA)
+ N(XDP_FEAT_CHANGE)
}
#undef N
return "UNKNOWN_NETDEV_EVENT";
diff --git a/net/core/dev.h b/net/core/dev.h
index a065b7571441..e075e198092c 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -9,6 +9,7 @@ struct net_device;
struct netdev_bpf;
struct netdev_phys_item_id;
struct netlink_ext_ack;
+struct cpumask;
/* Random bits of netdevice that don't need to be exposed */
#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */
@@ -134,4 +135,5 @@ static inline void netif_set_gro_ipv4_max_size(struct net_device *dev,
WRITE_ONCE(dev->gro_ipv4_max_size, size);
}
+int rps_cpumask_housekeeping(struct cpumask *mask);
#endif
diff --git a/net/core/filter.c b/net/core/filter.c
index d8f9b53f3db6..2ce06a72a5ba 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4318,16 +4318,13 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
enum bpf_map_type map_type = ri->map_type;
- /* XDP_REDIRECT is not fully supported yet for xdp frags since
- * not all XDP capable drivers can map non-linear xdp_frame in
- * ndo_xdp_xmit.
- */
- if (unlikely(xdp_buff_has_frags(xdp) &&
- map_type != BPF_MAP_TYPE_CPUMAP))
- return -EOPNOTSUPP;
+ if (map_type == BPF_MAP_TYPE_XSKMAP) {
+ /* XDP_REDIRECT is not supported AF_XDP yet. */
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ return -EOPNOTSUPP;
- if (map_type == BPF_MAP_TYPE_XSKMAP)
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
+ }
return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
xdp_prog);
@@ -7536,7 +7533,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg1_size = sizeof(struct iphdr),
.arg2_type = ARG_PTR_TO_MEM,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
};
BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph,
@@ -7568,7 +7565,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg1_size = sizeof(struct ipv6hdr),
.arg2_type = ARG_PTR_TO_MEM,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
};
BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index ca55dd747d6c..4b361ac6a252 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -831,42 +831,18 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf)
return len < PAGE_SIZE ? len : -EINVAL;
}
-static ssize_t store_rps_map(struct netdev_rx_queue *queue,
- const char *buf, size_t len)
+static int netdev_rx_queue_set_rps_mask(struct netdev_rx_queue *queue,
+ cpumask_var_t mask)
{
- struct rps_map *old_map, *map;
- cpumask_var_t mask;
- int err, cpu, i;
static DEFINE_MUTEX(rps_map_mutex);
-
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
- return -ENOMEM;
-
- err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
- if (err) {
- free_cpumask_var(mask);
- return err;
- }
-
- if (!cpumask_empty(mask)) {
- cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN));
- cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ));
- if (cpumask_empty(mask)) {
- free_cpumask_var(mask);
- return -EINVAL;
- }
- }
+ struct rps_map *old_map, *map;
+ int cpu, i;
map = kzalloc(max_t(unsigned int,
RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
GFP_KERNEL);
- if (!map) {
- free_cpumask_var(mask);
+ if (!map)
return -ENOMEM;
- }
i = 0;
for_each_cpu_and(cpu, mask, cpu_online_mask)
@@ -893,9 +869,45 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
if (old_map)
kfree_rcu(old_map, rcu);
+ return 0;
+}
+int rps_cpumask_housekeeping(struct cpumask *mask)
+{
+ if (!cpumask_empty(mask)) {
+ cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN));
+ cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ));
+ if (cpumask_empty(mask))
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static ssize_t store_rps_map(struct netdev_rx_queue *queue,
+ const char *buf, size_t len)
+{
+ cpumask_var_t mask;
+ int err;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+ if (err)
+ goto out;
+
+ err = rps_cpumask_housekeeping(mask);
+ if (err)
+ goto out;
+
+ err = netdev_rx_queue_set_rps_mask(queue, mask);
+
+out:
free_cpumask_var(mask);
- return len;
+ return err ? : len;
}
static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
@@ -1071,6 +1083,13 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
goto err;
}
+#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
+ if (!cpumask_empty(&rps_default_mask)) {
+ error = netdev_rx_queue_set_rps_mask(queue, &rps_default_mask);
+ if (error)
+ goto err;
+ }
+#endif
kobject_uevent(kobj, KOBJ_ADD);
return error;
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
new file mode 100644
index 000000000000..48812ec843f5
--- /dev/null
+++ b/net/core/netdev-genl-gen.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/netdev.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "netdev-genl-gen.h"
+
+#include <linux/netdev.h>
+
+/* NETDEV_CMD_DEV_GET - do */
+static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = {
+ [NETDEV_A_DEV_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
+/* Ops table for netdev */
+static const struct genl_split_ops netdev_nl_ops[2] = {
+ {
+ .cmd = NETDEV_CMD_DEV_GET,
+ .doit = netdev_nl_dev_get_doit,
+ .policy = netdev_dev_get_nl_policy,
+ .maxattr = NETDEV_A_DEV_IFINDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_DEV_GET,
+ .dumpit = netdev_nl_dev_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+};
+
+static const struct genl_multicast_group netdev_nl_mcgrps[] = {
+ [NETDEV_NLGRP_MGMT] = { "mgmt", },
+};
+
+struct genl_family netdev_nl_family __ro_after_init = {
+ .name = NETDEV_FAMILY_NAME,
+ .version = NETDEV_FAMILY_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .module = THIS_MODULE,
+ .split_ops = netdev_nl_ops,
+ .n_split_ops = ARRAY_SIZE(netdev_nl_ops),
+ .mcgrps = netdev_nl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps),
+};
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
new file mode 100644
index 000000000000..b16dc7e026bb
--- /dev/null
+++ b/net/core/netdev-genl-gen.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/netdev.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_NETDEV_GEN_H
+#define _LINUX_NETDEV_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <linux/netdev.h>
+
+int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+
+enum {
+ NETDEV_NLGRP_MGMT,
+};
+
+extern struct genl_family netdev_nl_family;
+
+#endif /* _LINUX_NETDEV_GEN_H */
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
new file mode 100644
index 000000000000..a4270fafdf11
--- /dev/null
+++ b/net/core/netdev-genl.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/netdevice.h>
+#include <linux/notifier.h>
+#include <linux/rtnetlink.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include "netdev-genl-gen.h"
+
+static int
+netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
+ u32 portid, u32 seq, int flags, u32 cmd)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(rsp, portid, seq, &netdev_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
+ nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
+ netdev->xdp_features, NETDEV_A_DEV_PAD)) {
+ genlmsg_cancel(rsp, hdr);
+ return -EINVAL;
+ }
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+}
+
+static void
+netdev_genl_dev_notify(struct net_device *netdev, int cmd)
+{
+ struct sk_buff *ntf;
+
+ if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev),
+ NETDEV_NLGRP_MGMT))
+ return;
+
+ ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!ntf)
+ return;
+
+ if (netdev_nl_dev_fill(netdev, ntf, 0, 0, 0, cmd)) {
+ nlmsg_free(ntf);
+ return;
+ }
+
+ genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf,
+ 0, NETDEV_NLGRP_MGMT, GFP_KERNEL);
+}
+
+int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_device *netdev;
+ struct sk_buff *rsp;
+ u32 ifindex;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX))
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp)
+ return -ENOMEM;
+
+ rtnl_lock();
+
+ netdev = __dev_get_by_index(genl_info_net(info), ifindex);
+ if (netdev)
+ err = netdev_nl_dev_fill(netdev, rsp, info->snd_portid,
+ info->snd_seq, 0, info->genlhdr->cmd);
+ else
+ err = -ENODEV;
+
+ rtnl_unlock();
+
+ if (err)
+ goto err_free_msg;
+
+ return genlmsg_reply(rsp, info);
+
+err_free_msg:
+ nlmsg_free(rsp);
+ return err;
+}
+
+int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ int idx = 0, s_idx;
+ int h, s_h;
+ int err;
+
+ s_h = cb->args[0];
+ s_idx = cb->args[1];
+
+ rtnl_lock();
+
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ struct hlist_head *head;
+
+ idx = 0;
+ head = &net->dev_index_head[h];
+ hlist_for_each_entry(netdev, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ err = netdev_nl_dev_fill(netdev, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, 0,
+ NETDEV_CMD_DEV_GET);
+ if (err < 0)
+ break;
+cont:
+ idx++;
+ }
+ }
+
+ rtnl_unlock();
+
+ if (err != -EMSGSIZE)
+ return err;
+
+ cb->args[1] = idx;
+ cb->args[0] = h;
+ cb->seq = net->dev_base_seq;
+
+ return skb->len;
+}
+
+static int netdev_genl_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF);
+ break;
+ case NETDEV_UNREGISTER:
+ netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF);
+ break;
+ case NETDEV_XDP_FEAT_CHANGE:
+ netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block netdev_genl_nb = {
+ .notifier_call = netdev_genl_netdevice_event,
+};
+
+static int __init netdev_genl_init(void)
+{
+ int err;
+
+ err = register_netdevice_notifier(&netdev_genl_nb);
+ if (err)
+ return err;
+
+ err = genl_register_family(&netdev_nl_family);
+ if (err)
+ goto err_unreg_ntf;
+
+ return 0;
+
+err_unreg_ntf:
+ unregister_netdevice_notifier(&netdev_genl_nb);
+ return err;
+}
+
+subsys_initcall(netdev_genl_init);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 70a6088e8326..13ea10cf8544 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -84,7 +84,7 @@
#include "dev.h"
#include "sock_destructor.h"
-struct kmem_cache *skbuff_head_cache __ro_after_init;
+struct kmem_cache *skbuff_cache __ro_after_init;
static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
#ifdef CONFIG_SKB_EXTENSIONS
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
@@ -285,7 +285,7 @@ static struct sk_buff *napi_skb_cache_get(void)
struct sk_buff *skb;
if (unlikely(!nc->skb_count)) {
- nc->skb_count = kmem_cache_alloc_bulk(skbuff_head_cache,
+ nc->skb_count = kmem_cache_alloc_bulk(skbuff_cache,
GFP_ATOMIC,
NAPI_SKB_CACHE_BULK,
nc->skb_cache);
@@ -294,7 +294,7 @@ static struct sk_buff *napi_skb_cache_get(void)
}
skb = nc->skb_cache[--nc->skb_count];
- kasan_unpoison_object_data(skbuff_head_cache, skb);
+ kasan_unpoison_object_data(skbuff_cache, skb);
return skb;
}
@@ -352,7 +352,7 @@ struct sk_buff *slab_build_skb(void *data)
struct sk_buff *skb;
unsigned int size;
- skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -403,7 +403,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{
struct sk_buff *skb;
- skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -585,7 +585,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
u8 *data;
cache = (flags & SKB_ALLOC_FCLONE)
- ? skbuff_fclone_cache : skbuff_head_cache;
+ ? skbuff_fclone_cache : skbuff_cache;
if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
gfp_mask |= __GFP_MEMALLOC;
@@ -921,7 +921,7 @@ static void kfree_skbmem(struct sk_buff *skb)
switch (skb->fclone) {
case SKB_FCLONE_UNAVAILABLE:
- kmem_cache_free(skbuff_head_cache, skb);
+ kmem_cache_free(skbuff_cache, skb);
return;
case SKB_FCLONE_ORIG:
@@ -1035,7 +1035,7 @@ static void kfree_skb_add_bulk(struct sk_buff *skb,
sa->skb_array[sa->skb_count++] = skb;
if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
- kmem_cache_free_bulk(skbuff_head_cache, KFREE_SKB_BULK_SIZE,
+ kmem_cache_free_bulk(skbuff_cache, KFREE_SKB_BULK_SIZE,
sa->skb_array);
sa->skb_count = 0;
}
@@ -1060,8 +1060,7 @@ kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason)
}
if (sa.skb_count)
- kmem_cache_free_bulk(skbuff_head_cache, sa.skb_count,
- sa.skb_array);
+ kmem_cache_free_bulk(skbuff_cache, sa.skb_count, sa.skb_array);
}
EXPORT_SYMBOL(kfree_skb_list_reason);
@@ -1215,15 +1214,15 @@ static void napi_skb_cache_put(struct sk_buff *skb)
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
u32 i;
- kasan_poison_object_data(skbuff_head_cache, skb);
+ kasan_poison_object_data(skbuff_cache, skb);
nc->skb_cache[nc->skb_count++] = skb;
if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++)
- kasan_unpoison_object_data(skbuff_head_cache,
+ kasan_unpoison_object_data(skbuff_cache,
nc->skb_cache[i]);
- kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_HALF,
+ kmem_cache_free_bulk(skbuff_cache, NAPI_SKB_CACHE_HALF,
nc->skb_cache + NAPI_SKB_CACHE_HALF);
nc->skb_count = NAPI_SKB_CACHE_HALF;
}
@@ -1807,7 +1806,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
- n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+ n = kmem_cache_alloc(skbuff_cache, gfp_mask);
if (!n)
return NULL;
@@ -4677,7 +4676,7 @@ static void skb_extensions_init(void) {}
void __init skb_init(void)
{
- skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache",
+ skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",
sizeof(struct sk_buff),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
@@ -5556,7 +5555,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
{
if (head_stolen) {
skb_release_head_state(skb);
- kmem_cache_free(skbuff_head_cache, skb);
+ kmem_cache_free(skbuff_cache, skb);
} else {
__kfree_skb(skb);
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index e7b98162c632..7130e6d9e263 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -16,6 +16,7 @@
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/sched/isolation.h>
#include <net/ip.h>
#include <net/sock.h>
@@ -45,7 +46,59 @@ EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
int sysctl_devconf_inherit_init_net __read_mostly;
EXPORT_SYMBOL(sysctl_devconf_inherit_init_net);
+#if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS)
+static void dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos,
+ struct cpumask *mask)
+{
+ char kbuf[128];
+ int len;
+
+ if (*ppos || !*lenp) {
+ *lenp = 0;
+ return;
+ }
+
+ len = min(sizeof(kbuf) - 1, *lenp);
+ len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask));
+ if (!len) {
+ *lenp = 0;
+ return;
+ }
+
+ if (len < *lenp)
+ kbuf[len++] = '\n';
+ memcpy(buffer, kbuf, len);
+ *lenp = len;
+ *ppos += len;
+}
+#endif
+
#ifdef CONFIG_RPS
+struct cpumask rps_default_mask;
+
+static int rps_default_mask_sysctl(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ int err = 0;
+
+ rtnl_lock();
+ if (write) {
+ err = cpumask_parse(buffer, &rps_default_mask);
+ if (err)
+ goto done;
+
+ err = rps_cpumask_housekeeping(&rps_default_mask);
+ if (err)
+ goto done;
+ } else {
+ dump_cpumask(buffer, lenp, ppos, &rps_default_mask);
+ }
+
+done:
+ rtnl_unlock();
+ return err;
+}
+
static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -155,13 +208,6 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
write_unlock:
mutex_unlock(&flow_limit_update_mutex);
} else {
- char kbuf[128];
-
- if (*ppos || !*lenp) {
- *lenp = 0;
- goto done;
- }
-
cpumask_clear(mask);
rcu_read_lock();
for_each_possible_cpu(i) {
@@ -171,17 +217,7 @@ write_unlock:
}
rcu_read_unlock();
- len = min(sizeof(kbuf) - 1, *lenp);
- len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask));
- if (!len) {
- *lenp = 0;
- goto done;
- }
- if (len < *lenp)
- kbuf[len++] = '\n';
- memcpy(buffer, kbuf, len);
- *lenp = len;
- *ppos += len;
+ dump_cpumask(buffer, lenp, ppos, mask);
}
done:
@@ -472,6 +508,11 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = rps_sock_flow_sysctl
},
+ {
+ .procname = "rps_default_mask",
+ .mode = 0644,
+ .proc_handler = rps_default_mask_sysctl
+ },
#endif
#ifdef CONFIG_NET_FLOW_LIMIT
{
@@ -675,6 +716,10 @@ static __net_initdata struct pernet_operations sysctl_core_ops = {
static __init int sysctl_core_init(void)
{
+#if IS_ENABLED(CONFIG_RPS)
+ cpumask_copy(&rps_default_mask, cpu_none_mask);
+#endif
+
register_net_sysctl(&init_net, "net/core", net_core_table);
return register_pernet_subsys(&sysctl_core_ops);
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index a5a7ecf6391c..8c92fc553317 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -4,6 +4,7 @@
* Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
*/
#include <linux/bpf.h>
+#include <linux/btf.h>
#include <linux/btf_ids.h>
#include <linux/filter.h>
#include <linux/types.h>
@@ -603,8 +604,7 @@ EXPORT_SYMBOL_GPL(xdp_warn);
int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
{
- n_skb = kmem_cache_alloc_bulk(skbuff_head_cache, gfp,
- n_skb, skbs);
+ n_skb = kmem_cache_alloc_bulk(skbuff_cache, gfp, n_skb, skbs);
if (unlikely(!n_skb))
return -ENOMEM;
@@ -673,7 +673,7 @@ struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
{
struct sk_buff *skb;
- skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -722,7 +722,7 @@ __diag_ignore_all("-Wmissing-prototypes",
*
* Returns 0 on success or ``-errno`` on error.
*/
-int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
+__bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
{
return -EOPNOTSUPP;
}
@@ -734,7 +734,7 @@ int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
*
* Returns 0 on success or ``-errno`` on error.
*/
-int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
+__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
{
return -EOPNOTSUPP;
}
@@ -773,3 +773,21 @@ static int __init xdp_metadata_init(void)
return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &xdp_metadata_kfunc_set);
}
late_initcall(xdp_metadata_init);
+
+void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg)
+{
+ dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
+ if (support_sg)
+ dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT_SG;
+
+ call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
+}
+EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target);
+
+void xdp_features_clear_redirect_target(struct net_device *dev)
+{
+ dev->xdp_features &= ~(NETDEV_XDP_ACT_NDO_XMIT |
+ NETDEV_XDP_ACT_NDO_XMIT_SG);
+ call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
+}
+EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target);
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index d2c470524e58..146792cd26fe 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -295,7 +295,7 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
}
/* override sysctl_tcp_min_tso_segs */
-static u32 bbr_min_tso_segs(struct sock *sk)
+__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk)
{
return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
}
@@ -328,7 +328,7 @@ static void bbr_save_cwnd(struct sock *sk)
bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp));
}
-static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+__bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
@@ -1023,7 +1023,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
bbr_update_gains(sk);
}
-static void bbr_main(struct sock *sk, const struct rate_sample *rs)
+__bpf_kfunc static void bbr_main(struct sock *sk, const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 bw;
@@ -1035,7 +1035,7 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
}
-static void bbr_init(struct sock *sk)
+__bpf_kfunc static void bbr_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
@@ -1077,7 +1077,7 @@ static void bbr_init(struct sock *sk)
cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
}
-static u32 bbr_sndbuf_expand(struct sock *sk)
+__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk)
{
/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
return 3;
@@ -1086,7 +1086,7 @@ static u32 bbr_sndbuf_expand(struct sock *sk)
/* In theory BBR does not need to undo the cwnd since it does not
* always reduce cwnd on losses (see bbr_main()). Keep it for now.
*/
-static u32 bbr_undo_cwnd(struct sock *sk)
+__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
@@ -1097,7 +1097,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
}
/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
-static u32 bbr_ssthresh(struct sock *sk)
+__bpf_kfunc static u32 bbr_ssthresh(struct sock *sk)
{
bbr_save_cwnd(sk);
return tcp_sk(sk)->snd_ssthresh;
@@ -1125,7 +1125,7 @@ static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
return 0;
}
-static void bbr_set_state(struct sock *sk, u8 new_state)
+__bpf_kfunc static void bbr_set_state(struct sock *sk, u8 new_state)
{
struct bbr *bbr = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index d3cae40749e8..db8b4b488c31 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -403,7 +403,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
* ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and
* returns the leftover acks to adjust cwnd in congestion avoidance mode.
*/
-u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
+__bpf_kfunc u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh);
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start);
/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w),
* for every packet that was ACKed.
*/
-void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
+__bpf_kfunc void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
{
/* If credits accumulated at a higher w, apply them gently now. */
if (tp->snd_cwnd_cnt >= w) {
@@ -443,7 +443,7 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
/* This is Jacobson's slow start and congestion avoidance.
* SIGCOMM '88, p. 328.
*/
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+__bpf_kfunc void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -462,7 +462,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
/* Slow start threshold is half the congestion window (min 2) */
-u32 tcp_reno_ssthresh(struct sock *sk)
+__bpf_kfunc u32 tcp_reno_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -470,7 +470,7 @@ u32 tcp_reno_ssthresh(struct sock *sk)
}
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
-u32 tcp_reno_undo_cwnd(struct sock *sk)
+__bpf_kfunc u32 tcp_reno_undo_cwnd(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 768c10c1f649..0fd78ecb67e7 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -126,7 +126,7 @@ static inline void bictcp_hystart_reset(struct sock *sk)
ca->sample_cnt = 0;
}
-static void cubictcp_init(struct sock *sk)
+__bpf_kfunc static void cubictcp_init(struct sock *sk)
{
struct bictcp *ca = inet_csk_ca(sk);
@@ -139,7 +139,7 @@ static void cubictcp_init(struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
-static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+__bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_TX_START) {
struct bictcp *ca = inet_csk_ca(sk);
@@ -321,7 +321,7 @@ tcp_friendliness:
ca->cnt = max(ca->cnt, 2U);
}
-static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+__bpf_kfunc static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -338,7 +338,7 @@ static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
-static u32 cubictcp_recalc_ssthresh(struct sock *sk)
+__bpf_kfunc static u32 cubictcp_recalc_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -355,7 +355,7 @@ static u32 cubictcp_recalc_ssthresh(struct sock *sk)
return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
}
-static void cubictcp_state(struct sock *sk, u8 new_state)
+__bpf_kfunc static void cubictcp_state(struct sock *sk, u8 new_state)
{
if (new_state == TCP_CA_Loss) {
bictcp_reset(inet_csk_ca(sk));
@@ -445,7 +445,7 @@ static void hystart_update(struct sock *sk, u32 delay)
}
}
-static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample)
+__bpf_kfunc static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index e0a2ca7456ff..bb23bb5b387a 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -75,7 +75,7 @@ static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca)
ca->old_delivered_ce = tp->delivered_ce;
}
-static void dctcp_init(struct sock *sk)
+__bpf_kfunc static void dctcp_init(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -104,7 +104,7 @@ static void dctcp_init(struct sock *sk)
INET_ECN_dontxmit(sk);
}
-static u32 dctcp_ssthresh(struct sock *sk)
+__bpf_kfunc static u32 dctcp_ssthresh(struct sock *sk)
{
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -113,7 +113,7 @@ static u32 dctcp_ssthresh(struct sock *sk)
return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * ca->dctcp_alpha) >> 11U), 2U);
}
-static void dctcp_update_alpha(struct sock *sk, u32 flags)
+__bpf_kfunc static void dctcp_update_alpha(struct sock *sk, u32 flags)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct dctcp *ca = inet_csk_ca(sk);
@@ -169,7 +169,7 @@ static void dctcp_react_to_loss(struct sock *sk)
tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
-static void dctcp_state(struct sock *sk, u8 new_state)
+__bpf_kfunc static void dctcp_state(struct sock *sk, u8 new_state)
{
if (new_state == TCP_CA_Recovery &&
new_state != inet_csk(sk)->icsk_ca_state)
@@ -179,7 +179,7 @@ static void dctcp_state(struct sock *sk, u8 new_state)
*/
}
-static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
+__bpf_kfunc static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
{
struct dctcp *ca = inet_csk_ca(sk);
@@ -229,7 +229,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
return 0;
}
-static u32 dctcp_cwnd_undo(struct sock *sk)
+__bpf_kfunc static u32 dctcp_cwnd_undo(struct sock *sk)
{
const struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f71b41c7ce2f..4d6737160857 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -189,6 +189,9 @@ config NF_CONNTRACK_LABELS
to connection tracking entries. It can be used with xtables connlabel
match and the nftables ct expression.
+config NF_CONNTRACK_OVS
+ bool
+
config NF_CT_PROTO_DCCP
bool 'DCCP protocol connection tracking support'
depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index ba2a6b5e93d9..5ffef1cd6143 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -11,6 +11,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_OVS) += nf_conntrack_ovs.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 24002bc61e07..34913521c385 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -249,7 +249,7 @@ __diag_ignore_all("-Wmissing-prototypes",
* @opts__sz - Length of the bpf_ct_opts structure
* Must be NF_BPF_CT_OPTS_SZ (12)
*/
-struct nf_conn___init *
+__bpf_kfunc struct nf_conn___init *
bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
{
@@ -283,7 +283,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
* @opts__sz - Length of the bpf_ct_opts structure
* Must be NF_BPF_CT_OPTS_SZ (12)
*/
-struct nf_conn *
+__bpf_kfunc struct nf_conn *
bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
{
@@ -316,7 +316,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
* @opts__sz - Length of the bpf_ct_opts structure
* Must be NF_BPF_CT_OPTS_SZ (12)
*/
-struct nf_conn___init *
+__bpf_kfunc struct nf_conn___init *
bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
{
@@ -351,7 +351,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
* @opts__sz - Length of the bpf_ct_opts structure
* Must be NF_BPF_CT_OPTS_SZ (12)
*/
-struct nf_conn *
+__bpf_kfunc struct nf_conn *
bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
{
@@ -376,7 +376,7 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
* @nfct - Pointer to referenced nf_conn___init object, obtained
* using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
*/
-struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
+__bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
{
struct nf_conn *nfct = (struct nf_conn *)nfct_i;
int err;
@@ -400,7 +400,7 @@ struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
* @nf_conn - Pointer to referenced nf_conn object, obtained using
* bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
*/
-void bpf_ct_release(struct nf_conn *nfct)
+__bpf_kfunc void bpf_ct_release(struct nf_conn *nfct)
{
if (!nfct)
return;
@@ -417,7 +417,7 @@ void bpf_ct_release(struct nf_conn *nfct)
* bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
* @timeout - Timeout in msecs.
*/
-void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
+__bpf_kfunc void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
{
__nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
}
@@ -432,7 +432,7 @@ void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
* bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
* @timeout - New timeout in msecs.
*/
-int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
+__bpf_kfunc int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
{
return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
}
@@ -447,7 +447,7 @@ int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
* bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
* @status - New status value.
*/
-int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
+__bpf_kfunc int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
{
return nf_ct_change_status_common((struct nf_conn *)nfct, status);
}
@@ -462,7 +462,7 @@ int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
* bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
* @status - New status value.
*/
-int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
+__bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
{
return nf_ct_change_status_common(nfct, status);
}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 48ea6d0264b5..0c4db2f2ac43 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -242,104 +242,6 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
}
EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
-/* 'skb' should already be pulled to nh_ofs. */
-int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo, u16 proto)
-{
- const struct nf_conntrack_helper *helper;
- const struct nf_conn_help *help;
- unsigned int protoff;
- int err;
-
- if (ctinfo == IP_CT_RELATED_REPLY)
- return NF_ACCEPT;
-
- help = nfct_help(ct);
- if (!help)
- return NF_ACCEPT;
-
- helper = rcu_dereference(help->helper);
- if (!helper)
- return NF_ACCEPT;
-
- if (helper->tuple.src.l3num != NFPROTO_UNSPEC &&
- helper->tuple.src.l3num != proto)
- return NF_ACCEPT;
-
- switch (proto) {
- case NFPROTO_IPV4:
- protoff = ip_hdrlen(skb);
- proto = ip_hdr(skb)->protocol;
- break;
- case NFPROTO_IPV6: {
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- __be16 frag_off;
- int ofs;
-
- ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
- &frag_off);
- if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
- pr_debug("proto header not found\n");
- return NF_ACCEPT;
- }
- protoff = ofs;
- proto = nexthdr;
- break;
- }
- default:
- WARN_ONCE(1, "helper invoked on non-IP family!");
- return NF_DROP;
- }
-
- if (helper->tuple.dst.protonum != proto)
- return NF_ACCEPT;
-
- err = helper->help(skb, protoff, ct, ctinfo);
- if (err != NF_ACCEPT)
- return err;
-
- /* Adjust seqs after helper. This is needed due to some helpers (e.g.,
- * FTP with NAT) adusting the TCP payload size when mangling IP
- * addresses and/or port numbers in the text-based control connection.
- */
- if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
- !nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
- return NF_DROP;
- return NF_ACCEPT;
-}
-EXPORT_SYMBOL_GPL(nf_ct_helper);
-
-int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family,
- u8 proto, bool nat, struct nf_conntrack_helper **hp)
-{
- struct nf_conntrack_helper *helper;
- struct nf_conn_help *help;
- int ret = 0;
-
- helper = nf_conntrack_helper_try_module_get(name, family, proto);
- if (!helper)
- return -EINVAL;
-
- help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
- if (!help) {
- nf_conntrack_helper_put(helper);
- return -ENOMEM;
- }
-#if IS_ENABLED(CONFIG_NF_NAT)
- if (nat) {
- ret = nf_nat_helper_try_module_get(name, family, proto);
- if (ret) {
- nf_conntrack_helper_put(helper);
- return ret;
- }
- }
-#endif
- rcu_assign_pointer(help->helper, helper);
- *hp = helper;
- return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_add_helper);
-
/* appropriate ct lock protecting must be taken by caller */
static int unhelp(struct nf_conn *ct, void *me)
{
diff --git a/net/netfilter/nf_conntrack_ovs.c b/net/netfilter/nf_conntrack_ovs.c
new file mode 100644
index 000000000000..52b776bdf526
--- /dev/null
+++ b/net/netfilter/nf_conntrack_ovs.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Support ct functions for openvswitch and used by OVS and TC conntrack. */
+
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/ipv6_frag.h>
+#include <net/ip.h>
+
+/* 'skb' should already be pulled to nh_ofs. */
+int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo, u16 proto)
+{
+ const struct nf_conntrack_helper *helper;
+ const struct nf_conn_help *help;
+ unsigned int protoff;
+ int err;
+
+ if (ctinfo == IP_CT_RELATED_REPLY)
+ return NF_ACCEPT;
+
+ help = nfct_help(ct);
+ if (!help)
+ return NF_ACCEPT;
+
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ return NF_ACCEPT;
+
+ if (helper->tuple.src.l3num != NFPROTO_UNSPEC &&
+ helper->tuple.src.l3num != proto)
+ return NF_ACCEPT;
+
+ switch (proto) {
+ case NFPROTO_IPV4:
+ protoff = ip_hdrlen(skb);
+ proto = ip_hdr(skb)->protocol;
+ break;
+ case NFPROTO_IPV6: {
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ __be16 frag_off;
+ int ofs;
+
+ ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ &frag_off);
+ if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
+ pr_debug("proto header not found\n");
+ return NF_ACCEPT;
+ }
+ protoff = ofs;
+ proto = nexthdr;
+ break;
+ }
+ default:
+ WARN_ONCE(1, "helper invoked on non-IP family!");
+ return NF_DROP;
+ }
+
+ if (helper->tuple.dst.protonum != proto)
+ return NF_ACCEPT;
+
+ err = helper->help(skb, protoff, ct, ctinfo);
+ if (err != NF_ACCEPT)
+ return err;
+
+ /* Adjust seqs after helper. This is needed due to some helpers (e.g.,
+ * FTP with NAT) adusting the TCP payload size when mangling IP
+ * addresses and/or port numbers in the text-based control connection.
+ */
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+ !nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper);
+
+int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family,
+ u8 proto, bool nat, struct nf_conntrack_helper **hp)
+{
+ struct nf_conntrack_helper *helper;
+ struct nf_conn_help *help;
+ int ret = 0;
+
+ helper = nf_conntrack_helper_try_module_get(name, family, proto);
+ if (!helper)
+ return -EINVAL;
+
+ help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
+ if (!help) {
+ nf_conntrack_helper_put(helper);
+ return -ENOMEM;
+ }
+#if IS_ENABLED(CONFIG_NF_NAT)
+ if (nat) {
+ ret = nf_nat_helper_try_module_get(name, family, proto);
+ if (ret) {
+ nf_conntrack_helper_put(helper);
+ return ret;
+ }
+ }
+#endif
+ rcu_assign_pointer(help->helper, helper);
+ *hp = helper;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_add_helper);
+
+/* Trim the skb to the length specified by the IP/IPv6 header,
+ * removing any trailing lower-layer padding. This prepares the skb
+ * for higher-layer processing that assumes skb->len excludes padding
+ * (such as nf_ip_checksum). The caller needs to pull the skb to the
+ * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
+ */
+int nf_ct_skb_network_trim(struct sk_buff *skb, int family)
+{
+ unsigned int len;
+
+ switch (family) {
+ case NFPROTO_IPV4:
+ len = skb_ip_totlen(skb);
+ break;
+ case NFPROTO_IPV6:
+ len = sizeof(struct ipv6hdr)
+ + ntohs(ipv6_hdr(skb)->payload_len);
+ break;
+ default:
+ len = skb->len;
+ }
+
+ return pskb_trim_rcsum(skb, len);
+}
+EXPORT_SYMBOL_GPL(nf_ct_skb_network_trim);
+
+/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
+ * value if 'skb' is freed.
+ */
+int nf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
+ u16 zone, u8 family, u8 *proto, u16 *mru)
+{
+ int err;
+
+ if (family == NFPROTO_IPV4) {
+ enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
+
+ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+ local_bh_disable();
+ err = ip_defrag(net, skb, user);
+ local_bh_enable();
+ if (err)
+ return err;
+
+ *mru = IPCB(skb)->frag_max_size;
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ } else if (family == NFPROTO_IPV6) {
+ enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
+
+ memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
+ err = nf_ct_frag6_gather(net, skb, user);
+ if (err) {
+ if (err != -EINPROGRESS)
+ kfree_skb(skb);
+ return err;
+ }
+
+ *proto = ipv6_hdr(skb)->nexthdr;
+ *mru = IP6CB(skb)->frag_max_size;
+#endif
+ } else {
+ kfree_skb(skb);
+ return -EPFNOSUPPORT;
+ }
+
+ skb_clear_hash(skb);
+ skb->ignore_df = 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_handle_fragments);
diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c
index 0fa5a0bbb0ff..141ee7783223 100644
--- a/net/netfilter/nf_nat_bpf.c
+++ b/net/netfilter/nf_nat_bpf.c
@@ -30,9 +30,9 @@ __diag_ignore_all("-Wmissing-prototypes",
* interpreted as select a random port.
* @manip - NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST
*/
-int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
- union nf_inet_addr *addr, int port,
- enum nf_nat_manip_type manip)
+__bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
+ union nf_inet_addr *addr, int port,
+ enum nf_nat_manip_type manip)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
u16 proto = nf_ct_l3num(ct);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 600993c80050..04c4036bf406 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -13,7 +13,7 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
-#include <linux/string.h>
+#include <linux/string_helpers.h>
#include <linux/skbuff.h>
#include <linux/mutex.h>
#include <linux/bitmap.h>
@@ -457,7 +457,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family)
if (WARN_ON(grp->name[0] == '\0'))
return -EINVAL;
- if (WARN_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL))
+ if (WARN_ON(!string_is_terminated(grp->name, GENL_NAMSIZ)))
return -EINVAL;
}
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 747d537a3f06..29a7081858cd 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -15,6 +15,7 @@ config OPENVSWITCH
select NET_MPLS_GSO
select DST_CACHE
select NET_NSH
+ select NF_CONNTRACK_OVS if NF_CONNTRACK
select NF_NAT_OVS if NF_NAT
help
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 2172930b1f17..331730fd3580 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -9,6 +9,7 @@
#include <linux/udp.h>
#include <linux/sctp.h>
#include <linux/static_key.h>
+#include <linux/string_helpers.h>
#include <net/ip.h>
#include <net/genetlink.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -434,52 +435,21 @@ static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key,
return 0;
}
-/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
- * value if 'skb' is freed.
- */
-static int handle_fragments(struct net *net, struct sw_flow_key *key,
- u16 zone, struct sk_buff *skb)
+static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key,
+ u16 zone, int family, struct sk_buff *skb)
{
struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
int err;
- if (key->eth.type == htons(ETH_P_IP)) {
- enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- err = ip_defrag(net, skb, user);
- if (err)
- return err;
-
- ovs_cb.mru = IPCB(skb)->frag_max_size;
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- } else if (key->eth.type == htons(ETH_P_IPV6)) {
- enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- err = nf_ct_frag6_gather(net, skb, user);
- if (err) {
- if (err != -EINPROGRESS)
- kfree_skb(skb);
- return err;
- }
-
- key->ip.proto = ipv6_hdr(skb)->nexthdr;
- ovs_cb.mru = IP6CB(skb)->frag_max_size;
-#endif
- } else {
- kfree_skb(skb);
- return -EPFNOSUPPORT;
- }
+ err = nf_ct_handle_fragments(net, skb, zone, family, &key->ip.proto, &ovs_cb.mru);
+ if (err)
+ return err;
/* The key extracted from the fragment that completed this datagram
* likely didn't have an L4 header, so regenerate it.
*/
ovs_flow_key_update_l3l4(skb, key);
-
key->ip.frag = OVS_FRAG_TYPE_NONE;
- skb_clear_hash(skb);
- skb->ignore_df = 1;
*OVS_CB(skb) = ovs_cb;
return 0;
@@ -1090,36 +1060,6 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
return 0;
}
-/* Trim the skb to the length specified by the IP/IPv6 header,
- * removing any trailing lower-layer padding. This prepares the skb
- * for higher-layer processing that assumes skb->len excludes padding
- * (such as nf_ip_checksum). The caller needs to pull the skb to the
- * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
- */
-static int ovs_skb_network_trim(struct sk_buff *skb)
-{
- unsigned int len;
- int err;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- len = skb_ip_totlen(skb);
- break;
- case htons(ETH_P_IPV6):
- len = sizeof(struct ipv6hdr)
- + ntohs(ipv6_hdr(skb)->payload_len);
- break;
- default:
- len = skb->len;
- }
-
- err = pskb_trim_rcsum(skb, len);
- if (err)
- kfree_skb(skb);
-
- return err;
-}
-
/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
* value if 'skb' is freed.
*/
@@ -1134,12 +1074,15 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
nh_ofs = skb_network_offset(skb);
skb_pull_rcsum(skb, nh_ofs);
- err = ovs_skb_network_trim(skb);
- if (err)
+ err = nf_ct_skb_network_trim(skb, info->family);
+ if (err) {
+ kfree_skb(skb);
return err;
+ }
if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
- err = handle_fragments(net, key, info->zone.id, skb);
+ err = ovs_ct_handle_fragments(net, key, info->zone.id,
+ info->family, skb);
if (err)
return err;
}
@@ -1383,7 +1326,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
#endif
case OVS_CT_ATTR_HELPER:
*helper = nla_data(a);
- if (!memchr(*helper, '\0', nla_len(a))) {
+ if (!string_is_terminated(*helper, nla_len(a))) {
OVS_NLERR(log, "Invalid conntrack helper");
return -EINVAL;
}
@@ -1404,7 +1347,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
case OVS_CT_ATTR_TIMEOUT:
memcpy(info->timeout, nla_data(a), nla_len(a));
- if (!memchr(info->timeout, '\0', nla_len(a))) {
+ if (!string_is_terminated(info->timeout, nla_len(a))) {
OVS_NLERR(log, "Invalid conntrack timeout");
return -EINVAL;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 6eaffb0d8fdc..e9f1f49d18c2 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -54,12 +54,14 @@ void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what)
spin_lock_bh(&local->lock);
busy = !list_empty(&call->attend_link);
trace_rxrpc_poke_call(call, busy, what);
+ if (!busy && !rxrpc_try_get_call(call, rxrpc_call_get_poke))
+ busy = true;
if (!busy) {
- rxrpc_get_call(call, rxrpc_call_get_poke);
list_add_tail(&call->attend_link, &local->call_attend_q);
}
spin_unlock_bh(&local->lock);
- rxrpc_wake_up_io_thread(local);
+ if (!busy)
+ rxrpc_wake_up_io_thread(local);
}
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 44414e724415..95f4bc206b3d 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -163,7 +163,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
trace_rxrpc_tx_ack(chan->call_debug_id, serial,
ntohl(pkt.ack.firstPacket),
ntohl(pkt.ack.serial),
- pkt.ack.reason, 0);
+ pkt.ack.reason, 0, rxrpc_rx_window_size);
break;
default:
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 6b2022240076..5e53429c6922 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -80,7 +80,8 @@ static void rxrpc_set_keepalive(struct rxrpc_call *call)
*/
static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
struct rxrpc_call *call,
- struct rxrpc_txbuf *txb)
+ struct rxrpc_txbuf *txb,
+ u16 *_rwind)
{
struct rxrpc_ackinfo ackinfo;
unsigned int qsize, sack, wrap, to;
@@ -124,6 +125,7 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
jmax = rxrpc_rx_jumbo_max;
qsize = (window - 1) - call->rx_consumed;
rsize = max_t(int, call->rx_winsize - qsize, 0);
+ *_rwind = rsize;
ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
ackinfo.maxMTU = htonl(mtu);
ackinfo.rwind = htonl(rsize);
@@ -190,6 +192,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
rxrpc_serial_t serial;
size_t len, n;
int ret, rtt_slot = -1;
+ u16 rwind;
if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
return -ECONNRESET;
@@ -205,7 +208,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
if (txb->ack.reason == RXRPC_ACK_PING)
txb->wire.flags |= RXRPC_REQUEST_ACK;
- n = rxrpc_fill_out_ack(conn, call, txb);
+ n = rxrpc_fill_out_ack(conn, call, txb, &rwind);
if (n == 0)
return 0;
@@ -217,7 +220,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
txb->wire.serial = htonl(serial);
trace_rxrpc_tx_ack(call->debug_id, serial,
ntohl(txb->ack.firstPacket),
- ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks);
+ ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks,
+ rwind);
if (txb->ack.reason == RXRPC_ACK_PING)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 50d263a6359d..76eb2b9cd936 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -137,7 +137,7 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
/* Check to see if there's an ACK that needs sending. */
acked = atomic_add_return(call->rx_consumed - old_consumed,
&call->ackr_nr_consumed);
- if (acked > 2 &&
+ if (acked > 8 &&
!test_and_set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
rxrpc_poke_call(call, rxrpc_call_poke_idle);
}
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index 944320e65ea8..3bcd6ee80396 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -63,7 +63,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
if (skb) {
int n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
- kfree_skb_reason(skb, SKB_CONSUMED);
+ consume_skb(skb);
}
}
@@ -78,6 +78,6 @@ void rxrpc_purge_queue(struct sk_buff_head *list)
int n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, refcount_read(&skb->users), n,
rxrpc_skb_put_purge);
- kfree_skb_reason(skb, SKB_CONSUMED);
+ consume_skb(skb);
}
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index f5acb535413d..4f7b52f5a11c 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -984,6 +984,7 @@ config NET_ACT_TUNNEL_KEY
config NET_ACT_CT
tristate "connection tracking tc action"
depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE
+ select NF_CONNTRACK_OVS
select NF_NAT_OVS if NF_NAT
help
Say Y here to allow sending the packets to conntrack module.
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index b126f03c1bb6..9cc0bc7c71ed 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -726,31 +726,6 @@ drop_ct:
return false;
}
-/* Trim the skb to the length specified by the IP/IPv6 header,
- * removing any trailing lower-layer padding. This prepares the skb
- * for higher-layer processing that assumes skb->len excludes padding
- * (such as nf_ip_checksum). The caller needs to pull the skb to the
- * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
- */
-static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family)
-{
- unsigned int len;
-
- switch (family) {
- case NFPROTO_IPV4:
- len = skb_ip_totlen(skb);
- break;
- case NFPROTO_IPV6:
- len = sizeof(struct ipv6hdr)
- + ntohs(ipv6_hdr(skb)->payload_len);
- break;
- default:
- len = skb->len;
- }
-
- return pskb_trim_rcsum(skb, len);
-}
-
static u8 tcf_ct_skb_nf_family(struct sk_buff *skb)
{
u8 family = NFPROTO_UNSPEC;
@@ -810,6 +785,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
struct nf_conn *ct;
int err = 0;
bool frag;
+ u8 proto;
u16 mru;
/* Previously seen (loopback)? Ignore. */
@@ -825,50 +801,14 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
return err;
skb_get(skb);
- mru = tc_skb_cb(skb)->mru;
-
- if (family == NFPROTO_IPV4) {
- enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- local_bh_disable();
- err = ip_defrag(net, skb, user);
- local_bh_enable();
- if (err && err != -EINPROGRESS)
- return err;
-
- if (!err) {
- *defrag = true;
- mru = IPCB(skb)->frag_max_size;
- }
- } else { /* NFPROTO_IPV6 */
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- err = nf_ct_frag6_gather(net, skb, user);
- if (err && err != -EINPROGRESS)
- goto out_free;
-
- if (!err) {
- *defrag = true;
- mru = IP6CB(skb)->frag_max_size;
- }
-#else
- err = -EOPNOTSUPP;
- goto out_free;
-#endif
- }
+ err = nf_ct_handle_fragments(net, skb, zone, family, &proto, &mru);
+ if (err)
+ return err;
- if (err != -EINPROGRESS)
- tc_skb_cb(skb)->mru = mru;
- skb_clear_hash(skb);
- skb->ignore_df = 1;
- return err;
+ *defrag = true;
+ tc_skb_cb(skb)->mru = mru;
-out_free:
- kfree_skb(skb);
- return err;
+ return 0;
}
static void tcf_ct_params_free(struct tcf_ct_params *params)
@@ -1011,7 +951,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
if (err)
goto drop;
- err = tcf_ct_skb_network_trim(skb, family);
+ err = nf_ct_skb_network_trim(skb, family);
if (err)
goto drop;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index dfea27a906f2..9b47c8409231 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -39,6 +39,7 @@
#include "node.h"
#include "net.h"
#include <net/genetlink.h>
+#include <linux/string_helpers.h>
#include <linux/tipc_config.h>
/* The legacy API had an artificial message length limit called
@@ -173,11 +174,6 @@ static struct sk_buff *tipc_get_err_tlv(char *str)
return buf;
}
-static inline bool string_is_valid(char *s, int len)
-{
- return memchr(s, '\0', len) ? true : false;
-}
-
static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
struct tipc_nl_compat_msg *msg,
struct sk_buff *arg)
@@ -445,7 +441,7 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd,
return -EINVAL;
len = min_t(int, len, TIPC_MAX_BEARER_NAME);
- if (!string_is_valid(b->name, len))
+ if (!string_is_terminated(b->name, len))
return -EINVAL;
if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name))
@@ -486,7 +482,7 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd,
return -EINVAL;
len = min_t(int, len, TIPC_MAX_BEARER_NAME);
- if (!string_is_valid(name, len))
+ if (!string_is_terminated(name, len))
return -EINVAL;
if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name))
@@ -584,7 +580,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
len = min_t(int, len, TIPC_MAX_LINK_NAME);
- if (!string_is_valid(name, len))
+ if (!string_is_terminated(name, len))
return -EINVAL;
if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0)
@@ -819,7 +815,7 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd,
return -EINVAL;
len = min_t(int, len, TIPC_MAX_LINK_NAME);
- if (!string_is_valid(lc->name, len))
+ if (!string_is_terminated(lc->name, len))
return -EINVAL;
media = tipc_media_find(lc->name);
@@ -856,7 +852,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd,
return -EINVAL;
len = min_t(int, len, TIPC_MAX_LINK_NAME);
- if (!string_is_valid(name, len))
+ if (!string_is_terminated(name, len))
return -EINVAL;
if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name))
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index ed6c71826d31..b2df1e0f8153 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -140,6 +140,10 @@ static void xp_disable_drv_zc(struct xsk_buff_pool *pool)
}
}
+#define NETDEV_XDP_ACT_ZC (NETDEV_XDP_ACT_BASIC | \
+ NETDEV_XDP_ACT_REDIRECT | \
+ NETDEV_XDP_ACT_XSK_ZEROCOPY)
+
int xp_assign_dev(struct xsk_buff_pool *pool,
struct net_device *netdev, u16 queue_id, u16 flags)
{
@@ -178,8 +182,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
/* For copy-mode, we are done. */
return 0;
- if (!netdev->netdev_ops->ndo_bpf ||
- !netdev->netdev_ops->ndo_xsk_wakeup) {
+ if ((netdev->xdp_features & NETDEV_XDP_ACT_ZC) != NETDEV_XDP_ACT_ZC) {
err = -EOPNOTSUPP;
goto err_unreg_pool;
}
diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c
index 1ef2162cebcf..d74f3fd20f2b 100644
--- a/net/xfrm/xfrm_interface_bpf.c
+++ b/net/xfrm/xfrm_interface_bpf.c
@@ -39,8 +39,7 @@ __diag_ignore_all("-Wmissing-prototypes",
* @to - Pointer to memory to which the metadata will be copied
* Cannot be NULL
*/
-__used noinline
-int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
+__bpf_kfunc int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
{
struct sk_buff *skb = (struct sk_buff *)skb_ctx;
struct xfrm_md_info *info;
@@ -62,9 +61,7 @@ int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
* @from - Pointer to memory from which the metadata will be copied
* Cannot be NULL
*/
-__used noinline
-int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
- const struct bpf_xfrm_info *from)
+__bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bpf_xfrm_info *from)
{
struct sk_buff *skb = (struct sk_buff *)skb_ctx;
struct metadata_dst *md_dst;