summaryrefslogtreecommitdiff
path: root/net/ipv6
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv6')
-rw-r--r--net/ipv6/Kconfig11
-rw-r--r--net/ipv6/Makefile3
-rw-r--r--net/ipv6/addrconf.c65
-rw-r--r--net/ipv6/af_inet6.c16
-rw-r--r--net/ipv6/exthdrs.c158
-rw-r--r--net/ipv6/ioam6.c910
-rw-r--r--net/ipv6/ioam6_iptunnel.c274
-rw-r--r--net/ipv6/ip6_fib.c4
-rw-r--r--net/ipv6/ip6_gre.c17
-rw-r--r--net/ipv6/ip6_output.c80
-rw-r--r--net/ipv6/ip6_tunnel.c21
-rw-r--r--net/ipv6/ip6_vti.c21
-rw-r--r--net/ipv6/ip6mr.c3
-rw-r--r--net/ipv6/ipv6_sockglue.c18
-rw-r--r--net/ipv6/mcast.c20
-rw-r--r--net/ipv6/ndisc.c17
-rw-r--r--net/ipv6/netfilter/ip6table_filter.c23
-rw-r--r--net/ipv6/netfilter/ip6table_mangle.c22
-rw-r--r--net/ipv6/netfilter/ip6table_nat.c16
-rw-r--r--net/ipv6/netfilter/ip6table_raw.c24
-rw-r--r--net/ipv6/netfilter/ip6table_security.c22
-rw-r--r--net/ipv6/route.c30
-rw-r--r--net/ipv6/seg6_iptunnel.c74
-rw-r--r--net/ipv6/seg6_local.c110
-rw-r--r--net/ipv6/sit.c40
-rw-r--r--net/ipv6/sysctl_net_ipv6.c19
-rw-r--r--net/ipv6/udp.c2
27 files changed, 1714 insertions, 306 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 747f56e0c636..e504204bca92 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -328,4 +328,15 @@ config IPV6_RPL_LWTUNNEL
If unsure, say N.
+config IPV6_IOAM6_LWTUNNEL
+ bool "IPv6: IOAM Pre-allocated Trace insertion support"
+ depends on IPV6
+ select LWTUNNEL
+ help
+ Support for the inline insertion of IOAM Pre-allocated
+ Trace Header (only on locally generated packets), using
+ the lightweight tunnels mechanism.
+
+ If unsure, say N.
+
endif # IPV6
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile
index cf7b47bdb9b3..1bc7e143217b 100644
--- a/net/ipv6/Makefile
+++ b/net/ipv6/Makefile
@@ -10,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \
- udp_offload.o seg6.o fib6_notifier.o rpl.o
+ udp_offload.o seg6.o fib6_notifier.o rpl.o ioam6.o
ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o
@@ -27,6 +27,7 @@ ipv6-$(CONFIG_NETLABEL) += calipso.o
ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o
ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o
ipv6-$(CONFIG_IPV6_RPL_LWTUNNEL) += rpl_iptunnel.o
+ipv6-$(CONFIG_IPV6_IOAM6_LWTUNNEL) += ioam6_iptunnel.o
ipv6-objs += $(ipv6-y)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3bf685fe64b9..17756f3ed33b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -89,6 +89,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/export.h>
+#include <linux/ioam6.h>
#define INFINITY_LIFE_TIME 0xFFFFFFFF
@@ -237,6 +238,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
.disable_policy = 0,
.rpl_seg_enabled = 0,
+ .ioam6_enabled = 0,
+ .ioam6_id = IOAM6_DEFAULT_IF_ID,
+ .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
};
static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -293,6 +297,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
.addr_gen_mode = IN6_ADDR_GEN_MODE_EUI64,
.disable_policy = 0,
.rpl_seg_enabled = 0,
+ .ioam6_enabled = 0,
+ .ioam6_id = IOAM6_DEFAULT_IF_ID,
+ .ioam6_id_wide = IOAM6_DEFAULT_IF_ID_WIDE,
};
/* Check if link is ready: is it up and is a valid qdisc available */
@@ -387,6 +394,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
ndev->cnf.addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY;
ndev->cnf.mtu6 = dev->mtu;
+ ndev->ra_mtu = 0;
ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl);
if (!ndev->nd_parms) {
kfree(ndev);
@@ -694,8 +702,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb,
errout:
if (in6_dev)
in6_dev_put(in6_dev);
- if (dev)
- dev_put(dev);
+ dev_put(dev);
return err;
}
@@ -1080,7 +1087,7 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg,
goto out;
}
- ifa = kzalloc(sizeof(*ifa), gfp_flags);
+ ifa = kzalloc(sizeof(*ifa), gfp_flags | __GFP_ACCOUNT);
if (!ifa) {
err = -ENOBUFS;
goto out;
@@ -3843,6 +3850,7 @@ restart:
}
idev->tstamp = jiffies;
+ idev->ra_mtu = 0;
/* Last: Shot the device (if unregistered) */
if (unregister) {
@@ -5211,8 +5219,7 @@ static int inet6_dump_addr(struct sk_buff *skb, struct netlink_callback *cb,
.netnsid = -1,
.type = type,
};
- struct net *net = sock_net(skb->sk);
- struct net *tgt_net = net;
+ struct net *tgt_net = sock_net(skb->sk);
int idx, s_idx, s_ip_idx;
int h, s_h;
struct net_device *dev;
@@ -5351,7 +5358,7 @@ static int inet6_rtm_valid_getaddr_req(struct sk_buff *skb,
static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
- struct net *net = sock_net(in_skb->sk);
+ struct net *tgt_net = sock_net(in_skb->sk);
struct inet6_fill_args fillargs = {
.portid = NETLINK_CB(in_skb).portid,
.seq = nlh->nlmsg_seq,
@@ -5359,7 +5366,6 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
.flags = 0,
.netnsid = -1,
};
- struct net *tgt_net = net;
struct ifaddrmsg *ifm;
struct nlattr *tb[IFA_MAX+1];
struct in6_addr *addr = NULL, *peer;
@@ -5412,8 +5418,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh,
errout_ifa:
in6_ifa_put(ifa);
errout:
- if (dev)
- dev_put(dev);
+ dev_put(dev);
if (fillargs.netnsid >= 0)
put_net(tgt_net);
@@ -5526,6 +5531,9 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy;
array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass;
array[DEVCONF_RPL_SEG_ENABLED] = cnf->rpl_seg_enabled;
+ array[DEVCONF_IOAM6_ENABLED] = cnf->ioam6_enabled;
+ array[DEVCONF_IOAM6_ID] = cnf->ioam6_id;
+ array[DEVCONF_IOAM6_ID_WIDE] = cnf->ioam6_id_wide;
}
static inline size_t inet6_ifla6_size(void)
@@ -5537,6 +5545,7 @@ static inline size_t inet6_ifla6_size(void)
+ nla_total_size(ICMP6_MIB_MAX * 8) /* IFLA_INET6_ICMP6STATS */
+ nla_total_size(sizeof(struct in6_addr)) /* IFLA_INET6_TOKEN */
+ nla_total_size(1) /* IFLA_INET6_ADDR_GEN_MODE */
+ + nla_total_size(4) /* IFLA_INET6_RA_MTU */
+ 0;
}
@@ -5645,6 +5654,10 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev,
if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->cnf.addr_gen_mode))
goto nla_put_failure;
+ if (idev->ra_mtu &&
+ nla_put_u32(skb, IFLA_INET6_RA_MTU, idev->ra_mtu))
+ goto nla_put_failure;
+
return 0;
nla_put_failure:
@@ -5761,6 +5774,9 @@ update_lft:
static const struct nla_policy inet6_af_policy[IFLA_INET6_MAX + 1] = {
[IFLA_INET6_ADDR_GEN_MODE] = { .type = NLA_U8 },
[IFLA_INET6_TOKEN] = { .len = sizeof(struct in6_addr) },
+ [IFLA_INET6_RA_MTU] = { .type = NLA_REJECT,
+ .reject_message =
+ "IFLA_INET6_RA_MTU can not be set" },
};
static int check_addr_gen_mode(int mode)
@@ -5784,7 +5800,8 @@ static int check_stable_privacy(struct inet6_dev *idev, struct net *net,
}
static int inet6_validate_link_af(const struct net_device *dev,
- const struct nlattr *nla)
+ const struct nlattr *nla,
+ struct netlink_ext_ack *extack)
{
struct nlattr *tb[IFLA_INET6_MAX + 1];
struct inet6_dev *idev = NULL;
@@ -5797,7 +5814,7 @@ static int inet6_validate_link_af(const struct net_device *dev,
}
err = nla_parse_nested_deprecated(tb, IFLA_INET6_MAX, nla,
- inet6_af_policy, NULL);
+ inet6_af_policy, extack);
if (err)
return err;
@@ -6540,6 +6557,7 @@ static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
static int minus_one = -1;
static const int two_five_five = 255;
+static u32 ioam6_if_id_max = U16_MAX;
static const struct ctl_table addrconf_sysctl[] = {
{
@@ -6933,6 +6951,31 @@ static const struct ctl_table addrconf_sysctl[] = {
.proc_handler = proc_dointvec,
},
{
+ .procname = "ioam6_enabled",
+ .data = &ipv6_devconf.ioam6_enabled,
+ .maxlen = sizeof(u8),
+ .mode = 0644,
+ .proc_handler = proc_dou8vec_minmax,
+ .extra1 = (void *)SYSCTL_ZERO,
+ .extra2 = (void *)SYSCTL_ONE,
+ },
+ {
+ .procname = "ioam6_id",
+ .data = &ipv6_devconf.ioam6_id,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra1 = (void *)SYSCTL_ZERO,
+ .extra2 = (void *)&ioam6_if_id_max,
+ },
+ {
+ .procname = "ioam6_id_wide",
+ .data = &ipv6_devconf.ioam6_id_wide,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec,
+ },
+ {
/* sentinel */
}
};
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 2389ff702f51..b5878bb8e419 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -62,6 +62,7 @@
#include <net/rpl.h>
#include <net/compat.h>
#include <net/xfrm.h>
+#include <net/ioam6.h>
#include <linux/uaccess.h>
#include <linux/mroute6.h>
@@ -454,7 +455,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
* changes context in a wrong way it will be caught.
*/
err = BPF_CGROUP_RUN_PROG_INET_BIND_LOCK(sk, uaddr,
- BPF_CGROUP_INET6_BIND, &flags);
+ CGROUP_INET6_BIND, &flags);
if (err)
return err;
@@ -531,7 +532,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
if (np->sndflow)
sin->sin6_flowinfo = np->flow_label;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET6_GETPEERNAME,
+ CGROUP_INET6_GETPEERNAME,
NULL);
} else {
if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))
@@ -540,7 +541,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
sin->sin6_addr = sk->sk_v6_rcv_saddr;
sin->sin6_port = inet->inet_sport;
BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
- BPF_CGROUP_INET6_GETSOCKNAME,
+ CGROUP_INET6_GETSOCKNAME,
NULL);
}
sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
@@ -961,6 +962,9 @@ static int __net_init inet6_net_init(struct net *net)
net->ipv6.sysctl.fib_notify_on_flag_change = 0;
atomic_set(&net->ipv6.fib6_sernum, 1);
+ net->ipv6.sysctl.ioam6_id = IOAM6_DEFAULT_ID;
+ net->ipv6.sysctl.ioam6_id_wide = IOAM6_DEFAULT_ID_WIDE;
+
err = ipv6_init_mibs(net);
if (err)
return err;
@@ -1191,6 +1195,10 @@ static int __init inet6_init(void)
if (err)
goto rpl_fail;
+ err = ioam6_init();
+ if (err)
+ goto ioam6_fail;
+
err = igmp6_late_init();
if (err)
goto igmp6_late_err;
@@ -1213,6 +1221,8 @@ sysctl_fail:
igmp6_late_cleanup();
#endif
igmp6_late_err:
+ ioam6_exit();
+ioam6_fail:
rpl_exit();
rpl_fail:
seg6_exit();
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 26882e165c9e..3a871a09f962 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -49,22 +49,12 @@
#include <net/seg6_hmac.h>
#endif
#include <net/rpl.h>
+#include <linux/ioam6.h>
+#include <net/ioam6.h>
+#include <net/dst_metadata.h>
#include <linux/uaccess.h>
-/*
- * Parsing tlv encoded headers.
- *
- * Parsing function "func" returns true, if parsing succeed
- * and false, if it failed.
- * It MUST NOT touch skb->h.
- */
-
-struct tlvtype_proc {
- int type;
- bool (*func)(struct sk_buff *skb, int offset);
-};
-
/*********************
Generic functions
*********************/
@@ -109,16 +99,23 @@ drop:
return false;
}
+static bool ipv6_hop_ra(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff);
+static bool ipv6_hop_calipso(struct sk_buff *skb, int optoff);
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+static bool ipv6_dest_hao(struct sk_buff *skb, int optoff);
+#endif
+
/* Parse tlv encoded option header (hop-by-hop or destination) */
-static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
+static bool ip6_parse_tlv(bool hopbyhop,
struct sk_buff *skb,
int max_count)
{
int len = (skb_transport_header(skb)[1] + 1) << 3;
const unsigned char *nh = skb_network_header(skb);
int off = skb_network_header_len(skb);
- const struct tlvtype_proc *curr;
bool disallow_unknowns = false;
int tlv_count = 0;
int padlen = 0;
@@ -173,20 +170,45 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs,
if (tlv_count > max_count)
goto bad;
- for (curr = procs; curr->type >= 0; curr++) {
- if (curr->type == nh[off]) {
- /* type specific length/alignment
- checks will be performed in the
- func(). */
- if (curr->func(skb, off) == false)
+ if (hopbyhop) {
+ switch (nh[off]) {
+ case IPV6_TLV_ROUTERALERT:
+ if (!ipv6_hop_ra(skb, off))
+ return false;
+ break;
+ case IPV6_TLV_IOAM:
+ if (!ipv6_hop_ioam(skb, off))
+ return false;
+ break;
+ case IPV6_TLV_JUMBO:
+ if (!ipv6_hop_jumbo(skb, off))
+ return false;
+ break;
+ case IPV6_TLV_CALIPSO:
+ if (!ipv6_hop_calipso(skb, off))
+ return false;
+ break;
+ default:
+ if (!ip6_tlvopt_unknown(skb, off,
+ disallow_unknowns))
+ return false;
+ break;
+ }
+ } else {
+ switch (nh[off]) {
+#if IS_ENABLED(CONFIG_IPV6_MIP6)
+ case IPV6_TLV_HAO:
+ if (!ipv6_dest_hao(skb, off))
+ return false;
+ break;
+#endif
+ default:
+ if (!ip6_tlvopt_unknown(skb, off,
+ disallow_unknowns))
return false;
break;
}
}
- if (curr->type < 0 &&
- !ip6_tlvopt_unknown(skb, off, disallow_unknowns))
- return false;
-
padlen = 0;
}
off += optlen;
@@ -264,16 +286,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff)
}
#endif
-static const struct tlvtype_proc tlvprocdestopt_lst[] = {
-#if IS_ENABLED(CONFIG_IPV6_MIP6)
- {
- .type = IPV6_TLV_HAO,
- .func = ipv6_dest_hao,
- },
-#endif
- {-1, NULL}
-};
-
static int ipv6_destopt_rcv(struct sk_buff *skb)
{
struct inet6_dev *idev = __in6_dev_get(skb->dev);
@@ -304,8 +316,7 @@ fail_and_free:
dstbuf = opt->dst1;
#endif
- if (ip6_parse_tlv(tlvprocdestopt_lst, skb,
- net->ipv6.sysctl.max_dst_opts_cnt)) {
+ if (ip6_parse_tlv(false, skb, net->ipv6.sysctl.max_dst_opts_cnt)) {
skb->transport_header += extlen;
opt = IP6CB(skb);
#if IS_ENABLED(CONFIG_IPV6_MIP6)
@@ -928,6 +939,60 @@ static bool ipv6_hop_ra(struct sk_buff *skb, int optoff)
return false;
}
+/* IOAM */
+
+static bool ipv6_hop_ioam(struct sk_buff *skb, int optoff)
+{
+ struct ioam6_trace_hdr *trace;
+ struct ioam6_namespace *ns;
+ struct ioam6_hdr *hdr;
+
+ /* Bad alignment (must be 4n-aligned) */
+ if (optoff & 3)
+ goto drop;
+
+ /* Ignore if IOAM is not enabled on ingress */
+ if (!__in6_dev_get(skb->dev)->cnf.ioam6_enabled)
+ goto ignore;
+
+ /* Truncated Option header */
+ hdr = (struct ioam6_hdr *)(skb_network_header(skb) + optoff);
+ if (hdr->opt_len < 2)
+ goto drop;
+
+ switch (hdr->type) {
+ case IOAM6_TYPE_PREALLOC:
+ /* Truncated Pre-allocated Trace header */
+ if (hdr->opt_len < 2 + sizeof(*trace))
+ goto drop;
+
+ /* Malformed Pre-allocated Trace header */
+ trace = (struct ioam6_trace_hdr *)((u8 *)hdr + sizeof(*hdr));
+ if (hdr->opt_len < 2 + sizeof(*trace) + trace->remlen * 4)
+ goto drop;
+
+ /* Ignore if the IOAM namespace is unknown */
+ ns = ioam6_namespace(ipv6_skb_net(skb), trace->namespace_id);
+ if (!ns)
+ goto ignore;
+
+ if (!skb_valid_dst(skb))
+ ip6_route_input(skb);
+
+ ioam6_fill_trace_data(skb, ns, trace);
+ break;
+ default:
+ break;
+ }
+
+ignore:
+ return true;
+
+drop:
+ kfree_skb(skb);
+ return false;
+}
+
/* Jumbo payload */
static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
@@ -994,22 +1059,6 @@ drop:
return false;
}
-static const struct tlvtype_proc tlvprochopopt_lst[] = {
- {
- .type = IPV6_TLV_ROUTERALERT,
- .func = ipv6_hop_ra,
- },
- {
- .type = IPV6_TLV_JUMBO,
- .func = ipv6_hop_jumbo,
- },
- {
- .type = IPV6_TLV_CALIPSO,
- .func = ipv6_hop_calipso,
- },
- { -1, }
-};
-
int ipv6_parse_hopopts(struct sk_buff *skb)
{
struct inet6_skb_parm *opt = IP6CB(skb);
@@ -1035,8 +1084,7 @@ fail_and_free:
goto fail_and_free;
opt->flags |= IP6SKB_HOPBYHOP;
- if (ip6_parse_tlv(tlvprochopopt_lst, skb,
- net->ipv6.sysctl.max_hbh_opts_cnt)) {
+ if (ip6_parse_tlv(true, skb, net->ipv6.sysctl.max_hbh_opts_cnt)) {
skb->transport_header += extlen;
opt = IP6CB(skb);
opt->nhoff = sizeof(struct ipv6hdr);
diff --git a/net/ipv6/ioam6.c b/net/ipv6/ioam6.c
new file mode 100644
index 000000000000..5e8961004832
--- /dev/null
+++ b/net/ipv6/ioam6.c
@@ -0,0 +1,910 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IPv6 IOAM implementation
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/net.h>
+#include <linux/ioam6.h>
+#include <linux/ioam6_genl.h>
+#include <linux/rhashtable.h>
+
+#include <net/addrconf.h>
+#include <net/genetlink.h>
+#include <net/ioam6.h>
+
+static void ioam6_ns_release(struct ioam6_namespace *ns)
+{
+ kfree_rcu(ns, rcu);
+}
+
+static void ioam6_sc_release(struct ioam6_schema *sc)
+{
+ kfree_rcu(sc, rcu);
+}
+
+static void ioam6_free_ns(void *ptr, void *arg)
+{
+ struct ioam6_namespace *ns = (struct ioam6_namespace *)ptr;
+
+ if (ns)
+ ioam6_ns_release(ns);
+}
+
+static void ioam6_free_sc(void *ptr, void *arg)
+{
+ struct ioam6_schema *sc = (struct ioam6_schema *)ptr;
+
+ if (sc)
+ ioam6_sc_release(sc);
+}
+
+static int ioam6_ns_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct ioam6_namespace *ns = obj;
+
+ return (ns->id != *(__be16 *)arg->key);
+}
+
+static int ioam6_sc_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+ const struct ioam6_schema *sc = obj;
+
+ return (sc->id != *(u32 *)arg->key);
+}
+
+static const struct rhashtable_params rht_ns_params = {
+ .key_len = sizeof(__be16),
+ .key_offset = offsetof(struct ioam6_namespace, id),
+ .head_offset = offsetof(struct ioam6_namespace, head),
+ .automatic_shrinking = true,
+ .obj_cmpfn = ioam6_ns_cmpfn,
+};
+
+static const struct rhashtable_params rht_sc_params = {
+ .key_len = sizeof(u32),
+ .key_offset = offsetof(struct ioam6_schema, id),
+ .head_offset = offsetof(struct ioam6_schema, head),
+ .automatic_shrinking = true,
+ .obj_cmpfn = ioam6_sc_cmpfn,
+};
+
+static struct genl_family ioam6_genl_family;
+
+static const struct nla_policy ioam6_genl_policy_addns[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+ [IOAM6_ATTR_NS_DATA] = { .type = NLA_U32 },
+ [IOAM6_ATTR_NS_DATA_WIDE] = { .type = NLA_U64 },
+};
+
+static const struct nla_policy ioam6_genl_policy_delns[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+};
+
+static const struct nla_policy ioam6_genl_policy_addsc[] = {
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+ [IOAM6_ATTR_SC_DATA] = { .type = NLA_BINARY,
+ .len = IOAM6_MAX_SCHEMA_DATA_LEN },
+};
+
+static const struct nla_policy ioam6_genl_policy_delsc[] = {
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+};
+
+static const struct nla_policy ioam6_genl_policy_ns_sc[] = {
+ [IOAM6_ATTR_NS_ID] = { .type = NLA_U16 },
+ [IOAM6_ATTR_SC_ID] = { .type = NLA_U32 },
+ [IOAM6_ATTR_SC_NONE] = { .type = NLA_FLAG },
+};
+
+static int ioam6_genl_addns(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ u64 data64;
+ u32 data32;
+ __be16 id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID])
+ return -EINVAL;
+
+ id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+ if (ns) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+
+ ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+ if (!ns) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ ns->id = id;
+
+ if (!info->attrs[IOAM6_ATTR_NS_DATA])
+ data32 = IOAM6_U32_UNAVAILABLE;
+ else
+ data32 = nla_get_u32(info->attrs[IOAM6_ATTR_NS_DATA]);
+
+ if (!info->attrs[IOAM6_ATTR_NS_DATA_WIDE])
+ data64 = IOAM6_U64_UNAVAILABLE;
+ else
+ data64 = nla_get_u64(info->attrs[IOAM6_ATTR_NS_DATA_WIDE]);
+
+ ns->data = cpu_to_be32(data32);
+ ns->data_wide = cpu_to_be64(data64);
+
+ err = rhashtable_lookup_insert_fast(&nsdata->namespaces, &ns->head,
+ rht_ns_params);
+ if (err)
+ kfree(ns);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int ioam6_genl_delns(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ struct ioam6_schema *sc;
+ __be16 id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID])
+ return -EINVAL;
+
+ id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+ if (!ns) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ sc = rcu_dereference_protected(ns->schema,
+ lockdep_is_held(&nsdata->lock));
+
+ err = rhashtable_remove_fast(&nsdata->namespaces, &ns->head,
+ rht_ns_params);
+ if (err)
+ goto out_unlock;
+
+ if (sc)
+ rcu_assign_pointer(sc->ns, NULL);
+
+ ioam6_ns_release(ns);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int __ioam6_genl_dumpns_element(struct ioam6_namespace *ns,
+ u32 portid,
+ u32 seq,
+ u32 flags,
+ struct sk_buff *skb,
+ u8 cmd)
+{
+ struct ioam6_schema *sc;
+ u64 data64;
+ u32 data32;
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd);
+ if (!hdr)
+ return -ENOMEM;
+
+ data32 = be32_to_cpu(ns->data);
+ data64 = be64_to_cpu(ns->data_wide);
+
+ if (nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id)) ||
+ (data32 != IOAM6_U32_UNAVAILABLE &&
+ nla_put_u32(skb, IOAM6_ATTR_NS_DATA, data32)) ||
+ (data64 != IOAM6_U64_UNAVAILABLE &&
+ nla_put_u64_64bit(skb, IOAM6_ATTR_NS_DATA_WIDE,
+ data64, IOAM6_ATTR_PAD)))
+ goto nla_put_failure;
+
+ rcu_read_lock();
+
+ sc = rcu_dereference(ns->schema);
+ if (sc && nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id)) {
+ rcu_read_unlock();
+ goto nla_put_failure;
+ }
+
+ rcu_read_unlock();
+
+ genlmsg_end(skb, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ioam6_genl_dumpns_start(struct netlink_callback *cb)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk));
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[0] = (long)iter;
+ }
+
+ rhashtable_walk_enter(&nsdata->namespaces, iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpns_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ rhashtable_walk_exit(iter);
+ kfree(iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpns(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter;
+ struct ioam6_namespace *ns;
+ int err;
+
+ iter = (struct rhashtable_iter *)cb->args[0];
+ rhashtable_walk_start(iter);
+
+ for (;;) {
+ ns = rhashtable_walk_next(iter);
+
+ if (IS_ERR(ns)) {
+ if (PTR_ERR(ns) == -EAGAIN)
+ continue;
+ err = PTR_ERR(ns);
+ goto done;
+ } else if (!ns) {
+ break;
+ }
+
+ err = __ioam6_genl_dumpns_element(ns,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ skb,
+ IOAM6_CMD_DUMP_NAMESPACES);
+ if (err)
+ goto done;
+ }
+
+ err = skb->len;
+
+done:
+ rhashtable_walk_stop(iter);
+ return err;
+}
+
+static int ioam6_genl_addsc(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ int len, len_aligned, err;
+ struct ioam6_schema *sc;
+ u32 id;
+
+ if (!info->attrs[IOAM6_ATTR_SC_ID] || !info->attrs[IOAM6_ATTR_SC_DATA])
+ return -EINVAL;
+
+ id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params);
+ if (sc) {
+ err = -EEXIST;
+ goto out_unlock;
+ }
+
+ len = nla_len(info->attrs[IOAM6_ATTR_SC_DATA]);
+ len_aligned = ALIGN(len, 4);
+
+ sc = kzalloc(sizeof(*sc) + len_aligned, GFP_KERNEL);
+ if (!sc) {
+ err = -ENOMEM;
+ goto out_unlock;
+ }
+
+ sc->id = id;
+ sc->len = len_aligned;
+ sc->hdr = cpu_to_be32(sc->id | ((u8)(sc->len / 4) << 24));
+ nla_memcpy(sc->data, info->attrs[IOAM6_ATTR_SC_DATA], len);
+
+ err = rhashtable_lookup_insert_fast(&nsdata->schemas, &sc->head,
+ rht_sc_params);
+ if (err)
+ goto free_sc;
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+free_sc:
+ kfree(sc);
+ goto out_unlock;
+}
+
+static int ioam6_genl_delsc(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_pernet_data *nsdata;
+ struct ioam6_namespace *ns;
+ struct ioam6_schema *sc;
+ int err;
+ u32 id;
+
+ if (!info->attrs[IOAM6_ATTR_SC_ID])
+ return -EINVAL;
+
+ id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &id, rht_sc_params);
+ if (!sc) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ ns = rcu_dereference_protected(sc->ns, lockdep_is_held(&nsdata->lock));
+
+ err = rhashtable_remove_fast(&nsdata->schemas, &sc->head,
+ rht_sc_params);
+ if (err)
+ goto out_unlock;
+
+ if (ns)
+ rcu_assign_pointer(ns->schema, NULL);
+
+ ioam6_sc_release(sc);
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static int __ioam6_genl_dumpsc_element(struct ioam6_schema *sc,
+ u32 portid, u32 seq, u32 flags,
+ struct sk_buff *skb, u8 cmd)
+{
+ struct ioam6_namespace *ns;
+ void *hdr;
+
+ hdr = genlmsg_put(skb, portid, seq, &ioam6_genl_family, flags, cmd);
+ if (!hdr)
+ return -ENOMEM;
+
+ if (nla_put_u32(skb, IOAM6_ATTR_SC_ID, sc->id) ||
+ nla_put(skb, IOAM6_ATTR_SC_DATA, sc->len, sc->data))
+ goto nla_put_failure;
+
+ rcu_read_lock();
+
+ ns = rcu_dereference(sc->ns);
+ if (ns && nla_put_u16(skb, IOAM6_ATTR_NS_ID, be16_to_cpu(ns->id))) {
+ rcu_read_unlock();
+ goto nla_put_failure;
+ }
+
+ rcu_read_unlock();
+
+ genlmsg_end(skb, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return -EMSGSIZE;
+}
+
+static int ioam6_genl_dumpsc_start(struct netlink_callback *cb)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(sock_net(cb->skb->sk));
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ if (!iter) {
+ iter = kmalloc(sizeof(*iter), GFP_KERNEL);
+ if (!iter)
+ return -ENOMEM;
+
+ cb->args[0] = (long)iter;
+ }
+
+ rhashtable_walk_enter(&nsdata->schemas, iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpsc_done(struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter = (struct rhashtable_iter *)cb->args[0];
+
+ rhashtable_walk_exit(iter);
+ kfree(iter);
+
+ return 0;
+}
+
+static int ioam6_genl_dumpsc(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct rhashtable_iter *iter;
+ struct ioam6_schema *sc;
+ int err;
+
+ iter = (struct rhashtable_iter *)cb->args[0];
+ rhashtable_walk_start(iter);
+
+ for (;;) {
+ sc = rhashtable_walk_next(iter);
+
+ if (IS_ERR(sc)) {
+ if (PTR_ERR(sc) == -EAGAIN)
+ continue;
+ err = PTR_ERR(sc);
+ goto done;
+ } else if (!sc) {
+ break;
+ }
+
+ err = __ioam6_genl_dumpsc_element(sc,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ skb,
+ IOAM6_CMD_DUMP_SCHEMAS);
+ if (err)
+ goto done;
+ }
+
+ err = skb->len;
+
+done:
+ rhashtable_walk_stop(iter);
+ return err;
+}
+
+static int ioam6_genl_ns_set_schema(struct sk_buff *skb, struct genl_info *info)
+{
+ struct ioam6_namespace *ns, *ns_ref;
+ struct ioam6_schema *sc, *sc_ref;
+ struct ioam6_pernet_data *nsdata;
+ __be16 ns_id;
+ u32 sc_id;
+ int err;
+
+ if (!info->attrs[IOAM6_ATTR_NS_ID] ||
+ (!info->attrs[IOAM6_ATTR_SC_ID] &&
+ !info->attrs[IOAM6_ATTR_SC_NONE]))
+ return -EINVAL;
+
+ ns_id = cpu_to_be16(nla_get_u16(info->attrs[IOAM6_ATTR_NS_ID]));
+ nsdata = ioam6_pernet(genl_info_net(info));
+
+ mutex_lock(&nsdata->lock);
+
+ ns = rhashtable_lookup_fast(&nsdata->namespaces, &ns_id, rht_ns_params);
+ if (!ns) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+
+ if (info->attrs[IOAM6_ATTR_SC_NONE]) {
+ sc = NULL;
+ } else {
+ sc_id = nla_get_u32(info->attrs[IOAM6_ATTR_SC_ID]);
+ sc = rhashtable_lookup_fast(&nsdata->schemas, &sc_id,
+ rht_sc_params);
+ if (!sc) {
+ err = -ENOENT;
+ goto out_unlock;
+ }
+ }
+
+ sc_ref = rcu_dereference_protected(ns->schema,
+ lockdep_is_held(&nsdata->lock));
+ if (sc_ref)
+ rcu_assign_pointer(sc_ref->ns, NULL);
+ rcu_assign_pointer(ns->schema, sc);
+
+ if (sc) {
+ ns_ref = rcu_dereference_protected(sc->ns,
+ lockdep_is_held(&nsdata->lock));
+ if (ns_ref)
+ rcu_assign_pointer(ns_ref->schema, NULL);
+ rcu_assign_pointer(sc->ns, ns);
+ }
+
+ err = 0;
+
+out_unlock:
+ mutex_unlock(&nsdata->lock);
+ return err;
+}
+
+static const struct genl_ops ioam6_genl_ops[] = {
+ {
+ .cmd = IOAM6_CMD_ADD_NAMESPACE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_addns,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_addns,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_addns) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DEL_NAMESPACE,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_delns,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_delns,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_delns) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DUMP_NAMESPACES,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .start = ioam6_genl_dumpns_start,
+ .dumpit = ioam6_genl_dumpns,
+ .done = ioam6_genl_dumpns_done,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = IOAM6_CMD_ADD_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_addsc,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_addsc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_addsc) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DEL_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_delsc,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_delsc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_delsc) - 1,
+ },
+ {
+ .cmd = IOAM6_CMD_DUMP_SCHEMAS,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .start = ioam6_genl_dumpsc_start,
+ .dumpit = ioam6_genl_dumpsc,
+ .done = ioam6_genl_dumpsc_done,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = IOAM6_CMD_NS_SET_SCHEMA,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = ioam6_genl_ns_set_schema,
+ .flags = GENL_ADMIN_PERM,
+ .policy = ioam6_genl_policy_ns_sc,
+ .maxattr = ARRAY_SIZE(ioam6_genl_policy_ns_sc) - 1,
+ },
+};
+
+static struct genl_family ioam6_genl_family __ro_after_init = {
+ .name = IOAM6_GENL_NAME,
+ .version = IOAM6_GENL_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .ops = ioam6_genl_ops,
+ .n_ops = ARRAY_SIZE(ioam6_genl_ops),
+ .module = THIS_MODULE,
+};
+
+struct ioam6_namespace *ioam6_namespace(struct net *net, __be16 id)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(net);
+
+ return rhashtable_lookup_fast(&nsdata->namespaces, &id, rht_ns_params);
+}
+
+static void __ioam6_fill_trace_data(struct sk_buff *skb,
+ struct ioam6_namespace *ns,
+ struct ioam6_trace_hdr *trace,
+ struct ioam6_schema *sc,
+ u8 sclen)
+{
+ struct __kernel_sock_timeval ts;
+ u64 raw64;
+ u32 raw32;
+ u16 raw16;
+ u8 *data;
+ u8 byte;
+
+ data = trace->data + trace->remlen * 4 - trace->nodelen * 4 - sclen * 4;
+
+ /* hop_lim and node_id */
+ if (trace->type.bit0) {
+ byte = ipv6_hdr(skb)->hop_limit;
+ if (skb->dev)
+ byte--;
+
+ raw32 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id;
+
+ *(__be32 *)data = cpu_to_be32((byte << 24) | raw32);
+ data += sizeof(__be32);
+ }
+
+ /* ingress_if_id and egress_if_id */
+ if (trace->type.bit1) {
+ if (!skb->dev)
+ raw16 = IOAM6_U16_UNAVAILABLE;
+ else
+ raw16 = (__force u16)__in6_dev_get(skb->dev)->cnf.ioam6_id;
+
+ *(__be16 *)data = cpu_to_be16(raw16);
+ data += sizeof(__be16);
+
+ if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+ raw16 = IOAM6_U16_UNAVAILABLE;
+ else
+ raw16 = (__force u16)__in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id;
+
+ *(__be16 *)data = cpu_to_be16(raw16);
+ data += sizeof(__be16);
+ }
+
+ /* timestamp seconds */
+ if (trace->type.bit2) {
+ if (!skb->dev) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ } else {
+ if (!skb->tstamp)
+ __net_timestamp(skb);
+
+ skb_get_new_timestamp(skb, &ts);
+ *(__be32 *)data = cpu_to_be32((u32)ts.tv_sec);
+ }
+ data += sizeof(__be32);
+ }
+
+ /* timestamp subseconds */
+ if (trace->type.bit3) {
+ if (!skb->dev) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ } else {
+ if (!skb->tstamp)
+ __net_timestamp(skb);
+
+ if (!trace->type.bit2)
+ skb_get_new_timestamp(skb, &ts);
+
+ *(__be32 *)data = cpu_to_be32((u32)ts.tv_usec);
+ }
+ data += sizeof(__be32);
+ }
+
+ /* transit delay */
+ if (trace->type.bit4) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* namespace data */
+ if (trace->type.bit5) {
+ *(__be32 *)data = ns->data;
+ data += sizeof(__be32);
+ }
+
+ /* queue depth */
+ if (trace->type.bit6) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* checksum complement */
+ if (trace->type.bit7) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* hop_lim and node_id (wide) */
+ if (trace->type.bit8) {
+ byte = ipv6_hdr(skb)->hop_limit;
+ if (skb->dev)
+ byte--;
+
+ raw64 = dev_net(skb_dst(skb)->dev)->ipv6.sysctl.ioam6_id_wide;
+
+ *(__be64 *)data = cpu_to_be64(((u64)byte << 56) | raw64);
+ data += sizeof(__be64);
+ }
+
+ /* ingress_if_id and egress_if_id (wide) */
+ if (trace->type.bit9) {
+ if (!skb->dev)
+ raw32 = IOAM6_U32_UNAVAILABLE;
+ else
+ raw32 = __in6_dev_get(skb->dev)->cnf.ioam6_id_wide;
+
+ *(__be32 *)data = cpu_to_be32(raw32);
+ data += sizeof(__be32);
+
+ if (skb_dst(skb)->dev->flags & IFF_LOOPBACK)
+ raw32 = IOAM6_U32_UNAVAILABLE;
+ else
+ raw32 = __in6_dev_get(skb_dst(skb)->dev)->cnf.ioam6_id_wide;
+
+ *(__be32 *)data = cpu_to_be32(raw32);
+ data += sizeof(__be32);
+ }
+
+ /* namespace data (wide) */
+ if (trace->type.bit10) {
+ *(__be64 *)data = ns->data_wide;
+ data += sizeof(__be64);
+ }
+
+ /* buffer occupancy */
+ if (trace->type.bit11) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE);
+ data += sizeof(__be32);
+ }
+
+ /* opaque state snapshot */
+ if (trace->type.bit22) {
+ if (!sc) {
+ *(__be32 *)data = cpu_to_be32(IOAM6_U32_UNAVAILABLE >> 8);
+ } else {
+ *(__be32 *)data = sc->hdr;
+ data += sizeof(__be32);
+
+ memcpy(data, sc->data, sc->len);
+ }
+ }
+}
+
+/* called with rcu_read_lock() */
+void ioam6_fill_trace_data(struct sk_buff *skb,
+ struct ioam6_namespace *ns,
+ struct ioam6_trace_hdr *trace)
+{
+ struct ioam6_schema *sc;
+ u8 sclen = 0;
+
+ /* Skip if Overflow flag is set OR
+ * if an unknown type (bit 12-21) is set
+ */
+ if (trace->overflow ||
+ trace->type.bit12 | trace->type.bit13 | trace->type.bit14 |
+ trace->type.bit15 | trace->type.bit16 | trace->type.bit17 |
+ trace->type.bit18 | trace->type.bit19 | trace->type.bit20 |
+ trace->type.bit21) {
+ return;
+ }
+
+ /* NodeLen does not include Opaque State Snapshot length. We need to
+ * take it into account if the corresponding bit is set (bit 22) and
+ * if the current IOAM namespace has an active schema attached to it
+ */
+ sc = rcu_dereference(ns->schema);
+ if (trace->type.bit22) {
+ sclen = sizeof_field(struct ioam6_schema, hdr) / 4;
+
+ if (sc)
+ sclen += sc->len / 4;
+ }
+
+ /* If there is no space remaining, we set the Overflow flag and we
+ * skip without filling the trace
+ */
+ if (!trace->remlen || trace->remlen < trace->nodelen + sclen) {
+ trace->overflow = 1;
+ return;
+ }
+
+ __ioam6_fill_trace_data(skb, ns, trace, sc, sclen);
+ trace->remlen -= trace->nodelen + sclen;
+}
+
+static int __net_init ioam6_net_init(struct net *net)
+{
+ struct ioam6_pernet_data *nsdata;
+ int err = -ENOMEM;
+
+ nsdata = kzalloc(sizeof(*nsdata), GFP_KERNEL);
+ if (!nsdata)
+ goto out;
+
+ mutex_init(&nsdata->lock);
+ net->ipv6.ioam6_data = nsdata;
+
+ err = rhashtable_init(&nsdata->namespaces, &rht_ns_params);
+ if (err)
+ goto free_nsdata;
+
+ err = rhashtable_init(&nsdata->schemas, &rht_sc_params);
+ if (err)
+ goto free_rht_ns;
+
+out:
+ return err;
+free_rht_ns:
+ rhashtable_destroy(&nsdata->namespaces);
+free_nsdata:
+ kfree(nsdata);
+ net->ipv6.ioam6_data = NULL;
+ goto out;
+}
+
+static void __net_exit ioam6_net_exit(struct net *net)
+{
+ struct ioam6_pernet_data *nsdata = ioam6_pernet(net);
+
+ rhashtable_free_and_destroy(&nsdata->namespaces, ioam6_free_ns, NULL);
+ rhashtable_free_and_destroy(&nsdata->schemas, ioam6_free_sc, NULL);
+
+ kfree(nsdata);
+}
+
+static struct pernet_operations ioam6_net_ops = {
+ .init = ioam6_net_init,
+ .exit = ioam6_net_exit,
+};
+
+int __init ioam6_init(void)
+{
+ int err = register_pernet_subsys(&ioam6_net_ops);
+ if (err)
+ goto out;
+
+ err = genl_register_family(&ioam6_genl_family);
+ if (err)
+ goto out_unregister_pernet_subsys;
+
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+ err = ioam6_iptunnel_init();
+ if (err)
+ goto out_unregister_genl;
+#endif
+
+ pr_info("In-situ OAM (IOAM) with IPv6\n");
+
+out:
+ return err;
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+out_unregister_genl:
+ genl_unregister_family(&ioam6_genl_family);
+#endif
+out_unregister_pernet_subsys:
+ unregister_pernet_subsys(&ioam6_net_ops);
+ goto out;
+}
+
+void ioam6_exit(void)
+{
+#ifdef CONFIG_IPV6_IOAM6_LWTUNNEL
+ ioam6_iptunnel_exit();
+#endif
+ genl_unregister_family(&ioam6_genl_family);
+ unregister_pernet_subsys(&ioam6_net_ops);
+}
diff --git a/net/ipv6/ioam6_iptunnel.c b/net/ipv6/ioam6_iptunnel.c
new file mode 100644
index 000000000000..f9ee04541c17
--- /dev/null
+++ b/net/ipv6/ioam6_iptunnel.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * IPv6 IOAM Lightweight Tunnel implementation
+ *
+ * Author:
+ * Justin Iurman <justin.iurman@uliege.be>
+ */
+
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/net.h>
+#include <linux/netlink.h>
+#include <linux/in6.h>
+#include <linux/ioam6.h>
+#include <linux/ioam6_iptunnel.h>
+#include <net/dst.h>
+#include <net/sock.h>
+#include <net/lwtunnel.h>
+#include <net/ioam6.h>
+
+#define IOAM6_MASK_SHORT_FIELDS 0xff100000
+#define IOAM6_MASK_WIDE_FIELDS 0xe00000
+
+struct ioam6_lwt_encap {
+ struct ipv6_hopopt_hdr eh;
+ u8 pad[2]; /* 2-octet padding for 4n-alignment */
+ struct ioam6_hdr ioamh;
+ struct ioam6_trace_hdr traceh;
+} __packed;
+
+struct ioam6_lwt {
+ struct ioam6_lwt_encap tuninfo;
+};
+
+static struct ioam6_lwt *ioam6_lwt_state(struct lwtunnel_state *lwt)
+{
+ return (struct ioam6_lwt *)lwt->data;
+}
+
+static struct ioam6_lwt_encap *ioam6_lwt_info(struct lwtunnel_state *lwt)
+{
+ return &ioam6_lwt_state(lwt)->tuninfo;
+}
+
+static struct ioam6_trace_hdr *ioam6_trace(struct lwtunnel_state *lwt)
+{
+ return &(ioam6_lwt_state(lwt)->tuninfo.traceh);
+}
+
+static const struct nla_policy ioam6_iptunnel_policy[IOAM6_IPTUNNEL_MAX + 1] = {
+ [IOAM6_IPTUNNEL_TRACE] = NLA_POLICY_EXACT_LEN(sizeof(struct ioam6_trace_hdr)),
+};
+
+static int nla_put_ioam6_trace(struct sk_buff *skb, int attrtype,
+ struct ioam6_trace_hdr *trace)
+{
+ struct ioam6_trace_hdr *data;
+ struct nlattr *nla;
+ int len;
+
+ len = sizeof(*trace);
+
+ nla = nla_reserve(skb, attrtype, len);
+ if (!nla)
+ return -EMSGSIZE;
+
+ data = nla_data(nla);
+ memcpy(data, trace, len);
+
+ return 0;
+}
+
+static bool ioam6_validate_trace_hdr(struct ioam6_trace_hdr *trace)
+{
+ u32 fields;
+
+ if (!trace->type_be32 || !trace->remlen ||
+ trace->remlen > IOAM6_TRACE_DATA_SIZE_MAX / 4)
+ return false;
+
+ trace->nodelen = 0;
+ fields = be32_to_cpu(trace->type_be32);
+
+ trace->nodelen += hweight32(fields & IOAM6_MASK_SHORT_FIELDS)
+ * (sizeof(__be32) / 4);
+ trace->nodelen += hweight32(fields & IOAM6_MASK_WIDE_FIELDS)
+ * (sizeof(__be64) / 4);
+
+ return true;
+}
+
+static int ioam6_build_state(struct net *net, struct nlattr *nla,
+ unsigned int family, const void *cfg,
+ struct lwtunnel_state **ts,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *tb[IOAM6_IPTUNNEL_MAX + 1];
+ struct ioam6_lwt_encap *tuninfo;
+ struct ioam6_trace_hdr *trace;
+ struct lwtunnel_state *s;
+ int len_aligned;
+ int len, err;
+
+ if (family != AF_INET6)
+ return -EINVAL;
+
+ err = nla_parse_nested(tb, IOAM6_IPTUNNEL_MAX, nla,
+ ioam6_iptunnel_policy, extack);
+ if (err < 0)
+ return err;
+
+ if (!tb[IOAM6_IPTUNNEL_TRACE]) {
+ NL_SET_ERR_MSG(extack, "missing trace");
+ return -EINVAL;
+ }
+
+ trace = nla_data(tb[IOAM6_IPTUNNEL_TRACE]);
+ if (!ioam6_validate_trace_hdr(trace)) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[IOAM6_IPTUNNEL_TRACE],
+ "invalid trace validation");
+ return -EINVAL;
+ }
+
+ len = sizeof(*tuninfo) + trace->remlen * 4;
+ len_aligned = ALIGN(len, 8);
+
+ s = lwtunnel_state_alloc(len_aligned);
+ if (!s)
+ return -ENOMEM;
+
+ tuninfo = ioam6_lwt_info(s);
+ tuninfo->eh.hdrlen = (len_aligned >> 3) - 1;
+ tuninfo->pad[0] = IPV6_TLV_PADN;
+ tuninfo->ioamh.type = IOAM6_TYPE_PREALLOC;
+ tuninfo->ioamh.opt_type = IPV6_TLV_IOAM;
+ tuninfo->ioamh.opt_len = sizeof(tuninfo->ioamh) - 2 + sizeof(*trace)
+ + trace->remlen * 4;
+
+ memcpy(&tuninfo->traceh, trace, sizeof(*trace));
+
+ len = len_aligned - len;
+ if (len == 1) {
+ tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PAD1;
+ } else if (len > 0) {
+ tuninfo->traceh.data[trace->remlen * 4] = IPV6_TLV_PADN;
+ tuninfo->traceh.data[trace->remlen * 4 + 1] = len - 2;
+ }
+
+ s->type = LWTUNNEL_ENCAP_IOAM6;
+ s->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+
+ *ts = s;
+
+ return 0;
+}
+
+static int ioam6_do_inline(struct sk_buff *skb, struct ioam6_lwt_encap *tuninfo)
+{
+ struct ioam6_trace_hdr *trace;
+ struct ipv6hdr *oldhdr, *hdr;
+ struct ioam6_namespace *ns;
+ int hdrlen, err;
+
+ hdrlen = (tuninfo->eh.hdrlen + 1) << 3;
+
+ err = skb_cow_head(skb, hdrlen + skb->mac_len);
+ if (unlikely(err))
+ return err;
+
+ oldhdr = ipv6_hdr(skb);
+ skb_pull(skb, sizeof(*oldhdr));
+ skb_postpull_rcsum(skb, skb_network_header(skb), sizeof(*oldhdr));
+
+ skb_push(skb, sizeof(*oldhdr) + hdrlen);
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+
+ hdr = ipv6_hdr(skb);
+ memmove(hdr, oldhdr, sizeof(*oldhdr));
+ tuninfo->eh.nexthdr = hdr->nexthdr;
+
+ skb_set_transport_header(skb, sizeof(*hdr));
+ skb_postpush_rcsum(skb, hdr, sizeof(*hdr) + hdrlen);
+
+ memcpy(skb_transport_header(skb), (u8 *)tuninfo, hdrlen);
+
+ hdr->nexthdr = NEXTHDR_HOP;
+ hdr->payload_len = cpu_to_be16(skb->len - sizeof(*hdr));
+
+ trace = (struct ioam6_trace_hdr *)(skb_transport_header(skb)
+ + sizeof(struct ipv6_hopopt_hdr) + 2
+ + sizeof(struct ioam6_hdr));
+
+ ns = ioam6_namespace(dev_net(skb_dst(skb)->dev), trace->namespace_id);
+ if (ns)
+ ioam6_fill_trace_data(skb, ns, trace);
+
+ return 0;
+}
+
+static int ioam6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct lwtunnel_state *lwt = skb_dst(skb)->lwtstate;
+ int err = -EINVAL;
+
+ if (skb->protocol != htons(ETH_P_IPV6))
+ goto drop;
+
+ /* Only for packets we send and
+ * that do not contain a Hop-by-Hop yet
+ */
+ if (skb->dev || ipv6_hdr(skb)->nexthdr == NEXTHDR_HOP)
+ goto out;
+
+ err = ioam6_do_inline(skb, ioam6_lwt_info(lwt));
+ if (unlikely(err))
+ goto drop;
+
+ err = skb_cow_head(skb, LL_RESERVED_SPACE(skb_dst(skb)->dev));
+ if (unlikely(err))
+ goto drop;
+
+out:
+ return lwt->orig_output(net, sk, skb);
+
+drop:
+ kfree_skb(skb);
+ return err;
+}
+
+static int ioam6_fill_encap_info(struct sk_buff *skb,
+ struct lwtunnel_state *lwtstate)
+{
+ struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+
+ if (nla_put_ioam6_trace(skb, IOAM6_IPTUNNEL_TRACE, trace))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int ioam6_encap_nlsize(struct lwtunnel_state *lwtstate)
+{
+ struct ioam6_trace_hdr *trace = ioam6_trace(lwtstate);
+
+ return nla_total_size(sizeof(*trace));
+}
+
+static int ioam6_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b)
+{
+ struct ioam6_trace_hdr *a_hdr = ioam6_trace(a);
+ struct ioam6_trace_hdr *b_hdr = ioam6_trace(b);
+
+ return (a_hdr->namespace_id != b_hdr->namespace_id);
+}
+
+static const struct lwtunnel_encap_ops ioam6_iptun_ops = {
+ .build_state = ioam6_build_state,
+ .output = ioam6_output,
+ .fill_encap = ioam6_fill_encap_info,
+ .get_encap_size = ioam6_encap_nlsize,
+ .cmp_encap = ioam6_encap_cmp,
+ .owner = THIS_MODULE,
+};
+
+int __init ioam6_iptunnel_init(void)
+{
+ return lwtunnel_encap_add_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
+}
+
+void ioam6_iptunnel_exit(void)
+{
+ lwtunnel_encap_del_ops(&ioam6_iptun_ops, LWTUNNEL_ENCAP_IOAM6);
+}
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ef75c9b05f17..1bec5b22f80d 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2449,8 +2449,8 @@ int __init fib6_init(void)
int ret = -ENOMEM;
fib6_node_kmem = kmem_cache_create("fib6_nodes",
- sizeof(struct fib6_node),
- 0, SLAB_HWCACHE_ALIGN,
+ sizeof(struct fib6_node), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
NULL);
if (!fib6_node_kmem)
goto out;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 7a5e90e09363..7baf41d160f5 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1246,8 +1246,9 @@ static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u,
memcpy(u->name, p->name, sizeof(u->name));
}
-static int ip6gre_tunnel_ioctl(struct net_device *dev,
- struct ifreq *ifr, int cmd)
+static int ip6gre_tunnel_siocdevprivate(struct net_device *dev,
+ struct ifreq *ifr, void __user *data,
+ int cmd)
{
int err = 0;
struct ip6_tnl_parm2 p;
@@ -1261,7 +1262,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ign->fb_tunnel_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ if (copy_from_user(&p, data, sizeof(p))) {
err = -EFAULT;
break;
}
@@ -1272,7 +1273,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
}
memset(&p, 0, sizeof(p));
ip6gre_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
break;
@@ -1283,7 +1284,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
goto done;
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
goto done;
err = -EINVAL;
@@ -1320,7 +1321,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
memset(&p, 0, sizeof(p));
ip6gre_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
} else
err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
@@ -1333,7 +1334,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,
if (dev == ign->fb_tunnel_dev) {
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
goto done;
err = -ENOENT;
ip6gre_tnl_parm_from_user(&p1, &p);
@@ -1400,7 +1401,7 @@ static const struct net_device_ops ip6gre_netdev_ops = {
.ndo_init = ip6gre_tunnel_init,
.ndo_uninit = ip6gre_tunnel_uninit,
.ndo_start_xmit = ip6gre_tunnel_xmit,
- .ndo_do_ioctl = ip6gre_tunnel_ioctl,
+ .ndo_siocdevprivate = ip6gre_tunnel_siocdevprivate,
.ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 8e6ca9ad6812..12f985f43bcc 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -60,46 +60,29 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
{
struct dst_entry *dst = skb_dst(skb);
struct net_device *dev = dst->dev;
+ struct inet6_dev *idev = ip6_dst_idev(dst);
unsigned int hh_len = LL_RESERVED_SPACE(dev);
- int delta = hh_len - skb_headroom(skb);
- const struct in6_addr *nexthop;
+ const struct in6_addr *daddr, *nexthop;
+ struct ipv6hdr *hdr;
struct neighbour *neigh;
int ret;
/* Be paranoid, rather than too clever. */
- if (unlikely(delta > 0) && dev->header_ops) {
- /* pskb_expand_head() might crash, if skb is shared */
- if (skb_shared(skb)) {
- struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
-
- if (likely(nskb)) {
- if (skb->sk)
- skb_set_owner_w(nskb, skb->sk);
- consume_skb(skb);
- } else {
- kfree_skb(skb);
- }
- skb = nskb;
- }
- if (skb &&
- pskb_expand_head(skb, SKB_DATA_ALIGN(delta), 0, GFP_ATOMIC)) {
- kfree_skb(skb);
- skb = NULL;
- }
+ if (unlikely(hh_len > skb_headroom(skb)) && dev->header_ops) {
+ skb = skb_expand_head(skb, hh_len);
if (!skb) {
- IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
return -ENOMEM;
}
}
- if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
- struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
-
+ hdr = ipv6_hdr(skb);
+ daddr = &hdr->daddr;
+ if (ipv6_addr_is_multicast(daddr)) {
if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
((mroute6_is_socket(net, skb) &&
!(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
- ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
- &ipv6_hdr(skb)->saddr))) {
+ ipv6_chk_mcast_addr(dev, daddr, &hdr->saddr))) {
struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
/* Do not check for IFF_ALLMULTI; multicast routing
@@ -110,7 +93,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
net, sk, newskb, NULL, newskb->dev,
dev_loopback_xmit);
- if (ipv6_hdr(skb)->hop_limit == 0) {
+ if (hdr->hop_limit == 0) {
IP6_INC_STATS(net, idev,
IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
@@ -119,9 +102,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
-
- if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
- IPV6_ADDR_SCOPE_NODELOCAL &&
+ if (IPV6_ADDR_MC_SCOPE(daddr) <= IPV6_ADDR_SCOPE_NODELOCAL &&
!(dev->flags & IFF_LOOPBACK)) {
kfree_skb(skb);
return 0;
@@ -136,10 +117,10 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
rcu_read_lock_bh();
- nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
- neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
+ nexthop = rt6_nexthop((struct rt6_info *)dst, daddr);
+ neigh = __ipv6_neigh_lookup_noref(dev, nexthop);
if (unlikely(!neigh))
- neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
+ neigh = __neigh_create(&nd_tbl, nexthop, dev, false);
if (!IS_ERR(neigh)) {
sock_confirm_neigh(skb, neigh);
ret = neigh_output(neigh, skb, false);
@@ -148,7 +129,7 @@ static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *
}
rcu_read_unlock_bh();
- IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EINVAL;
}
@@ -268,6 +249,8 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
const struct ipv6_pinfo *np = inet6_sk(sk);
struct in6_addr *first_hop = &fl6->daddr;
struct dst_entry *dst = skb_dst(skb);
+ struct net_device *dev = dst->dev;
+ struct inet6_dev *idev = ip6_dst_idev(dst);
unsigned int head_room;
struct ipv6hdr *hdr;
u8 proto = fl6->flowi6_proto;
@@ -275,22 +258,16 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
int hlimit = -1;
u32 mtu;
- head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
+ head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dev);
if (opt)
head_room += opt->opt_nflen + opt->opt_flen;
- if (unlikely(skb_headroom(skb) < head_room)) {
- struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
- if (!skb2) {
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUTDISCARDS);
- kfree_skb(skb);
+ if (unlikely(head_room > skb_headroom(skb))) {
+ skb = skb_expand_head(skb, head_room);
+ if (!skb) {
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
return -ENOBUFS;
}
- if (skb->sk)
- skb_set_owner_w(skb2, skb->sk);
- consume_skb(skb);
- skb = skb2;
}
if (opt) {
@@ -332,8 +309,7 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
mtu = dst_mtu(dst);
if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
- IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_OUT, skb->len);
+ IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len);
/* if egress device is enslaved to an L3 master device pass the
* skb to its handler for processing
@@ -346,17 +322,17 @@ int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
* we promote our socket to non const
*/
return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
- net, (struct sock *)sk, skb, NULL, dst->dev,
+ net, (struct sock *)sk, skb, NULL, dev,
dst_output);
}
- skb->dev = dst->dev;
+ skb->dev = dev;
/* ipv6_local_error() does not require socket lock,
* we promote our socket to non const
*/
ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
+ IP6_INC_STATS(net, idev, IPSTATS_MIB_FRAGFAILS);
kfree_skb(skb);
return -EMSGSIZE;
}
@@ -608,7 +584,7 @@ int ip6_forward(struct sk_buff *skb)
}
}
- mtu = ip6_dst_mtu_forward(dst);
+ mtu = ip6_dst_mtu_maybe_forward(dst, true);
if (mtu < IPV6_MIN_MTU)
mtu = IPV6_MIN_MTU;
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 322698d9fcf4..20a67efda47f 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1581,9 +1581,10 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
}
/**
- * ip6_tnl_ioctl - configure ipv6 tunnels from userspace
+ * ip6_tnl_siocdevprivate - configure ipv6 tunnels from userspace
* @dev: virtual device associated with tunnel
- * @ifr: parameters passed from userspace
+ * @ifr: unused
+ * @data: parameters passed from userspace
* @cmd: command to be performed
*
* Description:
@@ -1609,7 +1610,8 @@ ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p)
**/
static int
-ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ip6_tnl_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
int err = 0;
struct ip6_tnl_parm p;
@@ -1623,7 +1625,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ip6n->fb_tnl_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ if (copy_from_user(&p, data, sizeof(p))) {
err = -EFAULT;
break;
}
@@ -1635,9 +1637,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
memset(&p, 0, sizeof(p));
}
ip6_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) {
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
- }
break;
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
@@ -1645,7 +1646,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
break;
err = -EINVAL;
if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP &&
@@ -1669,7 +1670,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!IS_ERR(t)) {
err = 0;
ip6_tnl_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
} else {
@@ -1683,7 +1684,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (dev == ip6n->fb_tnl_dev) {
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
break;
err = -ENOENT;
ip6_tnl_parm_from_user(&p1, &p);
@@ -1802,7 +1803,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = {
.ndo_init = ip6_tnl_dev_init,
.ndo_uninit = ip6_tnl_dev_uninit,
.ndo_start_xmit = ip6_tnl_start_xmit,
- .ndo_do_ioctl = ip6_tnl_ioctl,
+ .ndo_siocdevprivate = ip6_tnl_siocdevprivate,
.ndo_change_mtu = ip6_tnl_change_mtu,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 2d048e21abbb..1d8e3ffa225d 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -771,13 +771,14 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
}
/**
- * vti6_ioctl - configure vti6 tunnels from userspace
+ * vti6_siocdevprivate - configure vti6 tunnels from userspace
* @dev: virtual device associated with tunnel
- * @ifr: parameters passed from userspace
+ * @ifr: unused
+ * @data: parameters passed from userspace
* @cmd: command to be performed
*
* Description:
- * vti6_ioctl() is used for managing vti6 tunnels
+ * vti6_siocdevprivate() is used for managing vti6 tunnels
* from userspace.
*
* The possible commands are the following:
@@ -798,7 +799,7 @@ vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p)
* %-ENODEV if attempting to change or delete a nonexisting device
**/
static int
-vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+vti6_siocdevprivate(struct net_device *dev, struct ifreq *ifr, void __user *data, int cmd)
{
int err = 0;
struct ip6_tnl_parm2 p;
@@ -810,7 +811,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
switch (cmd) {
case SIOCGETTUNNEL:
if (dev == ip6n->fb_tnl_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
+ if (copy_from_user(&p, data, sizeof(p))) {
err = -EFAULT;
break;
}
@@ -822,7 +823,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!t)
t = netdev_priv(dev);
vti6_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
break;
case SIOCADDTUNNEL:
@@ -831,7 +832,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
break;
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
break;
err = -EINVAL;
if (p.proto != IPPROTO_IPV6 && p.proto != 0)
@@ -852,7 +853,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (t) {
err = 0;
vti6_parm_to_user(&p, &t->parms);
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+ if (copy_to_user(data, &p, sizeof(p)))
err = -EFAULT;
} else
@@ -865,7 +866,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (dev == ip6n->fb_tnl_dev) {
err = -EFAULT;
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
break;
err = -ENOENT;
vti6_parm_from_user(&p1, &p);
@@ -890,7 +891,7 @@ static const struct net_device_ops vti6_netdev_ops = {
.ndo_init = vti6_dev_init,
.ndo_uninit = vti6_dev_uninit,
.ndo_start_xmit = vti6_tnl_xmit,
- .ndo_do_ioctl = vti6_ioctl,
+ .ndo_siocdevprivate = vti6_siocdevprivate,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip6_tnl_get_iflink,
};
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 06b0d2c329b9..36ed9efb8825 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -559,8 +559,7 @@ static int pim6_rcv(struct sk_buff *skb)
read_lock(&mrt_lock);
if (reg_vif_num >= 0)
reg_dev = mrt->vif_table[reg_vif_num].dev;
- if (reg_dev)
- dev_hold(reg_dev);
+ dev_hold(reg_dev);
read_unlock(&mrt_lock);
if (!reg_dev)
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a6804a7e34c1..e4bdb09c5586 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -225,7 +225,7 @@ static int ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (GROUP_FILTER_SIZE(gsf->gf_numsrc) > optlen)
goto out_free_gsf;
- ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist);
+ ret = ip6_mc_msfilter(sk, gsf, gsf->gf_slist_flex);
out_free_gsf:
kfree(gsf);
return ret;
@@ -234,7 +234,7 @@ out_free_gsf:
static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
int optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter *gf32;
void *p;
int ret;
@@ -249,7 +249,7 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
if (!p)
return -ENOMEM;
- gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+ gf32 = p + 4; /* we want ->gf_group and ->gf_slist_flex aligned */
ret = -EFAULT;
if (copy_from_sockptr(gf32, optval, optlen))
goto out_free_p;
@@ -261,14 +261,14 @@ static int compat_ipv6_set_mcast_msfilter(struct sock *sk, sockptr_t optval,
goto out_free_p;
ret = -EINVAL;
- if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen)
+ if (offsetof(struct compat_group_filter, gf_slist_flex[n]) > optlen)
goto out_free_p;
ret = ip6_mc_msfilter(sk, &(struct group_filter){
.gf_interface = gf32->gf_interface,
.gf_group = gf32->gf_group,
.gf_fmode = gf32->gf_fmode,
- .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist);
+ .gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist_flex);
out_free_p:
kfree(p);
@@ -1048,7 +1048,7 @@ static int ipv6_getsockopt_sticky(struct sock *sk, struct ipv6_txoptions *opt,
static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
int __user *optlen, int len)
{
- const int size0 = offsetof(struct group_filter, gf_slist);
+ const int size0 = offsetof(struct group_filter, gf_slist_flex);
struct group_filter __user *p = optval;
struct group_filter gsf;
int num;
@@ -1062,7 +1062,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
return -EADDRNOTAVAIL;
num = gsf.gf_numsrc;
lock_sock(sk);
- err = ip6_mc_msfget(sk, &gsf, p->gf_slist);
+ err = ip6_mc_msfget(sk, &gsf, p->gf_slist_flex);
if (!err) {
if (num > gsf.gf_numsrc)
num = gsf.gf_numsrc;
@@ -1077,7 +1077,7 @@ static int ipv6_get_msfilter(struct sock *sk, void __user *optval,
static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
int __user *optlen)
{
- const int size0 = offsetof(struct compat_group_filter, gf_slist);
+ const int size0 = offsetof(struct compat_group_filter, gf_slist_flex);
struct compat_group_filter __user *p = optval;
struct compat_group_filter gf32;
struct group_filter gf;
@@ -1100,7 +1100,7 @@ static int compat_ipv6_get_msfilter(struct sock *sk, void __user *optval,
return -EADDRNOTAVAIL;
lock_sock(sk);
- err = ip6_mc_msfget(sk, &gf, p->gf_slist);
+ err = ip6_mc_msfget(sk, &gf, p->gf_slist_flex);
release_sock(sk);
if (err)
return err;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 54ec163fbafa..cd951faa2fac 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -447,7 +447,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
if (psl)
count += psl->sl_max;
- newpsl = sock_kmalloc(sk, IP6_SFLSIZE(count), GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr, count),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
@@ -457,7 +458,8 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
if (psl) {
for (i = 0; i < psl->sl_count; i++)
newpsl->sl_addr[i] = psl->sl_addr[i];
- atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
}
psl = newpsl;
@@ -525,8 +527,9 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
goto done;
}
if (gsf->gf_numsrc) {
- newpsl = sock_kmalloc(sk, IP6_SFLSIZE(gsf->gf_numsrc),
- GFP_KERNEL);
+ newpsl = sock_kmalloc(sk, struct_size(newpsl, sl_addr,
+ gsf->gf_numsrc),
+ GFP_KERNEL);
if (!newpsl) {
err = -ENOBUFS;
goto done;
@@ -543,7 +546,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
newpsl->sl_count, newpsl->sl_addr, 0);
if (err) {
mutex_unlock(&idev->mc_lock);
- sock_kfree_s(sk, newpsl, IP6_SFLSIZE(newpsl->sl_max));
+ sock_kfree_s(sk, newpsl, struct_size(newpsl, sl_addr,
+ newpsl->sl_max));
goto done;
}
mutex_unlock(&idev->mc_lock);
@@ -559,7 +563,8 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
if (psl) {
ip6_mc_del_src(idev, group, pmc->sfmode,
psl->sl_count, psl->sl_addr, 0);
- atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
} else {
ip6_mc_del_src(idev, group, pmc->sfmode, 0, NULL, 0);
@@ -2607,7 +2612,8 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml,
err = ip6_mc_del_src(idev, &iml->addr, iml->sfmode,
psl->sl_count, psl->sl_addr, 0);
RCU_INIT_POINTER(iml->sflist, NULL);
- atomic_sub(IP6_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+ atomic_sub(struct_size(psl, sl_addr, psl->sl_max),
+ &sk->sk_omem_alloc);
kfree_rcu(psl, rcu);
}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index c467c6419893..4b098521a44c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1391,12 +1391,6 @@ skip_defrtr:
}
}
- /*
- * Send a notify if RA changed managed/otherconf flags or timer settings
- */
- if (send_ifinfo_notify)
- inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
-
skip_linkparms:
/*
@@ -1496,6 +1490,11 @@ skip_routeinfo:
memcpy(&n, ((u8 *)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
mtu = ntohl(n);
+ if (in6_dev->ra_mtu != mtu) {
+ in6_dev->ra_mtu = mtu;
+ send_ifinfo_notify = true;
+ }
+
if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
ND_PRINTK(2, warn, "RA: invalid mtu: %d\n", mtu);
} else if (in6_dev->cnf.mtu6 != mtu) {
@@ -1519,6 +1518,12 @@ skip_routeinfo:
ND_PRINTK(2, warn, "RA: invalid RA options\n");
}
out:
+ /* Send a notify if RA changed managed/otherconf flags or
+ * timer settings or ra_mtu value
+ */
+ if (send_ifinfo_notify)
+ inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
+
fib6_info_release(rt);
if (neigh)
neigh_release(neigh);
diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c
index bb784ea7bbd3..727ee8097012 100644
--- a/net/ipv6/netfilter/ip6table_filter.c
+++ b/net/ipv6/netfilter/ip6table_filter.c
@@ -19,15 +19,12 @@ MODULE_DESCRIPTION("ip6tables filter table");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT))
-static int __net_init ip6table_filter_table_init(struct net *net);
-
static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_FILTER,
- .table_init = ip6table_filter_table_init,
};
/* The work comes in here from netfilter.c. */
@@ -44,7 +41,7 @@ static struct nf_hook_ops *filter_ops __read_mostly;
static bool forward = true;
module_param(forward, bool, 0000);
-static int __net_init ip6table_filter_table_init(struct net *net)
+static int ip6table_filter_table_init(struct net *net)
{
struct ip6t_replace *repl;
int err;
@@ -63,7 +60,7 @@ static int __net_init ip6table_filter_table_init(struct net *net)
static int __net_init ip6table_filter_net_init(struct net *net)
{
- if (net == &init_net || !forward)
+ if (!forward)
return ip6table_filter_table_init(net);
return 0;
@@ -87,15 +84,24 @@ static struct pernet_operations ip6table_filter_net_ops = {
static int __init ip6table_filter_init(void)
{
- int ret;
+ int ret = xt_register_template(&packet_filter,
+ ip6table_filter_table_init);
+
+ if (ret < 0)
+ return ret;
filter_ops = xt_hook_ops_alloc(&packet_filter, ip6table_filter_hook);
- if (IS_ERR(filter_ops))
+ if (IS_ERR(filter_ops)) {
+ xt_unregister_template(&packet_filter);
return PTR_ERR(filter_ops);
+ }
ret = register_pernet_subsys(&ip6table_filter_net_ops);
- if (ret < 0)
+ if (ret < 0) {
+ xt_unregister_template(&packet_filter);
kfree(filter_ops);
+ return ret;
+ }
return ret;
}
@@ -103,6 +109,7 @@ static int __init ip6table_filter_init(void)
static void __exit ip6table_filter_fini(void)
{
unregister_pernet_subsys(&ip6table_filter_net_ops);
+ xt_unregister_template(&packet_filter);
kfree(filter_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c
index c76cffd63041..9b518ce37d6a 100644
--- a/net/ipv6/netfilter/ip6table_mangle.c
+++ b/net/ipv6/netfilter/ip6table_mangle.c
@@ -20,15 +20,12 @@ MODULE_DESCRIPTION("ip6tables mangle table");
(1 << NF_INET_LOCAL_OUT) | \
(1 << NF_INET_POST_ROUTING))
-static int __net_init ip6table_mangle_table_init(struct net *net);
-
static const struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_MANGLE,
- .table_init = ip6table_mangle_table_init,
};
static unsigned int
@@ -76,7 +73,7 @@ ip6table_mangle_hook(void *priv, struct sk_buff *skb,
}
static struct nf_hook_ops *mangle_ops __read_mostly;
-static int __net_init ip6table_mangle_table_init(struct net *net)
+static int ip6table_mangle_table_init(struct net *net)
{
struct ip6t_replace *repl;
int ret;
@@ -106,29 +103,32 @@ static struct pernet_operations ip6table_mangle_net_ops = {
static int __init ip6table_mangle_init(void)
{
- int ret;
+ int ret = xt_register_template(&packet_mangler,
+ ip6table_mangle_table_init);
+
+ if (ret < 0)
+ return ret;
mangle_ops = xt_hook_ops_alloc(&packet_mangler, ip6table_mangle_hook);
- if (IS_ERR(mangle_ops))
+ if (IS_ERR(mangle_ops)) {
+ xt_unregister_template(&packet_mangler);
return PTR_ERR(mangle_ops);
+ }
ret = register_pernet_subsys(&ip6table_mangle_net_ops);
if (ret < 0) {
+ xt_unregister_template(&packet_mangler);
kfree(mangle_ops);
return ret;
}
- ret = ip6table_mangle_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&ip6table_mangle_net_ops);
- kfree(mangle_ops);
- }
return ret;
}
static void __exit ip6table_mangle_fini(void)
{
unregister_pernet_subsys(&ip6table_mangle_net_ops);
+ xt_unregister_template(&packet_mangler);
kfree(mangle_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c
index b0292251e655..921c1723a01e 100644
--- a/net/ipv6/netfilter/ip6table_nat.c
+++ b/net/ipv6/netfilter/ip6table_nat.c
@@ -19,8 +19,6 @@ struct ip6table_nat_pernet {
struct nf_hook_ops *nf_nat_ops;
};
-static int __net_init ip6table_nat_table_init(struct net *net);
-
static unsigned int ip6table_nat_net_id __read_mostly;
static const struct xt_table nf_nat_ipv6_table = {
@@ -31,7 +29,6 @@ static const struct xt_table nf_nat_ipv6_table = {
(1 << NF_INET_LOCAL_IN),
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
- .table_init = ip6table_nat_table_init,
};
static unsigned int ip6table_nat_do_chain(void *priv,
@@ -115,7 +112,7 @@ static void ip6t_nat_unregister_lookups(struct net *net)
kfree(ops);
}
-static int __net_init ip6table_nat_table_init(struct net *net)
+static int ip6table_nat_table_init(struct net *net)
{
struct ip6t_replace *repl;
int ret;
@@ -157,20 +154,23 @@ static struct pernet_operations ip6table_nat_net_ops = {
static int __init ip6table_nat_init(void)
{
- int ret = register_pernet_subsys(&ip6table_nat_net_ops);
+ int ret = xt_register_template(&nf_nat_ipv6_table,
+ ip6table_nat_table_init);
- if (ret)
+ if (ret < 0)
return ret;
- ret = ip6table_nat_table_init(&init_net);
+ ret = register_pernet_subsys(&ip6table_nat_net_ops);
if (ret)
- unregister_pernet_subsys(&ip6table_nat_net_ops);
+ xt_unregister_template(&nf_nat_ipv6_table);
+
return ret;
}
static void __exit ip6table_nat_exit(void)
{
unregister_pernet_subsys(&ip6table_nat_net_ops);
+ xt_unregister_template(&nf_nat_ipv6_table);
}
module_init(ip6table_nat_init);
diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c
index f63c106c521e..4f2a04af71d3 100644
--- a/net/ipv6/netfilter/ip6table_raw.c
+++ b/net/ipv6/netfilter/ip6table_raw.c
@@ -11,8 +11,6 @@
#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
-static int __net_init ip6table_raw_table_init(struct net *net);
-
static bool raw_before_defrag __read_mostly;
MODULE_PARM_DESC(raw_before_defrag, "Enable raw table before defrag");
module_param(raw_before_defrag, bool, 0000);
@@ -23,7 +21,6 @@ static const struct xt_table packet_raw = {
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_RAW,
- .table_init = ip6table_raw_table_init,
};
static const struct xt_table packet_raw_before_defrag = {
@@ -32,7 +29,6 @@ static const struct xt_table packet_raw_before_defrag = {
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_RAW_BEFORE_DEFRAG,
- .table_init = ip6table_raw_table_init,
};
/* The work comes in here from netfilter.c. */
@@ -45,7 +41,7 @@ ip6table_raw_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *rawtable_ops __read_mostly;
-static int __net_init ip6table_raw_table_init(struct net *net)
+static int ip6table_raw_table_init(struct net *net)
{
struct ip6t_replace *repl;
const struct xt_table *table = &packet_raw;
@@ -79,37 +75,39 @@ static struct pernet_operations ip6table_raw_net_ops = {
static int __init ip6table_raw_init(void)
{
- int ret;
const struct xt_table *table = &packet_raw;
+ int ret;
if (raw_before_defrag) {
table = &packet_raw_before_defrag;
-
pr_info("Enabling raw table before defrag\n");
}
+ ret = xt_register_template(table, ip6table_raw_table_init);
+ if (ret < 0)
+ return ret;
+
/* Register hooks */
rawtable_ops = xt_hook_ops_alloc(table, ip6table_raw_hook);
- if (IS_ERR(rawtable_ops))
+ if (IS_ERR(rawtable_ops)) {
+ xt_unregister_template(table);
return PTR_ERR(rawtable_ops);
+ }
ret = register_pernet_subsys(&ip6table_raw_net_ops);
if (ret < 0) {
kfree(rawtable_ops);
+ xt_unregister_template(table);
return ret;
}
- ret = ip6table_raw_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&ip6table_raw_net_ops);
- kfree(rawtable_ops);
- }
return ret;
}
static void __exit ip6table_raw_fini(void)
{
unregister_pernet_subsys(&ip6table_raw_net_ops);
+ xt_unregister_template(&packet_raw);
kfree(rawtable_ops);
}
diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c
index 8dc335cf450b..931674034d8b 100644
--- a/net/ipv6/netfilter/ip6table_security.c
+++ b/net/ipv6/netfilter/ip6table_security.c
@@ -24,15 +24,12 @@ MODULE_DESCRIPTION("ip6tables security table, for MAC rules");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT)
-static int __net_init ip6table_security_table_init(struct net *net);
-
static const struct xt_table security_table = {
.name = "security",
.valid_hooks = SECURITY_VALID_HOOKS,
.me = THIS_MODULE,
.af = NFPROTO_IPV6,
.priority = NF_IP6_PRI_SECURITY,
- .table_init = ip6table_security_table_init,
};
static unsigned int
@@ -44,7 +41,7 @@ ip6table_security_hook(void *priv, struct sk_buff *skb,
static struct nf_hook_ops *sectbl_ops __read_mostly;
-static int __net_init ip6table_security_table_init(struct net *net)
+static int ip6table_security_table_init(struct net *net)
{
struct ip6t_replace *repl;
int ret;
@@ -74,29 +71,32 @@ static struct pernet_operations ip6table_security_net_ops = {
static int __init ip6table_security_init(void)
{
- int ret;
+ int ret = xt_register_template(&security_table,
+ ip6table_security_table_init);
+
+ if (ret < 0)
+ return ret;
sectbl_ops = xt_hook_ops_alloc(&security_table, ip6table_security_hook);
- if (IS_ERR(sectbl_ops))
+ if (IS_ERR(sectbl_ops)) {
+ xt_unregister_template(&security_table);
return PTR_ERR(sectbl_ops);
+ }
ret = register_pernet_subsys(&ip6table_security_net_ops);
if (ret < 0) {
kfree(sectbl_ops);
+ xt_unregister_template(&security_table);
return ret;
}
- ret = ip6table_security_table_init(&init_net);
- if (ret) {
- unregister_pernet_subsys(&ip6table_security_net_ops);
- kfree(sectbl_ops);
- }
return ret;
}
static void __exit ip6table_security_fini(void)
{
unregister_pernet_subsys(&ip6table_security_net_ops);
+ xt_unregister_template(&security_table);
kfree(sectbl_ops);
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c5e8ecb96426..dbc224023977 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1657,6 +1657,7 @@ static int rt6_insert_exception(struct rt6_info *nrt,
struct in6_addr *src_key = NULL;
struct rt6_exception *rt6_ex;
struct fib6_nh *nh = res->nh;
+ int max_depth;
int err = 0;
spin_lock_bh(&rt6_exception_lock);
@@ -1711,7 +1712,9 @@ static int rt6_insert_exception(struct rt6_info *nrt,
bucket->depth++;
net->ipv6.rt6_stats->fib_rt_cache++;
- if (bucket->depth > FIB6_MAX_DEPTH)
+ /* Randomize max depth to avoid some side channels attacks. */
+ max_depth = FIB6_MAX_DEPTH + prandom_u32_max(FIB6_MAX_DEPTH);
+ while (bucket->depth > max_depth)
rt6_exception_remove_oldest(bucket);
out:
@@ -3209,25 +3212,7 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst)
INDIRECT_CALLABLE_SCOPE unsigned int ip6_mtu(const struct dst_entry *dst)
{
- struct inet6_dev *idev;
- unsigned int mtu;
-
- mtu = dst_metric_raw(dst, RTAX_MTU);
- if (mtu)
- goto out;
-
- mtu = IPV6_MIN_MTU;
-
- rcu_read_lock();
- idev = __in6_dev_get(dst->dev);
- if (idev)
- mtu = idev->cnf.mtu6;
- rcu_read_unlock();
-
-out:
- mtu = min_t(unsigned int, mtu, IP6_MAX_MTU);
-
- return mtu - lwtunnel_headroom(dst->lwtstate, mtu);
+ return ip6_dst_mtu_maybe_forward(dst, false);
}
EXPORT_INDIRECT_CALLABLE(ip6_mtu);
@@ -3652,8 +3637,7 @@ out:
if (err) {
lwtstate_put(fib6_nh->fib_nh_lws);
fib6_nh->fib_nh_lws = NULL;
- if (dev)
- dev_put(dev);
+ dev_put(dev);
}
return err;
@@ -6646,7 +6630,7 @@ int __init ip6_route_init(void)
ret = -ENOMEM;
ip6_dst_ops_template.kmem_cachep =
kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
- SLAB_HWCACHE_ALIGN, NULL);
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!ip6_dst_ops_template.kmem_cachep)
goto out;
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 897fa59c47de..1bf5f5ae75ac 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -26,6 +26,7 @@
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
+#include <linux/netfilter.h>
static size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
{
@@ -295,11 +296,19 @@ static int seg6_do_srh(struct sk_buff *skb)
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+ nf_reset_ct(skb);
return 0;
}
-static int seg6_input(struct sk_buff *skb)
+static int seg6_input_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ return dst_input(skb);
+}
+
+static int seg6_input_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
@@ -337,10 +346,41 @@ static int seg6_input(struct sk_buff *skb)
if (unlikely(err))
return err;
- return dst_input(skb);
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
+ dev_net(skb->dev), NULL, skb, NULL,
+ skb_dst(skb)->dev, seg6_input_finish);
+
+ return seg6_input_finish(dev_net(skb->dev), NULL, skb);
}
-static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+static int seg6_input_nf(struct sk_buff *skb)
+{
+ struct net_device *dev = skb_dst(skb)->dev;
+ struct net *net = dev_net(skb->dev);
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, NULL,
+ skb, NULL, dev, seg6_input_core);
+ case htons(ETH_P_IPV6):
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, NULL,
+ skb, NULL, dev, seg6_input_core);
+ }
+
+ return -EINVAL;
+}
+
+static int seg6_input(struct sk_buff *skb)
+{
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return seg6_input_nf(skb);
+
+ return seg6_input_core(dev_net(skb->dev), NULL, skb);
+}
+
+static int seg6_output_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct dst_entry *dst = NULL;
@@ -387,12 +427,40 @@ static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
if (unlikely(err))
goto drop;
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, net, sk, skb,
+ NULL, skb_dst(skb)->dev, dst_output);
+
return dst_output(net, sk, skb);
drop:
kfree_skb(skb);
return err;
}
+static int seg6_output_nf(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ struct net_device *dev = skb_dst(skb)->dev;
+
+ switch (skb->protocol) {
+ case htons(ETH_P_IP):
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_POST_ROUTING, net, sk, skb,
+ NULL, dev, seg6_output_core);
+ case htons(ETH_P_IPV6):
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, net, sk, skb,
+ NULL, dev, seg6_output_core);
+ }
+
+ return -EINVAL;
+}
+
+static int seg6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
+{
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return seg6_output_nf(net, sk, skb);
+
+ return seg6_output_core(net, sk, skb);
+}
+
static int seg6_build_state(struct net *net, struct nlattr *nla,
unsigned int family, const void *cfg,
struct lwtunnel_state **ts,
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 60bf3b877957..2dc40b3f373e 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -30,6 +30,7 @@
#include <net/seg6_local.h>
#include <linux/etherdevice.h>
#include <linux/bpf.h>
+#include <linux/netfilter.h>
#define SEG6_F_ATTR(i) BIT(i)
@@ -413,12 +414,33 @@ drop:
return -EINVAL;
}
+static int input_action_end_dx6_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
+{
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct in6_addr *nhaddr = NULL;
+ struct seg6_local_lwt *slwt;
+
+ slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
+
+ /* The inner packet is not associated to any local interface,
+ * so we do not call netif_rx().
+ *
+ * If slwt->nh6 is set to ::, then lookup the nexthop for the
+ * inner packet's DA. Otherwise, use the specified nexthop.
+ */
+ if (!ipv6_addr_any(&slwt->nh6))
+ nhaddr = &slwt->nh6;
+
+ seg6_lookup_nexthop(skb, nhaddr, 0);
+
+ return dst_input(skb);
+}
+
/* decapsulate and forward to specified nexthop */
static int input_action_end_dx6(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
- struct in6_addr *nhaddr = NULL;
-
/* this function accepts IPv6 encapsulated packets, with either
* an SRH with SL=0, or no SRH.
*/
@@ -429,40 +451,30 @@ static int input_action_end_dx6(struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
- /* The inner packet is not associated to any local interface,
- * so we do not call netif_rx().
- *
- * If slwt->nh6 is set to ::, then lookup the nexthop for the
- * inner packet's DA. Otherwise, use the specified nexthop.
- */
-
- if (!ipv6_addr_any(&slwt->nh6))
- nhaddr = &slwt->nh6;
-
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
+ nf_reset_ct(skb);
- seg6_lookup_nexthop(skb, nhaddr, 0);
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
+ dev_net(skb->dev), NULL, skb, NULL,
+ skb_dst(skb)->dev, input_action_end_dx6_finish);
- return dst_input(skb);
+ return input_action_end_dx6_finish(dev_net(skb->dev), NULL, skb);
drop:
kfree_skb(skb);
return -EINVAL;
}
-static int input_action_end_dx4(struct sk_buff *skb,
- struct seg6_local_lwt *slwt)
+static int input_action_end_dx4_finish(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
+ struct dst_entry *orig_dst = skb_dst(skb);
+ struct seg6_local_lwt *slwt;
struct iphdr *iph;
__be32 nhaddr;
int err;
- if (!decap_and_validate(skb, IPPROTO_IPIP))
- goto drop;
-
- if (!pskb_may_pull(skb, sizeof(struct iphdr)))
- goto drop;
-
- skb->protocol = htons(ETH_P_IP);
+ slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
iph = ip_hdr(skb);
@@ -470,14 +482,34 @@ static int input_action_end_dx4(struct sk_buff *skb,
skb_dst_drop(skb);
- skb_set_transport_header(skb, sizeof(struct iphdr));
-
err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
- if (err)
- goto drop;
+ if (err) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
return dst_input(skb);
+}
+
+static int input_action_end_dx4(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ if (!decap_and_validate(skb, IPPROTO_IPIP))
+ goto drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto drop;
+
+ skb->protocol = htons(ETH_P_IP);
+ skb_set_transport_header(skb, sizeof(struct iphdr));
+ nf_reset_ct(skb);
+
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING,
+ dev_net(skb->dev), NULL, skb, NULL,
+ skb_dst(skb)->dev, input_action_end_dx4_finish);
+ return input_action_end_dx4_finish(dev_net(skb->dev), NULL, skb);
drop:
kfree_skb(skb);
return -EINVAL;
@@ -645,6 +677,7 @@ static struct sk_buff *end_dt_vrf_core(struct sk_buff *skb,
skb_dst_drop(skb);
skb_set_transport_header(skb, hdrlen);
+ nf_reset_ct(skb);
return end_dt_vrf_rcv(skb, family, vrf);
@@ -1078,7 +1111,8 @@ static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
u64_stats_update_end(&pcounters->syncp);
}
-static int seg6_local_input(struct sk_buff *skb)
+static int seg6_local_input_core(struct net *net, struct sock *sk,
+ struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct seg6_action_desc *desc;
@@ -1086,11 +1120,6 @@ static int seg6_local_input(struct sk_buff *skb)
unsigned int len = skb->len;
int rc;
- if (skb->protocol != htons(ETH_P_IPV6)) {
- kfree_skb(skb);
- return -EINVAL;
- }
-
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
desc = slwt->desc;
@@ -1104,6 +1133,21 @@ static int seg6_local_input(struct sk_buff *skb)
return rc;
}
+static int seg6_local_input(struct sk_buff *skb)
+{
+ if (skb->protocol != htons(ETH_P_IPV6)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ if (static_branch_unlikely(&nf_hooks_lwtunnel_enabled))
+ return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN,
+ dev_net(skb->dev), NULL, skb, skb->dev, NULL,
+ seg6_local_input_core);
+
+ return seg6_local_input_core(dev_net(skb->dev), NULL, skb);
+}
+
static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_ACTION] = { .type = NLA_U32 },
[SEG6_LOCAL_SRH] = { .type = NLA_BINARY },
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index df5bea818410..ef0c7a7c18e2 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -299,9 +299,8 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
}
-static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
+static int ipip6_tunnel_get_prl(struct net_device *dev, struct ip_tunnel_prl __user *a)
{
- struct ip_tunnel_prl __user *a = ifr->ifr_ifru.ifru_data;
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_prl kprl, *kp;
struct ip_tunnel_prl_entry *prl;
@@ -321,7 +320,7 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
* we try harder to allocate.
*/
kp = (cmax <= 1 || capable(CAP_NET_ADMIN)) ?
- kcalloc(cmax, sizeof(*kp), GFP_KERNEL | __GFP_NOWARN) :
+ kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) :
NULL;
rcu_read_lock();
@@ -334,7 +333,8 @@ static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
* For root users, retry allocating enough memory for
* the answer.
*/
- kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC);
+ kp = kcalloc(ca, sizeof(*kp), GFP_ATOMIC | __GFP_ACCOUNT |
+ __GFP_NOWARN);
if (!kp) {
ret = -ENOMEM;
goto out;
@@ -453,8 +453,8 @@ out:
return err;
}
-static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr,
- int cmd)
+static int ipip6_tunnel_prl_ctl(struct net_device *dev,
+ struct ip_tunnel_prl __user *data, int cmd)
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_prl prl;
@@ -465,7 +465,7 @@ static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr,
if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
return -EINVAL;
- if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
+ if (copy_from_user(&prl, data, sizeof(prl)))
return -EFAULT;
switch (cmd) {
@@ -1197,14 +1197,14 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
}
static int
-ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr)
+ipip6_tunnel_get6rd(struct net_device *dev, struct ip_tunnel_parm __user *data)
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_6rd ip6rd;
struct ip_tunnel_parm p;
if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
- if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+ if (copy_from_user(&p, data, sizeof(p)))
return -EFAULT;
t = ipip6_tunnel_locate(t->net, &p, 0);
}
@@ -1215,13 +1215,14 @@ ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr)
ip6rd.relay_prefix = t->ip6rd.relay_prefix;
ip6rd.prefixlen = t->ip6rd.prefixlen;
ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
- if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, sizeof(ip6rd)))
+ if (copy_to_user(data, &ip6rd, sizeof(ip6rd)))
return -EFAULT;
return 0;
}
static int
-ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ipip6_tunnel_6rdctl(struct net_device *dev, struct ip_tunnel_6rd __user *data,
+ int cmd)
{
struct ip_tunnel *t = netdev_priv(dev);
struct ip_tunnel_6rd ip6rd;
@@ -1229,7 +1230,7 @@ ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd)
if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
return -EPERM;
- if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, sizeof(ip6rd)))
+ if (copy_from_user(&ip6rd, data, sizeof(ip6rd)))
return -EFAULT;
if (cmd != SIOCDEL6RD) {
@@ -1368,27 +1369,28 @@ ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
}
static int
-ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ipip6_tunnel_siocdevprivate(struct net_device *dev, struct ifreq *ifr,
+ void __user *data, int cmd)
{
switch (cmd) {
case SIOCGETTUNNEL:
case SIOCADDTUNNEL:
case SIOCCHGTUNNEL:
case SIOCDELTUNNEL:
- return ip_tunnel_ioctl(dev, ifr, cmd);
+ return ip_tunnel_siocdevprivate(dev, ifr, data, cmd);
case SIOCGETPRL:
- return ipip6_tunnel_get_prl(dev, ifr);
+ return ipip6_tunnel_get_prl(dev, data);
case SIOCADDPRL:
case SIOCDELPRL:
case SIOCCHGPRL:
- return ipip6_tunnel_prl_ctl(dev, ifr, cmd);
+ return ipip6_tunnel_prl_ctl(dev, data, cmd);
#ifdef CONFIG_IPV6_SIT_6RD
case SIOCGET6RD:
- return ipip6_tunnel_get6rd(dev, ifr);
+ return ipip6_tunnel_get6rd(dev, data);
case SIOCADD6RD:
case SIOCCHG6RD:
case SIOCDEL6RD:
- return ipip6_tunnel_6rdctl(dev, ifr, cmd);
+ return ipip6_tunnel_6rdctl(dev, data, cmd);
#endif
default:
return -EINVAL;
@@ -1399,7 +1401,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
.ndo_init = ipip6_tunnel_init,
.ndo_uninit = ipip6_tunnel_uninit,
.ndo_start_xmit = sit_tunnel_xmit,
- .ndo_do_ioctl = ipip6_tunnel_ioctl,
+ .ndo_siocdevprivate = ipip6_tunnel_siocdevprivate,
.ndo_get_stats64 = dev_get_tstats64,
.ndo_get_iflink = ip_tunnel_get_iflink,
.ndo_tunnel_ctl = ipip6_tunnel_ctl,
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index d7cf26f730d7..d53dd142bf87 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -21,6 +21,7 @@
#ifdef CONFIG_NETLABEL
#include <net/calipso.h>
#endif
+#include <linux/ioam6.h>
static int two = 2;
static int three = 3;
@@ -28,6 +29,8 @@ static int flowlabel_reflect_max = 0x7;
static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
static u32 rt6_multipath_hash_fields_all_mask =
FIB_MULTIPATH_HASH_FIELD_ALL_MASK;
+static u32 ioam6_id_max = IOAM6_DEFAULT_ID;
+static u64 ioam6_id_wide_max = IOAM6_DEFAULT_ID_WIDE;
static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
@@ -196,6 +199,22 @@ static struct ctl_table ipv6_table_template[] = {
.extra1 = SYSCTL_ZERO,
.extra2 = &two,
},
+ {
+ .procname = "ioam6_id",
+ .data = &init_net.ipv6.sysctl.ioam6_id,
+ .maxlen = sizeof(u32),
+ .mode = 0644,
+ .proc_handler = proc_douintvec_minmax,
+ .extra2 = &ioam6_id_max,
+ },
+ {
+ .procname = "ioam6_id_wide",
+ .data = &init_net.ipv6.sysctl.ioam6_id_wide,
+ .maxlen = sizeof(u64),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra2 = &ioam6_id_wide_max,
+ },
{ }
};
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index c5e15e94bb00..ea53847b5b7e 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -1475,7 +1475,7 @@ do_udp_sendmsg:
fl6.saddr = np->saddr;
fl6.fl6_sport = inet->inet_sport;
- if (cgroup_bpf_enabled(BPF_CGROUP_UDP6_SENDMSG) && !connected) {
+ if (cgroup_bpf_enabled(CGROUP_UDP6_SENDMSG) && !connected) {
err = BPF_CGROUP_RUN_PROG_UDP6_SENDMSG_LOCK(sk,
(struct sockaddr *)sin6, &fl6.saddr);
if (err)