summaryrefslogtreecommitdiff
path: root/drivers/net/vxlan.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-03-19 20:05:34 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-19 20:05:34 +0300
commit1200b6809dfd9d73bc4c7db76d288c35fa4b2ebe (patch)
tree552e03de245cdbd0780ca1215914edc4a26540f7 /drivers/net/vxlan.c
parent6b5f04b6cf8ebab9a65d9c0026c650bb2538fd0f (diff)
parentfe30937b65354c7fec244caebbdaae68e28ca797 (diff)
downloadlinux-1200b6809dfd9d73bc4c7db76d288c35fa4b2ebe.tar.xz
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking updates from David Miller: "Highlights: 1) Support more Realtek wireless chips, from Jes Sorenson. 2) New BPF types for per-cpu hash and arrap maps, from Alexei Starovoitov. 3) Make several TCP sysctls per-namespace, from Nikolay Borisov. 4) Allow the use of SO_REUSEPORT in order to do per-thread processing of incoming TCP/UDP connections. The muxing can be done using a BPF program which hashes the incoming packet. From Craig Gallek. 5) Add a multiplexer for TCP streams, to provide a messaged based interface. BPF programs can be used to determine the message boundaries. From Tom Herbert. 6) Add 802.1AE MACSEC support, from Sabrina Dubroca. 7) Avoid factorial complexity when taking down an inetdev interface with lots of configured addresses. We were doing things like traversing the entire address less for each address removed, and flushing the entire netfilter conntrack table for every address as well. 8) Add and use SKB bulk free infrastructure, from Jesper Brouer. 9) Allow offloading u32 classifiers to hardware, and implement for ixgbe, from John Fastabend. 10) Allow configuring IRQ coalescing parameters on a per-queue basis, from Kan Liang. 11) Extend ethtool so that larger link mode masks can be supported. From David Decotigny. 12) Introduce devlink, which can be used to configure port link types (ethernet vs Infiniband, etc.), port splitting, and switch device level attributes as a whole. From Jiri Pirko. 13) Hardware offload support for flower classifiers, from Amir Vadai. 14) Add "Local Checksum Offload". Basically, for a tunneled packet the checksum of the outer header is 'constant' (because with the checksum field filled into the inner protocol header, the payload of the outer frame checksums to 'zero'), and we can take advantage of that in various ways. From Edward Cree" * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1548 commits) bonding: fix bond_get_stats() net: bcmgenet: fix dma api length mismatch net/mlx4_core: Fix backward compatibility on VFs phy: mdio-thunder: Fix some Kconfig typos lan78xx: add ndo_get_stats64 lan78xx: handle statistics counter rollover RDS: TCP: Remove unused constant RDS: TCP: Add sysctl tunables for sndbuf/rcvbuf on rds-tcp socket net: smc911x: convert pxa dma to dmaengine team: remove duplicate set of flag IFF_MULTICAST bonding: remove duplicate set of flag IFF_MULTICAST net: fix a comment typo ethernet: micrel: fix some error codes ip_tunnels, bpf: define IP_TUNNEL_OPTS_MAX and use it bpf, dst: add and use dst_tclassid helper bpf: make skb->tc_classid also readable net: mvneta: bm: clarify dependencies cls_bpf: reset class and reuse major in da ldmvsw: Checkpatch sunvnet.c and sunvnet_common.c ldmvsw: Add ldmvsw.c driver code ...
Diffstat (limited to 'drivers/net/vxlan.c')
-rw-r--r--drivers/net/vxlan.c689
1 files changed, 319 insertions, 370 deletions
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 1c32bd104797..800106a7246c 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -42,7 +42,7 @@
#include <net/netns/generic.h>
#include <net/vxlan.h>
#include <net/protocol.h>
-#include <net/udp_tunnel.h>
+
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6.h>
#include <net/addrconf.h>
@@ -197,9 +197,9 @@ static int vxlan_nla_put_addr(struct sk_buff *skb, int attr,
#endif
/* Virtual Network hash table head */
-static inline struct hlist_head *vni_head(struct vxlan_sock *vs, u32 id)
+static inline struct hlist_head *vni_head(struct vxlan_sock *vs, __be32 vni)
{
- return &vs->vni_list[hash_32(id, VNI_HASH_BITS)];
+ return &vs->vni_list[hash_32((__force u32)vni, VNI_HASH_BITS)];
}
/* Socket hash table head */
@@ -242,12 +242,16 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
return NULL;
}
-static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
+static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, __be32 vni)
{
struct vxlan_dev *vxlan;
- hlist_for_each_entry_rcu(vxlan, vni_head(vs, id), hlist) {
- if (vxlan->default_dst.remote_vni == id)
+ /* For flow based devices, map all packets to VNI 0 */
+ if (vs->flags & VXLAN_F_COLLECT_METADATA)
+ vni = 0;
+
+ hlist_for_each_entry_rcu(vxlan, vni_head(vs, vni), hlist) {
+ if (vxlan->default_dst.remote_vni == vni)
return vxlan;
}
@@ -255,7 +259,7 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
}
/* Look up VNI in a per net namespace table */
-static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
+static struct vxlan_dev *vxlan_find_vni(struct net *net, __be32 vni,
sa_family_t family, __be16 port,
u32 flags)
{
@@ -265,7 +269,7 @@ static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
if (!vs)
return NULL;
- return vxlan_vs_find_vni(vs, id);
+ return vxlan_vs_find_vni(vs, vni);
}
/* Fill in neighbour message in skbuff. */
@@ -315,7 +319,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
nla_put_be16(skb, NDA_PORT, rdst->remote_port))
goto nla_put_failure;
if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
- nla_put_u32(skb, NDA_VNI, rdst->remote_vni))
+ nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
goto nla_put_failure;
if (rdst->remote_ifindex &&
nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
@@ -383,7 +387,7 @@ static void vxlan_ip_miss(struct net_device *dev, union vxlan_addr *ipa)
};
struct vxlan_rdst remote = {
.remote_ip = *ipa, /* goes to NDA_DST */
- .remote_vni = VXLAN_N_VID,
+ .remote_vni = cpu_to_be32(VXLAN_N_VID),
};
vxlan_fdb_notify(vxlan, &f, &remote, RTM_GETNEIGH);
@@ -452,7 +456,7 @@ static struct vxlan_fdb *vxlan_find_mac(struct vxlan_dev *vxlan,
/* caller should hold vxlan->hash_lock */
static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
union vxlan_addr *ip, __be16 port,
- __u32 vni, __u32 ifindex)
+ __be32 vni, __u32 ifindex)
{
struct vxlan_rdst *rd;
@@ -469,7 +473,8 @@ static struct vxlan_rdst *vxlan_fdb_find_rdst(struct vxlan_fdb *f,
/* Replace destination of unicast mac */
static int vxlan_fdb_replace(struct vxlan_fdb *f,
- union vxlan_addr *ip, __be16 port, __u32 vni, __u32 ifindex)
+ union vxlan_addr *ip, __be16 port, __be32 vni,
+ __u32 ifindex)
{
struct vxlan_rdst *rd;
@@ -480,6 +485,8 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f,
rd = list_first_entry_or_null(&f->remotes, struct vxlan_rdst, list);
if (!rd)
return 0;
+
+ dst_cache_reset(&rd->dst_cache);
rd->remote_ip = *ip;
rd->remote_port = port;
rd->remote_vni = vni;
@@ -489,7 +496,7 @@ static int vxlan_fdb_replace(struct vxlan_fdb *f,
/* Add/update destinations for multicast */
static int vxlan_fdb_append(struct vxlan_fdb *f,
- union vxlan_addr *ip, __be16 port, __u32 vni,
+ union vxlan_addr *ip, __be16 port, __be32 vni,
__u32 ifindex, struct vxlan_rdst **rdp)
{
struct vxlan_rdst *rd;
@@ -501,6 +508,12 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
rd = kmalloc(sizeof(*rd), GFP_ATOMIC);
if (rd == NULL)
return -ENOBUFS;
+
+ if (dst_cache_init(&rd->dst_cache, GFP_ATOMIC)) {
+ kfree(rd);
+ return -ENOBUFS;
+ }
+
rd->remote_ip = *ip;
rd->remote_port = port;
rd->remote_vni = vni;
@@ -515,7 +528,8 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
unsigned int off,
struct vxlanhdr *vh, size_t hdrlen,
- u32 data, struct gro_remcsum *grc,
+ __be32 vni_field,
+ struct gro_remcsum *grc,
bool nopartial)
{
size_t start, offset;
@@ -526,10 +540,8 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
if (!NAPI_GRO_CB(skb)->csum_valid)
return NULL;
- start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
- offset = start + ((data & VXLAN_RCO_UDP) ?
- offsetof(struct udphdr, check) :
- offsetof(struct tcphdr, check));
+ start = vxlan_rco_start(vni_field);
+ offset = start + vxlan_rco_offset(vni_field);
vh = skb_gro_remcsum_process(skb, (void *)vh, off, hdrlen,
start, offset, grc, nopartial);
@@ -549,7 +561,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
int flush = 1;
struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock,
udp_offloads);
- u32 flags;
+ __be32 flags;
struct gro_remcsum grc;
skb_gro_remcsum_init(&grc);
@@ -565,11 +577,11 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
- flags = ntohl(vh->vx_flags);
+ flags = vh->vx_flags;
if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
- ntohl(vh->vx_vni), &grc,
+ vh->vx_vni, &grc,
!!(vs->flags &
VXLAN_F_REMCSUM_NOPARTIAL));
@@ -579,8 +591,6 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
- flush = 0;
-
for (p = *head; p; p = p->next) {
if (!NAPI_GRO_CB(p)->same_flow)
continue;
@@ -594,6 +604,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
}
pp = eth_gro_receive(head, skb);
+ flush = 0;
out:
skb_gro_remcsum_cleanup(skb, &grc);
@@ -660,7 +671,7 @@ static void vxlan_notify_del_rx_port(struct vxlan_sock *vs)
static int vxlan_fdb_create(struct vxlan_dev *vxlan,
const u8 *mac, union vxlan_addr *ip,
__u16 state, __u16 flags,
- __be16 port, __u32 vni, __u32 ifindex,
+ __be16 port, __be32 vni, __u32 ifindex,
__u8 ndm_flags)
{
struct vxlan_rdst *rd = NULL;
@@ -749,8 +760,10 @@ static void vxlan_fdb_free(struct rcu_head *head)
struct vxlan_fdb *f = container_of(head, struct vxlan_fdb, rcu);
struct vxlan_rdst *rd, *nd;
- list_for_each_entry_safe(rd, nd, &f->remotes, list)
+ list_for_each_entry_safe(rd, nd, &f->remotes, list) {
+ dst_cache_destroy(&rd->dst_cache);
kfree(rd);
+ }
kfree(f);
}
@@ -767,7 +780,8 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f)
}
static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
- union vxlan_addr *ip, __be16 *port, u32 *vni, u32 *ifindex)
+ union vxlan_addr *ip, __be16 *port, __be32 *vni,
+ u32 *ifindex)
{
struct net *net = dev_net(vxlan->dev);
int err;
@@ -800,7 +814,7 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
if (tb[NDA_VNI]) {
if (nla_len(tb[NDA_VNI]) != sizeof(u32))
return -EINVAL;
- *vni = nla_get_u32(tb[NDA_VNI]);
+ *vni = cpu_to_be32(nla_get_u32(tb[NDA_VNI]));
} else {
*vni = vxlan->default_dst.remote_vni;
}
@@ -830,7 +844,8 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
/* struct net *net = dev_net(vxlan->dev); */
union vxlan_addr ip;
__be16 port;
- u32 vni, ifindex;
+ __be32 vni;
+ u32 ifindex;
int err;
if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_REACHABLE))) {
@@ -867,7 +882,8 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
struct vxlan_rdst *rd = NULL;
union vxlan_addr ip;
__be16 port;
- u32 vni, ifindex;
+ __be32 vni;
+ u32 ifindex;
int err;
err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &vni, &ifindex);
@@ -1124,177 +1140,168 @@ static int vxlan_igmp_leave(struct vxlan_dev *vxlan)
return ret;
}
-static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
- size_t hdrlen, u32 data, bool nopartial)
+static bool vxlan_remcsum(struct vxlanhdr *unparsed,
+ struct sk_buff *skb, u32 vxflags)
{
size_t start, offset, plen;
- if (skb->remcsum_offload)
- return vh;
+ if (!(unparsed->vx_flags & VXLAN_HF_RCO) || skb->remcsum_offload)
+ goto out;
- start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
- offset = start + ((data & VXLAN_RCO_UDP) ?
- offsetof(struct udphdr, check) :
- offsetof(struct tcphdr, check));
+ start = vxlan_rco_start(unparsed->vx_vni);
+ offset = start + vxlan_rco_offset(unparsed->vx_vni);
- plen = hdrlen + offset + sizeof(u16);
+ plen = sizeof(struct vxlanhdr) + offset + sizeof(u16);
if (!pskb_may_pull(skb, plen))
- return NULL;
+ return false;
+
+ skb_remcsum_process(skb, (void *)(vxlan_hdr(skb) + 1), start, offset,
+ !!(vxflags & VXLAN_F_REMCSUM_NOPARTIAL));
+out:
+ unparsed->vx_flags &= ~VXLAN_HF_RCO;
+ unparsed->vx_vni &= VXLAN_VNI_MASK;
+ return true;
+}
- vh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
+static void vxlan_parse_gbp_hdr(struct vxlanhdr *unparsed,
+ struct sk_buff *skb, u32 vxflags,
+ struct vxlan_metadata *md)
+{
+ struct vxlanhdr_gbp *gbp = (struct vxlanhdr_gbp *)unparsed;
+ struct metadata_dst *tun_dst;
- skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset,
- nopartial);
+ if (!(unparsed->vx_flags & VXLAN_HF_GBP))
+ goto out;
- return vh;
+ md->gbp = ntohs(gbp->policy_id);
+
+ tun_dst = (struct metadata_dst *)skb_dst(skb);
+ if (tun_dst) {
+ tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
+ tun_dst->u.tun_info.options_len = sizeof(*md);
+ }
+ if (gbp->dont_learn)
+ md->gbp |= VXLAN_GBP_DONT_LEARN;
+
+ if (gbp->policy_applied)
+ md->gbp |= VXLAN_GBP_POLICY_APPLIED;
+
+ /* In flow-based mode, GBP is carried in dst_metadata */
+ if (!(vxflags & VXLAN_F_COLLECT_METADATA))
+ skb->mark = md->gbp;
+out:
+ unparsed->vx_flags &= ~VXLAN_GBP_USED_BITS;
}
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
- struct vxlan_metadata *md, u32 vni,
- struct metadata_dst *tun_dst)
+static bool vxlan_set_mac(struct vxlan_dev *vxlan,
+ struct vxlan_sock *vs,
+ struct sk_buff *skb)
{
- struct iphdr *oip = NULL;
- struct ipv6hdr *oip6 = NULL;
- struct vxlan_dev *vxlan;
- struct pcpu_sw_netstats *stats;
union vxlan_addr saddr;
- int err = 0;
-
- /* For flow based devices, map all packets to VNI 0 */
- if (vs->flags & VXLAN_F_COLLECT_METADATA)
- vni = 0;
-
- /* Is this VNI defined? */
- vxlan = vxlan_vs_find_vni(vs, vni);
- if (!vxlan)
- goto drop;
skb_reset_mac_header(skb);
- skb_scrub_packet(skb, !net_eq(vxlan->net, dev_net(vxlan->dev)));
skb->protocol = eth_type_trans(skb, vxlan->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
/* Ignore packet loops (and multicast echo) */
if (ether_addr_equal(eth_hdr(skb)->h_source, vxlan->dev->dev_addr))
- goto drop;
+ return false;
- /* Get data from the outer IP header */
+ /* Get address from the outer IP header */
if (vxlan_get_sk_family(vs) == AF_INET) {
- oip = ip_hdr(skb);
- saddr.sin.sin_addr.s_addr = oip->saddr;
+ saddr.sin.sin_addr.s_addr = ip_hdr(skb)->saddr;
saddr.sa.sa_family = AF_INET;
#if IS_ENABLED(CONFIG_IPV6)
} else {
- oip6 = ipv6_hdr(skb);
- saddr.sin6.sin6_addr = oip6->saddr;
+ saddr.sin6.sin6_addr = ipv6_hdr(skb)->saddr;
saddr.sa.sa_family = AF_INET6;
#endif
}
- if (tun_dst) {
- skb_dst_set(skb, (struct dst_entry *)tun_dst);
- tun_dst = NULL;
- }
-
if ((vxlan->flags & VXLAN_F_LEARN) &&
vxlan_snoop(skb->dev, &saddr, eth_hdr(skb)->h_source))
- goto drop;
-
- skb_reset_network_header(skb);
- /* In flow-based mode, GBP is carried in dst_metadata */
- if (!(vs->flags & VXLAN_F_COLLECT_METADATA))
- skb->mark = md->gbp;
-
- if (oip6)
- err = IP6_ECN_decapsulate(oip6, skb);
- if (oip)
- err = IP_ECN_decapsulate(oip, skb);
-
- if (unlikely(err)) {
- if (log_ecn_error) {
- if (oip6)
- net_info_ratelimited("non-ECT from %pI6\n",
- &oip6->saddr);
- if (oip)
- net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
- &oip->saddr, oip->tos);
- }
- if (err > 1) {
- ++vxlan->dev->stats.rx_frame_errors;
- ++vxlan->dev->stats.rx_errors;
- goto drop;
- }
- }
+ return false;
- stats = this_cpu_ptr(vxlan->dev->tstats);
- u64_stats_update_begin(&stats->syncp);
- stats->rx_packets++;
- stats->rx_bytes += skb->len;
- u64_stats_update_end(&stats->syncp);
+ return true;
+}
- gro_cells_receive(&vxlan->gro_cells, skb);
+static bool vxlan_ecn_decapsulate(struct vxlan_sock *vs, void *oiph,
+ struct sk_buff *skb)
+{
+ int err = 0;
- return;
-drop:
- if (tun_dst)
- dst_release((struct dst_entry *)tun_dst);
+ if (vxlan_get_sk_family(vs) == AF_INET)
+ err = IP_ECN_decapsulate(oiph, skb);
+#if IS_ENABLED(CONFIG_IPV6)
+ else
+ err = IP6_ECN_decapsulate(oiph, skb);
+#endif
- /* Consume bad packet */
- kfree_skb(skb);
+ if (unlikely(err) && log_ecn_error) {
+ if (vxlan_get_sk_family(vs) == AF_INET)
+ net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
+ &((struct iphdr *)oiph)->saddr,
+ ((struct iphdr *)oiph)->tos);
+ else
+ net_info_ratelimited("non-ECT from %pI6\n",
+ &((struct ipv6hdr *)oiph)->saddr);
+ }
+ return err <= 1;
}
/* Callback from net/ipv4/udp.c to receive packets */
-static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+static int vxlan_rcv(struct sock *sk, struct sk_buff *skb)
{
- struct metadata_dst *tun_dst = NULL;
+ struct pcpu_sw_netstats *stats;
+ struct vxlan_dev *vxlan;
struct vxlan_sock *vs;
- struct vxlanhdr *vxh;
- u32 flags, vni;
+ struct vxlanhdr unparsed;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
+ void *oiph;
/* Need Vxlan and inner Ethernet header to be present */
if (!pskb_may_pull(skb, VXLAN_HLEN))
- goto error;
-
- vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
- flags = ntohl(vxh->vx_flags);
- vni = ntohl(vxh->vx_vni);
+ return 1;
- if (flags & VXLAN_HF_VNI) {
- flags &= ~VXLAN_HF_VNI;
- } else {
- /* VNI flag always required to be set */
- goto bad_flags;
+ unparsed = *vxlan_hdr(skb);
+ /* VNI flag always required to be set */
+ if (!(unparsed.vx_flags & VXLAN_HF_VNI)) {
+ netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
+ ntohl(vxlan_hdr(skb)->vx_flags),
+ ntohl(vxlan_hdr(skb)->vx_vni));
+ /* Return non vxlan pkt */
+ return 1;
}
-
- if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
- goto drop;
- vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
+ unparsed.vx_flags &= ~VXLAN_HF_VNI;
+ unparsed.vx_vni &= ~VXLAN_VNI_MASK;
vs = rcu_dereference_sk_user_data(sk);
if (!vs)
goto drop;
- if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
- vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni,
- !!(vs->flags & VXLAN_F_REMCSUM_NOPARTIAL));
- if (!vxh)
- goto drop;
+ vxlan = vxlan_vs_find_vni(vs, vxlan_vni(vxlan_hdr(skb)->vx_vni));
+ if (!vxlan)
+ goto drop;
- flags &= ~VXLAN_HF_RCO;
- vni &= VXLAN_VNI_MASK;
- }
+ if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB),
+ !net_eq(vxlan->net, dev_net(vxlan->dev))))
+ goto drop;
if (vxlan_collect_metadata(vs)) {
+ __be32 vni = vxlan_vni(vxlan_hdr(skb)->vx_vni);
+ struct metadata_dst *tun_dst;
+
tun_dst = udp_tun_rx_dst(skb, vxlan_get_sk_family(vs), TUNNEL_KEY,
- cpu_to_be64(vni >> 8), sizeof(*md));
+ vxlan_vni_to_tun_id(vni), sizeof(*md));
if (!tun_dst)
goto drop;
md = ip_tunnel_info_opts(&tun_dst->u.tun_info);
+
+ skb_dst_set(skb, (struct dst_entry *)tun_dst);
} else {
memset(md, 0, sizeof(*md));
}
@@ -1302,27 +1309,13 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
/* For backwards compatibility, only allow reserved fields to be
* used by VXLAN extensions if explicitly requested.
*/
- if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
- struct vxlanhdr_gbp *gbp;
-
- gbp = (struct vxlanhdr_gbp *)vxh;
- md->gbp = ntohs(gbp->policy_id);
-
- if (tun_dst) {
- tun_dst->u.tun_info.key.tun_flags |= TUNNEL_VXLAN_OPT;
- tun_dst->u.tun_info.options_len = sizeof(*md);
- }
-
- if (gbp->dont_learn)
- md->gbp |= VXLAN_GBP_DONT_LEARN;
-
- if (gbp->policy_applied)
- md->gbp |= VXLAN_GBP_POLICY_APPLIED;
-
- flags &= ~VXLAN_GBP_USED_BITS;
- }
+ if (vs->flags & VXLAN_F_REMCSUM_RX)
+ if (!vxlan_remcsum(&unparsed, skb, vs->flags))
+ goto drop;
+ if (vs->flags & VXLAN_F_GBP)
+ vxlan_parse_gbp_hdr(&unparsed, skb, vs->flags, md);
- if (flags || vni & ~VXLAN_VNI_MASK) {
+ if (unparsed.vx_flags || unparsed.vx_vni) {
/* If there are any unprocessed flags remaining treat
* this as a malformed packet. This behavior diverges from
* VXLAN RFC (RFC7348) which stipulates that bits in reserved
@@ -1331,28 +1324,34 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
* is more robust and provides a little more security in
* adding extensions to VXLAN.
*/
+ goto drop;
+ }
- goto bad_flags;
+ if (!vxlan_set_mac(vxlan, vs, skb))
+ goto drop;
+
+ oiph = skb_network_header(skb);
+ skb_reset_network_header(skb);
+
+ if (!vxlan_ecn_decapsulate(vs, oiph, skb)) {
+ ++vxlan->dev->stats.rx_frame_errors;
+ ++vxlan->dev->stats.rx_errors;
+ goto drop;
}
- vxlan_rcv(vs, skb, md, vni >> 8, tun_dst);
+ stats = this_cpu_ptr(vxlan->dev->tstats);
+ u64_stats_update_begin(&stats->syncp);
+ stats->rx_packets++;
+ stats->rx_bytes += skb->len;
+ u64_stats_update_end(&stats->syncp);
+
+ gro_cells_receive(&vxlan->gro_cells, skb);
return 0;
drop:
/* Consume bad packet */
kfree_skb(skb);
return 0;
-
-bad_flags:
- netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
- ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));
-
-error:
- if (tun_dst)
- dst_release((struct dst_entry *)tun_dst);
-
- /* Return non vxlan pkt */
- return 1;
}
static int arp_reduce(struct net_device *dev, struct sk_buff *skb)
@@ -1463,7 +1462,7 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request,
reply->dev = dev;
skb_reserve(reply, LL_RESERVED_SPACE(request->dev));
skb_push(reply, sizeof(struct ethhdr));
- skb_set_mac_header(reply, 0);
+ skb_reset_mac_header(reply);
ns = (struct nd_msg *)skb_transport_header(request);
@@ -1483,7 +1482,7 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request,
reply->protocol = htons(ETH_P_IPV6);
skb_pull(reply, sizeof(struct ethhdr));
- skb_set_network_header(reply, 0);
+ skb_reset_network_header(reply);
skb_put(reply, sizeof(struct ipv6hdr));
/* IPv6 header */
@@ -1498,7 +1497,7 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request,
pip6->saddr = *(struct in6_addr *)n->primary_key;
skb_pull(reply, sizeof(struct ipv6hdr));
- skb_set_transport_header(reply, 0);
+ skb_reset_transport_header(reply);
na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen);
@@ -1677,7 +1676,7 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
return;
gbp = (struct vxlanhdr_gbp *)vxh;
- vxh->vx_flags |= htonl(VXLAN_HF_GBP);
+ vxh->vx_flags |= VXLAN_HF_GBP;
if (md->gbp & VXLAN_GBP_DONT_LEARN)
gbp->dont_learn = 1;
@@ -1688,20 +1687,15 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
}
-#if IS_ENABLED(CONFIG_IPV6)
-static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
- struct sk_buff *skb,
- struct net_device *dev, struct in6_addr *saddr,
- struct in6_addr *daddr, __u8 prio, __u8 ttl,
- __be16 src_port, __be16 dst_port, __be32 vni,
- struct vxlan_metadata *md, bool xnet, u32 vxflags)
+static int vxlan_build_skb(struct sk_buff *skb, struct dst_entry *dst,
+ int iphdr_len, __be32 vni,
+ struct vxlan_metadata *md, u32 vxflags,
+ bool udp_sum)
{
struct vxlanhdr *vxh;
int min_headroom;
int err;
- bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX);
int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
- u16 hdrlen = sizeof(struct vxlanhdr);
if ((vxflags & VXLAN_F_REMCSUM_TX) &&
skb->ip_summed == CHECKSUM_PARTIAL) {
@@ -1710,50 +1704,39 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
if (csum_start <= VXLAN_MAX_REMCSUM_START &&
!(csum_start & VXLAN_RCO_SHIFT_MASK) &&
(skb->csum_offset == offsetof(struct udphdr, check) ||
- skb->csum_offset == offsetof(struct tcphdr, check))) {
- udp_sum = false;
+ skb->csum_offset == offsetof(struct tcphdr, check)))
type |= SKB_GSO_TUNNEL_REMCSUM;
- }
}
- skb_scrub_packet(skb, xnet);
-
min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
- + VXLAN_HLEN + sizeof(struct ipv6hdr)
+ + VXLAN_HLEN + iphdr_len
+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
/* Need space for new headers (invalidates iph ptr) */
err = skb_cow_head(skb, min_headroom);
if (unlikely(err)) {
kfree_skb(skb);
- goto err;
+ return err;
}
skb = vlan_hwaccel_push_inside(skb);
- if (WARN_ON(!skb)) {
- err = -ENOMEM;
- goto err;
- }
+ if (WARN_ON(!skb))
+ return -ENOMEM;
- skb = iptunnel_handle_offloads(skb, udp_sum, type);
- if (IS_ERR(skb)) {
- err = -EINVAL;
- goto err;
- }
+ skb = iptunnel_handle_offloads(skb, type);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
- vxh->vx_flags = htonl(VXLAN_HF_VNI);
- vxh->vx_vni = vni;
+ vxh->vx_flags = VXLAN_HF_VNI;
+ vxh->vx_vni = vxlan_vni_field(vni);
if (type & SKB_GSO_TUNNEL_REMCSUM) {
- u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
- VXLAN_RCO_SHIFT;
+ unsigned int start;
- if (skb->csum_offset == offsetof(struct udphdr, check))
- data |= VXLAN_RCO_UDP;
-
- vxh->vx_vni |= htonl(data);
- vxh->vx_flags |= htonl(VXLAN_HF_RCO);
+ start = skb_checksum_start_offset(skb) - sizeof(struct vxlanhdr);
+ vxh->vx_vni |= vxlan_compute_rco(start, skb->csum_offset);
+ vxh->vx_flags |= VXLAN_HF_RCO;
if (!skb_is_gso(skb)) {
skb->ip_summed = CHECKSUM_NONE;
@@ -1765,106 +1748,72 @@ static int vxlan6_xmit_skb(struct dst_entry *dst, struct sock *sk,
vxlan_build_gbp_hdr(vxh, vxflags, md);
skb_set_inner_protocol(skb, htons(ETH_P_TEB));
-
- udp_tunnel6_xmit_skb(dst, sk, skb, dev, saddr, daddr, prio,
- ttl, src_port, dst_port,
- !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX));
return 0;
-err:
- dst_release(dst);
- return err;
}
-#endif
-static int vxlan_xmit_skb(struct rtable *rt, struct sock *sk, struct sk_buff *skb,
- __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
- __be16 src_port, __be16 dst_port, __be32 vni,
- struct vxlan_metadata *md, bool xnet, u32 vxflags)
+static struct rtable *vxlan_get_route(struct vxlan_dev *vxlan,
+ struct sk_buff *skb, int oif, u8 tos,
+ __be32 daddr, __be32 *saddr,
+ struct dst_cache *dst_cache,
+ const struct ip_tunnel_info *info)
{
- struct vxlanhdr *vxh;
- int min_headroom;
- int err;
- bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM);
- int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
- u16 hdrlen = sizeof(struct vxlanhdr);
-
- if ((vxflags & VXLAN_F_REMCSUM_TX) &&
- skb->ip_summed == CHECKSUM_PARTIAL) {
- int csum_start = skb_checksum_start_offset(skb);
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
+ struct rtable *rt = NULL;
+ struct flowi4 fl4;
- if (csum_start <= VXLAN_MAX_REMCSUM_START &&
- !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
- (skb->csum_offset == offsetof(struct udphdr, check) ||
- skb->csum_offset == offsetof(struct tcphdr, check))) {
- udp_sum = false;
- type |= SKB_GSO_TUNNEL_REMCSUM;
- }
+ if (tos && !info)
+ use_cache = false;
+ if (use_cache) {
+ rt = dst_cache_get_ip4(dst_cache, saddr);
+ if (rt)
+ return rt;
}
- min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
- + VXLAN_HLEN + sizeof(struct iphdr)
- + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
-
- /* Need space for new headers (invalidates iph ptr) */
- err = skb_cow_head(skb, min_headroom);
- if (unlikely(err)) {
- kfree_skb(skb);
- return err;
- }
-
- skb = vlan_hwaccel_push_inside(skb);
- if (WARN_ON(!skb))
- return -ENOMEM;
-
- skb = iptunnel_handle_offloads(skb, udp_sum, type);
- if (IS_ERR(skb))
- return PTR_ERR(skb);
-
- vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
- vxh->vx_flags = htonl(VXLAN_HF_VNI);
- vxh->vx_vni = vni;
-
- if (type & SKB_GSO_TUNNEL_REMCSUM) {
- u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
- VXLAN_RCO_SHIFT;
-
- if (skb->csum_offset == offsetof(struct udphdr, check))
- data |= VXLAN_RCO_UDP;
-
- vxh->vx_vni |= htonl(data);
- vxh->vx_flags |= htonl(VXLAN_HF_RCO);
+ memset(&fl4, 0, sizeof(fl4));
+ fl4.flowi4_oif = oif;
+ fl4.flowi4_tos = RT_TOS(tos);
+ fl4.flowi4_mark = skb->mark;
+ fl4.flowi4_proto = IPPROTO_UDP;
+ fl4.daddr = daddr;
+ fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
- if (!skb_is_gso(skb)) {
- skb->ip_summed = CHECKSUM_NONE;
- skb->encapsulation = 0;
- }
+ rt = ip_route_output_key(vxlan->net, &fl4);
+ if (!IS_ERR(rt)) {
+ *saddr = fl4.saddr;
+ if (use_cache)
+ dst_cache_set_ip4(dst_cache, &rt->dst, fl4.saddr);
}
-
- if (vxflags & VXLAN_F_GBP)
- vxlan_build_gbp_hdr(vxh, vxflags, md);
-
- skb_set_inner_protocol(skb, htons(ETH_P_TEB));
-
- udp_tunnel_xmit_skb(rt, sk, skb, src, dst, tos, ttl, df,
- src_port, dst_port, xnet,
- !(vxflags & VXLAN_F_UDP_CSUM));
- return 0;
+ return rt;
}
#if IS_ENABLED(CONFIG_IPV6)
static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
- struct sk_buff *skb, int oif,
+ struct sk_buff *skb, int oif, u8 tos,
+ __be32 label,
const struct in6_addr *daddr,
- struct in6_addr *saddr)
+ struct in6_addr *saddr,
+ struct dst_cache *dst_cache,
+ const struct ip_tunnel_info *info)
{
+ bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct dst_entry *ndst;
struct flowi6 fl6;
int err;
+ if (tos && !info)
+ use_cache = false;
+ if (use_cache) {
+ ndst = dst_cache_get_ip6(dst_cache, saddr);
+ if (ndst)
+ return ndst;
+ }
+
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_oif = oif;
+ fl6.flowi6_tos = RT_TOS(tos);
fl6.daddr = *daddr;
fl6.saddr = vxlan->cfg.saddr.sin6.sin6_addr;
+ fl6.flowlabel = label;
fl6.flowi6_mark = skb->mark;
fl6.flowi6_proto = IPPROTO_UDP;
@@ -1875,6 +1824,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
return ERR_PTR(err);
*saddr = fl6.saddr;
+ if (use_cache)
+ dst_cache_set_ip6(dst_cache, ndst, saddr);
return ndst;
}
#endif
@@ -1927,22 +1878,24 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
struct vxlan_rdst *rdst, bool did_rsc)
{
+ struct dst_cache *dst_cache;
struct ip_tunnel_info *info;
struct vxlan_dev *vxlan = netdev_priv(dev);
struct sock *sk;
struct rtable *rt = NULL;
const struct iphdr *old_iph;
- struct flowi4 fl4;
union vxlan_addr *dst;
union vxlan_addr remote_ip;
struct vxlan_metadata _md;
struct vxlan_metadata *md = &_md;
__be16 src_port = 0, dst_port;
- u32 vni;
+ __be32 vni, label;
__be16 df = 0;
__u8 tos, ttl;
int err;
u32 flags = vxlan->flags;
+ bool udp_sum = false;
+ bool xnet = !net_eq(vxlan->net, dev_net(vxlan->dev));
info = skb_tunnel_info(skb);
@@ -1950,6 +1903,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
dst_port = rdst->remote_port ? rdst->remote_port : vxlan->cfg.dst_port;
vni = rdst->remote_vni;
dst = &rdst->remote_ip;
+ dst_cache = &rdst->dst_cache;
} else {
if (!info) {
WARN_ONCE(1, "%s: Missing encapsulation instructions\n",
@@ -1957,13 +1911,14 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto drop;
}
dst_port = info->key.tp_dst ? : vxlan->cfg.dst_port;
- vni = be64_to_cpu(info->key.tun_id);
+ vni = vxlan_tun_id_to_vni(info->key.tun_id);
remote_ip.sa.sa_family = ip_tunnel_info_af(info);
if (remote_ip.sa.sa_family == AF_INET)
remote_ip.sin.sin_addr.s_addr = info->key.u.ipv4.dst;
else
remote_ip.sin6.sin6_addr = info->key.u.ipv6.dst;
dst = &remote_ip;
+ dst_cache = &info->dst_cache;
}
if (vxlan_addr_any(dst)) {
@@ -1985,12 +1940,15 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (tos == 1)
tos = ip_tunnel_get_dsfield(old_iph, skb);
+ label = vxlan->cfg.label;
src_port = udp_flow_src_port(dev_net(dev), skb, vxlan->cfg.port_min,
vxlan->cfg.port_max, true);
if (info) {
ttl = info->key.ttl;
tos = info->key.tos;
+ label = info->key.label;
+ udp_sum = !!(info->key.tun_flags & TUNNEL_CSUM);
if (info->options_len)
md = ip_tunnel_info_opts(info);
@@ -1999,29 +1957,16 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}
if (dst->sa.sa_family == AF_INET) {
+ __be32 saddr;
+
if (!vxlan->vn4_sock)
goto drop;
sk = vxlan->vn4_sock->sock->sk;
- if (info) {
- if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
- df = htons(IP_DF);
-
- if (info->key.tun_flags & TUNNEL_CSUM)
- flags |= VXLAN_F_UDP_CSUM;
- else
- flags &= ~VXLAN_F_UDP_CSUM;
- }
-
- memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_oif = rdst ? rdst->remote_ifindex : 0;
- fl4.flowi4_tos = RT_TOS(tos);
- fl4.flowi4_mark = skb->mark;
- fl4.flowi4_proto = IPPROTO_UDP;
- fl4.daddr = dst->sin.sin_addr.s_addr;
- fl4.saddr = vxlan->cfg.saddr.sin.sin_addr.s_addr;
-
- rt = ip_route_output_key(vxlan->net, &fl4);
+ rt = vxlan_get_route(vxlan, skb,
+ rdst ? rdst->remote_ifindex : 0, tos,
+ dst->sin.sin_addr.s_addr, &saddr,
+ dst_cache, info);
if (IS_ERR(rt)) {
netdev_dbg(dev, "no route to %pI4\n",
&dst->sin.sin_addr.s_addr);
@@ -2051,18 +1996,21 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
return;
}
+ if (!info)
+ udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM_TX);
+ else if (info->key.tun_flags & TUNNEL_DONT_FRAGMENT)
+ df = htons(IP_DF);
+
tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
- err = vxlan_xmit_skb(rt, sk, skb, fl4.saddr,
- dst->sin.sin_addr.s_addr, tos, ttl, df,
- src_port, dst_port, htonl(vni << 8), md,
- !net_eq(vxlan->net, dev_net(vxlan->dev)),
- flags);
- if (err < 0) {
- /* skb is already freed. */
- skb = NULL;
- goto rt_tx_error;
- }
+ err = vxlan_build_skb(skb, &rt->dst, sizeof(struct iphdr),
+ vni, md, flags, udp_sum);
+ if (err < 0)
+ goto xmit_tx_error;
+
+ udp_tunnel_xmit_skb(rt, sk, skb, saddr,
+ dst->sin.sin_addr.s_addr, tos, ttl, df,
+ src_port, dst_port, xnet, !udp_sum);
#if IS_ENABLED(CONFIG_IPV6)
} else {
struct dst_entry *ndst;
@@ -2074,8 +2022,9 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
sk = vxlan->vn6_sock->sock->sk;
ndst = vxlan6_get_route(vxlan, skb,
- rdst ? rdst->remote_ifindex : 0,
- &dst->sin6.sin6_addr, &saddr);
+ rdst ? rdst->remote_ifindex : 0, tos,
+ label, &dst->sin6.sin6_addr, &saddr,
+ dst_cache, info);
if (IS_ERR(ndst)) {
netdev_dbg(dev, "no route to %pI6\n",
&dst->sin6.sin6_addr);
@@ -2107,18 +2056,21 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
return;
}
- if (info) {
- if (info->key.tun_flags & TUNNEL_CSUM)
- flags &= ~VXLAN_F_UDP_ZERO_CSUM6_TX;
- else
- flags |= VXLAN_F_UDP_ZERO_CSUM6_TX;
- }
+ if (!info)
+ udp_sum = !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
+ tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
ttl = ttl ? : ip6_dst_hoplimit(ndst);
- err = vxlan6_xmit_skb(ndst, sk, skb, dev, &saddr, &dst->sin6.sin6_addr,
- 0, ttl, src_port, dst_port, htonl(vni << 8), md,
- !net_eq(vxlan->net, dev_net(vxlan->dev)),
- flags);
+ skb_scrub_packet(skb, xnet);
+ err = vxlan_build_skb(skb, ndst, sizeof(struct ipv6hdr),
+ vni, md, flags, udp_sum);
+ if (err < 0) {
+ dst_release(ndst);
+ return;
+ }
+ udp_tunnel6_xmit_skb(ndst, sk, skb, dev,
+ &saddr, &dst->sin6.sin6_addr, tos, ttl,
+ label, src_port, dst_port, !udp_sum);
#endif
}
@@ -2128,6 +2080,9 @@ drop:
dev->stats.tx_dropped++;
goto tx_free;
+xmit_tx_error:
+ /* skb is already freed. */
+ skb = NULL;
rt_tx_error:
ip_rt_put(rt);
tx_error:
@@ -2267,7 +2222,7 @@ static void vxlan_cleanup(unsigned long arg)
static void vxlan_vs_add_dev(struct vxlan_sock *vs, struct vxlan_dev *vxlan)
{
struct vxlan_net *vn = net_generic(vxlan->net, vxlan_net_id);
- __u32 vni = vxlan->default_dst.remote_vni;
+ __be32 vni = vxlan->default_dst.remote_vni;
spin_lock(&vn->sock_lock);
hlist_add_head_rcu(&vxlan->hlist, vni_head(vs, vni));
@@ -2410,31 +2365,6 @@ static int vxlan_change_mtu(struct net_device *dev, int new_mtu)
return __vxlan_change_mtu(dev, lowerdev, dst, new_mtu, true);
}
-static int egress_ipv4_tun_info(struct net_device *dev, struct sk_buff *skb,
- struct ip_tunnel_info *info,
- __be16 sport, __be16 dport)
-{
- struct vxlan_dev *vxlan = netdev_priv(dev);
- struct rtable *rt;
- struct flowi4 fl4;
-
- memset(&fl4, 0, sizeof(fl4));
- fl4.flowi4_tos = RT_TOS(info->key.tos);
- fl4.flowi4_mark = skb->mark;
- fl4.flowi4_proto = IPPROTO_UDP;
- fl4.daddr = info->key.u.ipv4.dst;
-
- rt = ip_route_output_key(vxlan->net, &fl4);
- if (IS_ERR(rt))
- return PTR_ERR(rt);
- ip_rt_put(rt);
-
- info->key.u.ipv4.src = fl4.saddr;
- info->key.tp_src = sport;
- info->key.tp_dst = dport;
- return 0;
-}
-
static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
{
struct vxlan_dev *vxlan = netdev_priv(dev);
@@ -2446,28 +2376,34 @@ static int vxlan_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb)
dport = info->key.tp_dst ? : vxlan->cfg.dst_port;
if (ip_tunnel_info_af(info) == AF_INET) {
+ struct rtable *rt;
+
if (!vxlan->vn4_sock)
return -EINVAL;
- return egress_ipv4_tun_info(dev, skb, info, sport, dport);
+ rt = vxlan_get_route(vxlan, skb, 0, info->key.tos,
+ info->key.u.ipv4.dst,
+ &info->key.u.ipv4.src, NULL, info);
+ if (IS_ERR(rt))
+ return PTR_ERR(rt);
+ ip_rt_put(rt);
} else {
#if IS_ENABLED(CONFIG_IPV6)
struct dst_entry *ndst;
if (!vxlan->vn6_sock)
return -EINVAL;
- ndst = vxlan6_get_route(vxlan, skb, 0,
- &info->key.u.ipv6.dst,
- &info->key.u.ipv6.src);
+ ndst = vxlan6_get_route(vxlan, skb, 0, info->key.tos,
+ info->key.label, &info->key.u.ipv6.dst,
+ &info->key.u.ipv6.src, NULL, info);
if (IS_ERR(ndst))
return PTR_ERR(ndst);
dst_release(ndst);
-
- info->key.tp_src = sport;
- info->key.tp_dst = dport;
#else /* !CONFIG_IPV6 */
return -EPFNOSUPPORT;
#endif
}
+ info->key.tp_src = sport;
+ info->key.tp_dst = dport;
return 0;
}
@@ -2572,6 +2508,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
[IFLA_VXLAN_LOCAL6] = { .len = sizeof(struct in6_addr) },
[IFLA_VXLAN_TOS] = { .type = NLA_U8 },
[IFLA_VXLAN_TTL] = { .type = NLA_U8 },
+ [IFLA_VXLAN_LABEL] = { .type = NLA_U32 },
[IFLA_VXLAN_LEARNING] = { .type = NLA_U8 },
[IFLA_VXLAN_AGEING] = { .type = NLA_U32 },
[IFLA_VXLAN_LIMIT] = { .type = NLA_U32 },
@@ -2719,7 +2656,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, bool ipv6,
/* Mark socket as an encapsulation socket. */
tunnel_cfg.sk_user_data = vs;
tunnel_cfg.encap_type = 1;
- tunnel_cfg.encap_rcv = vxlan_udp_encap_recv;
+ tunnel_cfg.encap_rcv = vxlan_rcv;
tunnel_cfg.encap_destroy = NULL;
setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
@@ -2806,6 +2743,11 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev,
vxlan->flags |= VXLAN_F_IPV6;
}
+ if (conf->label && !use_ipv6) {
+ pr_info("label only supported in use with IPv6\n");
+ return -EINVAL;
+ }
+
if (conf->remote_ifindex) {
lowerdev = __dev_get_by_index(src_net, conf->remote_ifindex);
dst->remote_ifindex = conf->remote_ifindex;
@@ -2921,7 +2863,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
memset(&conf, 0, sizeof(conf));
if (data[IFLA_VXLAN_ID])
- conf.vni = nla_get_u32(data[IFLA_VXLAN_ID]);
+ conf.vni = cpu_to_be32(nla_get_u32(data[IFLA_VXLAN_ID]));
if (data[IFLA_VXLAN_GROUP]) {
conf.remote_ip.sin.sin_addr.s_addr = nla_get_in_addr(data[IFLA_VXLAN_GROUP]);
@@ -2954,6 +2896,10 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
if (data[IFLA_VXLAN_TTL])
conf.ttl = nla_get_u8(data[IFLA_VXLAN_TTL]);
+ if (data[IFLA_VXLAN_LABEL])
+ conf.label = nla_get_be32(data[IFLA_VXLAN_LABEL]) &
+ IPV6_FLOWLABEL_MASK;
+
if (!data[IFLA_VXLAN_LEARNING] || nla_get_u8(data[IFLA_VXLAN_LEARNING]))
conf.flags |= VXLAN_F_LEARN;
@@ -2989,8 +2935,9 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
if (data[IFLA_VXLAN_PORT])
conf.dst_port = nla_get_be16(data[IFLA_VXLAN_PORT]);
- if (data[IFLA_VXLAN_UDP_CSUM] && nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
- conf.flags |= VXLAN_F_UDP_CSUM;
+ if (data[IFLA_VXLAN_UDP_CSUM] &&
+ !nla_get_u8(data[IFLA_VXLAN_UDP_CSUM]))
+ conf.flags |= VXLAN_F_UDP_ZERO_CSUM_TX;
if (data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX] &&
nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]))
@@ -3025,7 +2972,7 @@ static int vxlan_newlink(struct net *src_net, struct net_device *dev,
break;
case -EEXIST:
- pr_info("duplicate VNI %u\n", conf.vni);
+ pr_info("duplicate VNI %u\n", be32_to_cpu(conf.vni));
break;
}
@@ -3056,6 +3003,7 @@ static size_t vxlan_get_size(const struct net_device *dev)
nla_total_size(sizeof(struct in6_addr)) + /* IFLA_VXLAN_LOCAL{6} */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TTL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_TOS */
+ nla_total_size(sizeof(__be32)) + /* IFLA_VXLAN_LABEL */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_LEARNING */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_PROXY */
nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_RSC */
@@ -3083,7 +3031,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
.high = htons(vxlan->cfg.port_max),
};
- if (nla_put_u32(skb, IFLA_VXLAN_ID, dst->remote_vni))
+ if (nla_put_u32(skb, IFLA_VXLAN_ID, be32_to_cpu(dst->remote_vni)))
goto nla_put_failure;
if (!vxlan_addr_any(&dst->remote_ip)) {
@@ -3119,6 +3067,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put_u8(skb, IFLA_VXLAN_TTL, vxlan->cfg.ttl) ||
nla_put_u8(skb, IFLA_VXLAN_TOS, vxlan->cfg.tos) ||
+ nla_put_be32(skb, IFLA_VXLAN_LABEL, vxlan->cfg.label) ||
nla_put_u8(skb, IFLA_VXLAN_LEARNING,
!!(vxlan->flags & VXLAN_F_LEARN)) ||
nla_put_u8(skb, IFLA_VXLAN_PROXY,
@@ -3134,7 +3083,7 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
nla_put_u32(skb, IFLA_VXLAN_LIMIT, vxlan->cfg.addrmax) ||
nla_put_be16(skb, IFLA_VXLAN_PORT, vxlan->cfg.dst_port) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_CSUM,
- !!(vxlan->flags & VXLAN_F_UDP_CSUM)) ||
+ !(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,