summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/net/tun.c17
-rw-r--r--drivers/net/veth.c26
-rw-r--r--include/linux/netdevice.h31
-rw-r--r--net/bridge/br_if.c37
-rw-r--r--net/openvswitch/datapath.c40
-rw-r--r--net/openvswitch/datapath.h4
-rw-r--r--net/openvswitch/vport-internal_dev.c10
7 files changed, 161 insertions, 4 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 88bb8cc3555b..afdf950617c3 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -187,6 +187,7 @@ struct tun_struct {
#define TUN_USER_FEATURES (NETIF_F_HW_CSUM|NETIF_F_TSO_ECN|NETIF_F_TSO| \
NETIF_F_TSO6|NETIF_F_UFO)
+ int align;
int vnet_hdr_sz;
int sndbuf;
struct tap_filter txflt;
@@ -934,6 +935,17 @@ static void tun_poll_controller(struct net_device *dev)
return;
}
#endif
+
+static void tun_set_headroom(struct net_device *dev, int new_hr)
+{
+ struct tun_struct *tun = netdev_priv(dev);
+
+ if (new_hr < NET_SKB_PAD)
+ new_hr = NET_SKB_PAD;
+
+ tun->align = new_hr;
+}
+
static const struct net_device_ops tun_netdev_ops = {
.ndo_uninit = tun_net_uninit,
.ndo_open = tun_net_open,
@@ -945,6 +957,7 @@ static const struct net_device_ops tun_netdev_ops = {
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = tun_poll_controller,
#endif
+ .ndo_set_rx_headroom = tun_set_headroom,
};
static const struct net_device_ops tap_netdev_ops = {
@@ -962,6 +975,7 @@ static const struct net_device_ops tap_netdev_ops = {
.ndo_poll_controller = tun_poll_controller,
#endif
.ndo_features_check = passthru_features_check,
+ .ndo_set_rx_headroom = tun_set_headroom,
};
static void tun_flow_init(struct tun_struct *tun)
@@ -1086,7 +1100,7 @@ static ssize_t tun_get_user(struct tun_struct *tun, struct tun_file *tfile,
struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
struct sk_buff *skb;
size_t total_len = iov_iter_count(from);
- size_t len = total_len, align = NET_SKB_PAD, linear;
+ size_t len = total_len, align = tun->align, linear;
struct virtio_net_hdr gso = { 0 };
int good_linear;
int copylen;
@@ -1694,6 +1708,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
tun->txflt.count = 0;
tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
+ tun->align = NET_SKB_PAD;
tun->filter_attached = false;
tun->sndbuf = tfile->socket.sk->sk_sndbuf;
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index ba21d072be31..4f30a6ae50d0 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -35,6 +35,7 @@ struct pcpu_vstats {
struct veth_priv {
struct net_device __rcu *peer;
atomic64_t dropped;
+ unsigned requested_headroom;
};
/*
@@ -271,6 +272,29 @@ static int veth_get_iflink(const struct net_device *dev)
return iflink;
}
+static void veth_set_rx_headroom(struct net_device *dev, int new_hr)
+{
+ struct veth_priv *peer_priv, *priv = netdev_priv(dev);
+ struct net_device *peer;
+
+ if (new_hr < 0)
+ new_hr = 0;
+
+ rcu_read_lock();
+ peer = rcu_dereference(priv->peer);
+ if (unlikely(!peer))
+ goto out;
+
+ peer_priv = netdev_priv(peer);
+ priv->requested_headroom = new_hr;
+ new_hr = max(priv->requested_headroom, peer_priv->requested_headroom);
+ dev->needed_headroom = new_hr;
+ peer->needed_headroom = new_hr;
+
+out:
+ rcu_read_unlock();
+}
+
static const struct net_device_ops veth_netdev_ops = {
.ndo_init = veth_dev_init,
.ndo_open = veth_open,
@@ -285,6 +309,7 @@ static const struct net_device_ops veth_netdev_ops = {
#endif
.ndo_get_iflink = veth_get_iflink,
.ndo_features_check = passthru_features_check,
+ .ndo_set_rx_headroom = veth_set_rx_headroom,
};
#define VETH_FEATURES (NETIF_F_SG | NETIF_F_FRAGLIST | NETIF_F_ALL_TSO | \
@@ -301,6 +326,7 @@ static void veth_setup(struct net_device *dev)
dev->priv_flags &= ~IFF_TX_SKB_SHARING;
dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
dev->priv_flags |= IFF_NO_QUEUE;
+ dev->priv_flags |= IFF_PHONY_HEADROOM;
dev->netdev_ops = &veth_netdev_ops;
dev->ethtool_ops = &veth_ethtool_ops;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index e52077ffe5ed..efe7cec111fa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1093,6 +1093,12 @@ struct tc_to_netdev {
* This function is used to get egress tunnel information for given skb.
* This is useful for retrieving outer tunnel header parameters while
* sampling packet.
+ * void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom);
+ * This function is used to specify the headroom that the skb must
+ * consider when allocation skb during packet reception. Setting
+ * appropriate rx headroom value allows avoiding skb head copy on
+ * forward. Setting a negative value reset the rx headroom to the
+ * default value.
*
*/
struct net_device_ops {
@@ -1278,6 +1284,8 @@ struct net_device_ops {
bool proto_down);
int (*ndo_fill_metadata_dst)(struct net_device *dev,
struct sk_buff *skb);
+ void (*ndo_set_rx_headroom)(struct net_device *dev,
+ int needed_headroom);
};
/**
@@ -1315,6 +1323,8 @@ struct net_device_ops {
* @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device
* @IFF_TEAM: device is a team device
* @IFF_RXFH_CONFIGURED: device has had Rx Flow indirection table configured
+ * @IFF_PHONY_HEADROOM: the headroom value is controlled by an external
+ * entity (i.e. the master device for bridged veth)
*/
enum netdev_priv_flags {
IFF_802_1Q_VLAN = 1<<0,
@@ -1343,6 +1353,7 @@ enum netdev_priv_flags {
IFF_L3MDEV_SLAVE = 1<<23,
IFF_TEAM = 1<<24,
IFF_RXFH_CONFIGURED = 1<<25,
+ IFF_PHONY_HEADROOM = 1<<26,
};
#define IFF_802_1Q_VLAN IFF_802_1Q_VLAN
@@ -1937,6 +1948,26 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
struct sk_buff *skb,
void *accel_priv);
+/* returns the headroom that the master device needs to take in account
+ * when forwarding to this dev
+ */
+static inline unsigned netdev_get_fwd_headroom(struct net_device *dev)
+{
+ return dev->priv_flags & IFF_PHONY_HEADROOM ? 0 : dev->needed_headroom;
+}
+
+static inline void netdev_set_rx_headroom(struct net_device *dev, int new_hr)
+{
+ if (dev->netdev_ops->ndo_set_rx_headroom)
+ dev->netdev_ops->ndo_set_rx_headroom(dev, new_hr);
+}
+
+/* set the device rx headroom to the dev's default */
+static inline void netdev_reset_rx_headroom(struct net_device *dev)
+{
+ netdev_set_rx_headroom(dev, -1);
+}
+
/*
* Net namespace inlines
*/
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index b37a1cc97d98..a73df3315df9 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -223,6 +223,31 @@ static void destroy_nbp_rcu(struct rcu_head *head)
destroy_nbp(p);
}
+static unsigned get_max_headroom(struct net_bridge *br)
+{
+ unsigned max_headroom = 0;
+ struct net_bridge_port *p;
+
+ list_for_each_entry(p, &br->port_list, list) {
+ unsigned dev_headroom = netdev_get_fwd_headroom(p->dev);
+
+ if (dev_headroom > max_headroom)
+ max_headroom = dev_headroom;
+ }
+
+ return max_headroom;
+}
+
+static void update_headroom(struct net_bridge *br, int new_hr)
+{
+ struct net_bridge_port *p;
+
+ list_for_each_entry(p, &br->port_list, list)
+ netdev_set_rx_headroom(p->dev, new_hr);
+
+ br->dev->needed_headroom = new_hr;
+}
+
/* Delete port(interface) from bridge is done in two steps.
* via RCU. First step, marks device as down. That deletes
* all the timers and stops new packets from flowing through.
@@ -248,6 +273,9 @@ static void del_nbp(struct net_bridge_port *p)
br_ifinfo_notify(RTM_DELLINK, p);
list_del_rcu(&p->list);
+ if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom)
+ update_headroom(br, get_max_headroom(br));
+ netdev_reset_rx_headroom(dev);
nbp_vlan_flush(p);
br_fdb_delete_by_port(br, p, 0, 1);
@@ -438,6 +466,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
{
struct net_bridge_port *p;
int err = 0;
+ unsigned br_hr, dev_hr;
bool changed_addr;
/* Don't allow bridging non-ethernet like devices, or DSA-enabled
@@ -505,8 +534,12 @@ int br_add_if(struct net_bridge *br, struct net_device *dev)
netdev_update_features(br->dev);
- if (br->dev->needed_headroom < dev->needed_headroom)
- br->dev->needed_headroom = dev->needed_headroom;
+ br_hr = br->dev->needed_headroom;
+ dev_hr = netdev_get_fwd_headroom(dev);
+ if (br_hr < dev_hr)
+ update_headroom(br, dev_hr);
+ else
+ netdev_set_rx_headroom(dev, br_hr);
if (br_fdb_insert(br, p, dev->dev_addr, 0))
netdev_err(dev, "failed insert local address bridge forwarding table\n");
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index c4e8455d5d56..e6a7d494df24 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1908,6 +1908,29 @@ static struct vport *lookup_vport(struct net *net,
return ERR_PTR(-EINVAL);
}
+/* Called with ovs_mutex */
+static void update_headroom(struct datapath *dp)
+{
+ unsigned dev_headroom, max_headroom = 0;
+ struct net_device *dev;
+ struct vport *vport;
+ int i;
+
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
+ dev = vport->dev;
+ dev_headroom = netdev_get_fwd_headroom(dev);
+ if (dev_headroom > max_headroom)
+ max_headroom = dev_headroom;
+ }
+ }
+
+ dp->max_headroom = max_headroom;
+ for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+ hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node)
+ netdev_set_rx_headroom(vport->dev, max_headroom);
+}
+
static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
{
struct nlattr **a = info->attrs;
@@ -1973,6 +1996,12 @@ restart:
err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
info->snd_seq, 0, OVS_VPORT_CMD_NEW);
+
+ if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom)
+ update_headroom(dp);
+ else
+ netdev_set_rx_headroom(vport->dev, dp->max_headroom);
+
BUG_ON(err < 0);
ovs_unlock();
@@ -2039,8 +2068,10 @@ exit_unlock_free:
static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
{
+ bool must_update_headroom = false;
struct nlattr **a = info->attrs;
struct sk_buff *reply;
+ struct datapath *dp;
struct vport *vport;
int err;
@@ -2062,7 +2093,16 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid,
info->snd_seq, 0, OVS_VPORT_CMD_DEL);
BUG_ON(err < 0);
+
+ /* the vport deletion may trigger dp headroom update */
+ dp = vport->dp;
+ if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
+ must_update_headroom = true;
+ netdev_reset_rx_headroom(vport->dev);
ovs_dp_detach_port(vport);
+
+ if (must_update_headroom)
+ update_headroom(dp);
ovs_unlock();
ovs_notify(&dp_vport_genl_family, reply, info);
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 67bdecd9fdc1..427e39a045cf 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -68,6 +68,8 @@ struct dp_stats_percpu {
* ovs_mutex and RCU.
* @stats_percpu: Per-CPU datapath statistics.
* @net: Reference to net namespace.
+ * @max_headroom: the maximum headroom of all vports in this datapath; it will
+ * be used by all the internal vports in this dp.
*
* Context: See the comment on locking at the top of datapath.c for additional
* locking information.
@@ -89,6 +91,8 @@ struct datapath {
possible_net_t net;
u32 user_features;
+
+ u32 max_headroom;
};
/**
diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c
index ec76398a792f..83a5534abd31 100644
--- a/net/openvswitch/vport-internal_dev.c
+++ b/net/openvswitch/vport-internal_dev.c
@@ -138,6 +138,11 @@ internal_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
return stats;
}
+void internal_set_rx_headroom(struct net_device *dev, int new_hr)
+{
+ dev->needed_headroom = new_hr;
+}
+
static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_open = internal_dev_open,
.ndo_stop = internal_dev_stop,
@@ -145,6 +150,7 @@ static const struct net_device_ops internal_dev_netdev_ops = {
.ndo_set_mac_address = eth_mac_addr,
.ndo_change_mtu = internal_dev_change_mtu,
.ndo_get_stats64 = internal_get_stats,
+ .ndo_set_rx_headroom = internal_set_rx_headroom,
};
static struct rtnl_link_ops internal_dev_link_ops __read_mostly = {
@@ -158,7 +164,8 @@ static void do_setup(struct net_device *netdev)
netdev->netdev_ops = &internal_dev_netdev_ops;
netdev->priv_flags &= ~IFF_TX_SKB_SHARING;
- netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH;
+ netdev->priv_flags |= IFF_LIVE_ADDR_CHANGE | IFF_OPENVSWITCH |
+ IFF_PHONY_HEADROOM;
netdev->destructor = internal_dev_destructor;
netdev->ethtool_ops = &internal_dev_ethtool_ops;
netdev->rtnl_link_ops = &internal_dev_link_ops;
@@ -199,6 +206,7 @@ static struct vport *internal_dev_create(const struct vport_parms *parms)
err = -ENOMEM;
goto error_free_netdev;
}
+ vport->dev->needed_headroom = vport->dp->max_headroom;
dev_net_set(vport->dev, ovs_dp_get_net(vport->dp));
internal_dev = internal_dev_priv(vport->dev);