summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/datagram.h15
-rw-r--r--net/core/dev.c174
-rw-r--r--net/core/dev.h23
-rw-r--r--net/core/devlink.c303
-rw-r--r--net/core/drop_monitor.c2
-rw-r--r--net/core/gro.c8
-rw-r--r--net/core/lwt_bpf.c7
-rw-r--r--net/core/neighbour.c2
-rw-r--r--net/core/net-sysfs.c21
-rw-r--r--net/core/page_pool.c5
-rw-r--r--net/core/rtnetlink.c250
-rw-r--r--net/core/secure_seq.c16
-rw-r--r--net/core/skbuff.c46
-rw-r--r--net/core/sock.c88
-rw-r--r--net/core/sysctl_net_core.c14
16 files changed, 412 insertions, 564 deletions
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 70126d15ca6e..50f4faeea76c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -62,8 +62,6 @@
#include <trace/events/skb.h>
#include <net/busy_poll.h>
-#include "datagram.h"
-
/*
* Is a socket 'connection oriented' ?
*/
diff --git a/net/core/datagram.h b/net/core/datagram.h
deleted file mode 100644
index bcfb75bfa3b2..000000000000
--- a/net/core/datagram.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef _NET_CORE_DATAGRAM_H_
-#define _NET_CORE_DATAGRAM_H_
-
-#include <linux/types.h>
-
-struct sock;
-struct sk_buff;
-struct iov_iter;
-
-int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb,
- struct iov_iter *from, size_t length);
-
-#endif /* _NET_CORE_DATAGRAM_H_ */
diff --git a/net/core/dev.c b/net/core/dev.c
index 611bd7197064..721ba9c26554 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -682,11 +682,11 @@ int dev_fill_forward_path(const struct net_device *dev, const u8 *daddr,
const struct net_device *last_dev;
struct net_device_path_ctx ctx = {
.dev = dev,
- .daddr = daddr,
};
struct net_device_path *path;
int ret = 0;
+ memcpy(ctx.daddr, daddr, sizeof(ctx.daddr));
stack->num_paths = 0;
while (ctx.dev && ctx.dev->netdev_ops->ndo_fill_forward_path) {
last_dev = ctx.dev;
@@ -2993,6 +2993,52 @@ undo_rx:
EXPORT_SYMBOL(netif_set_real_num_queues);
/**
+ * netif_set_tso_max_size() - set the max size of TSO frames supported
+ * @dev: netdev to update
+ * @size: max skb->len of a TSO frame
+ *
+ * Set the limit on the size of TSO super-frames the device can handle.
+ * Unless explicitly set the stack will assume the value of
+ * %GSO_LEGACY_MAX_SIZE.
+ */
+void netif_set_tso_max_size(struct net_device *dev, unsigned int size)
+{
+ dev->tso_max_size = min(GSO_MAX_SIZE, size);
+ if (size < READ_ONCE(dev->gso_max_size))
+ netif_set_gso_max_size(dev, size);
+}
+EXPORT_SYMBOL(netif_set_tso_max_size);
+
+/**
+ * netif_set_tso_max_segs() - set the max number of segs supported for TSO
+ * @dev: netdev to update
+ * @segs: max number of TCP segments
+ *
+ * Set the limit on the number of TCP segments the device can generate from
+ * a single TSO super-frame.
+ * Unless explicitly set the stack will assume the value of %GSO_MAX_SEGS.
+ */
+void netif_set_tso_max_segs(struct net_device *dev, unsigned int segs)
+{
+ dev->tso_max_segs = segs;
+ if (segs < READ_ONCE(dev->gso_max_segs))
+ netif_set_gso_max_segs(dev, segs);
+}
+EXPORT_SYMBOL(netif_set_tso_max_segs);
+
+/**
+ * netif_inherit_tso_max() - copy all TSO limits from a lower device to an upper
+ * @to: netdev to update
+ * @from: netdev from which to copy the limits
+ */
+void netif_inherit_tso_max(struct net_device *to, const struct net_device *from)
+{
+ netif_set_tso_max_size(to, from->tso_max_size);
+ netif_set_tso_max_segs(to, from->tso_max_segs);
+}
+EXPORT_SYMBOL(netif_inherit_tso_max);
+
+/**
* netif_get_num_default_rss_queues - default number of RSS queues
*
* Default value is the number of physical cores if there are only 1 or 2, or
@@ -3220,12 +3266,18 @@ int skb_checksum_help(struct sk_buff *skb)
}
offset = skb_checksum_start_offset(skb);
- BUG_ON(offset >= skb_headlen(skb));
+ ret = -EINVAL;
+ if (WARN_ON_ONCE(offset >= skb_headlen(skb))) {
+ DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
+ goto out;
+ }
csum = skb_checksum(skb, offset, skb->len - offset, 0);
offset += skb->csum_offset;
- BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
-
+ if (WARN_ON_ONCE(offset + sizeof(__sum16) > skb_headlen(skb))) {
+ DO_ONCE_LITE(skb_dump, KERN_ERR, skb, false);
+ goto out;
+ }
ret = skb_ensure_writable(skb, offset + sizeof(__sum16));
if (ret)
goto out;
@@ -4086,32 +4138,27 @@ struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
}
/**
- * __dev_queue_xmit - transmit a buffer
- * @skb: buffer to transmit
- * @sb_dev: suboordinate device used for L2 forwarding offload
+ * __dev_queue_xmit() - transmit a buffer
+ * @skb: buffer to transmit
+ * @sb_dev: suboordinate device used for L2 forwarding offload
*
- * Queue a buffer for transmission to a network device. The caller must
- * have set the device and priority and built the buffer before calling
- * this function. The function can be called from an interrupt.
+ * Queue a buffer for transmission to a network device. The caller must
+ * have set the device and priority and built the buffer before calling
+ * this function. The function can be called from an interrupt.
*
- * A negative errno code is returned on a failure. A success does not
- * guarantee the frame will be transmitted as it may be dropped due
- * to congestion or traffic shaping.
+ * When calling this method, interrupts MUST be enabled. This is because
+ * the BH enable code must have IRQs enabled so that it will not deadlock.
*
- * -----------------------------------------------------------------------------------
- * I notice this method can also return errors from the queue disciplines,
- * including NET_XMIT_DROP, which is a positive value. So, errors can also
- * be positive.
+ * Regardless of the return value, the skb is consumed, so it is currently
+ * difficult to retry a send to this method. (You can bump the ref count
+ * before sending to hold a reference for retry if you are careful.)
*
- * Regardless of the return value, the skb is consumed, so it is currently
- * difficult to retry a send to this method. (You can bump the ref count
- * before sending to hold a reference for retry if you are careful.)
- *
- * When calling this method, interrupts MUST be enabled. This is because
- * the BH enable code must have IRQs enabled so that it will not deadlock.
- * --BLG
+ * Return:
+ * * 0 - buffer successfully transmitted
+ * * positive qdisc return code - NET_XMIT_DROP etc.
+ * * negative errno - other errors
*/
-static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
+int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
{
struct net_device *dev = skb->dev;
struct netdev_queue *txq = NULL;
@@ -4235,18 +4282,7 @@ out:
rcu_read_unlock_bh();
return rc;
}
-
-int dev_queue_xmit(struct sk_buff *skb)
-{
- return __dev_queue_xmit(skb, NULL);
-}
-EXPORT_SYMBOL(dev_queue_xmit);
-
-int dev_queue_xmit_accel(struct sk_buff *skb, struct net_device *sb_dev)
-{
- return __dev_queue_xmit(skb, sb_dev);
-}
-EXPORT_SYMBOL(dev_queue_xmit_accel);
+EXPORT_SYMBOL(__dev_queue_xmit);
int __dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
{
@@ -4294,6 +4330,7 @@ int netdev_max_backlog __read_mostly = 1000;
EXPORT_SYMBOL(netdev_max_backlog);
int netdev_tstamp_prequeue __read_mostly = 1;
+unsigned int sysctl_skb_defer_max __read_mostly = 64;
int netdev_budget __read_mostly = 300;
/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
@@ -4546,9 +4583,12 @@ static void rps_trigger_softirq(void *data)
#endif /* CONFIG_RPS */
/* Called from hardirq (IPI) context */
-static void trigger_rx_softirq(void *data __always_unused)
+static void trigger_rx_softirq(void *data)
{
+ struct softnet_data *sd = data;
+
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ smp_store_release(&sd->defer_ipi_scheduled, 0);
}
/*
@@ -6314,8 +6354,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
}
EXPORT_SYMBOL(dev_set_threaded);
-void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
- int (*poll)(struct napi_struct *, int), int weight)
+void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int weight)
{
if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
return;
@@ -6348,7 +6388,7 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
if (dev->threaded && napi_kthread_create(napi))
dev->threaded = 0;
}
-EXPORT_SYMBOL(netif_napi_add);
+EXPORT_SYMBOL(netif_napi_add_weight);
void napi_disable(struct napi_struct *n)
{
@@ -6594,7 +6634,7 @@ static void skb_defer_free_flush(struct softnet_data *sd)
while (skb != NULL) {
next = skb->next;
- __kfree_skb(skb);
+ napi_consume_skb(skb, 1);
skb = next;
}
}
@@ -6615,9 +6655,11 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
for (;;) {
struct napi_struct *n;
+ skb_defer_free_flush(sd);
+
if (list_empty(&list)) {
if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
- return;
+ goto end;
break;
}
@@ -6644,7 +6686,7 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
net_rps_action_and_irq_enable(sd);
- skb_defer_free_flush(sd);
+end:;
}
struct netdev_adjacent {
@@ -9891,22 +9933,14 @@ void netif_tx_stop_all_queues(struct net_device *dev)
EXPORT_SYMBOL(netif_tx_stop_all_queues);
/**
- * register_netdevice - register a network device
- * @dev: device to register
+ * register_netdevice() - register a network device
+ * @dev: device to register
*
- * Take a completed network device structure and add it to the kernel
- * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
- * chain. 0 is returned on success. A negative errno code is returned
- * on a failure to set up the device, or if the name is a duplicate.
- *
- * Callers must hold the rtnl semaphore. You may want
- * register_netdev() instead of this.
- *
- * BUGS:
- * The locking appears insufficient to guarantee two parallel registers
- * will not get the same name.
+ * Take a prepared network device structure and make it externally accessible.
+ * A %NETDEV_REGISTER message is sent to the netdev notifier chain.
+ * Callers must hold the rtnl lock - you may want register_netdev()
+ * instead of this.
*/
-
int register_netdevice(struct net_device *dev)
{
int ret;
@@ -10357,7 +10391,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
}
EXPORT_SYMBOL(netdev_stats_to_stats64);
-struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev)
+struct net_device_core_stats __percpu *netdev_core_stats_alloc(struct net_device *dev)
{
struct net_device_core_stats __percpu *p;
@@ -10368,11 +10402,7 @@ struct net_device_core_stats *netdev_core_stats_alloc(struct net_device *dev)
free_percpu(p);
/* This READ_ONCE() pairs with the cmpxchg() above */
- p = READ_ONCE(dev->core_stats);
- if (!p)
- return NULL;
-
- return this_cpu_ptr(p);
+ return READ_ONCE(dev->core_stats);
}
EXPORT_SYMBOL(netdev_core_stats_alloc);
@@ -10409,10 +10439,10 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
for_each_possible_cpu(i) {
core_stats = per_cpu_ptr(p, i);
- storage->rx_dropped += local_read(&core_stats->rx_dropped);
- storage->tx_dropped += local_read(&core_stats->tx_dropped);
- storage->rx_nohandler += local_read(&core_stats->rx_nohandler);
- storage->rx_otherhost_dropped += local_read(&core_stats->rx_otherhost_dropped);
+ storage->rx_dropped += READ_ONCE(core_stats->rx_dropped);
+ storage->tx_dropped += READ_ONCE(core_stats->tx_dropped);
+ storage->rx_nohandler += READ_ONCE(core_stats->rx_nohandler);
+ storage->rx_otherhost_dropped += READ_ONCE(core_stats->rx_otherhost_dropped);
}
}
return storage;
@@ -10571,9 +10601,11 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
- dev->gso_max_size = GSO_MAX_SIZE;
+ dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
- dev->gro_max_size = GRO_MAX_SIZE;
+ dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
+ dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
+ dev->tso_max_segs = TSO_MAX_SEGS;
dev->upper_level = 1;
dev->lower_level = 1;
#ifdef CONFIG_LOCKDEP
@@ -11355,7 +11387,7 @@ static int __init net_dev_init(void)
INIT_CSD(&sd->csd, rps_trigger_softirq, sd);
sd->cpu = i;
#endif
- INIT_CSD(&sd->defer_csd, trigger_rx_softirq, NULL);
+ INIT_CSD(&sd->defer_csd, trigger_rx_softirq, sd);
spin_lock_init(&sd->defer_lock);
init_gro_hash(&sd->backlog);
diff --git a/net/core/dev.h b/net/core/dev.h
index 27923df00637..cbb8a925175a 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -39,7 +39,7 @@ void dev_addr_check(struct net_device *dev);
/* sysctls not referred to from outside net/core/ */
extern int netdev_budget;
extern unsigned int netdev_budget_usecs;
-
+extern unsigned int sysctl_skb_defer_max;
extern int netdev_tstamp_prequeue;
extern int netdev_unregister_timeout_secs;
extern int weight_p;
@@ -88,4 +88,25 @@ int dev_change_carrier(struct net_device *dev, bool new_carrier);
void __dev_set_rx_mode(struct net_device *dev);
+static inline void netif_set_gso_max_size(struct net_device *dev,
+ unsigned int size)
+{
+ /* dev->gso_max_size is read locklessly from sk_setup_caps() */
+ WRITE_ONCE(dev->gso_max_size, size);
+}
+
+static inline void netif_set_gso_max_segs(struct net_device *dev,
+ unsigned int segs)
+{
+ /* dev->gso_max_segs is read locklessly from sk_setup_caps() */
+ WRITE_ONCE(dev->gso_max_segs, segs);
+}
+
+static inline void netif_set_gro_max_size(struct net_device *dev,
+ unsigned int size)
+{
+ /* This pairs with the READ_ONCE() in skb_gro_receive() */
+ WRITE_ONCE(dev->gro_max_size, size);
+}
+
#endif
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 5f441a0e34f4..5cc88490f18f 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -83,11 +83,10 @@ struct devlink_linecard {
const struct devlink_linecard_ops *ops;
void *priv;
enum devlink_linecard_state state;
- struct mutex state_lock; /* Protects state and device_list */
+ struct mutex state_lock; /* Protects state */
const char *type;
struct devlink_linecard_type *types;
unsigned int types_count;
- struct list_head device_list;
};
/**
@@ -2059,56 +2058,6 @@ struct devlink_linecard_type {
const void *priv;
};
-struct devlink_linecard_device {
- struct list_head list;
- unsigned int index;
- void *priv;
-};
-
-static int
-devlink_nl_linecard_device_fill(struct sk_buff *msg,
- struct devlink_linecard_device *linecard_device)
-{
- struct nlattr *attr;
-
- attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE);
- if (!attr)
- return -EMSGSIZE;
- if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_DEVICE_INDEX,
- linecard_device->index)) {
- nla_nest_cancel(msg, attr);
- return -EMSGSIZE;
- }
- nla_nest_end(msg, attr);
-
- return 0;
-}
-
-static int devlink_nl_linecard_devices_fill(struct sk_buff *msg,
- struct devlink_linecard *linecard)
-{
- struct devlink_linecard_device *linecard_device;
- struct nlattr *attr;
- int err;
-
- if (list_empty(&linecard->device_list))
- return 0;
-
- attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE_LIST);
- if (!attr)
- return -EMSGSIZE;
- list_for_each_entry(linecard_device, &linecard->device_list, list) {
- err = devlink_nl_linecard_device_fill(msg, linecard_device);
- if (err) {
- nla_nest_cancel(msg, attr);
- return err;
- }
- }
- nla_nest_end(msg, attr);
-
- return 0;
-}
-
static int devlink_nl_linecard_fill(struct sk_buff *msg,
struct devlink *devlink,
struct devlink_linecard *linecard,
@@ -2119,7 +2068,6 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg,
struct devlink_linecard_type *linecard_type;
struct nlattr *attr;
void *hdr;
- int err;
int i;
hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
@@ -2152,10 +2100,6 @@ static int devlink_nl_linecard_fill(struct sk_buff *msg,
nla_nest_end(msg, attr);
}
- err = devlink_nl_linecard_devices_fill(msg, linecard);
- if (err)
- goto nla_put_failure;
-
genlmsg_end(msg, hdr);
return 0;
@@ -2425,191 +2369,6 @@ static int devlink_nl_cmd_linecard_set_doit(struct sk_buff *skb,
return 0;
}
-struct devlink_info_req {
- struct sk_buff *msg;
-};
-
-static int
-devlink_nl_linecard_device_info_fill(struct sk_buff *msg,
- struct devlink_linecard *linecard,
- struct devlink_linecard_device *linecard_device,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *attr;
-
- attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE);
- if (!attr)
- return -EMSGSIZE;
- if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_DEVICE_INDEX,
- linecard_device->index)) {
- nla_nest_cancel(msg, attr);
- return -EMSGSIZE;
- }
- if (linecard->ops->device_info_get) {
- struct devlink_info_req req;
- int err;
-
- req.msg = msg;
- err = linecard->ops->device_info_get(linecard_device,
- linecard_device->priv,
- &req, extack);
- if (err) {
- nla_nest_cancel(msg, attr);
- return err;
- }
- }
- nla_nest_end(msg, attr);
-
- return 0;
-}
-
-static int devlink_nl_linecard_devices_info_fill(struct sk_buff *msg,
- struct devlink_linecard *linecard,
- struct netlink_ext_ack *extack)
-{
- struct devlink_linecard_device *linecard_device;
- struct nlattr *attr;
- int err;
-
- if (list_empty(&linecard->device_list))
- return 0;
-
- attr = nla_nest_start(msg, DEVLINK_ATTR_LINECARD_DEVICE_LIST);
- if (!attr)
- return -EMSGSIZE;
- list_for_each_entry(linecard_device, &linecard->device_list, list) {
- err = devlink_nl_linecard_device_info_fill(msg, linecard,
- linecard_device,
- extack);
- if (err) {
- nla_nest_cancel(msg, attr);
- return err;
- }
- }
- nla_nest_end(msg, attr);
-
- return 0;
-}
-
-static int
-devlink_nl_linecard_info_fill(struct sk_buff *msg, struct devlink *devlink,
- struct devlink_linecard *linecard,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags, struct netlink_ext_ack *extack)
-{
- struct devlink_info_req req;
- void *hdr;
- int err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- err = -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u32(msg, DEVLINK_ATTR_LINECARD_INDEX, linecard->index))
- goto nla_put_failure;
-
- req.msg = msg;
- err = linecard->ops->info_get(linecard, linecard->priv, &req, extack);
- if (err)
- goto nla_put_failure;
-
- err = devlink_nl_linecard_devices_info_fill(msg, linecard, extack);
- if (err)
- goto nla_put_failure;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_linecard_info_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink_linecard *linecard = info->user_ptr[1];
- struct devlink *devlink = linecard->devlink;
- struct sk_buff *msg;
- int err;
-
- if (!linecard->ops->info_get)
- return -EOPNOTSUPP;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- mutex_lock(&linecard->state_lock);
- err = devlink_nl_linecard_info_fill(msg, devlink, linecard,
- DEVLINK_CMD_LINECARD_INFO_GET,
- info->snd_portid, info->snd_seq, 0,
- info->extack);
- mutex_unlock(&linecard->state_lock);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_cmd_linecard_info_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
-{
- struct devlink_linecard *linecard;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
- int idx = 0;
- int err = 0;
-
- mutex_lock(&devlink_mutex);
- xa_for_each_marked(&devlinks, index, devlink, DEVLINK_REGISTERED) {
- if (!devlink_try_get(devlink))
- continue;
-
- if (!net_eq(devlink_net(devlink), sock_net(msg->sk)))
- goto retry;
-
- mutex_lock(&devlink->linecards_lock);
- list_for_each_entry(linecard, &devlink->linecard_list, list) {
- if (idx < start || !linecard->ops->info_get) {
- idx++;
- continue;
- }
- mutex_lock(&linecard->state_lock);
- err = devlink_nl_linecard_info_fill(msg, devlink, linecard,
- DEVLINK_CMD_LINECARD_INFO_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI,
- cb->extack);
- mutex_unlock(&linecard->state_lock);
- if (err) {
- mutex_unlock(&devlink->linecards_lock);
- devlink_put(devlink);
- goto out;
- }
- idx++;
- }
- mutex_unlock(&devlink->linecards_lock);
-retry:
- devlink_put(devlink);
- }
-out:
- mutex_unlock(&devlink_mutex);
-
- if (err != -EMSGSIZE)
- return err;
-
- cb->args[0] = idx;
- return msg->len;
-}
-
static int devlink_nl_sb_fill(struct sk_buff *msg, struct devlink *devlink,
struct devlink_sb *devlink_sb,
enum devlink_command cmd, u32 portid,
@@ -6602,6 +6361,10 @@ out_dev:
return err;
}
+struct devlink_info_req {
+ struct sk_buff *msg;
+};
+
int devlink_info_driver_name_put(struct devlink_info_req *req, const char *name)
{
return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME, name);
@@ -9322,13 +9085,6 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
},
{
- .cmd = DEVLINK_CMD_LINECARD_INFO_GET,
- .doit = devlink_nl_cmd_linecard_info_get_doit,
- .dumpit = devlink_nl_cmd_linecard_info_get_dumpit,
- .internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
- /* can be retrieved by unprivileged users */
- },
- {
.cmd = DEVLINK_CMD_SB_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_get_doit,
@@ -10508,7 +10264,6 @@ devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
linecard->priv = priv;
linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
mutex_init(&linecard->state_lock);
- INIT_LIST_HEAD(&linecard->device_list);
err = devlink_linecard_types_init(linecard);
if (err) {
@@ -10536,7 +10291,6 @@ void devlink_linecard_destroy(struct devlink_linecard *linecard)
struct devlink *devlink = linecard->devlink;
devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
- WARN_ON(!list_empty(&linecard->device_list));
mutex_lock(&devlink->linecards_lock);
list_del(&linecard->list);
devlink_linecard_types_fini(linecard);
@@ -10546,52 +10300,6 @@ void devlink_linecard_destroy(struct devlink_linecard *linecard)
EXPORT_SYMBOL_GPL(devlink_linecard_destroy);
/**
- * devlink_linecard_device_create - Create a device on linecard
- *
- * @linecard: devlink linecard
- * @device_index: index of the linecard device
- * @priv: user priv pointer
- *
- * Return: Line card device structure or an ERR_PTR() encoded error code.
- */
-struct devlink_linecard_device *
-devlink_linecard_device_create(struct devlink_linecard *linecard,
- unsigned int device_index, void *priv)
-{
- struct devlink_linecard_device *linecard_device;
-
- linecard_device = kzalloc(sizeof(*linecard_device), GFP_KERNEL);
- if (!linecard_device)
- return ERR_PTR(-ENOMEM);
- linecard_device->index = device_index;
- linecard_device->priv = priv;
- mutex_lock(&linecard->state_lock);
- list_add_tail(&linecard_device->list, &linecard->device_list);
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
- return linecard_device;
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_device_create);
-
-/**
- * devlink_linecard_device_destroy - Destroy device on linecard
- *
- * @linecard: devlink linecard
- * @linecard_device: devlink linecard device
- */
-void
-devlink_linecard_device_destroy(struct devlink_linecard *linecard,
- struct devlink_linecard_device *linecard_device)
-{
- mutex_lock(&linecard->state_lock);
- list_del(&linecard_device->list);
- devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
- mutex_unlock(&linecard->state_lock);
- kfree(linecard_device);
-}
-EXPORT_SYMBOL_GPL(devlink_linecard_device_destroy);
-
-/**
* devlink_linecard_provision_set - Set provisioning on linecard
*
* @linecard: devlink linecard
@@ -10623,7 +10331,6 @@ EXPORT_SYMBOL_GPL(devlink_linecard_provision_set);
void devlink_linecard_provision_clear(struct devlink_linecard *linecard)
{
mutex_lock(&linecard->state_lock);
- WARN_ON(!list_empty(&linecard->device_list));
linecard->state = DEVLINK_LINECARD_STATE_UNPROVISIONED;
linecard->type = NULL;
devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c
index b89e3e95bffc..41cac0e4834e 100644
--- a/net/core/drop_monitor.c
+++ b/net/core/drop_monitor.c
@@ -517,7 +517,7 @@ static void net_dm_packet_trace_kfree_skb_hit(void *ignore,
if (!nskb)
return;
- if ((unsigned int)reason >= SKB_DROP_REASON_MAX)
+ if (unlikely(reason >= SKB_DROP_REASON_MAX || reason <= 0))
reason = SKB_DROP_REASON_NOT_SPECIFIED;
cb = NET_DM_SKB_CB(nskb);
cb->reason = reason;
diff --git a/net/core/gro.c b/net/core/gro.c
index 78110edf5d4b..b4190eb08467 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -167,6 +167,14 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
return -E2BIG;
+ if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) {
+ if (p->protocol != htons(ETH_P_IPV6) ||
+ skb_headroom(p) < sizeof(struct hop_jumbo_hdr) ||
+ ipv6_hdr(p)->nexthdr != IPPROTO_TCP ||
+ p->encapsulation)
+ return -E2BIG;
+ }
+
lp = NAPI_GRO_CB(p)->last;
pinfo = skb_shinfo(lp);
diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c
index 349480ef68a5..8b6b5e72b217 100644
--- a/net/core/lwt_bpf.c
+++ b/net/core/lwt_bpf.c
@@ -159,10 +159,8 @@ static int bpf_output(struct net *net, struct sock *sk, struct sk_buff *skb)
return dst->lwtstate->orig_output(net, sk, skb);
}
-static int xmit_check_hhlen(struct sk_buff *skb)
+static int xmit_check_hhlen(struct sk_buff *skb, int hh_len)
{
- int hh_len = skb_dst(skb)->dev->hard_header_len;
-
if (skb_headroom(skb) < hh_len) {
int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
@@ -274,6 +272,7 @@ static int bpf_xmit(struct sk_buff *skb)
bpf = bpf_lwt_lwtunnel(dst->lwtstate);
if (bpf->xmit.prog) {
+ int hh_len = dst->dev->hard_header_len;
__be16 proto = skb->protocol;
int ret;
@@ -291,7 +290,7 @@ static int bpf_xmit(struct sk_buff *skb)
/* If the header was expanded, headroom might be too
* small for L2 header to come, expand as needed.
*/
- ret = xmit_check_hhlen(skb);
+ ret = xmit_check_hhlen(skb, hh_len);
if (unlikely(ret))
return ret;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f64ebd050f6c..47b6c1f0fdbb 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3728,7 +3728,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
char *p_name;
- t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
+ t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL_ACCOUNT);
if (!t)
goto err;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4980c3a50475..e319e242dddf 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -746,7 +746,6 @@ static const struct attribute_group netstat_group = {
.attrs = netstat_attrs,
};
-#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
static struct attribute *wireless_attrs[] = {
NULL
};
@@ -755,7 +754,19 @@ static const struct attribute_group wireless_group = {
.name = "wireless",
.attrs = wireless_attrs,
};
+
+static bool wireless_group_needed(struct net_device *ndev)
+{
+#if IS_ENABLED(CONFIG_CFG80211)
+ if (ndev->ieee80211_ptr)
+ return true;
#endif
+#if IS_ENABLED(CONFIG_WIRELESS_EXT)
+ if (ndev->wireless_handlers)
+ return true;
+#endif
+ return false;
+}
#else /* CONFIG_SYSFS */
#define net_class_groups NULL
@@ -1996,14 +2007,8 @@ int netdev_register_kobject(struct net_device *ndev)
*groups++ = &netstat_group;
-#if IS_ENABLED(CONFIG_WIRELESS_EXT) || IS_ENABLED(CONFIG_CFG80211)
- if (ndev->ieee80211_ptr)
- *groups++ = &wireless_group;
-#if IS_ENABLED(CONFIG_WIRELESS_EXT)
- else if (ndev->wireless_handlers)
+ if (wireless_group_needed(ndev))
*groups++ = &wireless_group;
-#endif
-#endif
#endif /* CONFIG_SYSFS */
error = device_add(dev);
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index bdbadfaee867..f18e6e771993 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -704,8 +704,10 @@ struct page *page_pool_alloc_frag(struct page_pool *pool,
if (page && *offset + size > max_size) {
page = page_pool_drain_frag(pool, page);
- if (page)
+ if (page) {
+ alloc_stat_inc(pool, fast);
goto frag_reset;
+ }
}
if (!page) {
@@ -727,6 +729,7 @@ frag_reset:
pool->frag_users++;
pool->frag_offset = *offset + size;
+ alloc_stat_inc(pool, fast);
return page;
}
EXPORT_SYMBOL(page_pool_alloc_frag);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 73f2cbc440c9..ac45328607f7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1064,6 +1064,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_GSO_MAX_SEGS */
+ nla_total_size(4) /* IFLA_GSO_MAX_SIZE */
+ nla_total_size(4) /* IFLA_GRO_MAX_SIZE */
+ + nla_total_size(4) /* IFLA_TSO_MAX_SIZE */
+ + nla_total_size(4) /* IFLA_TSO_MAX_SEGS */
+ nla_total_size(1) /* IFLA_OPERSTATE */
+ nla_total_size(1) /* IFLA_LINKMODE */
+ nla_total_size(4) /* IFLA_CARRIER_CHANGES */
@@ -1769,6 +1771,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
+ nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) ||
+ nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) ||
#ifdef CONFIG_RPS
nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) ||
#endif
@@ -1922,6 +1926,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_NEW_IFINDEX] = NLA_POLICY_MIN(NLA_S32, 1),
[IFLA_PARENT_DEV_NAME] = { .type = NLA_NUL_STRING },
[IFLA_GRO_MAX_SIZE] = { .type = NLA_U32 },
+ [IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT },
+ [IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2306,6 +2312,19 @@ invalid_attr:
return -EINVAL;
}
+static int rtnl_set_vf_rate(struct net_device *dev, int vf, int min_tx_rate,
+ int max_tx_rate)
+{
+ const struct net_device_ops *ops = dev->netdev_ops;
+
+ if (!ops->ndo_set_vf_rate)
+ return -EOPNOTSUPP;
+ if (max_tx_rate && max_tx_rate < min_tx_rate)
+ return -EINVAL;
+
+ return ops->ndo_set_vf_rate(dev, vf, min_tx_rate, max_tx_rate);
+}
+
static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
struct netlink_ext_ack *extack)
{
@@ -2341,14 +2360,6 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[],
}
}
- if (tb[IFLA_GRO_MAX_SIZE]) {
- u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_MAX_SIZE]);
-
- if (gro_max_size > GRO_MAX_SIZE) {
- NL_SET_ERR_MSG(extack, "too big gro_max_size");
- return -EINVAL;
- }
- }
return 0;
}
@@ -2443,11 +2454,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (err < 0)
return err;
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rate)
- err = ops->ndo_set_vf_rate(dev, ivt->vf,
- ivf.min_tx_rate,
- ivt->rate);
+ err = rtnl_set_vf_rate(dev, ivt->vf,
+ ivf.min_tx_rate, ivt->rate);
if (err < 0)
return err;
}
@@ -2457,11 +2465,9 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb)
if (ivt->vf >= INT_MAX)
return -EINVAL;
- err = -EOPNOTSUPP;
- if (ops->ndo_set_vf_rate)
- err = ops->ndo_set_vf_rate(dev, ivt->vf,
- ivt->min_tx_rate,
- ivt->max_tx_rate);
+
+ err = rtnl_set_vf_rate(dev, ivt->vf,
+ ivt->min_tx_rate, ivt->max_tx_rate);
if (err < 0)
return err;
}
@@ -2803,7 +2809,7 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_GSO_MAX_SIZE]) {
u32 max_size = nla_get_u32(tb[IFLA_GSO_MAX_SIZE]);
- if (max_size > GSO_MAX_SIZE) {
+ if (max_size > dev->tso_max_size) {
err = -EINVAL;
goto errout;
}
@@ -2817,7 +2823,7 @@ static int do_setlink(const struct sk_buff *skb,
if (tb[IFLA_GSO_MAX_SEGS]) {
u32 max_segs = nla_get_u32(tb[IFLA_GSO_MAX_SEGS]);
- if (max_segs > GSO_MAX_SEGS) {
+ if (max_segs > GSO_MAX_SEGS || max_segs > dev->tso_max_segs) {
err = -EINVAL;
goto errout;
}
@@ -3302,23 +3308,116 @@ static int rtnl_group_changelink(const struct sk_buff *skb,
return 0;
}
-static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
- struct nlattr **attr, struct netlink_ext_ack *extack)
+static int rtnl_newlink_create(struct sk_buff *skb, struct ifinfomsg *ifm,
+ const struct rtnl_link_ops *ops,
+ struct nlattr **tb, struct nlattr **data,
+ struct netlink_ext_ack *extack)
{
- struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
unsigned char name_assign_type = NET_NAME_USER;
+ struct net *net = sock_net(skb->sk);
+ struct net *dest_net, *link_net;
+ struct net_device *dev;
+ char ifname[IFNAMSIZ];
+ int err;
+
+ if (!ops->alloc && !ops->setup)
+ return -EOPNOTSUPP;
+
+ if (tb[IFLA_IFNAME]) {
+ nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
+ } else {
+ snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
+ name_assign_type = NET_NAME_ENUM;
+ }
+
+ dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
+ if (IS_ERR(dest_net))
+ return PTR_ERR(dest_net);
+
+ if (tb[IFLA_LINK_NETNSID]) {
+ int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
+
+ link_net = get_net_ns_by_id(dest_net, id);
+ if (!link_net) {
+ NL_SET_ERR_MSG(extack, "Unknown network namespace id");
+ err = -EINVAL;
+ goto out;
+ }
+ err = -EPERM;
+ if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
+ goto out;
+ } else {
+ link_net = NULL;
+ }
+
+ dev = rtnl_create_link(link_net ? : dest_net, ifname,
+ name_assign_type, ops, tb, extack);
+ if (IS_ERR(dev)) {
+ err = PTR_ERR(dev);
+ goto out;
+ }
+
+ dev->ifindex = ifm->ifi_index;
+
+ if (ops->newlink)
+ err = ops->newlink(link_net ? : net, dev, tb, data, extack);
+ else
+ err = register_netdevice(dev);
+ if (err < 0) {
+ free_netdev(dev);
+ goto out;
+ }
+
+ err = rtnl_configure_link(dev, ifm);
+ if (err < 0)
+ goto out_unregister;
+ if (link_net) {
+ err = dev_change_net_namespace(dev, dest_net, ifname);
+ if (err < 0)
+ goto out_unregister;
+ }
+ if (tb[IFLA_MASTER]) {
+ err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
+ if (err)
+ goto out_unregister;
+ }
+out:
+ if (link_net)
+ put_net(link_net);
+ put_net(dest_net);
+ return err;
+out_unregister:
+ if (ops->newlink) {
+ LIST_HEAD(list_kill);
+
+ ops->dellink(dev, &list_kill);
+ unregister_netdevice_many(&list_kill);
+ } else {
+ unregister_netdevice(dev);
+ }
+ goto out;
+}
+
+struct rtnl_newlink_tbs {
+ struct nlattr *tb[IFLA_MAX + 1];
+ struct nlattr *attr[RTNL_MAX_TYPE + 1];
+ struct nlattr *slave_attr[RTNL_SLAVE_MAX_TYPE + 1];
+};
+
+static int __rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
+ struct rtnl_newlink_tbs *tbs,
+ struct netlink_ext_ack *extack)
+{
struct nlattr *linkinfo[IFLA_INFO_MAX + 1];
+ struct nlattr ** const tb = tbs->tb;
const struct rtnl_link_ops *m_ops;
struct net_device *master_dev;
struct net *net = sock_net(skb->sk);
const struct rtnl_link_ops *ops;
- struct nlattr *tb[IFLA_MAX + 1];
- struct net *dest_net, *link_net;
struct nlattr **slave_data;
char kind[MODULE_NAME_LEN];
struct net_device *dev;
struct ifinfomsg *ifm;
- char ifname[IFNAMSIZ];
struct nlattr **data;
bool link_specified;
int err;
@@ -3382,12 +3481,12 @@ replay:
return -EINVAL;
if (ops->maxtype && linkinfo[IFLA_INFO_DATA]) {
- err = nla_parse_nested_deprecated(attr, ops->maxtype,
+ err = nla_parse_nested_deprecated(tbs->attr, ops->maxtype,
linkinfo[IFLA_INFO_DATA],
ops->policy, extack);
if (err < 0)
return err;
- data = attr;
+ data = tbs->attr;
}
if (ops->validate) {
err = ops->validate(tb, data, extack);
@@ -3403,14 +3502,14 @@ replay:
if (m_ops->slave_maxtype &&
linkinfo[IFLA_INFO_SLAVE_DATA]) {
- err = nla_parse_nested_deprecated(slave_attr,
+ err = nla_parse_nested_deprecated(tbs->slave_attr,
m_ops->slave_maxtype,
linkinfo[IFLA_INFO_SLAVE_DATA],
m_ops->slave_policy,
extack);
if (err < 0)
return err;
- slave_data = slave_attr;
+ slave_data = tbs->slave_attr;
}
}
@@ -3478,96 +3577,21 @@ replay:
return -EOPNOTSUPP;
}
- if (!ops->alloc && !ops->setup)
- return -EOPNOTSUPP;
-
- if (tb[IFLA_IFNAME]) {
- nla_strscpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ);
- } else {
- snprintf(ifname, IFNAMSIZ, "%s%%d", ops->kind);
- name_assign_type = NET_NAME_ENUM;
- }
-
- dest_net = rtnl_link_get_net_capable(skb, net, tb, CAP_NET_ADMIN);
- if (IS_ERR(dest_net))
- return PTR_ERR(dest_net);
-
- if (tb[IFLA_LINK_NETNSID]) {
- int id = nla_get_s32(tb[IFLA_LINK_NETNSID]);
-
- link_net = get_net_ns_by_id(dest_net, id);
- if (!link_net) {
- NL_SET_ERR_MSG(extack, "Unknown network namespace id");
- err = -EINVAL;
- goto out;
- }
- err = -EPERM;
- if (!netlink_ns_capable(skb, link_net->user_ns, CAP_NET_ADMIN))
- goto out;
- } else {
- link_net = NULL;
- }
-
- dev = rtnl_create_link(link_net ? : dest_net, ifname,
- name_assign_type, ops, tb, extack);
- if (IS_ERR(dev)) {
- err = PTR_ERR(dev);
- goto out;
- }
-
- dev->ifindex = ifm->ifi_index;
-
- if (ops->newlink)
- err = ops->newlink(link_net ? : net, dev, tb, data, extack);
- else
- err = register_netdevice(dev);
- if (err < 0) {
- free_netdev(dev);
- goto out;
- }
-
- err = rtnl_configure_link(dev, ifm);
- if (err < 0)
- goto out_unregister;
- if (link_net) {
- err = dev_change_net_namespace(dev, dest_net, ifname);
- if (err < 0)
- goto out_unregister;
- }
- if (tb[IFLA_MASTER]) {
- err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack);
- if (err)
- goto out_unregister;
- }
-out:
- if (link_net)
- put_net(link_net);
- put_net(dest_net);
- return err;
-out_unregister:
- if (ops->newlink) {
- LIST_HEAD(list_kill);
-
- ops->dellink(dev, &list_kill);
- unregister_netdevice_many(&list_kill);
- } else {
- unregister_netdevice(dev);
- }
- goto out;
+ return rtnl_newlink_create(skb, ifm, ops, tb, data, extack);
}
static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
struct netlink_ext_ack *extack)
{
- struct nlattr **attr;
+ struct rtnl_newlink_tbs *tbs;
int ret;
- attr = kmalloc_array(RTNL_MAX_TYPE + 1, sizeof(*attr), GFP_KERNEL);
- if (!attr)
+ tbs = kmalloc(sizeof(*tbs), GFP_KERNEL);
+ if (!tbs)
return -ENOMEM;
- ret = __rtnl_newlink(skb, nlh, attr, extack);
- kfree(attr);
+ ret = __rtnl_newlink(skb, nlh, tbs, extack);
+ kfree(tbs);
return ret;
}
@@ -4240,7 +4264,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
ops = br_dev->netdev_ops;
if (!del_bulk) {
if (ops->ndo_fdb_del)
- err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid);
+ err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
} else {
if (ops->ndo_fdb_del_bulk)
err = ops->ndo_fdb_del_bulk(ndm, tb, dev, vid,
@@ -4258,7 +4282,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
ops = dev->netdev_ops;
if (!del_bulk) {
if (ops->ndo_fdb_del)
- err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid);
+ err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid, extack);
else
err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
} else {
diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c
index 9b8443774449..5f85e01d4093 100644
--- a/net/core/secure_seq.c
+++ b/net/core/secure_seq.c
@@ -22,6 +22,8 @@
static siphash_aligned_key_t net_secret;
static siphash_aligned_key_t ts_secret;
+#define EPHEMERAL_PORT_SHUFFLE_PERIOD (10 * HZ)
+
static __always_inline void net_secret_init(void)
{
net_get_random_once(&net_secret, sizeof(net_secret));
@@ -94,17 +96,19 @@ u32 secure_tcpv6_seq(const __be32 *saddr, const __be32 *daddr,
}
EXPORT_SYMBOL(secure_tcpv6_seq);
-u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
+u64 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr,
__be16 dport)
{
const struct {
struct in6_addr saddr;
struct in6_addr daddr;
+ unsigned int timeseed;
__be16 dport;
} __aligned(SIPHASH_ALIGNMENT) combined = {
.saddr = *(struct in6_addr *)saddr,
.daddr = *(struct in6_addr *)daddr,
- .dport = dport
+ .timeseed = jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ .dport = dport,
};
net_secret_init();
return siphash(&combined, offsetofend(typeof(combined), dport),
@@ -142,11 +146,13 @@ u32 secure_tcp_seq(__be32 saddr, __be32 daddr,
}
EXPORT_SYMBOL_GPL(secure_tcp_seq);
-u32 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
+u64 secure_ipv4_port_ephemeral(__be32 saddr, __be32 daddr, __be16 dport)
{
net_secret_init();
- return siphash_3u32((__force u32)saddr, (__force u32)daddr,
- (__force u16)dport, &net_secret);
+ return siphash_4u32((__force u32)saddr, (__force u32)daddr,
+ (__force u16)dport,
+ jiffies / EPHEMERAL_PORT_SHUFFLE_PERIOD,
+ &net_secret);
}
EXPORT_SYMBOL_GPL(secure_ipv4_port_ephemeral);
#endif
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 00980e62415b..5b3559cb1d82 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -80,7 +80,7 @@
#include <linux/user_namespace.h>
#include <linux/indirect_call_wrapper.h>
-#include "datagram.h"
+#include "dev.h"
#include "sock_destructor.h"
struct kmem_cache *skbuff_head_cache __ro_after_init;
@@ -772,6 +772,8 @@ void kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
if (!skb_unref(skb))
return;
+ DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX);
+
trace_kfree_skb(skb, __builtin_return_address(0), reason);
__kfree_skb(skb);
}
@@ -1166,7 +1168,7 @@ void mm_unaccount_pinned_pages(struct mmpin *mmp)
}
EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages);
-struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
+static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
{
struct ubuf_info *uarg;
struct sk_buff *skb;
@@ -1197,7 +1199,6 @@ struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size)
return uarg;
}
-EXPORT_SYMBOL_GPL(msg_zerocopy_alloc);
static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg)
{
@@ -1340,18 +1341,11 @@ void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref)
}
EXPORT_SYMBOL_GPL(msg_zerocopy_put_abort);
-int skb_zerocopy_iter_dgram(struct sk_buff *skb, struct msghdr *msg, int len)
-{
- return __zerocopy_sg_from_iter(skb->sk, skb, &msg->msg_iter, len);
-}
-EXPORT_SYMBOL_GPL(skb_zerocopy_iter_dgram);
-
int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct msghdr *msg, int len,
struct ubuf_info *uarg)
{
struct ubuf_info *orig_uarg = skb_zcopy(skb);
- struct iov_iter orig_iter = msg->msg_iter;
int err, orig_len = skb->len;
/* An skb can only point to one uarg. This edge case happens when
@@ -1365,7 +1359,7 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb,
struct sock *save_sk = skb->sk;
/* Streams do not free skb on error. Reset to prev state. */
- msg->msg_iter = orig_iter;
+ iov_iter_revert(&msg->msg_iter, skb->len - orig_len);
skb->sk = sk;
___pskb_trim(skb, orig_len);
skb->sk = save_sk;
@@ -3898,7 +3892,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
unsigned int delta_len = 0;
struct sk_buff *tail = NULL;
struct sk_buff *nskb, *tmp;
- int err;
+ int len_diff, err;
skb_push(skb, -skb_network_offset(skb) + offset);
@@ -3938,9 +3932,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
skb_push(nskb, -skb_network_offset(nskb) + offset);
skb_release_head_state(nskb);
+ len_diff = skb_network_header_len(nskb) - skb_network_header_len(skb);
__copy_skb_header(nskb, skb);
skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
+ nskb->transport_header += len_diff;
skb_copy_from_linear_data_offset(skb, -tnl_hlen,
nskb->data - tnl_hlen,
offset + tnl_hlen);
@@ -6501,37 +6497,35 @@ void skb_attempt_defer_free(struct sk_buff *skb)
int cpu = skb->alloc_cpu;
struct softnet_data *sd;
unsigned long flags;
+ unsigned int defer_max;
bool kick;
if (WARN_ON_ONCE(cpu >= nr_cpu_ids) ||
!cpu_online(cpu) ||
cpu == raw_smp_processor_id()) {
- __kfree_skb(skb);
+nodefer: __kfree_skb(skb);
return;
}
sd = &per_cpu(softnet_data, cpu);
- /* We do not send an IPI or any signal.
- * Remote cpu will eventually call skb_defer_free_flush()
- */
+ defer_max = READ_ONCE(sysctl_skb_defer_max);
+ if (READ_ONCE(sd->defer_count) >= defer_max)
+ goto nodefer;
+
spin_lock_irqsave(&sd->defer_lock, flags);
+ /* Send an IPI every time queue reaches half capacity. */
+ kick = sd->defer_count == (defer_max >> 1);
+ /* Paired with the READ_ONCE() few lines above */
+ WRITE_ONCE(sd->defer_count, sd->defer_count + 1);
+
skb->next = sd->defer_list;
/* Paired with READ_ONCE() in skb_defer_free_flush() */
WRITE_ONCE(sd->defer_list, skb);
- sd->defer_count++;
-
- /* kick every time queue length reaches 128.
- * This should avoid blocking in smp_call_function_single_async().
- * This condition should hardly be bit under normal conditions,
- * unless cpu suddenly stopped to receive NIC interrupts.
- */
- kick = sd->defer_count == 128;
-
spin_unlock_irqrestore(&sd->defer_lock, flags);
/* Make sure to trigger NET_RX_SOFTIRQ on the remote CPU
* if we are unlucky enough (this seems very unlikely).
*/
- if (unlikely(kick))
+ if (unlikely(kick) && !cmpxchg(&sd->defer_ipi_scheduled, 0, 1))
smp_call_function_single_async(cpu, &sd->defer_csd);
}
diff --git a/net/core/sock.c b/net/core/sock.c
index a0f3989de3d6..2ff40dd0a7a6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -146,6 +146,9 @@
static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list);
+static void sock_def_write_space_wfree(struct sock *sk);
+static void sock_def_write_space(struct sock *sk);
+
/**
* sk_ns_capable - General socket capability test
* @sk: Socket to use a capability on or through
@@ -632,7 +635,9 @@ static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
if (ifindex < 0)
goto out;
- sk->sk_bound_dev_if = ifindex;
+ /* Paired with all READ_ONCE() done locklessly. */
+ WRITE_ONCE(sk->sk_bound_dev_if, ifindex);
+
if (sk->sk_prot->rehash)
sk->sk_prot->rehash(sk);
sk_dst_reset(sk);
@@ -710,10 +715,11 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
{
int ret = -ENOPROTOOPT;
#ifdef CONFIG_NETDEVICES
+ int bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
struct net *net = sock_net(sk);
char devname[IFNAMSIZ];
- if (sk->sk_bound_dev_if == 0) {
+ if (bound_dev_if == 0) {
len = 0;
goto zero;
}
@@ -722,7 +728,7 @@ static int sock_getbindtodevice(struct sock *sk, char __user *optval,
if (len < IFNAMSIZ)
goto out;
- ret = netdev_get_name(net, devname, sk->sk_bound_dev_if);
+ ret = netdev_get_name(net, devname, bound_dev_if);
if (ret)
goto out;
@@ -1311,6 +1317,15 @@ set_sndbuf:
__sock_set_mark(sk, val);
break;
+ case SO_RCVMARK:
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_RAW) &&
+ !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+ ret = -EPERM;
+ break;
+ }
+
+ sock_valbool_flag(sk, SOCK_RCVMARK, valbool);
+ break;
case SO_RXQ_OVFL:
sock_valbool_flag(sk, SOCK_RXQ_OVFL, valbool);
@@ -1737,6 +1752,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
v.val = sk->sk_mark;
break;
+ case SO_RCVMARK:
+ v.val = sock_flag(sk, SOCK_RCVMARK);
+ break;
+
case SO_RXQ_OVFL:
v.val = sock_flag(sk, SOCK_RXQ_OVFL);
break;
@@ -1845,7 +1864,7 @@ int sock_getsockopt(struct socket *sock, int level, int optname,
break;
case SO_BINDTOIFINDEX:
- v.val = sk->sk_bound_dev_if;
+ v.val = READ_ONCE(sk->sk_bound_dev_if);
break;
case SO_NETNS_COOKIE:
@@ -2277,6 +2296,19 @@ void sk_free_unlock_clone(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
+static void sk_trim_gso_size(struct sock *sk)
+{
+ if (sk->sk_gso_max_size <= GSO_LEGACY_MAX_SIZE)
+ return;
+#if IS_ENABLED(CONFIG_IPV6)
+ if (sk->sk_family == AF_INET6 &&
+ sk_is_tcp(sk) &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
+ return;
+#endif
+ sk->sk_gso_max_size = GSO_LEGACY_MAX_SIZE;
+}
+
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
{
u32 max_segs = 1;
@@ -2296,6 +2328,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
/* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
+ sk_trim_gso_size(sk);
sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
@@ -2317,8 +2350,20 @@ void sock_wfree(struct sk_buff *skb)
{
struct sock *sk = skb->sk;
unsigned int len = skb->truesize;
+ bool free;
if (!sock_flag(sk, SOCK_USE_WRITE_QUEUE)) {
+ if (sock_flag(sk, SOCK_RCU_FREE) &&
+ sk->sk_write_space == sock_def_write_space) {
+ rcu_read_lock();
+ free = refcount_sub_and_test(len, &sk->sk_wmem_alloc);
+ sock_def_write_space_wfree(sk);
+ rcu_read_unlock();
+ if (unlikely(free))
+ __sk_free(sk);
+ return;
+ }
+
/*
* Keep a reference on sk_wmem_alloc, this will be released
* after sk_write_space() call
@@ -2628,13 +2673,6 @@ failure:
}
EXPORT_SYMBOL(sock_alloc_send_pskb);
-struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size,
- int noblock, int *errcode)
-{
- return sock_alloc_send_pskb(sk, size, 0, noblock, errcode, 0);
-}
-EXPORT_SYMBOL(sock_alloc_send_skb);
-
int __sock_cmsg_send(struct sock *sk, struct msghdr *msg, struct cmsghdr *cmsg,
struct sockcm_cookie *sockc)
{
@@ -3191,20 +3229,42 @@ static void sock_def_write_space(struct sock *sk)
/* Do not wake up a writer until he can make "significant"
* progress. --DaveM
*/
- if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= READ_ONCE(sk->sk_sndbuf)) {
+ if (sock_writeable(sk)) {
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
EPOLLWRNORM | EPOLLWRBAND);
/* Should agree with poll, otherwise some programs break */
- if (sock_writeable(sk))
- sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
}
rcu_read_unlock();
}
+/* An optimised version of sock_def_write_space(), should only be called
+ * for SOCK_RCU_FREE sockets under RCU read section and after putting
+ * ->sk_wmem_alloc.
+ */
+static void sock_def_write_space_wfree(struct sock *sk)
+{
+ /* Do not wake up a writer until he can make "significant"
+ * progress. --DaveM
+ */
+ if (sock_writeable(sk)) {
+ struct socket_wq *wq = rcu_dereference(sk->sk_wq);
+
+ /* rely on refcount_sub from sock_wfree() */
+ smp_mb__after_atomic();
+ if (wq && waitqueue_active(&wq->wait))
+ wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT |
+ EPOLLWRNORM | EPOLLWRBAND);
+
+ /* Should agree with poll, otherwise some programs break */
+ sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
+ }
+}
+
static void sock_def_destruct(struct sock *sk)
{
}
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index ca4527fcbc75..71a13596ea2b 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -25,12 +25,10 @@
#include "dev.h"
-static int three = 3;
static int int_3600 = 3600;
static int min_sndbuf = SOCK_MIN_SNDBUF;
static int min_rcvbuf = SOCK_MIN_RCVBUF;
static int max_skb_frags = MAX_SKB_FRAGS;
-static long long_one __maybe_unused = 1;
static long long_max __maybe_unused = LONG_MAX;
static int net_msg_warn; /* Unused, but still a sysctl */
@@ -424,7 +422,7 @@ static struct ctl_table net_core_table[] = {
.maxlen = sizeof(long),
.mode = 0600,
.proc_handler = proc_dolongvec_minmax_bpf_restricted,
- .extra1 = &long_one,
+ .extra1 = SYSCTL_LONG_ONE,
.extra2 = &bpf_jit_limit_max,
},
#endif
@@ -560,7 +558,7 @@ static struct ctl_table net_core_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
- .extra2 = &three,
+ .extra2 = SYSCTL_THREE,
},
{
.procname = "high_order_alloc_disable",
@@ -586,6 +584,14 @@ static struct ctl_table net_core_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = &int_3600,
},
+ {
+ .procname = "skb_defer_max",
+ .data = &sysctl_skb_defer_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
{ }
};