summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/sysctl/net.rst8
-rw-r--r--net/core/dev.c1
-rw-r--r--net/core/dev.h2
-rw-r--r--net/core/skbuff.c15
-rw-r--r--net/core/sysctl_net_core.c8
5 files changed, 27 insertions, 7 deletions
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index f86b5e1623c6..fa4dcdb283cf 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -322,6 +322,14 @@ a leaked reference faster. A larger value may be useful to prevent false
warnings on slow/loaded systems.
Default value is 10, minimum 1, maximum 3600.
+skb_defer_max
+-------------
+
+Max size (in skbs) of the per-cpu list of skbs being freed
+by the cpu which allocated them. Used by TCP stack so far.
+
+Default: 64
+
optmem_max
----------
diff --git a/net/core/dev.c b/net/core/dev.c
index d0b34bc50706..6359f8953269 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4330,6 +4330,7 @@ int netdev_max_backlog __read_mostly = 1000;
EXPORT_SYMBOL(netdev_max_backlog);
int netdev_tstamp_prequeue __read_mostly = 1;
+unsigned int sysctl_skb_defer_max __read_mostly = 64;
int netdev_budget __read_mostly = 300;
/* Must be at least 2 jiffes to guarantee 1 jiffy timeout */
unsigned int __read_mostly netdev_budget_usecs = 2 * USEC_PER_SEC / HZ;
diff --git a/net/core/dev.h b/net/core/dev.h
index 328b37af90ba..cbb8a925175a 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -39,7 +39,7 @@ void dev_addr_check(struct net_device *dev);
/* sysctls not referred to from outside net/core/ */
extern int netdev_budget;
extern unsigned int netdev_budget_usecs;
-
+extern unsigned int sysctl_skb_defer_max;
extern int netdev_tstamp_prequeue;
extern int netdev_unregister_timeout_secs;
extern int weight_p;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index b40c8cdf4785..1d10bb4adec1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -80,6 +80,7 @@
#include <linux/user_namespace.h>
#include <linux/indirect_call_wrapper.h>
+#include "dev.h"
#include "sock_destructor.h"
struct kmem_cache *skbuff_head_cache __ro_after_init;
@@ -6496,16 +6497,21 @@ void skb_attempt_defer_free(struct sk_buff *skb)
int cpu = skb->alloc_cpu;
struct softnet_data *sd;
unsigned long flags;
+ unsigned int defer_max;
bool kick;
if (WARN_ON_ONCE(cpu >= nr_cpu_ids) ||
!cpu_online(cpu) ||
cpu == raw_smp_processor_id()) {
- __kfree_skb(skb);
+nodefer: __kfree_skb(skb);
return;
}
sd = &per_cpu(softnet_data, cpu);
+ defer_max = READ_ONCE(sysctl_skb_defer_max);
+ if (READ_ONCE(sd->defer_count) >= defer_max)
+ goto nodefer;
+
/* We do not send an IPI or any signal.
* Remote cpu will eventually call skb_defer_free_flush()
*/
@@ -6515,11 +6521,8 @@ void skb_attempt_defer_free(struct sk_buff *skb)
WRITE_ONCE(sd->defer_list, skb);
sd->defer_count++;
- /* kick every time queue length reaches 128.
- * This condition should hardly be hit under normal conditions,
- * unless cpu suddenly stopped to receive NIC interrupts.
- */
- kick = sd->defer_count == 128;
+ /* Send an IPI every time queue reaches half capacity. */
+ kick = sd->defer_count == (defer_max >> 1);
spin_unlock_irqrestore(&sd->defer_lock, flags);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 195ca5c28771..ca8d38325e1e 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -578,6 +578,14 @@ static struct ctl_table net_core_table[] = {
.extra1 = SYSCTL_ONE,
.extra2 = &int_3600,
},
+ {
+ .procname = "skb_defer_max",
+ .data = &sysctl_skb_defer_max,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ },
{ }
};