summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2023-03-30 14:40:04 +0300
committerPaolo Abeni <pabeni@redhat.com>2023-03-30 14:40:04 +0300
commit4ddd6375c3ef6756d492ea5466408cace097121b (patch)
tree955e0aa2919e1b114b1727e7fddd538c73b3c205
parent7079d5e61aaa14cd04fd2fe7a8a2b6eca7833fdb (diff)
parent8b43fd3d1d7d88293eb15e92090826e6b7cc13e4 (diff)
downloadlinux-4ddd6375c3ef6756d492ea5466408cace097121b.tar.xz
Merge branch 'net-rps-rfs-improvements'
Eric Dumazet says: ==================== net: rps/rfs improvements Jason Xing attempted to optimize napi_schedule_rps() by avoiding unneeded NET_RX_SOFTIRQ raises: [1], [2] This is quite complex to implement properly. I chose to implement the idea, and added a similar optimization in ____napi_schedule() Overall, in an intensive RPC workload, with 32 TX/RX queues with RFS I was able to observe a ~10% reduction of NET_RX_SOFTIRQ invocations. While this had no impact on throughput or cpu costs on this synthetic benchmark, we know that firing NET_RX_SOFTIRQ from softirq handler can force __do_softirq() to wakeup ksoftirqd when need_resched() is true. This can have a latency impact on stressed hosts. [1] https://lore.kernel.org/lkml/20230325152417.5403-1-kerneljasonxing@gmail.com/ [2] https://lore.kernel.org/netdev/20230328142112.12493-1-kerneljasonxing@gmail.com/ ==================== Link: https://lore.kernel.org/r/20230328235021.1048163-1-edumazet@google.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--include/linux/netdevice.h1
-rw-r--r--net/core/dev.c46
2 files changed, 37 insertions, 10 deletions
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 18a5be6ddd0f..c8c634091a65 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3188,6 +3188,7 @@ struct softnet_data {
#ifdef CONFIG_RPS
struct softnet_data *rps_ipi_list;
#endif
+ bool in_net_rx_action;
#ifdef CONFIG_NET_FLOW_LIMIT
struct sd_flow_limit __rcu *flow_limit;
#endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 7172334a418f..0c4b21291348 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4360,7 +4360,11 @@ static inline void ____napi_schedule(struct softnet_data *sd,
}
list_add_tail(&napi->poll_list, &sd->poll_list);
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ /* If not called from net_rx_action()
+ * we have to raise NET_RX_SOFTIRQ.
+ */
+ if (!sd->in_net_rx_action)
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
}
#ifdef CONFIG_RPS
@@ -4582,11 +4586,16 @@ static void trigger_rx_softirq(void *data)
}
/*
- * Check if this softnet_data structure is another cpu one
- * If yes, queue it to our IPI list and return 1
- * If no, return 0
+ * After we queued a packet into sd->input_pkt_queue,
+ * we need to make sure this queue is serviced soon.
+ *
+ * - If this is another cpu queue, link it to our rps_ipi_list,
+ * and make sure we will process rps_ipi_list from net_rx_action().
+ *
+ * - If this is our own queue, NAPI schedule our backlog.
+ * Note that this also raises NET_RX_SOFTIRQ.
*/
-static int napi_schedule_rps(struct softnet_data *sd)
+static void napi_schedule_rps(struct softnet_data *sd)
{
struct softnet_data *mysd = this_cpu_ptr(&softnet_data);
@@ -4595,12 +4604,15 @@ static int napi_schedule_rps(struct softnet_data *sd)
sd->rps_ipi_next = mysd->rps_ipi_list;
mysd->rps_ipi_list = sd;
- __raise_softirq_irqoff(NET_RX_SOFTIRQ);
- return 1;
+ /* If not called from net_rx_action()
+ * we have to raise NET_RX_SOFTIRQ.
+ */
+ if (!mysd->in_net_rx_action)
+ __raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ return;
}
#endif /* CONFIG_RPS */
__napi_schedule_irqoff(&mysd->backlog);
- return 0;
}
#ifdef CONFIG_NET_FLOW_LIMIT
@@ -6640,6 +6652,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
LIST_HEAD(list);
LIST_HEAD(repoll);
+start:
+ sd->in_net_rx_action = true;
local_irq_disable();
list_splice_init(&sd->poll_list, &list);
local_irq_enable();
@@ -6650,8 +6664,18 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
skb_defer_free_flush(sd);
if (list_empty(&list)) {
- if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll))
- goto end;
+ if (list_empty(&repoll)) {
+ sd->in_net_rx_action = false;
+ barrier();
+ /* We need to check if ____napi_schedule()
+ * had refilled poll_list while
+ * sd->in_net_rx_action was true.
+ */
+ if (!list_empty(&sd->poll_list))
+ goto start;
+ if (!sd_has_rps_ipi_waiting(sd))
+ goto end;
+ }
break;
}
@@ -6676,6 +6700,8 @@ static __latent_entropy void net_rx_action(struct softirq_action *h)
list_splice(&list, &sd->poll_list);
if (!list_empty(&sd->poll_list))
__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+ else
+ sd->in_net_rx_action = false;
net_rps_action_and_irq_enable(sd);
end:;