summaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c277
1 files changed, 167 insertions, 110 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 69a3e544676c..e7ffcfa037f7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -107,6 +107,7 @@
#include <net/pkt_cls.h>
#include <net/checksum.h>
#include <net/xfrm.h>
+#include <net/tcx.h>
#include <linux/highmem.h>
#include <linux/init.h>
#include <linux/module.h>
@@ -154,7 +155,6 @@
#include "dev.h"
#include "net-sysfs.h"
-
static DEFINE_SPINLOCK(ptype_lock);
struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
struct list_head ptype_all __read_mostly; /* Taps */
@@ -2384,8 +2384,7 @@ static bool remove_xps_queue(struct xps_dev_maps *dev_maps,
struct xps_map *map = NULL;
int pos;
- if (dev_maps)
- map = xmap_dereference(dev_maps->attr_map[tci]);
+ map = xmap_dereference(dev_maps->attr_map[tci]);
if (!map)
return false;
@@ -3882,69 +3881,198 @@ int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *skb)
EXPORT_SYMBOL(dev_loopback_xmit);
#ifdef CONFIG_NET_EGRESS
-static struct sk_buff *
-sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+static struct netdev_queue *
+netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
+{
+ int qm = skb_get_queue_mapping(skb);
+
+ return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
+}
+
+static bool netdev_xmit_txqueue_skipped(void)
+{
+ return __this_cpu_read(softnet_data.xmit.skip_txqueue);
+}
+
+void netdev_xmit_skip_txqueue(bool skip)
+{
+ __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
+}
+EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
+#endif /* CONFIG_NET_EGRESS */
+
+#ifdef CONFIG_NET_XGRESS
+static int tc_run(struct tcx_entry *entry, struct sk_buff *skb)
{
+ int ret = TC_ACT_UNSPEC;
#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress);
- struct tcf_result cl_res;
+ struct mini_Qdisc *miniq = rcu_dereference_bh(entry->miniq);
+ struct tcf_result res;
if (!miniq)
- return skb;
+ return ret;
- /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
tc_skb_cb(skb)->mru = 0;
tc_skb_cb(skb)->post_ct = false;
- mini_qdisc_bstats_cpu_update(miniq, skb);
- switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
+ mini_qdisc_bstats_cpu_update(miniq, skb);
+ ret = tcf_classify(skb, miniq->block, miniq->filter_list, &res, false);
+ /* Only tcf related quirks below. */
+ switch (ret) {
+ case TC_ACT_SHOT:
+ mini_qdisc_qstats_cpu_drop(miniq);
+ break;
case TC_ACT_OK:
case TC_ACT_RECLASSIFY:
- skb->tc_index = TC_H_MIN(cl_res.classid);
+ skb->tc_index = TC_H_MIN(res.classid);
break;
+ }
+#endif /* CONFIG_NET_CLS_ACT */
+ return ret;
+}
+
+static DEFINE_STATIC_KEY_FALSE(tcx_needed_key);
+
+void tcx_inc(void)
+{
+ static_branch_inc(&tcx_needed_key);
+}
+
+void tcx_dec(void)
+{
+ static_branch_dec(&tcx_needed_key);
+}
+
+static __always_inline enum tcx_action_base
+tcx_run(const struct bpf_mprog_entry *entry, struct sk_buff *skb,
+ const bool needs_mac)
+{
+ const struct bpf_mprog_fp *fp;
+ const struct bpf_prog *prog;
+ int ret = TCX_NEXT;
+
+ if (needs_mac)
+ __skb_push(skb, skb->mac_len);
+ bpf_mprog_foreach_prog(entry, fp, prog) {
+ bpf_compute_data_pointers(skb);
+ ret = bpf_prog_run(prog, skb);
+ if (ret != TCX_NEXT)
+ break;
+ }
+ if (needs_mac)
+ __skb_pull(skb, skb->mac_len);
+ return tcx_action_code(skb, ret);
+}
+
+static __always_inline struct sk_buff *
+sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev, bool *another)
+{
+ struct bpf_mprog_entry *entry = rcu_dereference_bh(skb->dev->tcx_ingress);
+ int sch_ret;
+
+ if (!entry)
+ return skb;
+ if (*pt_prev) {
+ *ret = deliver_skb(skb, *pt_prev, orig_dev);
+ *pt_prev = NULL;
+ }
+
+ qdisc_skb_cb(skb)->pkt_len = skb->len;
+ tcx_set_ingress(skb, true);
+
+ if (static_branch_unlikely(&tcx_needed_key)) {
+ sch_ret = tcx_run(entry, skb, true);
+ if (sch_ret != TC_ACT_UNSPEC)
+ goto ingress_verdict;
+ }
+ sch_ret = tc_run(tcx_entry(entry), skb);
+ingress_verdict:
+ switch (sch_ret) {
+ case TC_ACT_REDIRECT:
+ /* skb_mac_header check was done by BPF, so we can safely
+ * push the L2 header back before redirecting to another
+ * netdev.
+ */
+ __skb_push(skb, skb->mac_len);
+ if (skb_do_redirect(skb) == -EAGAIN) {
+ __skb_pull(skb, skb->mac_len);
+ *another = true;
+ break;
+ }
+ *ret = NET_RX_SUCCESS;
+ return NULL;
case TC_ACT_SHOT:
- mini_qdisc_qstats_cpu_drop(miniq);
- *ret = NET_XMIT_DROP;
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
+ *ret = NET_RX_DROP;
return NULL;
+ /* used by tc_run */
case TC_ACT_STOLEN:
case TC_ACT_QUEUED:
case TC_ACT_TRAP:
- *ret = NET_XMIT_SUCCESS;
consume_skb(skb);
+ fallthrough;
+ case TC_ACT_CONSUMED:
+ *ret = NET_RX_SUCCESS;
return NULL;
+ }
+
+ return skb;
+}
+
+static __always_inline struct sk_buff *
+sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
+{
+ struct bpf_mprog_entry *entry = rcu_dereference_bh(dev->tcx_egress);
+ int sch_ret;
+
+ if (!entry)
+ return skb;
+
+ /* qdisc_skb_cb(skb)->pkt_len & tcx_set_ingress() was
+ * already set by the caller.
+ */
+ if (static_branch_unlikely(&tcx_needed_key)) {
+ sch_ret = tcx_run(entry, skb, false);
+ if (sch_ret != TC_ACT_UNSPEC)
+ goto egress_verdict;
+ }
+ sch_ret = tc_run(tcx_entry(entry), skb);
+egress_verdict:
+ switch (sch_ret) {
case TC_ACT_REDIRECT:
/* No need to push/pop skb's mac_header here on egress! */
skb_do_redirect(skb);
*ret = NET_XMIT_SUCCESS;
return NULL;
- default:
- break;
+ case TC_ACT_SHOT:
+ kfree_skb_reason(skb, SKB_DROP_REASON_TC_EGRESS);
+ *ret = NET_XMIT_DROP;
+ return NULL;
+ /* used by tc_run */
+ case TC_ACT_STOLEN:
+ case TC_ACT_QUEUED:
+ case TC_ACT_TRAP:
+ *ret = NET_XMIT_SUCCESS;
+ return NULL;
}
-#endif /* CONFIG_NET_CLS_ACT */
return skb;
}
-
-static struct netdev_queue *
-netdev_tx_queue_mapping(struct net_device *dev, struct sk_buff *skb)
-{
- int qm = skb_get_queue_mapping(skb);
-
- return netdev_get_tx_queue(dev, netdev_cap_txqueue(dev, qm));
-}
-
-static bool netdev_xmit_txqueue_skipped(void)
+#else
+static __always_inline struct sk_buff *
+sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
+ struct net_device *orig_dev, bool *another)
{
- return __this_cpu_read(softnet_data.xmit.skip_txqueue);
+ return skb;
}
-void netdev_xmit_skip_txqueue(bool skip)
+static __always_inline struct sk_buff *
+sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev)
{
- __this_cpu_write(softnet_data.xmit.skip_txqueue, skip);
+ return skb;
}
-EXPORT_SYMBOL_GPL(netdev_xmit_skip_txqueue);
-#endif /* CONFIG_NET_EGRESS */
+#endif /* CONFIG_NET_XGRESS */
#ifdef CONFIG_XPS
static int __get_xps_queue_idx(struct net_device *dev, struct sk_buff *skb,
@@ -4128,9 +4256,7 @@ int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
skb_update_prio(skb);
qdisc_pkt_len_init(skb);
-#ifdef CONFIG_NET_CLS_ACT
- skb->tc_at_ingress = 0;
-#endif
+ tcx_set_ingress(skb, false);
#ifdef CONFIG_NET_EGRESS
if (static_branch_unlikely(&egress_needed_key)) {
if (nf_hook_egress_active()) {
@@ -5064,72 +5190,6 @@ int (*br_fdb_test_addr_hook)(struct net_device *dev,
EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
#endif
-static inline struct sk_buff *
-sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret,
- struct net_device *orig_dev, bool *another)
-{
-#ifdef CONFIG_NET_CLS_ACT
- struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress);
- struct tcf_result cl_res;
-
- /* If there's at least one ingress present somewhere (so
- * we get here via enabled static key), remaining devices
- * that are not configured with an ingress qdisc will bail
- * out here.
- */
- if (!miniq)
- return skb;
-
- if (*pt_prev) {
- *ret = deliver_skb(skb, *pt_prev, orig_dev);
- *pt_prev = NULL;
- }
-
- qdisc_skb_cb(skb)->pkt_len = skb->len;
- tc_skb_cb(skb)->mru = 0;
- tc_skb_cb(skb)->post_ct = false;
- skb->tc_at_ingress = 1;
- mini_qdisc_bstats_cpu_update(miniq, skb);
-
- switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, false)) {
- case TC_ACT_OK:
- case TC_ACT_RECLASSIFY:
- skb->tc_index = TC_H_MIN(cl_res.classid);
- break;
- case TC_ACT_SHOT:
- mini_qdisc_qstats_cpu_drop(miniq);
- kfree_skb_reason(skb, SKB_DROP_REASON_TC_INGRESS);
- *ret = NET_RX_DROP;
- return NULL;
- case TC_ACT_STOLEN:
- case TC_ACT_QUEUED:
- case TC_ACT_TRAP:
- consume_skb(skb);
- *ret = NET_RX_SUCCESS;
- return NULL;
- case TC_ACT_REDIRECT:
- /* skb_mac_header check was done by cls/act_bpf, so
- * we can safely push the L2 header back before
- * redirecting to another netdev
- */
- __skb_push(skb, skb->mac_len);
- if (skb_do_redirect(skb) == -EAGAIN) {
- __skb_pull(skb, skb->mac_len);
- *another = true;
- break;
- }
- *ret = NET_RX_SUCCESS;
- return NULL;
- case TC_ACT_CONSUMED:
- *ret = NET_RX_SUCCESS;
- return NULL;
- default:
- break;
- }
-#endif /* CONFIG_NET_CLS_ACT */
- return skb;
-}
-
/**
* netdev_is_rx_handler_busy - check if receive handler is registered
* @dev: device to check
@@ -6316,12 +6376,8 @@ int dev_set_threaded(struct net_device *dev, bool threaded)
* softirq mode will happen in the next round of napi_schedule().
* This should not cause hiccups/stalls to the live traffic.
*/
- list_for_each_entry(napi, &dev->napi_list, dev_list) {
- if (threaded)
- set_bit(NAPI_STATE_THREADED, &napi->state);
- else
- clear_bit(NAPI_STATE_THREADED, &napi->state);
- }
+ list_for_each_entry(napi, &dev->napi_list, dev_list)
+ assign_bit(NAPI_STATE_THREADED, &napi->state, threaded);
return err;
}
@@ -10617,6 +10673,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev_net_set(dev, &init_net);
dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
+ dev->xdp_zc_max_segs = 1;
dev->gso_max_segs = GSO_MAX_SEGS;
dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
@@ -10838,7 +10895,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
/* Shutdown queueing discipline. */
dev_shutdown(dev);
-
+ dev_tcx_uninstall(dev);
dev_xdp_uninstall(dev);
bpf_dev_bound_netdev_unregister(dev);