summaryrefslogtreecommitdiff
path: root/net/bridge/br_switchdev.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-01 02:43:06 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-01 02:43:06 +0300
commit9e9fb7655ed585da8f468e29221f0ba194a5f613 (patch)
treed2c51887389b8297635a5b90d5766897f00fe928 /net/bridge/br_switchdev.c
parent86ac54e79fe09b34c52691a780a6e31d12fa57f4 (diff)
parent29ce8f9701072fc221d9c38ad952de1a9578f95c (diff)
downloadlinux-9e9fb7655ed585da8f468e29221f0ba194a5f613.tar.xz
Merge tag 'net-next-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski: "Core: - Enable memcg accounting for various networking objects. BPF: - Introduce bpf timers. - Add perf link and opaque bpf_cookie which the program can read out again, to be used in libbpf-based USDT library. - Add bpf_task_pt_regs() helper to access user space pt_regs in kprobes, to help user space stack unwinding. - Add support for UNIX sockets for BPF sockmap. - Extend BPF iterator support for UNIX domain sockets. - Allow BPF TCP congestion control progs and bpf iterators to call bpf_setsockopt(), e.g. to switch to another congestion control algorithm. Protocols: - Support IOAM Pre-allocated Trace with IPv6. - Support Management Component Transport Protocol. - bridge: multicast: add vlan support. - netfilter: add hooks for the SRv6 lightweight tunnel driver. - tcp: - enable mid-stream window clamping (by user space or BPF) - allow data-less, empty-cookie SYN with TFO_SERVER_COOKIE_NOT_REQD - more accurate DSACK processing for RACK-TLP - mptcp: - add full mesh path manager option - add partial support for MP_FAIL - improve use of backup subflows - optimize option processing - af_unix: add OOB notification support. - ipv6: add IFLA_INET6_RA_MTU to expose MTU value advertised by the router. - mac80211: Target Wake Time support in AP mode. - can: j1939: extend UAPI to notify about RX status. Driver APIs: - Add page frag support in page pool API. - Many improvements to the DSA (distributed switch) APIs. - ethtool: extend IRQ coalesce uAPI with timer reset modes. - devlink: control which auxiliary devices are created. - Support CAN PHYs via the generic PHY subsystem. - Proper cross-chip support for tag_8021q. - Allow TX forwarding for the software bridge data path to be offloaded to capable devices. Drivers: - veth: more flexible channels number configuration. - openvswitch: introduce per-cpu upcall dispatch. - Add internet mix (IMIX) mode to pktgen. - Transparently handle XDP operations in the bonding driver. - Add LiteETH network driver. - Renesas (ravb): - support Gigabit Ethernet IP - NXP Ethernet switch (sja1105): - fast aging support - support for "H" switch topologies - traffic termination for ports under VLAN-aware bridge - Intel 1G Ethernet - support getcrosststamp() with PCIe PTM (Precision Time Measurement) for better time sync - support Credit-Based Shaper (CBS) offload, enabling HW traffic prioritization and bandwidth reservation - Broadcom Ethernet (bnxt) - support pulse-per-second output - support larger Rx rings - Mellanox Ethernet (mlx5) - support ethtool RSS contexts and MQPRIO channel mode - support LAG offload with bridging - support devlink rate limit API - support packet sampling on tunnels - Huawei Ethernet (hns3): - basic devlink support - add extended IRQ coalescing support - report extended link state - Netronome Ethernet (nfp): - add conntrack offload support - Broadcom WiFi (brcmfmac): - add WPA3 Personal with FT to supported cipher suites - support 43752 SDIO device - Intel WiFi (iwlwifi): - support scanning hidden 6GHz networks - support for a new hardware family (Bz) - Xen pv driver: - harden netfront against malicious backends - Qualcomm mobile - ipa: refactor power management and enable automatic suspend - mhi: move MBIM to WWAN subsystem interfaces Refactor: - Ambient BPF run context and cgroup storage cleanup. - Compat rework for ndo_ioctl. Old code removal: - prism54 remove the obsoleted driver, deprecated by the p54 driver. - wan: remove sbni/granch driver" * tag 'net-next-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1715 commits) net: Add depends on OF_NET for LiteX's LiteETH ipv6: seg6: remove duplicated include net: hns3: remove unnecessary spaces net: hns3: add some required spaces net: hns3: clean up a type mismatch warning net: hns3: refine function hns3_set_default_feature() ipv6: remove duplicated 'net/lwtunnel.h' include net: w5100: check return value after calling platform_get_resource() net/mlxbf_gige: Make use of devm_platform_ioremap_resourcexxx() net: mdio: mscc-miim: Make use of the helper function devm_platform_ioremap_resource() net: mdio-ipq4019: Make use of devm_platform_ioremap_resource() fou: remove sparse errors ipv4: fix endianness issue in inet_rtm_getroute_build_skb() octeontx2-af: Set proper errorcode for IPv4 checksum errors octeontx2-af: Fix static code analyzer reported issues octeontx2-af: Fix mailbox errors in nix_rss_flowkey_cfg octeontx2-af: Fix loop in free and unmap counter af_unix: fix potential NULL deref in unix_dgram_connect() dpaa2-eth: Replace strlcpy with strscpy octeontx2-af: Use NDC TX for transmit packet data ...
Diffstat (limited to 'net/bridge/br_switchdev.c')
-rw-r--r--net/bridge/br_switchdev.c246
1 files changed, 220 insertions, 26 deletions
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index d3adee0f91f9..6bf518d78f02 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -8,50 +8,65 @@
#include "br_private.h"
-static int br_switchdev_mark_get(struct net_bridge *br, struct net_device *dev)
-{
- struct net_bridge_port *p;
+static struct static_key_false br_switchdev_tx_fwd_offload;
- /* dev is yet to be added to the port list. */
- list_for_each_entry(p, &br->port_list, list) {
- if (netdev_port_same_parent_id(dev, p->dev))
- return p->offload_fwd_mark;
- }
+static bool nbp_switchdev_can_offload_tx_fwd(const struct net_bridge_port *p,
+ const struct sk_buff *skb)
+{
+ if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
+ return false;
- return ++br->offload_fwd_mark;
+ return (p->flags & BR_TX_FWD_OFFLOAD) &&
+ (p->hwdom != BR_INPUT_SKB_CB(skb)->src_hwdom);
}
-int nbp_switchdev_mark_set(struct net_bridge_port *p)
+bool br_switchdev_frame_uses_tx_fwd_offload(struct sk_buff *skb)
{
- struct netdev_phys_item_id ppid = { };
- int err;
+ if (!static_branch_unlikely(&br_switchdev_tx_fwd_offload))
+ return false;
- ASSERT_RTNL();
+ return BR_INPUT_SKB_CB(skb)->tx_fwd_offload;
+}
- err = dev_get_port_parent_id(p->dev, &ppid, true);
- if (err) {
- if (err == -EOPNOTSUPP)
- return 0;
- return err;
- }
+void br_switchdev_frame_set_offload_fwd_mark(struct sk_buff *skb)
+{
+ skb->offload_fwd_mark = br_switchdev_frame_uses_tx_fwd_offload(skb);
+}
- p->offload_fwd_mark = br_switchdev_mark_get(p->br, p->dev);
+/* Mark the frame for TX forwarding offload if this egress port supports it */
+void nbp_switchdev_frame_mark_tx_fwd_offload(const struct net_bridge_port *p,
+ struct sk_buff *skb)
+{
+ if (nbp_switchdev_can_offload_tx_fwd(p, skb))
+ BR_INPUT_SKB_CB(skb)->tx_fwd_offload = true;
+}
- return 0;
+/* Lazily adds the hwdom of the egress bridge port to the bit mask of hwdoms
+ * that the skb has been already forwarded to, to avoid further cloning to
+ * other ports in the same hwdom by making nbp_switchdev_allowed_egress()
+ * return false.
+ */
+void nbp_switchdev_frame_mark_tx_fwd_to_hwdom(const struct net_bridge_port *p,
+ struct sk_buff *skb)
+{
+ if (nbp_switchdev_can_offload_tx_fwd(p, skb))
+ set_bit(p->hwdom, &BR_INPUT_SKB_CB(skb)->fwd_hwdoms);
}
void nbp_switchdev_frame_mark(const struct net_bridge_port *p,
struct sk_buff *skb)
{
- if (skb->offload_fwd_mark && !WARN_ON_ONCE(!p->offload_fwd_mark))
- BR_INPUT_SKB_CB(skb)->offload_fwd_mark = p->offload_fwd_mark;
+ if (p->hwdom)
+ BR_INPUT_SKB_CB(skb)->src_hwdom = p->hwdom;
}
bool nbp_switchdev_allowed_egress(const struct net_bridge_port *p,
const struct sk_buff *skb)
{
- return !skb->offload_fwd_mark ||
- BR_INPUT_SKB_CB(skb)->offload_fwd_mark != p->offload_fwd_mark;
+ struct br_input_skb_cb *cb = BR_INPUT_SKB_CB(skb);
+
+ return !test_bit(p->hwdom, &cb->fwd_hwdoms) &&
+ (!skb->offload_fwd_mark || cb->src_hwdom != p->hwdom);
}
/* Flags that can be offloaded to hardware */
@@ -112,7 +127,6 @@ br_switchdev_fdb_notify(struct net_bridge *br,
const struct net_bridge_fdb_entry *fdb, int type)
{
const struct net_bridge_port *dst = READ_ONCE(fdb->dst);
- struct net_device *dev = dst ? dst->dev : br->dev;
struct switchdev_notifier_fdb_info info = {
.addr = fdb->key.addr.addr,
.vid = fdb->key.vlan_id,
@@ -120,6 +134,7 @@ br_switchdev_fdb_notify(struct net_bridge *br,
.is_local = test_bit(BR_FDB_LOCAL, &fdb->flags),
.offloaded = test_bit(BR_FDB_OFFLOADED, &fdb->flags),
};
+ struct net_device *dev = (!dst || info.is_local) ? br->dev : dst->dev;
switch (type) {
case RTM_DELNEIGH:
@@ -156,3 +171,182 @@ int br_switchdev_port_vlan_del(struct net_device *dev, u16 vid)
return switchdev_port_obj_del(dev, &v.obj);
}
+
+static int nbp_switchdev_hwdom_set(struct net_bridge_port *joining)
+{
+ struct net_bridge *br = joining->br;
+ struct net_bridge_port *p;
+ int hwdom;
+
+ /* joining is yet to be added to the port list. */
+ list_for_each_entry(p, &br->port_list, list) {
+ if (netdev_phys_item_id_same(&joining->ppid, &p->ppid)) {
+ joining->hwdom = p->hwdom;
+ return 0;
+ }
+ }
+
+ hwdom = find_next_zero_bit(&br->busy_hwdoms, BR_HWDOM_MAX, 1);
+ if (hwdom >= BR_HWDOM_MAX)
+ return -EBUSY;
+
+ set_bit(hwdom, &br->busy_hwdoms);
+ joining->hwdom = hwdom;
+ return 0;
+}
+
+static void nbp_switchdev_hwdom_put(struct net_bridge_port *leaving)
+{
+ struct net_bridge *br = leaving->br;
+ struct net_bridge_port *p;
+
+ /* leaving is no longer in the port list. */
+ list_for_each_entry(p, &br->port_list, list) {
+ if (p->hwdom == leaving->hwdom)
+ return;
+ }
+
+ clear_bit(leaving->hwdom, &br->busy_hwdoms);
+}
+
+static int nbp_switchdev_add(struct net_bridge_port *p,
+ struct netdev_phys_item_id ppid,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (p->offload_count) {
+ /* Prevent unsupported configurations such as a bridge port
+ * which is a bonding interface, and the member ports are from
+ * different hardware switches.
+ */
+ if (!netdev_phys_item_id_same(&p->ppid, &ppid)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Same bridge port cannot be offloaded by two physical switches");
+ return -EBUSY;
+ }
+
+ /* Tolerate drivers that call switchdev_bridge_port_offload()
+ * more than once for the same bridge port, such as when the
+ * bridge port is an offloaded bonding/team interface.
+ */
+ p->offload_count++;
+
+ return 0;
+ }
+
+ p->ppid = ppid;
+ p->offload_count = 1;
+
+ err = nbp_switchdev_hwdom_set(p);
+ if (err)
+ return err;
+
+ if (tx_fwd_offload) {
+ p->flags |= BR_TX_FWD_OFFLOAD;
+ static_branch_inc(&br_switchdev_tx_fwd_offload);
+ }
+
+ return 0;
+}
+
+static void nbp_switchdev_del(struct net_bridge_port *p)
+{
+ if (WARN_ON(!p->offload_count))
+ return;
+
+ p->offload_count--;
+
+ if (p->offload_count)
+ return;
+
+ if (p->hwdom)
+ nbp_switchdev_hwdom_put(p);
+
+ if (p->flags & BR_TX_FWD_OFFLOAD) {
+ p->flags &= ~BR_TX_FWD_OFFLOAD;
+ static_branch_dec(&br_switchdev_tx_fwd_offload);
+ }
+}
+
+static int nbp_switchdev_sync_objs(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *br_dev = p->br->dev;
+ struct net_device *dev = p->dev;
+ int err;
+
+ err = br_vlan_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ err = br_mdb_replay(br_dev, dev, ctx, true, blocking_nb, extack);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ err = br_fdb_replay(br_dev, ctx, true, atomic_nb);
+ if (err && err != -EOPNOTSUPP)
+ return err;
+
+ return 0;
+}
+
+static void nbp_switchdev_unsync_objs(struct net_bridge_port *p,
+ const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+ struct net_device *br_dev = p->br->dev;
+ struct net_device *dev = p->dev;
+
+ br_vlan_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+
+ br_mdb_replay(br_dev, dev, ctx, false, blocking_nb, NULL);
+
+ br_fdb_replay(br_dev, ctx, false, atomic_nb);
+}
+
+/* Let the bridge know that this port is offloaded, so that it can assign a
+ * switchdev hardware domain to it.
+ */
+int br_switchdev_port_offload(struct net_bridge_port *p,
+ struct net_device *dev, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb,
+ bool tx_fwd_offload,
+ struct netlink_ext_ack *extack)
+{
+ struct netdev_phys_item_id ppid;
+ int err;
+
+ err = dev_get_port_parent_id(dev, &ppid, false);
+ if (err)
+ return err;
+
+ err = nbp_switchdev_add(p, ppid, tx_fwd_offload, extack);
+ if (err)
+ return err;
+
+ err = nbp_switchdev_sync_objs(p, ctx, atomic_nb, blocking_nb, extack);
+ if (err)
+ goto out_switchdev_del;
+
+ return 0;
+
+out_switchdev_del:
+ nbp_switchdev_del(p);
+
+ return err;
+}
+
+void br_switchdev_port_unoffload(struct net_bridge_port *p, const void *ctx,
+ struct notifier_block *atomic_nb,
+ struct notifier_block *blocking_nb)
+{
+ nbp_switchdev_unsync_objs(p, ctx, atomic_nb, blocking_nb);
+
+ nbp_switchdev_del(p);
+}