diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-01 02:43:06 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-09-01 02:43:06 +0300 |
commit | 9e9fb7655ed585da8f468e29221f0ba194a5f613 (patch) | |
tree | d2c51887389b8297635a5b90d5766897f00fe928 /drivers/net/ethernet/mellanox/mlx5/core/esw | |
parent | 86ac54e79fe09b34c52691a780a6e31d12fa57f4 (diff) | |
parent | 29ce8f9701072fc221d9c38ad952de1a9578f95c (diff) | |
download | linux-9e9fb7655ed585da8f468e29221f0ba194a5f613.tar.xz |
Merge tag 'net-next-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Jakub Kicinski:
"Core:
- Enable memcg accounting for various networking objects.
BPF:
- Introduce bpf timers.
- Add perf link and opaque bpf_cookie which the program can read out
again, to be used in libbpf-based USDT library.
- Add bpf_task_pt_regs() helper to access user space pt_regs in
kprobes, to help user space stack unwinding.
- Add support for UNIX sockets for BPF sockmap.
- Extend BPF iterator support for UNIX domain sockets.
- Allow BPF TCP congestion control progs and bpf iterators to call
bpf_setsockopt(), e.g. to switch to another congestion control
algorithm.
Protocols:
- Support IOAM Pre-allocated Trace with IPv6.
- Support Management Component Transport Protocol.
- bridge: multicast: add vlan support.
- netfilter: add hooks for the SRv6 lightweight tunnel driver.
- tcp:
- enable mid-stream window clamping (by user space or BPF)
- allow data-less, empty-cookie SYN with TFO_SERVER_COOKIE_NOT_REQD
- more accurate DSACK processing for RACK-TLP
- mptcp:
- add full mesh path manager option
- add partial support for MP_FAIL
- improve use of backup subflows
- optimize option processing
- af_unix: add OOB notification support.
- ipv6: add IFLA_INET6_RA_MTU to expose MTU value advertised by the
router.
- mac80211: Target Wake Time support in AP mode.
- can: j1939: extend UAPI to notify about RX status.
Driver APIs:
- Add page frag support in page pool API.
- Many improvements to the DSA (distributed switch) APIs.
- ethtool: extend IRQ coalesce uAPI with timer reset modes.
- devlink: control which auxiliary devices are created.
- Support CAN PHYs via the generic PHY subsystem.
- Proper cross-chip support for tag_8021q.
- Allow TX forwarding for the software bridge data path to be
offloaded to capable devices.
Drivers:
- veth: more flexible channels number configuration.
- openvswitch: introduce per-cpu upcall dispatch.
- Add internet mix (IMIX) mode to pktgen.
- Transparently handle XDP operations in the bonding driver.
- Add LiteETH network driver.
- Renesas (ravb):
- support Gigabit Ethernet IP
- NXP Ethernet switch (sja1105):
- fast aging support
- support for "H" switch topologies
- traffic termination for ports under VLAN-aware bridge
- Intel 1G Ethernet
- support getcrosststamp() with PCIe PTM (Precision Time
Measurement) for better time sync
- support Credit-Based Shaper (CBS) offload, enabling HW traffic
prioritization and bandwidth reservation
- Broadcom Ethernet (bnxt)
- support pulse-per-second output
- support larger Rx rings
- Mellanox Ethernet (mlx5)
- support ethtool RSS contexts and MQPRIO channel mode
- support LAG offload with bridging
- support devlink rate limit API
- support packet sampling on tunnels
- Huawei Ethernet (hns3):
- basic devlink support
- add extended IRQ coalescing support
- report extended link state
- Netronome Ethernet (nfp):
- add conntrack offload support
- Broadcom WiFi (brcmfmac):
- add WPA3 Personal with FT to supported cipher suites
- support 43752 SDIO device
- Intel WiFi (iwlwifi):
- support scanning hidden 6GHz networks
- support for a new hardware family (Bz)
- Xen pv driver:
- harden netfront against malicious backends
- Qualcomm mobile
- ipa: refactor power management and enable automatic suspend
- mhi: move MBIM to WWAN subsystem interfaces
Refactor:
- Ambient BPF run context and cgroup storage cleanup.
- Compat rework for ndo_ioctl.
Old code removal:
- prism54 remove the obsoleted driver, deprecated by the p54 driver.
- wan: remove sbni/granch driver"
* tag 'net-next-5.15' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1715 commits)
net: Add depends on OF_NET for LiteX's LiteETH
ipv6: seg6: remove duplicated include
net: hns3: remove unnecessary spaces
net: hns3: add some required spaces
net: hns3: clean up a type mismatch warning
net: hns3: refine function hns3_set_default_feature()
ipv6: remove duplicated 'net/lwtunnel.h' include
net: w5100: check return value after calling platform_get_resource()
net/mlxbf_gige: Make use of devm_platform_ioremap_resourcexxx()
net: mdio: mscc-miim: Make use of the helper function devm_platform_ioremap_resource()
net: mdio-ipq4019: Make use of devm_platform_ioremap_resource()
fou: remove sparse errors
ipv4: fix endianness issue in inet_rtm_getroute_build_skb()
octeontx2-af: Set proper errorcode for IPv4 checksum errors
octeontx2-af: Fix static code analyzer reported issues
octeontx2-af: Fix mailbox errors in nix_rss_flowkey_cfg
octeontx2-af: Fix loop in free and unmap counter
af_unix: fix potential NULL deref in unix_dgram_connect()
dpaa2-eth: Replace strlcpy with strscpy
octeontx2-af: Use NDC TX for transmit packet data
...
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/esw')
13 files changed, 1380 insertions, 767 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c index 505bf811984a..2e504c7461c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c @@ -15,6 +15,15 @@ static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport) vport->egress.offloads.fwd_rule = NULL; } +static void esw_acl_egress_ofld_bounce_rule_destroy(struct mlx5_vport *vport) +{ + if (!vport->egress.offloads.bounce_rule) + return; + + mlx5_del_flow_rules(vport->egress.offloads.bounce_rule); + vport->egress.offloads.bounce_rule = NULL; +} + static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport, struct mlx5_flow_destination *fwd_dest) @@ -87,6 +96,7 @@ static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport) { esw_acl_egress_vlan_destroy(vport); esw_acl_egress_ofld_fwd2vport_destroy(vport); + esw_acl_egress_ofld_bounce_rule_destroy(vport); } static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw, @@ -145,6 +155,12 @@ static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport) mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp); vport->egress.offloads.fwd_grp = NULL; } + + if (!IS_ERR_OR_NULL(vport->egress.offloads.bounce_grp)) { + mlx5_destroy_flow_group(vport->egress.offloads.bounce_grp); + vport->egress.offloads.bounce_grp = NULL; + } + esw_acl_egress_vlan_grp_destroy(vport); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index 69a3630818d7..7e221038df8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -5,6 +5,7 @@ #include <linux/notifier.h> #include <net/netevent.h> #include <net/switchdev.h> +#include "lib/devcom.h" #include "bridge.h" #include "eswitch.h" #include "bridge_priv.h" @@ -56,7 +57,6 @@ struct mlx5_esw_bridge { struct list_head fdb_list; struct rhashtable fdb_ht; - struct xarray vports; struct mlx5_flow_table *egress_ft; struct mlx5_flow_group *egress_vlan_fg; @@ -77,6 +77,15 @@ mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char * call_switchdev_notifiers(val, dev, &send_info.info, NULL); } +static void +mlx5_esw_bridge_fdb_del_notify(struct mlx5_esw_bridge_fdb_entry *entry) +{ + if (!(entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER | MLX5_ESW_BRIDGE_FLAG_PEER))) + mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, + entry->key.vid, + SWITCHDEV_FDB_DEL_TO_BRIDGE); +} + static struct mlx5_flow_table * mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw) { @@ -400,9 +409,10 @@ mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge) } static struct mlx5_flow_handle * -mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, - struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, - struct mlx5_esw_bridge *bridge) +mlx5_esw_bridge_ingress_flow_with_esw_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge *bridge, + struct mlx5_eswitch *esw) { struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; struct mlx5_flow_act flow_act = { @@ -430,7 +440,7 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, MLX5_SET(fte_match_param, rule_spec->match_criteria, misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num)); + mlx5_eswitch_get_vport_metadata_for_match(esw, vport_num)); if (vlan && vlan->pkt_reformat_push) { flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; @@ -459,6 +469,35 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, } static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge *bridge) +{ + return mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, + bridge, bridge->br_offloads->esw); +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_flow_peer_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_devcom *devcom = bridge->br_offloads->esw->dev->priv.devcom; + static struct mlx5_flow_handle *handle; + struct mlx5_eswitch *peer_esw; + + peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + if (!peer_esw) + return ERR_PTR(-ENODEV); + + handle = mlx5_esw_bridge_ingress_flow_with_esw_create(vport_num, addr, vlan, counter_id, + bridge, peer_esw); + + mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS); + return handle; +} + +static struct mlx5_flow_handle * mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *addr, struct mlx5_esw_bridge *bridge) { @@ -505,7 +544,7 @@ mlx5_esw_bridge_ingress_filter_flow_create(u16 vport_num, const unsigned char *a } static struct mlx5_flow_handle * -mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr, +mlx5_esw_bridge_egress_flow_create(u16 vport_num, u16 esw_owner_vhca_id, const unsigned char *addr, struct mlx5_esw_bridge_vlan *vlan, struct mlx5_esw_bridge *bridge) { @@ -550,6 +589,10 @@ mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr, vlan->vid); } + if (MLX5_CAP_ESW(bridge->br_offloads->esw->dev, merged_eswitch)) { + dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; + dest.vport.vhca_id = esw_owner_vhca_id; + } handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, &dest, 1); kvfree(rule_spec); @@ -576,7 +619,6 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, goto err_fdb_ht; INIT_LIST_HEAD(&bridge->fdb_list); - xa_init(&bridge->vports); bridge->ifindex = ifindex; bridge->refcnt = 1; bridge->ageing_time = clock_t_to_jiffies(BR_DEFAULT_AGEING_TIME); @@ -603,7 +645,6 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads, return; mlx5_esw_bridge_egress_table_cleanup(bridge); - WARN_ON(!xa_empty(&bridge->vports)); list_del(&bridge->list); rhashtable_destroy(&bridge->fdb_ht); kvfree(bridge); @@ -639,30 +680,40 @@ mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads return bridge; } +static unsigned long mlx5_esw_bridge_port_key_from_data(u16 vport_num, u16 esw_owner_vhca_id) +{ + return vport_num | (unsigned long)esw_owner_vhca_id << sizeof(vport_num) * BITS_PER_BYTE; +} + +static unsigned long mlx5_esw_bridge_port_key(struct mlx5_esw_bridge_port *port) +{ + return mlx5_esw_bridge_port_key_from_data(port->vport_num, port->esw_owner_vhca_id); +} + static int mlx5_esw_bridge_port_insert(struct mlx5_esw_bridge_port *port, - struct mlx5_esw_bridge *bridge) + struct mlx5_esw_bridge_offloads *br_offloads) { - return xa_insert(&bridge->vports, port->vport_num, port, GFP_KERNEL); + return xa_insert(&br_offloads->ports, mlx5_esw_bridge_port_key(port), port, GFP_KERNEL); } static struct mlx5_esw_bridge_port * -mlx5_esw_bridge_port_lookup(u16 vport_num, struct mlx5_esw_bridge *bridge) +mlx5_esw_bridge_port_lookup(u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads) { - return xa_load(&bridge->vports, vport_num); + return xa_load(&br_offloads->ports, mlx5_esw_bridge_port_key_from_data(vport_num, + esw_owner_vhca_id)); } static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port, - struct mlx5_esw_bridge *bridge) + struct mlx5_esw_bridge_offloads *br_offloads) { - xa_erase(&bridge->vports, port->vport_num); + xa_erase(&br_offloads->ports, mlx5_esw_bridge_port_key(port)); } -static void mlx5_esw_bridge_fdb_entry_refresh(unsigned long lastuse, - struct mlx5_esw_bridge_fdb_entry *entry) +static void mlx5_esw_bridge_fdb_entry_refresh(struct mlx5_esw_bridge_fdb_entry *entry) { trace_mlx5_esw_bridge_fdb_entry_refresh(entry); - entry->lastuse = lastuse; mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, entry->key.vid, SWITCHDEV_FDB_ADD_TO_BRIDGE); @@ -690,10 +741,7 @@ static void mlx5_esw_bridge_fdb_flush(struct mlx5_esw_bridge *bridge) struct mlx5_esw_bridge_fdb_entry *entry, *tmp; list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) { - if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)) - mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, - entry->key.vid, - SWITCHDEV_FDB_DEL_TO_BRIDGE); + mlx5_esw_bridge_fdb_del_notify(entry); mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); } } @@ -841,10 +889,7 @@ static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_esw_bridge_fdb_entry *entry, *tmp; list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list) { - if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)) - mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, - entry->key.vid, - SWITCHDEV_FDB_DEL_TO_BRIDGE); + mlx5_esw_bridge_fdb_del_notify(entry); mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); } @@ -875,13 +920,13 @@ static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port, } static struct mlx5_esw_bridge_vlan * -mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, struct mlx5_esw_bridge *bridge, - struct mlx5_eswitch *esw) +mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge *bridge, struct mlx5_eswitch *esw) { struct mlx5_esw_bridge_port *port; struct mlx5_esw_bridge_vlan *vlan; - port = mlx5_esw_bridge_port_lookup(vport_num, bridge); + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, bridge->br_offloads); if (!port) { /* FDB is added asynchronously on wq while port might have been deleted * concurrently. Report on 'info' logging level and skip the FDB offload. @@ -904,24 +949,23 @@ mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, struct mlx5_esw_bridge } static struct mlx5_esw_bridge_fdb_entry * -mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsigned char *addr, - u16 vid, bool added_by_user, struct mlx5_eswitch *esw, - struct mlx5_esw_bridge *bridge) +mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + const unsigned char *addr, u16 vid, bool added_by_user, bool peer, + struct mlx5_eswitch *esw, struct mlx5_esw_bridge *bridge) { struct mlx5_esw_bridge_vlan *vlan = NULL; struct mlx5_esw_bridge_fdb_entry *entry; struct mlx5_flow_handle *handle; struct mlx5_fc *counter; - struct mlx5e_priv *priv; int err; if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) { - vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, bridge, esw); + vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, esw_owner_vhca_id, bridge, + esw); if (IS_ERR(vlan)) return ERR_CAST(vlan); } - priv = netdev_priv(dev); entry = kvzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) return ERR_PTR(-ENOMEM); @@ -930,19 +974,25 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi entry->key.vid = vid; entry->dev = dev; entry->vport_num = vport_num; + entry->esw_owner_vhca_id = esw_owner_vhca_id; entry->lastuse = jiffies; if (added_by_user) entry->flags |= MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER; + if (peer) + entry->flags |= MLX5_ESW_BRIDGE_FLAG_PEER; - counter = mlx5_fc_create(priv->mdev, true); + counter = mlx5_fc_create(esw->dev, true); if (IS_ERR(counter)) { err = PTR_ERR(counter); goto err_ingress_fc_create; } entry->ingress_counter = counter; - handle = mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, mlx5_fc_id(counter), - bridge); + handle = peer ? + mlx5_esw_bridge_ingress_flow_peer_create(vport_num, addr, vlan, + mlx5_fc_id(counter), bridge) : + mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, + mlx5_fc_id(counter), bridge); if (IS_ERR(handle)) { err = PTR_ERR(handle); esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d)\n", @@ -962,7 +1012,8 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi entry->filter_handle = handle; } - handle = mlx5_esw_bridge_egress_flow_create(vport_num, addr, vlan, bridge); + handle = mlx5_esw_bridge_egress_flow_create(vport_num, esw_owner_vhca_id, addr, vlan, + bridge); if (IS_ERR(handle)) { err = PTR_ERR(handle); esw_warn(esw->dev, "Failed to create egress flow(vport=%u,err=%d)\n", @@ -994,32 +1045,37 @@ err_egress_flow_create: err_ingress_filter_flow_create: mlx5_del_flow_rules(entry->ingress_handle); err_ingress_flow_create: - mlx5_fc_destroy(priv->mdev, entry->ingress_counter); + mlx5_fc_destroy(esw->dev, entry->ingress_counter); err_ingress_fc_create: kvfree(entry); return ERR_PTR(err); } -int mlx5_esw_bridge_ageing_time_set(unsigned long ageing_time, struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time, + struct mlx5_esw_bridge_offloads *br_offloads) { - if (!vport->bridge) + struct mlx5_esw_bridge_port *port; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) return -EINVAL; - vport->bridge->ageing_time = clock_t_to_jiffies(ageing_time); + port->bridge->ageing_time = clock_t_to_jiffies(ageing_time); return 0; } -int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw, - struct mlx5_vport *vport) +int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, + struct mlx5_esw_bridge_offloads *br_offloads) { + struct mlx5_esw_bridge_port *port; struct mlx5_esw_bridge *bridge; bool filtering; - if (!vport->bridge) + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) return -EINVAL; - bridge = vport->bridge; + bridge = port->bridge; filtering = bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG; if (filtering == enable) return 0; @@ -1033,114 +1089,143 @@ int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw, return 0; } -static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge_offloads *br_offloads, - struct mlx5_esw_bridge *bridge, - struct mlx5_vport *vport) +static int mlx5_esw_bridge_vport_init(u16 vport_num, u16 esw_owner_vhca_id, u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct mlx5_esw_bridge *bridge) { struct mlx5_eswitch *esw = br_offloads->esw; struct mlx5_esw_bridge_port *port; int err; port = kvzalloc(sizeof(*port), GFP_KERNEL); - if (!port) { - err = -ENOMEM; - goto err_port_alloc; - } + if (!port) + return -ENOMEM; - port->vport_num = vport->vport; + port->vport_num = vport_num; + port->esw_owner_vhca_id = esw_owner_vhca_id; + port->bridge = bridge; + port->flags |= flags; xa_init(&port->vlans); - err = mlx5_esw_bridge_port_insert(port, bridge); + err = mlx5_esw_bridge_port_insert(port, br_offloads); if (err) { - esw_warn(esw->dev, "Failed to insert port metadata (vport=%u,err=%d)\n", - vport->vport, err); + esw_warn(esw->dev, + "Failed to insert port metadata (vport=%u,esw_owner_vhca_id=%u,err=%d)\n", + port->vport_num, port->esw_owner_vhca_id, err); goto err_port_insert; } trace_mlx5_esw_bridge_vport_init(port); - vport->bridge = bridge; return 0; err_port_insert: kvfree(port); -err_port_alloc: - mlx5_esw_bridge_put(br_offloads, bridge); return err; } static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_offloads, - struct mlx5_vport *vport) + struct mlx5_esw_bridge_port *port) { - struct mlx5_esw_bridge *bridge = vport->bridge; + u16 vport_num = port->vport_num, esw_owner_vhca_id = port->esw_owner_vhca_id; + struct mlx5_esw_bridge *bridge = port->bridge; struct mlx5_esw_bridge_fdb_entry *entry, *tmp; - struct mlx5_esw_bridge_port *port; list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) - if (entry->vport_num == vport->vport) + if (entry->vport_num == vport_num && entry->esw_owner_vhca_id == esw_owner_vhca_id) mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); - port = mlx5_esw_bridge_port_lookup(vport->vport, bridge); - if (!port) { - WARN(1, "Vport %u metadata not found on bridge", vport->vport); - return -EINVAL; - } - trace_mlx5_esw_bridge_vport_cleanup(port); mlx5_esw_bridge_port_vlans_flush(port, bridge); - mlx5_esw_bridge_port_erase(port, bridge); + mlx5_esw_bridge_port_erase(port, br_offloads); kvfree(port); mlx5_esw_bridge_put(br_offloads, bridge); - vport->bridge = NULL; return 0; } -int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads, - struct mlx5_vport *vport, struct netlink_ext_ack *extack) +static int mlx5_esw_bridge_vport_link_with_flags(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) { struct mlx5_esw_bridge *bridge; int err; - WARN_ON(vport->bridge); - bridge = mlx5_esw_bridge_lookup(ifindex, br_offloads); if (IS_ERR(bridge)) { NL_SET_ERR_MSG_MOD(extack, "Error checking for existing bridge with same ifindex"); return PTR_ERR(bridge); } - err = mlx5_esw_bridge_vport_init(br_offloads, bridge, vport); - if (err) + err = mlx5_esw_bridge_vport_init(vport_num, esw_owner_vhca_id, flags, br_offloads, bridge); + if (err) { NL_SET_ERR_MSG_MOD(extack, "Error initializing port"); + goto err_vport; + } + return 0; + +err_vport: + mlx5_esw_bridge_put(br_offloads, bridge); return err; } -int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads, - struct mlx5_vport *vport, struct netlink_ext_ack *extack) +int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) { - struct mlx5_esw_bridge *bridge = vport->bridge; + return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, 0, + br_offloads, extack); +} + +int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_bridge_port *port; int err; - if (!bridge) { + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) { NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge"); return -EINVAL; } - if (bridge->ifindex != ifindex) { + if (port->bridge->ifindex != ifindex) { NL_SET_ERR_MSG_MOD(extack, "Port is attached to another bridge"); return -EINVAL; } - err = mlx5_esw_bridge_vport_cleanup(br_offloads, vport); + err = mlx5_esw_bridge_vport_cleanup(br_offloads, port); if (err) NL_SET_ERR_MSG_MOD(extack, "Port cleanup failed"); return err; } -int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw, - struct mlx5_vport *vport, struct netlink_ext_ack *extack) +int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + if (!MLX5_CAP_ESW(br_offloads->esw->dev, merged_eswitch)) + return 0; + + return mlx5_esw_bridge_vport_link_with_flags(ifindex, vport_num, esw_owner_vhca_id, + MLX5_ESW_BRIDGE_PORT_FLAG_PEER, + br_offloads, extack); +} + +int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) +{ + return mlx5_esw_bridge_vport_unlink(ifindex, vport_num, esw_owner_vhca_id, br_offloads, + extack); +} + +int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack) { struct mlx5_esw_bridge_port *port; struct mlx5_esw_bridge_vlan *vlan; - port = mlx5_esw_bridge_port_lookup(vport->vport, vport->bridge); + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); if (!port) return -EINVAL; @@ -1148,10 +1233,10 @@ int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw, if (vlan) { if (vlan->flags == flags) return 0; - mlx5_esw_bridge_vlan_cleanup(port, vlan, vport->bridge); + mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge); } - vlan = mlx5_esw_bridge_vlan_create(vid, flags, port, esw); + vlan = mlx5_esw_bridge_vlan_create(vid, flags, port, br_offloads->esw); if (IS_ERR(vlan)) { NL_SET_ERR_MSG_MOD(extack, "Failed to create VLAN entry"); return PTR_ERR(vlan); @@ -1159,62 +1244,93 @@ int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw, return 0; } -void mlx5_esw_bridge_port_vlan_del(u16 vid, struct mlx5_eswitch *esw, struct mlx5_vport *vport) +void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads) { struct mlx5_esw_bridge_port *port; struct mlx5_esw_bridge_vlan *vlan; - port = mlx5_esw_bridge_port_lookup(vport->vport, vport->bridge); + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); if (!port) return; vlan = mlx5_esw_bridge_vlan_lookup(vid, port); if (!vlan) return; - mlx5_esw_bridge_vlan_cleanup(port, vlan, vport->bridge); + mlx5_esw_bridge_vlan_cleanup(port, vlan, port->bridge); } -void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw, - struct mlx5_vport *vport, - struct switchdev_notifier_fdb_info *fdb_info) +void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) { - struct mlx5_esw_bridge *bridge = vport->bridge; struct mlx5_esw_bridge_fdb_entry *entry; - u16 vport_num = vport->vport; + struct mlx5_esw_bridge_fdb_key key; + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; - if (!bridge) { - esw_info(esw->dev, "Vport is not assigned to bridge (vport=%u)\n", vport_num); + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port || port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER) + return; + + bridge = port->bridge; + ether_addr_copy(key.addr, fdb_info->addr); + key.vid = fdb_info->vid; + entry = rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params); + if (!entry) { + esw_debug(br_offloads->esw->dev, + "FDB entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n", + key.addr, key.vid, vport_num); return; } - entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, fdb_info->addr, fdb_info->vid, - fdb_info->added_by_user, esw, bridge); + entry->lastuse = jiffies; +} + +void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; + + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) + return; + + bridge = port->bridge; + entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, esw_owner_vhca_id, fdb_info->addr, + fdb_info->vid, fdb_info->added_by_user, + port->flags & MLX5_ESW_BRIDGE_PORT_FLAG_PEER, + br_offloads->esw, bridge); if (IS_ERR(entry)) return; if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER) mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid, SWITCHDEV_FDB_OFFLOADED); - else + else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER)) /* Take over dynamic entries to prevent kernel bridge from aging them out. */ mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid, SWITCHDEV_FDB_ADD_TO_BRIDGE); } -void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info) { - struct mlx5_esw_bridge *bridge = vport->bridge; + struct mlx5_eswitch *esw = br_offloads->esw; struct mlx5_esw_bridge_fdb_entry *entry; struct mlx5_esw_bridge_fdb_key key; - u16 vport_num = vport->vport; + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge *bridge; - if (!bridge) { - esw_warn(esw->dev, "Vport is not assigned to bridge (vport=%u)\n", vport_num); + port = mlx5_esw_bridge_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!port) return; - } + bridge = port->bridge; ether_addr_copy(key.addr, fdb_info->addr); key.vid = fdb_info->vid; entry = rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params); @@ -1225,9 +1341,7 @@ void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw return; } - if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)) - mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid, - SWITCHDEV_FDB_DEL_TO_BRIDGE); + mlx5_esw_bridge_fdb_del_notify(entry); mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); } @@ -1245,11 +1359,10 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads) continue; if (time_after(lastuse, entry->lastuse)) { - mlx5_esw_bridge_fdb_entry_refresh(lastuse, entry); - } else if (time_is_before_jiffies(entry->lastuse + bridge->ageing_time)) { - mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, - entry->key.vid, - SWITCHDEV_FDB_DEL_TO_BRIDGE); + mlx5_esw_bridge_fdb_entry_refresh(entry); + } else if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_PEER) && + time_is_before_jiffies(entry->lastuse + bridge->ageing_time)) { + mlx5_esw_bridge_fdb_del_notify(entry); mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); } } @@ -1258,13 +1371,11 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads) static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads) { - struct mlx5_eswitch *esw = br_offloads->esw; - struct mlx5_vport *vport; + struct mlx5_esw_bridge_port *port; unsigned long i; - mlx5_esw_for_each_vport(esw, i, vport) - if (vport->bridge) - mlx5_esw_bridge_vport_cleanup(br_offloads, vport); + xa_for_each(&br_offloads->ports, i, port) + mlx5_esw_bridge_vport_cleanup(br_offloads, port); WARN_ONCE(!list_empty(&br_offloads->bridges), "Cleaning up bridge offloads while still having bridges attached\n"); @@ -1279,6 +1390,7 @@ struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&br_offloads->bridges); + xa_init(&br_offloads->ports); br_offloads->esw = esw; esw->br_offloads = br_offloads; @@ -1293,6 +1405,7 @@ void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw) return; mlx5_esw_bridge_flush(br_offloads); + WARN_ON(!xa_empty(&br_offloads->ports)); esw->br_offloads = NULL; kvfree(br_offloads); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h index d826942b27fc..efc39975226e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -7,6 +7,7 @@ #include <linux/notifier.h> #include <linux/list.h> #include <linux/workqueue.h> +#include <linux/xarray.h> #include "eswitch.h" struct mlx5_flow_table; @@ -15,6 +16,8 @@ struct mlx5_flow_group; struct mlx5_esw_bridge_offloads { struct mlx5_eswitch *esw; struct list_head bridges; + struct xarray ports; + struct notifier_block netdev_nb; struct notifier_block nb_blk; struct notifier_block nb; @@ -31,23 +34,36 @@ struct mlx5_esw_bridge_offloads { struct mlx5_esw_bridge_offloads *mlx5_esw_bridge_init(struct mlx5_eswitch *esw); void mlx5_esw_bridge_cleanup(struct mlx5_eswitch *esw); -int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads, - struct mlx5_vport *vport, struct netlink_ext_ack *extack); -int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads, - struct mlx5_vport *vport, struct netlink_ext_ack *extack); -void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +int mlx5_esw_bridge_vport_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +int mlx5_esw_bridge_vport_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +int mlx5_esw_bridge_vport_peer_link(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +int mlx5_esw_bridge_vport_peer_unlink(int ifindex, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info); -void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw, - struct mlx5_vport *vport, +void mlx5_esw_bridge_fdb_remove(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info); void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads); -int mlx5_esw_bridge_ageing_time_set(unsigned long ageing_time, struct mlx5_eswitch *esw, - struct mlx5_vport *vport); -int mlx5_esw_bridge_vlan_filtering_set(bool enable, struct mlx5_eswitch *esw, - struct mlx5_vport *vport); -int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw, - struct mlx5_vport *vport, struct netlink_ext_ack *extack); -void mlx5_esw_bridge_port_vlan_del(u16 vid, struct mlx5_eswitch *esw, struct mlx5_vport *vport); +int mlx5_esw_bridge_ageing_time_set(u16 vport_num, u16 esw_owner_vhca_id, unsigned long ageing_time, + struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_vlan_filtering_set(u16 vport_num, u16 esw_owner_vhca_id, bool enable, + struct mlx5_esw_bridge_offloads *br_offloads); +int mlx5_esw_bridge_port_vlan_add(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, u16 flags, + struct mlx5_esw_bridge_offloads *br_offloads, + struct netlink_ext_ack *extack); +void mlx5_esw_bridge_port_vlan_del(u16 vport_num, u16 esw_owner_vhca_id, u16 vid, + struct mlx5_esw_bridge_offloads *br_offloads); #endif /* __MLX5_ESW_BRIDGE_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h index d9ab2e8bc2cb..52964a82d6a6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h @@ -19,6 +19,11 @@ struct mlx5_esw_bridge_fdb_key { enum { MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0), + MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1), +}; + +enum { + MLX5_ESW_BRIDGE_PORT_FLAG_PEER = BIT(0), }; struct mlx5_esw_bridge_fdb_entry { @@ -28,6 +33,7 @@ struct mlx5_esw_bridge_fdb_entry { struct list_head list; struct list_head vlan_list; u16 vport_num; + u16 esw_owner_vhca_id; u16 flags; struct mlx5_flow_handle *ingress_handle; @@ -47,6 +53,9 @@ struct mlx5_esw_bridge_vlan { struct mlx5_esw_bridge_port { u16 vport_num; + u16 esw_owner_vhca_id; + u16 flags; + struct mlx5_esw_bridge *bridge; struct xarray vlans; }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c index 1703384eca95..20af557ae30c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/devlink_port.c @@ -91,9 +91,15 @@ int mlx5_esw_offloads_devlink_port_register(struct mlx5_eswitch *esw, u16 vport_ if (err) goto reg_err; + err = devlink_rate_leaf_create(dl_port, vport); + if (err) + goto rate_err; + vport->dl_port = dl_port; return 0; +rate_err: + devlink_port_unregister(dl_port); reg_err: mlx5_esw_dl_port_free(dl_port); return err; @@ -109,6 +115,12 @@ void mlx5_esw_offloads_devlink_port_unregister(struct mlx5_eswitch *esw, u16 vpo vport = mlx5_eswitch_get_vport(esw, vport_num); if (IS_ERR(vport)) return; + + if (vport->dl_port->devlink_rate) { + mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); + devlink_rate_leaf_destroy(vport->dl_port); + } + devlink_port_unregister(vport->dl_port); mlx5_esw_dl_port_free(vport->dl_port); vport->dl_port = NULL; @@ -148,8 +160,16 @@ int mlx5_esw_devlink_sf_port_register(struct mlx5_eswitch *esw, struct devlink_p if (err) return err; + err = devlink_rate_leaf_create(dl_port, vport); + if (err) + goto rate_err; + vport->dl_port = dl_port; return 0; + +rate_err: + devlink_port_unregister(dl_port); + return err; } void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num) @@ -159,6 +179,12 @@ void mlx5_esw_devlink_sf_port_unregister(struct mlx5_eswitch *esw, u16 vport_num vport = mlx5_eswitch_get_vport(esw, vport_num); if (IS_ERR(vport)) return; + + if (vport->dl_port->devlink_rate) { + mlx5_esw_qos_vport_update_group(esw, vport, NULL, NULL); + devlink_rate_leaf_destroy(vport->dl_port); + } + devlink_port_unregister(vport->dl_port); vport->dl_port = NULL; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h index 227964b7d3b9..3401188e0a60 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h @@ -85,11 +85,18 @@ DECLARE_EVENT_CLASS(mlx5_esw_bridge_port_template, TP_ARGS(port), TP_STRUCT__entry( __field(u16, vport_num) + __field(u16, esw_owner_vhca_id) + __field(u16, flags) ), TP_fast_assign( __entry->vport_num = port->vport_num; + __entry->esw_owner_vhca_id = port->esw_owner_vhca_id; + __entry->flags = port->flags; ), - TP_printk("vport_num=%hu", __entry->vport_num) + TP_printk("vport_num=%hu esw_owner_vhca_id=%hu flags=%hx", + __entry->vport_num, + __entry->esw_owner_vhca_id, + __entry->flags) ); DEFINE_EVENT(mlx5_esw_bridge_port_template, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h new file mode 100644 index 000000000000..458baf0c6415 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/qos_tracepoint.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_ESW_TP_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_ESW_TP_ + +#include <linux/tracepoint.h> +#include "eswitch.h" + +TRACE_EVENT(mlx5_esw_vport_qos_destroy, + TP_PROTO(const struct mlx5_vport *vport), + TP_ARGS(vport), + TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + __field(unsigned short, vport_id) + __field(unsigned int, tsar_ix) + ), + TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device)); + __entry->vport_id = vport->vport; + __entry->tsar_ix = vport->qos.esw_tsar_ix; + ), + TP_printk("(%s) vport=%hu tsar_ix=%u\n", + __get_str(devname), __entry->vport_id, __entry->tsar_ix + ) +); + +DECLARE_EVENT_CLASS(mlx5_esw_vport_qos_template, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate), + TP_STRUCT__entry(__string(devname, dev_name(vport->dev->device)) + __field(unsigned short, vport_id) + __field(unsigned int, tsar_ix) + __field(unsigned int, bw_share) + __field(unsigned int, max_rate) + __field(void *, group) + ), + TP_fast_assign(__assign_str(devname, dev_name(vport->dev->device)); + __entry->vport_id = vport->vport; + __entry->tsar_ix = vport->qos.esw_tsar_ix; + __entry->bw_share = bw_share; + __entry->max_rate = max_rate; + __entry->group = vport->qos.group; + ), + TP_printk("(%s) vport=%hu tsar_ix=%u bw_share=%u, max_rate=%u group=%p\n", + __get_str(devname), __entry->vport_id, __entry->tsar_ix, + __entry->bw_share, __entry->max_rate, __entry->group + ) +); + +DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_create, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate) + ); + +DEFINE_EVENT(mlx5_esw_vport_qos_template, mlx5_esw_vport_qos_config, + TP_PROTO(const struct mlx5_vport *vport, u32 bw_share, u32 max_rate), + TP_ARGS(vport, bw_share, max_rate) + ); + +DECLARE_EVENT_CLASS(mlx5_esw_group_qos_template, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(const void *, group) + __field(unsigned int, tsar_ix) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->group = group; + __entry->tsar_ix = tsar_ix; + ), + TP_printk("(%s) group=%p tsar_ix=%u\n", + __get_str(devname), __entry->group, __entry->tsar_ix + ) +); + +DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_create, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix) + ); + +DEFINE_EVENT(mlx5_esw_group_qos_template, mlx5_esw_group_qos_destroy, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix), + TP_ARGS(dev, group, tsar_ix) + ); + +TRACE_EVENT(mlx5_esw_group_qos_config, + TP_PROTO(const struct mlx5_core_dev *dev, + const struct mlx5_esw_rate_group *group, + unsigned int tsar_ix, u32 bw_share, u32 max_rate), + TP_ARGS(dev, group, tsar_ix, bw_share, max_rate), + TP_STRUCT__entry(__string(devname, dev_name(dev->device)) + __field(const void *, group) + __field(unsigned int, tsar_ix) + __field(unsigned int, bw_share) + __field(unsigned int, max_rate) + ), + TP_fast_assign(__assign_str(devname, dev_name(dev->device)); + __entry->group = group; + __entry->tsar_ix = tsar_ix; + __entry->bw_share = bw_share; + __entry->max_rate = max_rate; + ), + TP_printk("(%s) group=%p tsar_ix=%u bw_share=%u max_rate=%u\n", + __get_str(devname), __entry->group, __entry->tsar_ix, + __entry->bw_share, __entry->max_rate + ) +); +#endif /* _MLX5_ESW_TP_ */ + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH esw/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE qos_tracepoint +#include <trace/define_trace.h> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c index 3da7becc1069..425c91814b34 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/indir_table.c @@ -364,6 +364,7 @@ static int mlx5_create_indir_fwd_group(struct mlx5_eswitch *esw, dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT; dest.vport.num = e->vport; dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id); + dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID; e->fwd_rule = mlx5_add_flow_rules(e->ft, spec, &flow_act, &dest, 1); if (IS_ERR(e->fwd_rule)) { mlx5_destroy_flow_group(e->fwd_grp); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index d9041b16611d..df277a6cddc0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -11,6 +11,7 @@ #include "mlx5_core.h" #include "eswitch.h" #include "fs_core.h" +#include "esw/qos.h" enum { LEGACY_VEPA_PRIO = 0, @@ -508,3 +509,22 @@ unlock: mutex_unlock(&esw->state_lock); return err; } + +int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, u16 vport, + u32 max_rate, u32 min_rate) +{ + struct mlx5_vport *evport = mlx5_eswitch_get_vport(esw, vport); + int err; + + if (!mlx5_esw_allowed(esw)) + return -EPERM; + if (IS_ERR(evport)) + return PTR_ERR(evport); + + mutex_lock(&esw->state_lock); + err = mlx5_esw_qos_set_vport_min_rate(esw, evport, min_rate, NULL); + if (!err) + err = mlx5_esw_qos_set_vport_max_rate(esw, evport, max_rate, NULL); + mutex_unlock(&esw->state_lock); + return err; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c new file mode 100644 index 000000000000..985e305179d1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -0,0 +1,869 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#include "eswitch.h" +#include "esw/qos.h" +#include "en/port.h" +#define CREATE_TRACE_POINTS +#include "diag/qos_tracepoint.h" + +/* Minimum supported BW share value by the HW is 1 Mbit/sec */ +#define MLX5_MIN_BW_SHARE 1 + +#define MLX5_RATE_TO_BW_SHARE(rate, divider, limit) \ + min_t(u32, max_t(u32, DIV_ROUND_UP(rate, divider), MLX5_MIN_BW_SHARE), limit) + +struct mlx5_esw_rate_group { + u32 tsar_ix; + u32 max_rate; + u32 min_rate; + u32 bw_share; + struct list_head list; +}; + +static int esw_qos_tsar_config(struct mlx5_core_dev *dev, u32 *sched_ctx, + u32 parent_ix, u32 tsar_ix, + u32 max_rate, u32 bw_share) +{ + u32 bitmask = 0; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_ix); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + bitmask |= MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_BW_SHARE; + + return mlx5_modify_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + sched_ctx, + tsar_ix, + bitmask); +} + +static int esw_qos_group_config(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, + u32 max_rate, u32 bw_share, struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = esw->dev; + int err; + + err = esw_qos_tsar_config(dev, sched_ctx, + esw->qos.root_tsar_ix, group->tsar_ix, + max_rate, bw_share); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify group TSAR element failed"); + + trace_mlx5_esw_group_qos_config(dev, group, group->tsar_ix, bw_share, max_rate); + + return err; +} + +static int esw_qos_vport_config(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u32 max_rate, u32 bw_share, + struct netlink_ext_ack *extack) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group = vport->qos.group; + struct mlx5_core_dev *dev = esw->dev; + u32 parent_tsar_ix; + void *vport_elem; + int err; + + if (!vport->qos.enabled) + return -EIO; + + parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); + vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, + element_attributes); + MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); + + err = esw_qos_tsar_config(dev, sched_ctx, parent_tsar_ix, vport->qos.esw_tsar_ix, + max_rate, bw_share); + if (err) { + esw_warn(esw->dev, + "E-Switch modify TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + NL_SET_ERR_MSG_MOD(extack, "E-Switch modify TSAR vport element failed"); + return err; + } + + trace_mlx5_esw_vport_qos_config(vport, bw_share, max_rate); + + return 0; +} + +static u32 esw_qos_calculate_min_rate_divider(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + bool group_level) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_vport *evport; + u32 max_guarantee = 0; + unsigned long i; + + if (group_level) { + struct mlx5_esw_rate_group *group; + + list_for_each_entry(group, &esw->qos.groups, list) { + if (group->min_rate < max_guarantee) + continue; + max_guarantee = group->min_rate; + } + } else { + mlx5_esw_for_each_vport(esw, i, evport) { + if (!evport->enabled || !evport->qos.enabled || + evport->qos.group != group || evport->qos.min_rate < max_guarantee) + continue; + max_guarantee = evport->qos.min_rate; + } + } + + if (max_guarantee) + return max_t(u32, max_guarantee / fw_max_bw_share, 1); + + /* If vports min rate divider is 0 but their group has bw_share configured, then + * need to set bw_share for vports to minimal value. + */ + if (!group_level && !max_guarantee && group->bw_share) + return 1; + return 0; +} + +static u32 esw_qos_calc_bw_share(u32 min_rate, u32 divider, u32 fw_max) +{ + if (divider) + return MLX5_RATE_TO_BW_SHARE(min_rate, divider, fw_max); + + return 0; +} + +static int esw_qos_normalize_vports_min_rate(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + u32 divider = esw_qos_calculate_min_rate_divider(esw, group, false); + struct mlx5_vport *evport; + unsigned long i; + u32 bw_share; + int err; + + mlx5_esw_for_each_vport(esw, i, evport) { + if (!evport->enabled || !evport->qos.enabled || evport->qos.group != group) + continue; + bw_share = esw_qos_calc_bw_share(evport->qos.min_rate, divider, fw_max_bw_share); + + if (bw_share == evport->qos.bw_share) + continue; + + err = esw_qos_vport_config(esw, evport, evport->qos.max_rate, bw_share, extack); + if (err) + return err; + + evport->qos.bw_share = bw_share; + } + + return 0; +} + +static int esw_qos_normalize_groups_min_rate(struct mlx5_eswitch *esw, u32 divider, + struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_esw_rate_group *group; + u32 bw_share; + int err; + + list_for_each_entry(group, &esw->qos.groups, list) { + bw_share = esw_qos_calc_bw_share(group->min_rate, divider, fw_max_bw_share); + + if (bw_share == group->bw_share) + continue; + + err = esw_qos_group_config(esw, group, group->max_rate, bw_share, extack); + if (err) + return err; + + group->bw_share = bw_share; + + /* All the group's vports need to be set with default bw_share + * to enable them with QOS + */ + err = esw_qos_normalize_vports_min_rate(esw, group, extack); + + if (err) + return err; + } + + return 0; +} + +int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, + u32 min_rate, + struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share, previous_min_rate; + bool min_rate_supported; + int err; + + lockdep_assert_held(&esw->state_lock); + fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + fw_max_bw_share >= MLX5_MIN_BW_SHARE; + if (min_rate && !min_rate_supported) + return -EOPNOTSUPP; + if (min_rate == evport->qos.min_rate) + return 0; + + previous_min_rate = evport->qos.min_rate; + evport->qos.min_rate = min_rate; + err = esw_qos_normalize_vports_min_rate(esw, evport->qos.group, extack); + if (err) + evport->qos.min_rate = previous_min_rate; + + return err; +} + +int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, + u32 max_rate, + struct netlink_ext_ack *extack) +{ + u32 act_max_rate = max_rate; + bool max_rate_supported; + int err; + + lockdep_assert_held(&esw->state_lock); + max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); + + if (max_rate && !max_rate_supported) + return -EOPNOTSUPP; + if (max_rate == evport->qos.max_rate) + return 0; + + /* If parent group has rate limit need to set to group + * value when new max rate is 0. + */ + if (evport->qos.group && !max_rate) + act_max_rate = evport->qos.group->max_rate; + + err = esw_qos_vport_config(esw, evport, act_max_rate, evport->qos.bw_share, extack); + + if (!err) + evport->qos.max_rate = max_rate; + + return err; +} + +static int esw_qos_set_group_min_rate(struct mlx5_eswitch *esw, struct mlx5_esw_rate_group *group, + u32 min_rate, struct netlink_ext_ack *extack) +{ + u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + struct mlx5_core_dev *dev = esw->dev; + u32 previous_min_rate, divider; + int err; + + if (!(MLX5_CAP_QOS(dev, esw_bw_share) && fw_max_bw_share >= MLX5_MIN_BW_SHARE)) + return -EOPNOTSUPP; + + if (min_rate == group->min_rate) + return 0; + + previous_min_rate = group->min_rate; + group->min_rate = min_rate; + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) { + group->min_rate = previous_min_rate; + NL_SET_ERR_MSG_MOD(extack, "E-Switch group min rate setting failed"); + + /* Attempt restoring previous configuration */ + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + if (esw_qos_normalize_groups_min_rate(esw, divider, extack)) + NL_SET_ERR_MSG_MOD(extack, "E-Switch BW share restore failed"); + } + + return err; +} + +static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + u32 max_rate, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport; + unsigned long i; + int err; + + if (group->max_rate == max_rate) + return 0; + + err = esw_qos_group_config(esw, group, max_rate, group->bw_share, extack); + if (err) + return err; + + group->max_rate = max_rate; + + /* Any unlimited vports in the group should be set + * with the value of the group. + */ + mlx5_esw_for_each_vport(esw, i, vport) { + if (!vport->enabled || !vport->qos.enabled || + vport->qos.group != group || vport->qos.max_rate) + continue; + + err = esw_qos_vport_config(esw, vport, max_rate, vport->qos.bw_share, extack); + if (err) + NL_SET_ERR_MSG_MOD(extack, + "E-Switch vport implicit rate limit setting failed"); + } + + return err; +} + +static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + u32 max_rate, u32 bw_share) +{ + u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group = vport->qos.group; + struct mlx5_core_dev *dev = esw->dev; + u32 parent_tsar_ix; + void *vport_elem; + int err; + + parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; + MLX5_SET(scheduling_context, sched_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); + vport_elem = MLX5_ADDR_OF(scheduling_context, sched_ctx, element_attributes); + MLX5_SET(vport_element, vport_elem, vport_number, vport->vport); + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_tsar_ix); + MLX5_SET(scheduling_context, sched_ctx, max_average_bw, max_rate); + MLX5_SET(scheduling_context, sched_ctx, bw_share, bw_share); + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + sched_ctx, + &vport->qos.esw_tsar_ix); + if (err) { + esw_warn(esw->dev, "E-Switch create TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + return err; + } + + return 0; +} + +static int esw_qos_update_group_scheduling_element(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *curr_group, + struct mlx5_esw_rate_group *new_group, + struct netlink_ext_ack *extack) +{ + u32 max_rate; + int err; + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + vport->qos.esw_tsar_ix); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR vport element failed"); + return err; + } + + vport->qos.group = new_group; + max_rate = vport->qos.max_rate ? vport->qos.max_rate : new_group->max_rate; + + /* If vport is unlimited, we set the group's value. + * Therefore, if the group is limited it will apply to + * the vport as well and if not, vport will remain unlimited. + */ + err = esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch vport group set failed."); + goto err_sched; + } + + return 0; + +err_sched: + vport->qos.group = curr_group; + max_rate = vport->qos.max_rate ? vport->qos.max_rate : curr_group->max_rate; + if (esw_qos_vport_create_sched_element(esw, vport, max_rate, vport->qos.bw_share)) + esw_warn(esw->dev, "E-Switch vport group restore failed (vport=%d)\n", + vport->vport); + + return err; +} + +static int esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *new_group, *curr_group; + int err; + + if (!vport->enabled) + return -EINVAL; + + curr_group = vport->qos.group; + new_group = group ?: esw->qos.group0; + if (curr_group == new_group) + return 0; + + err = esw_qos_update_group_scheduling_element(esw, vport, curr_group, new_group, extack); + if (err) + return err; + + /* Recalculate bw share weights of old and new groups */ + if (vport->qos.bw_share) { + esw_qos_normalize_vports_min_rate(esw, curr_group, extack); + esw_qos_normalize_vports_min_rate(esw, new_group, extack); + } + + return 0; +} + +static struct mlx5_esw_rate_group * +esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_esw_rate_group *group; + u32 divider; + int err; + + if (!MLX5_CAP_QOS(esw->dev, log_esw_max_sched_depth)) + return ERR_PTR(-EOPNOTSUPP); + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + + MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, + esw->qos.root_tsar_ix); + err = mlx5_create_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + tsar_ctx, + &group->tsar_ix); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch create TSAR for group failed"); + goto err_sched_elem; + } + + list_add_tail(&group->list, &esw->qos.groups); + + divider = esw_qos_calculate_min_rate_divider(esw, group, true); + if (divider) { + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups normalization failed"); + goto err_min_rate; + } + } + trace_mlx5_esw_group_qos_create(esw->dev, group, group->tsar_ix); + + return group; + +err_min_rate: + list_del(&group->list); + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + group->tsar_ix); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR for group failed"); +err_sched_elem: + kfree(group); + return ERR_PTR(err); +} + +static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + u32 divider; + int err; + + list_del(&group->list); + + divider = esw_qos_calculate_min_rate_divider(esw, NULL, true); + err = esw_qos_normalize_groups_min_rate(esw, divider, extack); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch groups' normalization failed"); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + group->tsar_ix); + if (err) + NL_SET_ERR_MSG_MOD(extack, "E-Switch destroy TSAR_ID failed"); + + trace_mlx5_esw_group_qos_destroy(esw->dev, group, group->tsar_ix); + kfree(group); + return err; +} + +static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) +{ + switch (type) { + case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_TASR; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT_TC; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; + } + return false; +} + +void mlx5_esw_qos_create(struct mlx5_eswitch *esw) +{ + u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_core_dev *dev = esw->dev; + __be32 *attr; + int err; + + if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) + return; + + if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) + return; + + mutex_lock(&esw->state_lock); + if (esw->qos.enabled) + goto unlock; + + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); + + err = mlx5_create_scheduling_element_cmd(dev, + SCHEDULING_HIERARCHY_E_SWITCH, + tsar_ctx, + &esw->qos.root_tsar_ix); + if (err) { + esw_warn(dev, "E-Switch create root TSAR failed (%d)\n", err); + goto unlock; + } + + INIT_LIST_HEAD(&esw->qos.groups); + if (MLX5_CAP_QOS(dev, log_esw_max_sched_depth)) { + esw->qos.group0 = esw_qos_create_rate_group(esw, NULL); + if (IS_ERR(esw->qos.group0)) { + esw_warn(dev, "E-Switch create rate group 0 failed (%ld)\n", + PTR_ERR(esw->qos.group0)); + goto err_group0; + } + } + esw->qos.enabled = true; +unlock: + mutex_unlock(&esw->state_lock); + return; + +err_group0: + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); + mutex_unlock(&esw->state_lock); +} + +void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw) +{ + struct devlink *devlink = priv_to_devlink(esw->dev); + int err; + + devlink_rate_nodes_destroy(devlink); + mutex_lock(&esw->state_lock); + if (!esw->qos.enabled) + goto unlock; + + if (esw->qos.group0) + esw_qos_destroy_rate_group(esw, esw->qos.group0, NULL); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + esw->qos.root_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy root TSAR failed (%d)\n", err); + + esw->qos.enabled = false; +unlock: + mutex_unlock(&esw->state_lock); +} + +int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 max_rate, u32 bw_share) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + if (!esw->qos.enabled) + return 0; + + if (vport->qos.enabled) + return -EEXIST; + + vport->qos.group = esw->qos.group0; + + err = esw_qos_vport_create_sched_element(esw, vport, max_rate, bw_share); + if (!err) { + vport->qos.enabled = true; + trace_mlx5_esw_vport_qos_create(vport, bw_share, max_rate); + } + + return err; +} + +void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport) +{ + int err; + + lockdep_assert_held(&esw->state_lock); + if (!esw->qos.enabled || !vport->qos.enabled) + return; + WARN(vport->qos.group && vport->qos.group != esw->qos.group0, + "Disabling QoS on port before detaching it from group"); + + err = mlx5_destroy_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + vport->qos.esw_tsar_ix); + if (err) + esw_warn(esw->dev, "E-Switch destroy TSAR vport element failed (vport=%d,err=%d)\n", + vport->vport, err); + + vport->qos.enabled = false; + trace_mlx5_esw_vport_qos_destroy(vport); +} + +int mlx5_esw_qos_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num, u32 rate_mbps) +{ + u32 ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; + struct mlx5_vport *vport; + u32 bitmask; + + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + return PTR_ERR(vport); + + if (!vport->qos.enabled) + return -EOPNOTSUPP; + + MLX5_SET(scheduling_context, ctx, max_average_bw, rate_mbps); + bitmask = MODIFY_SCHEDULING_ELEMENT_IN_MODIFY_BITMASK_MAX_AVERAGE_BW; + + return mlx5_modify_scheduling_element_cmd(esw->dev, + SCHEDULING_HIERARCHY_E_SWITCH, + ctx, + vport->qos.esw_tsar_ix, + bitmask); +} + +#define MLX5_LINKSPEED_UNIT 125000 /* 1Mbps in Bps */ + +/* Converts bytes per second value passed in a pointer into megabits per + * second, rewriting last. If converted rate exceed link speed or is not a + * fraction of Mbps - returns error. + */ +static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *name, + u64 *rate, struct netlink_ext_ack *extack) +{ + u32 link_speed_max, reminder; + u64 value; + int err; + + err = mlx5e_port_max_linkspeed(mdev, &link_speed_max); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to get link maximum speed"); + return err; + } + + value = div_u64_rem(*rate, MLX5_LINKSPEED_UNIT, &reminder); + if (reminder) { + pr_err("%s rate value %lluBps not in link speed units of 1Mbps.\n", + name, *rate); + NL_SET_ERR_MSG_MOD(extack, "TX rate value not in link speed units of 1Mbps"); + return -EINVAL; + } + + if (value > link_speed_max) { + pr_err("%s rate value %lluMbps exceed link maximum speed %u.\n", + name, value, link_speed_max); + NL_SET_ERR_MSG_MOD(extack, "TX rate value exceed link maximum speed"); + return -EINVAL; + } + + *rate = value; + return 0; +} + +/* Eswitch devlink rate API */ + +int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = priv; + struct mlx5_eswitch *esw; + int err; + + esw = vport->dev->priv.eswitch; + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_share", &tx_share, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = mlx5_esw_qos_set_vport_min_rate(esw, vport, tx_share, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct mlx5_vport *vport = priv; + struct mlx5_eswitch *esw; + int err; + + esw = vport->dev->priv.eswitch; + if (!mlx5_esw_allowed(esw)) + return -EPERM; + + err = esw_qos_devlink_rate_to_mbps(vport->dev, "tx_max", &tx_max, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = mlx5_esw_qos_set_vport_max_rate(esw, vport, tx_max, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_esw_rate_group *group = priv; + int err; + + err = esw_qos_devlink_rate_to_mbps(dev, "tx_share", &tx_share, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_set_group_min_rate(esw, group, tx_share, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_priv(rate_node->devlink); + struct mlx5_eswitch *esw = dev->priv.eswitch; + struct mlx5_esw_rate_group *group = priv; + int err; + + err = esw_qos_devlink_rate_to_mbps(dev, "tx_max", &tx_max, extack); + if (err) + return err; + + mutex_lock(&esw->state_lock); + err = esw_qos_set_group_max_rate(esw, group, tx_max, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + struct mlx5_eswitch *esw; + int err = 0; + + esw = mlx5_devlink_eswitch_get(rate_node->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + mutex_lock(&esw->state_lock); + if (esw->mode != MLX5_ESWITCH_OFFLOADS) { + NL_SET_ERR_MSG_MOD(extack, + "Rate node creation supported only in switchdev mode"); + err = -EOPNOTSUPP; + goto unlock; + } + + group = esw_qos_create_rate_group(esw, extack); + if (IS_ERR(group)) { + err = PTR_ERR(group); + goto unlock; + } + + *priv = group; +unlock: + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group = priv; + struct mlx5_eswitch *esw; + int err; + + esw = mlx5_devlink_eswitch_get(rate_node->devlink); + if (IS_ERR(esw)) + return PTR_ERR(esw); + + mutex_lock(&esw->state_lock); + err = esw_qos_destroy_rate_group(esw, group, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_qos_vport_update_group(struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct mlx5_esw_rate_group *group, + struct netlink_ext_ack *extack) +{ + int err; + + mutex_lock(&esw->state_lock); + err = esw_qos_vport_update_group(esw, vport, group, extack); + mutex_unlock(&esw->state_lock); + return err; +} + +int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack) +{ + struct mlx5_esw_rate_group *group; + struct mlx5_vport *vport = priv; + + if (!parent) + return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, + vport, NULL, extack); + + group = parent_priv; + return mlx5_esw_qos_vport_update_group(vport->dev->priv.eswitch, vport, group, extack); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h new file mode 100644 index 000000000000..28451abe2d2f --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */ + +#ifndef __MLX5_ESW_QOS_H__ +#define __MLX5_ESW_QOS_H__ + +#ifdef CONFIG_MLX5_ESWITCH + +int mlx5_esw_qos_set_vport_min_rate(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, + u32 min_rate, + struct netlink_ext_ack *extack); +int mlx5_esw_qos_set_vport_max_rate(struct mlx5_eswitch *esw, + struct mlx5_vport *evport, + u32 max_rate, + struct netlink_ext_ack *extack); +void mlx5_esw_qos_create(struct mlx5_eswitch *esw); +void mlx5_esw_qos_destroy(struct mlx5_eswitch *esw); +int mlx5_esw_qos_vport_enable(struct mlx5_eswitch *esw, struct mlx5_vport *vport, + u32 max_rate, u32 bw_share); +void mlx5_esw_qos_vport_disable(struct mlx5_eswitch *esw, struct mlx5_vport *vport); + +int mlx5_esw_devlink_rate_leaf_tx_share_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_leaf_tx_max_set(struct devlink_rate *rate_leaf, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_tx_share_set(struct devlink_rate *rate_node, void *priv, + u64 tx_share, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_tx_max_set(struct devlink_rate *rate_node, void *priv, + u64 tx_max, struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_new(struct devlink_rate *rate_node, void **priv, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_node_del(struct devlink_rate *rate_node, void *priv, + struct netlink_ext_ack *extack); +int mlx5_esw_devlink_rate_parent_set(struct devlink_rate *devlink_rate, + struct devlink_rate *parent, + void *priv, void *parent_priv, + struct netlink_ext_ack *extack); +#endif + +#endif diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c deleted file mode 100644 index d3ad78aa9d45..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.c +++ /dev/null @@ -1,586 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB -/* Copyright (c) 2021 Mellanox Technologies. */ - -#include <linux/skbuff.h> -#include <net/psample.h> -#include "en/mapping.h" -#include "esw/sample.h" -#include "eswitch.h" -#include "en_tc.h" -#include "fs_core.h" - -#define MLX5_ESW_VPORT_TBL_SIZE_SAMPLE (64 * 1024) - -static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = { - .max_fte = MLX5_ESW_VPORT_TBL_SIZE_SAMPLE, - .max_num_groups = 0, /* default num of groups */ - .flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP, -}; - -struct mlx5_esw_psample { - struct mlx5e_priv *priv; - struct mlx5_flow_table *termtbl; - struct mlx5_flow_handle *termtbl_rule; - DECLARE_HASHTABLE(hashtbl, 8); - struct mutex ht_lock; /* protect hashtbl */ - DECLARE_HASHTABLE(restore_hashtbl, 8); - struct mutex restore_lock; /* protect restore_hashtbl */ -}; - -struct mlx5_sampler { - struct hlist_node hlist; - u32 sampler_id; - u32 sample_ratio; - u32 sample_table_id; - u32 default_table_id; - int count; -}; - -struct mlx5_sample_flow { - struct mlx5_sampler *sampler; - struct mlx5_sample_restore *restore; - struct mlx5_flow_attr *pre_attr; - struct mlx5_flow_handle *pre_rule; - struct mlx5_flow_handle *rule; -}; - -struct mlx5_sample_restore { - struct hlist_node hlist; - struct mlx5_modify_hdr *modify_hdr; - struct mlx5_flow_handle *rule; - u32 obj_id; - int count; -}; - -static int -sampler_termtbl_create(struct mlx5_esw_psample *esw_psample) -{ - struct mlx5_core_dev *dev = esw_psample->priv->mdev; - struct mlx5_eswitch *esw = dev->priv.eswitch; - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_destination dest = {}; - struct mlx5_flow_namespace *root_ns; - struct mlx5_flow_act act = {}; - int err; - - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, termination_table)) { - mlx5_core_warn(dev, "termination table is not supported\n"); - return -EOPNOTSUPP; - } - - root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB); - if (!root_ns) { - mlx5_core_warn(dev, "failed to get FDB flow namespace\n"); - return -EOPNOTSUPP; - } - - ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED; - ft_attr.autogroup.max_num_groups = 1; - ft_attr.prio = FDB_SLOW_PATH; - ft_attr.max_fte = 1; - ft_attr.level = 1; - esw_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr); - if (IS_ERR(esw_psample->termtbl)) { - err = PTR_ERR(esw_psample->termtbl); - mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err); - return err; - } - - act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - dest.vport.num = esw->manager_vport; - esw_psample->termtbl_rule = mlx5_add_flow_rules(esw_psample->termtbl, NULL, &act, &dest, 1); - if (IS_ERR(esw_psample->termtbl_rule)) { - err = PTR_ERR(esw_psample->termtbl_rule); - mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err); - mlx5_destroy_flow_table(esw_psample->termtbl); - return err; - } - - return 0; -} - -static void -sampler_termtbl_destroy(struct mlx5_esw_psample *esw_psample) -{ - mlx5_del_flow_rules(esw_psample->termtbl_rule); - mlx5_destroy_flow_table(esw_psample->termtbl); -} - -static int -sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5_sampler *sampler) -{ - u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; - u64 general_obj_types; - void *obj; - int err; - - general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types); - if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER)) - return -EOPNOTSUPP; - if (!MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level)) - return -EOPNOTSUPP; - - obj = MLX5_ADDR_OF(create_sampler_obj_in, in, sampler_object); - MLX5_SET(sampler_obj, obj, table_type, FS_FT_FDB); - MLX5_SET(sampler_obj, obj, ignore_flow_level, 1); - MLX5_SET(sampler_obj, obj, level, 1); - MLX5_SET(sampler_obj, obj, sample_ratio, sampler->sample_ratio); - MLX5_SET(sampler_obj, obj, sample_table_id, sampler->sample_table_id); - MLX5_SET(sampler_obj, obj, default_table_id, sampler->default_table_id); - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER); - - err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); - if (!err) - sampler->sampler_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id); - - return err; -} - -static void -sampler_obj_destroy(struct mlx5_core_dev *mdev, u32 sampler_id) -{ - u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {}; - u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)]; - - MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER); - MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id); - - mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out)); -} - -static u32 -sampler_hash(u32 sample_ratio, u32 default_table_id) -{ - return jhash_2words(sample_ratio, default_table_id, 0); -} - -static int -sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 default_table_id2) -{ - return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2; -} - -static struct mlx5_sampler * -sampler_get(struct mlx5_esw_psample *esw_psample, u32 sample_ratio, u32 default_table_id) -{ - struct mlx5_sampler *sampler; - u32 hash_key; - int err; - - mutex_lock(&esw_psample->ht_lock); - hash_key = sampler_hash(sample_ratio, default_table_id); - hash_for_each_possible(esw_psample->hashtbl, sampler, hlist, hash_key) - if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id, - sample_ratio, default_table_id)) - goto add_ref; - - sampler = kzalloc(sizeof(*sampler), GFP_KERNEL); - if (!sampler) { - err = -ENOMEM; - goto err_alloc; - } - - sampler->sample_table_id = esw_psample->termtbl->id; - sampler->default_table_id = default_table_id; - sampler->sample_ratio = sample_ratio; - - err = sampler_obj_create(esw_psample->priv->mdev, sampler); - if (err) - goto err_create; - - hash_add(esw_psample->hashtbl, &sampler->hlist, hash_key); - -add_ref: - sampler->count++; - mutex_unlock(&esw_psample->ht_lock); - return sampler; - -err_create: - kfree(sampler); -err_alloc: - mutex_unlock(&esw_psample->ht_lock); - return ERR_PTR(err); -} - -static void -sampler_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sampler *sampler) -{ - mutex_lock(&esw_psample->ht_lock); - if (--sampler->count == 0) { - hash_del(&sampler->hlist); - sampler_obj_destroy(esw_psample->priv->mdev, sampler->sampler_id); - kfree(sampler); - } - mutex_unlock(&esw_psample->ht_lock); -} - -static struct mlx5_modify_hdr * -sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id) -{ - struct mlx5e_tc_mod_hdr_acts mod_acts = {}; - struct mlx5_modify_hdr *modify_hdr; - int err; - - err = mlx5e_tc_match_to_reg_set(mdev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB, - CHAIN_TO_REG, obj_id); - if (err) - goto err_set_regc0; - - modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB, - mod_acts.num_actions, - mod_acts.actions); - if (IS_ERR(modify_hdr)) { - err = PTR_ERR(modify_hdr); - goto err_modify_hdr; - } - - dealloc_mod_hdr_actions(&mod_acts); - return modify_hdr; - -err_modify_hdr: - dealloc_mod_hdr_actions(&mod_acts); -err_set_regc0: - return ERR_PTR(err); -} - -static struct mlx5_sample_restore * -sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id) -{ - struct mlx5_core_dev *mdev = esw_psample->priv->mdev; - struct mlx5_eswitch *esw = mdev->priv.eswitch; - struct mlx5_sample_restore *restore; - struct mlx5_modify_hdr *modify_hdr; - int err; - - mutex_lock(&esw_psample->restore_lock); - hash_for_each_possible(esw_psample->restore_hashtbl, restore, hlist, obj_id) - if (restore->obj_id == obj_id) - goto add_ref; - - restore = kzalloc(sizeof(*restore), GFP_KERNEL); - if (!restore) { - err = -ENOMEM; - goto err_alloc; - } - restore->obj_id = obj_id; - - modify_hdr = sample_metadata_rule_get(mdev, obj_id); - if (IS_ERR(modify_hdr)) { - err = PTR_ERR(modify_hdr); - goto err_modify_hdr; - } - restore->modify_hdr = modify_hdr; - - restore->rule = esw_add_restore_rule(esw, obj_id); - if (IS_ERR(restore->rule)) { - err = PTR_ERR(restore->rule); - goto err_restore; - } - - hash_add(esw_psample->restore_hashtbl, &restore->hlist, obj_id); -add_ref: - restore->count++; - mutex_unlock(&esw_psample->restore_lock); - return restore; - -err_restore: - mlx5_modify_header_dealloc(mdev, restore->modify_hdr); -err_modify_hdr: - kfree(restore); -err_alloc: - mutex_unlock(&esw_psample->restore_lock); - return ERR_PTR(err); -} - -static void -sample_restore_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sample_restore *restore) -{ - mutex_lock(&esw_psample->restore_lock); - if (--restore->count == 0) - hash_del(&restore->hlist); - mutex_unlock(&esw_psample->restore_lock); - - if (!restore->count) { - mlx5_del_flow_rules(restore->rule); - mlx5_modify_header_dealloc(esw_psample->priv->mdev, restore->modify_hdr); - kfree(restore); - } -} - -void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj) -{ - u32 trunc_size = mapped_obj->sample.trunc_size; - struct psample_group psample_group = {}; - struct psample_metadata md = {}; - - md.trunc_size = trunc_size ? min(trunc_size, skb->len) : skb->len; - md.in_ifindex = skb->dev->ifindex; - psample_group.group_num = mapped_obj->sample.group_id; - psample_group.net = &init_net; - skb_push(skb, skb->mac_len); - - psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md); -} - -/* For the following typical flow table: - * - * +-------------------------------+ - * + original flow table + - * +-------------------------------+ - * + original match + - * +-------------------------------+ - * + sample action + other actions + - * +-------------------------------+ - * - * We translate the tc filter with sample action to the following HW model: - * - * +---------------------+ - * + original flow table + - * +---------------------+ - * + original match + - * +---------------------+ - * | - * v - * +------------------------------------------------+ - * + Flow Sampler Object + - * +------------------------------------------------+ - * + sample ratio + - * +------------------------------------------------+ - * + sample table id | default table id + - * +------------------------------------------------+ - * | | - * v v - * +-----------------------------+ +----------------------------------------+ - * + sample table + + default table per <vport, chain, prio> + - * +-----------------------------+ +----------------------------------------+ - * + forward to management vport + + original match + - * +-----------------------------+ +----------------------------------------+ - * + other actions + - * +----------------------------------------+ - */ -struct mlx5_flow_handle * -mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample, - struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr) -{ - struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; - struct mlx5_vport_tbl_attr per_vport_tbl_attr; - struct mlx5_esw_flow_attr *pre_esw_attr; - struct mlx5_mapped_obj restore_obj = {}; - struct mlx5_sample_flow *sample_flow; - struct mlx5_sample_attr *sample_attr; - struct mlx5_flow_table *default_tbl; - struct mlx5_flow_attr *pre_attr; - struct mlx5_eswitch *esw; - u32 obj_id; - int err; - - if (IS_ERR_OR_NULL(esw_psample)) - return ERR_PTR(-EOPNOTSUPP); - - /* If slow path flag is set, eg. when the neigh is invalid for encap, - * don't offload sample action. - */ - esw = esw_psample->priv->mdev->priv.eswitch; - if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) - return mlx5_eswitch_add_offloaded_rule(esw, spec, attr); - - sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL); - if (!sample_flow) - return ERR_PTR(-ENOMEM); - esw_attr->sample->sample_flow = sample_flow; - - /* Allocate default table per vport, chain and prio. Otherwise, there is - * only one default table for the same sampler object. Rules with different - * prio and chain may overlap. For CT sample action, per vport default - * table is needed to resotre the metadata. - */ - per_vport_tbl_attr.chain = attr->chain; - per_vport_tbl_attr.prio = attr->prio; - per_vport_tbl_attr.vport = esw_attr->in_rep->vport; - per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; - default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr); - if (IS_ERR(default_tbl)) { - err = PTR_ERR(default_tbl); - goto err_default_tbl; - } - - /* Perform the original matches on the default table. - * Offload all actions except the sample action. - */ - esw_attr->sample->sample_default_tbl = default_tbl; - /* When offloading sample and encap action, if there is no valid - * neigh data struct, a slow path rule is offloaded first. Source - * port metadata match is set at that time. A per vport table is - * already allocated. No need to match it again. So clear the source - * port metadata match. - */ - mlx5_eswitch_clear_rule_source_port(esw, spec); - sample_flow->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr); - if (IS_ERR(sample_flow->rule)) { - err = PTR_ERR(sample_flow->rule); - goto err_offload_rule; - } - - /* Create sampler object. */ - sample_flow->sampler = sampler_get(esw_psample, esw_attr->sample->rate, default_tbl->id); - if (IS_ERR(sample_flow->sampler)) { - err = PTR_ERR(sample_flow->sampler); - goto err_sampler; - } - - /* Create an id mapping reg_c0 value to sample object. */ - restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE; - restore_obj.sample.group_id = esw_attr->sample->group_num; - restore_obj.sample.rate = esw_attr->sample->rate; - restore_obj.sample.trunc_size = esw_attr->sample->trunc_size; - err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id); - if (err) - goto err_obj_id; - esw_attr->sample->restore_obj_id = obj_id; - - /* Create sample restore context. */ - sample_flow->restore = sample_restore_get(esw_psample, obj_id); - if (IS_ERR(sample_flow->restore)) { - err = PTR_ERR(sample_flow->restore); - goto err_sample_restore; - } - - /* Perform the original matches on the original table. Offload the - * sample action. The destination is the sampler object. - */ - pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB); - if (!pre_attr) { - err = -ENOMEM; - goto err_alloc_flow_attr; - } - sample_attr = kzalloc(sizeof(*sample_attr), GFP_KERNEL); - if (!sample_attr) { - err = -ENOMEM; - goto err_alloc_sample_attr; - } - pre_esw_attr = pre_attr->esw_attr; - pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - pre_attr->modify_hdr = sample_flow->restore->modify_hdr; - pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE; - pre_attr->chain = attr->chain; - pre_attr->prio = attr->prio; - pre_esw_attr->sample = sample_attr; - pre_esw_attr->sample->sampler_id = sample_flow->sampler->sampler_id; - pre_esw_attr->in_mdev = esw_attr->in_mdev; - pre_esw_attr->in_rep = esw_attr->in_rep; - sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr); - if (IS_ERR(sample_flow->pre_rule)) { - err = PTR_ERR(sample_flow->pre_rule); - goto err_pre_offload_rule; - } - sample_flow->pre_attr = pre_attr; - - return sample_flow->rule; - -err_pre_offload_rule: - kfree(sample_attr); -err_alloc_sample_attr: - kfree(pre_attr); -err_alloc_flow_attr: - sample_restore_put(esw_psample, sample_flow->restore); -err_sample_restore: - mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id); -err_obj_id: - sampler_put(esw_psample, sample_flow->sampler); -err_sampler: - /* For sample offload, rule is added in default_tbl. No need to call - * mlx5_esw_chains_put_table() - */ - attr->prio = 0; - attr->chain = 0; - mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr); -err_offload_rule: - mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr); -err_default_tbl: - kfree(sample_flow); - return ERR_PTR(err); -} - -void -mlx5_esw_sample_unoffload(struct mlx5_esw_psample *esw_psample, - struct mlx5_flow_handle *rule, - struct mlx5_flow_attr *attr) -{ - struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr; - struct mlx5_sample_flow *sample_flow; - struct mlx5_vport_tbl_attr tbl_attr; - struct mlx5_flow_attr *pre_attr; - struct mlx5_eswitch *esw; - - if (IS_ERR_OR_NULL(esw_psample)) - return; - - /* If slow path flag is set, sample action is not offloaded. - * No need to delete sample rule. - */ - esw = esw_psample->priv->mdev->priv.eswitch; - if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) { - mlx5_eswitch_del_offloaded_rule(esw, rule, attr); - return; - } - - sample_flow = esw_attr->sample->sample_flow; - pre_attr = sample_flow->pre_attr; - memset(pre_attr, 0, sizeof(*pre_attr)); - esw = esw_psample->priv->mdev->priv.eswitch; - mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, pre_attr); - mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr); - - sample_restore_put(esw_psample, sample_flow->restore); - mapping_remove(esw->offloads.reg_c0_obj_pool, esw_attr->sample->restore_obj_id); - sampler_put(esw_psample, sample_flow->sampler); - tbl_attr.chain = attr->chain; - tbl_attr.prio = attr->prio; - tbl_attr.vport = esw_attr->in_rep->vport; - tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns; - mlx5_esw_vporttbl_put(esw, &tbl_attr); - - kfree(pre_attr->esw_attr->sample); - kfree(pre_attr); - kfree(sample_flow); -} - -struct mlx5_esw_psample * -mlx5_esw_sample_init(struct mlx5e_priv *priv) -{ - struct mlx5_esw_psample *esw_psample; - int err; - - esw_psample = kzalloc(sizeof(*esw_psample), GFP_KERNEL); - if (!esw_psample) - return ERR_PTR(-ENOMEM); - esw_psample->priv = priv; - err = sampler_termtbl_create(esw_psample); - if (err) - goto err_termtbl; - - mutex_init(&esw_psample->ht_lock); - mutex_init(&esw_psample->restore_lock); - - return esw_psample; - -err_termtbl: - kfree(esw_psample); - return ERR_PTR(err); -} - -void -mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample) -{ - if (IS_ERR_OR_NULL(esw_psample)) - return; - - mutex_destroy(&esw_psample->restore_lock); - mutex_destroy(&esw_psample->ht_lock); - sampler_termtbl_destroy(esw_psample); - kfree(esw_psample); -} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h deleted file mode 100644 index 2a3f4be10030..000000000000 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/sample.h +++ /dev/null @@ -1,42 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ -/* Copyright (c) 2021 Mellanox Technologies. */ - -#ifndef __MLX5_EN_TC_SAMPLE_H__ -#define __MLX5_EN_TC_SAMPLE_H__ - -#include "en.h" -#include "eswitch.h" - -struct mlx5e_priv; -struct mlx5_flow_attr; -struct mlx5_esw_psample; - -struct mlx5_sample_attr { - u32 group_num; - u32 rate; - u32 trunc_size; - u32 restore_obj_id; - u32 sampler_id; - struct mlx5_flow_table *sample_default_tbl; - struct mlx5_sample_flow *sample_flow; -}; - -void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj); - -struct mlx5_flow_handle * -mlx5_esw_sample_offload(struct mlx5_esw_psample *sample_priv, - struct mlx5_flow_spec *spec, - struct mlx5_flow_attr *attr); - -void -mlx5_esw_sample_unoffload(struct mlx5_esw_psample *sample_priv, - struct mlx5_flow_handle *rule, - struct mlx5_flow_attr *attr); - -struct mlx5_esw_psample * -mlx5_esw_sample_init(struct mlx5e_priv *priv); - -void -mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample); - -#endif /* __MLX5_EN_TC_SAMPLE_H__ */ |