summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2021-06-10 23:36:37 +0300
committerDavid S. Miller <davem@davemloft.net>2021-06-10 23:36:37 +0300
commit2027e13f62702cf77b50bf927e8d04be5987380e (patch)
tree5c7583f6c61d16258525fb0aad04dc4cd13b4024 /include
parent0699073951e354069b4cfec28dbc4c35cef46e97 (diff)
parent9724fd5d9c2a0d3686b799ed5ca90cb9378ca4f2 (diff)
downloadlinux-2027e13f62702cf77b50bf927e8d04be5987380e.tar.xz
Merge tag 'mlx5-updates-2021-06-09' of git://git.kernel.org/pub/scm/linux/kernel/git/saeed/linux
Saeed Mahameed says: ==================== mlx5-updates-2021-06-09 Introduce steering header insert/remove and switchdev bridge offloads 1) From Yevgeny, Steering header insert/remove support ConnectX supports offloading of various encapsulations and decapsulations (e.g. VXLAN), which are performed by 'Packet Reformat' action. Starting with ConnectX-6 DX, a new reformat type is supported - INSERT_HEADER. This reformat allows inserting an arbitrary size buffer at a selected location in the packet on RX flows. The insert/remove header support are needed as a prerequisite for the bridge offloads vlan pop/push supprt, see below. 2) From Vlad, Support for bridge offloads for switchdev mode This change implements bridge offloads with VLAN-support that works on top of mlx5 representors in switchdev mode. HIGH-LEVEL OVERVIEW Hardware supported by mlx5 driver doesn't provide dynamic learning or aging functionality and requires the driver to emulate all switch-like behavior in software. As such, all packets by default go through miss path, appear on representor and get to software bridge, if it is the upper device of the representor. This causes bridge to process packet in software, learn the MAC address to FDB and send SWITCHDEV_FDB_ADD_TO_DEVICE event to all subscribers. Upon reception of SWITCHDEV_FDB_ADD_TO_DEVICE notification mlx5 bridge offloads the FDB to hardware and sends back SWITCHDEV_FDB_ADD_TO_BRIDGE notification to prevent such entries from being aged out by kernel bridge. Leaving aging to kernel bridge would result deletion of offloaded dynamic FDB entries every aging_time period due to packets being processed by hardware and, consecutively, 'used' timestamp for FDB entry not being updated. Hardware aging is emulated in driver by running periodic workqueue task that manually updates the rules according to their hardware counter: - If hardware counter has changed since last update, the handler updates 'used' timestamp in kernel bridge dynamic entry by sending SWITCHDEV_FDB_ADD_TO_BRIDGE notification for the entry. - If FDB entry wasn't updated for user-controllable aging_time period, then the FDB entry is unoffloaded from hardware and corresponding SWITCHDEV_FDB_DEL_TO_BRIDGE notification is sent to kernel bridge. The mlx5 bridge offload implementation fully supports port VLAN objects, including PVID (vlan push) and "Egress Untagged" (vlan pop). SOFTWARE ARCHITECTURE Mlx5_eswitch is extended with pointer to new mlx5_esw_bridge_offloads structure which has a linked list of mlx5_esw_bridge objects. Struct mlx5_esw_bridge is the main switch object in mlx5 that holds all data for offloaded FDB entries and metadata for bridge ports and their vlans. The mlx5_esw_bridge object is created when first representor of eswitch vport is added to bridge and deleted when the last representor is detached from it. Bridge FDB entries are saved in linked list (to iterate over all FDB entries in aging workqueue task) and also in hashtable for quick lookup by MAC+VLAN tuple. Bridge FDB entries are saved in linked list (to iterate over all FDB entries in aging workqueue task) and in hashtable for quick lookup by MAC+VLAN tuple. Port metadata is stored in struct mlx5_esw_bridge_port that is saved in xarray to allow quick lookup by vport number. Part of the port metadata is the set of port vlans that are represented by mlx5_esw_bridge_vlan structure. The vlan structure points to all FDBs on vlan/port via fdb_list linked list. Simplified diagram of mlx5 bridge objects: +------------------+ | mxl5_eswitch | | | | br_offloads | +--------+---------+ | +--------v-------------------+ | mlx5_esw_bridge_offloads | | | +--> bridges | | +-------+--------------------+ | | | | | +---v---------------+ | | mlx5_esw_bridge | | | | | | vports | | | | | | fdb_ht | | +---+---------------+ | | | +---v---------------+ +------+ mlx5_esw_bridge | | | +-------------------------+ vports | | | | | | fdb_ht +------------------------------------------+ | +-------------------+ | | | | | | +----------------------+ +---------------------------+ | +-> mlx5_esw_bridge_port | +--> mlx5_esw_bridge_fdb_entry <-+ | | | +----------------------+ | +--+------------------------+ | | | vlans +--+-> mlx5_esw_bridge_vlan | | | | | | | | | | | +--v------------------------+ | | +----------------------+ | | fdb_list +--+ | mlx5_esw_bridge_fdb_entry <-+ | | +-------^--------------+ +--+------------------------+ | | +----------------------+ | | | | +-> mlx5_esw_bridge_port | | +-----------------------+ | | | | | | vlans | | -----------------------+ | | | +-> mlx5_esw_bridge_vlan | | +----------------------+ | | +---------------------------+ | | fdb_list +-----> mlx5_esw_bridge_fdb_entry <-+ +-------^--------------+ +--+------------------------+ | | +-----------------------+ HARDWARE REPRESENTATION In order to adhere to kernel software datapath model bridge offloads must come after TC and NF FDBs. However, since netfilter offload in mlx5 is implemented with unmanaged tables, its miss path is not automatically connected to next priority and requires the code to manually connect with slow table. To keep bridge offloads encapsulated and not mix it with eswitch offloads new FDB_TC_MISS priority is created between FDB_FT_OFFLOAD and FDB_SLOW_PATH which allows bridge offloads to be created without exposing its internal tables to any other modules since miss path of managed TC-miss table is automatically wired to next priority. The bridge tables are created with new priority FDB_BR_OFFLOAD in FDB namespace. The new priority is between tc-miss and slow path priorities. Priority consist of two levels: the ingress table that is global per eswitch and matches incoming packets by src_mac/vid and redirects them to next level (egress table) that is chosen according to ingress port bridge membership and matches on dst_mac/vid in order to redirect packet to vport according to the following diagram: + | +---------v----------+ | | | FDB_TC_OFFLOAD | | | +---------+----------+ | | +---------v----------+ | | | FDB_FT_OFFLOAD | | | +---------+----------+ | | +---------v----------+ | | | FDB_TC_MISS | | | +---------+----------+ | +--------------------------------------+ | | | | +------+ | | | | | +------v--------+ FDB_BR_OFFLOAD | | | INGRESS_TABLE | | | +------+---+----+ | | | | match | | | +---------+ | | | | | +-------+ | | +-------v-------+ match | | | | | | EGRESS_TABLE +------------> vport | | | +-------+-------+ | | | | | | | +-------+ | | miss | | | +------+------+ | | | | +--------------------------------------+ | | +---------v----------+ | | | FDB_SLOW_PATH | | | +---------+----------+ | v PATCHES OVERVIEW 1-3 - Miscellaneous refactorings and infrastructure changes. 4 - Mlx5 bridge offload infrastructure and dedicated fs_core namespace/tables implementation. 5 - FDB entry offload. 6 - Dynamic FDB entry aging. 7-10 - VLAN filtering offload. 11 - Tracepoints for main mlx5 bridge offload events (FDB entry offload/unoffload, VLAN add/delete, etc.) ==================== Signed-off-by: David S. Miller <davem@davemloft.net> --
Diffstat (limited to 'include')
-rw-r--r--include/linux/mlx5/device.h10
-rw-r--r--include/linux/mlx5/fs.h14
-rw-r--r--include/linux/mlx5/mlx5_ifc.h40
3 files changed, 55 insertions, 9 deletions
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 578c4ccae91c..0025913505ab 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1179,6 +1179,7 @@ enum mlx5_cap_type {
MLX5_CAP_VDPA_EMULATION = 0x13,
MLX5_CAP_DEV_EVENT = 0x14,
MLX5_CAP_IPSEC,
+ MLX5_CAP_GENERAL_2 = 0x20,
/* NUM OF CAP Types */
MLX5_CAP_NUM
};
@@ -1220,6 +1221,15 @@ enum mlx5_qcam_feature_groups {
#define MLX5_CAP_GEN_MAX(mdev, cap) \
MLX5_GET(cmd_hca_cap, mdev->caps.hca_max[MLX5_CAP_GENERAL], cap)
+#define MLX5_CAP_GEN_2(mdev, cap) \
+ MLX5_GET(cmd_hca_cap_2, mdev->caps.hca_cur[MLX5_CAP_GENERAL_2], cap)
+
+#define MLX5_CAP_GEN_2_64(mdev, cap) \
+ MLX5_GET64(cmd_hca_cap_2, mdev->caps.hca_cur[MLX5_CAP_GENERAL_2], cap)
+
+#define MLX5_CAP_GEN_2_MAX(mdev, cap) \
+ MLX5_GET(cmd_hca_cap_2, mdev->caps.hca_max[MLX5_CAP_GENERAL_2], cap)
+
#define MLX5_CAP_ETH(mdev, cap) \
MLX5_GET(per_protocol_networking_offload_caps,\
mdev->caps.hca_cur[MLX5_CAP_ETHERNET_OFFLOADS], cap)
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index 1f51f4c3b1af..77746f7e35b8 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -87,6 +87,8 @@ enum {
FDB_BYPASS_PATH,
FDB_TC_OFFLOAD,
FDB_FT_OFFLOAD,
+ FDB_TC_MISS,
+ FDB_BR_OFFLOAD,
FDB_SLOW_PATH,
FDB_PER_VPORT,
};
@@ -254,10 +256,16 @@ struct mlx5_modify_hdr *mlx5_modify_header_alloc(struct mlx5_core_dev *dev,
void mlx5_modify_header_dealloc(struct mlx5_core_dev *dev,
struct mlx5_modify_hdr *modify_hdr);
+struct mlx5_pkt_reformat_params {
+ int type;
+ u8 param_0;
+ u8 param_1;
+ size_t size;
+ void *data;
+};
+
struct mlx5_pkt_reformat *mlx5_packet_reformat_alloc(struct mlx5_core_dev *dev,
- int reformat_type,
- size_t size,
- void *reformat_data,
+ struct mlx5_pkt_reformat_params *params,
enum mlx5_flow_namespace_type ns_type);
void mlx5_packet_reformat_dealloc(struct mlx5_core_dev *dev,
struct mlx5_pkt_reformat *reformat);
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index eb86e80e4643..057db0eaf195 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -435,7 +435,10 @@ struct mlx5_ifc_flow_table_prop_layout_bits {
u8 reserved_at_40[0x20];
- u8 reserved_at_60[0x18];
+ u8 reserved_at_60[0x2];
+ u8 reformat_insert[0x1];
+ u8 reformat_remove[0x1];
+ u8 reserver_at_64[0x14];
u8 log_max_ft_num[0x8];
u8 reserved_at_80[0x10];
@@ -1312,7 +1315,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_0[0x1f];
u8 vhca_resource_manager[0x1];
- u8 reserved_at_20[0x3];
+ u8 hca_cap_2[0x1];
+ u8 reserved_at_21[0x2];
u8 event_on_vhca_state_teardown_request[0x1];
u8 event_on_vhca_state_in_use[0x1];
u8 event_on_vhca_state_active[0x1];
@@ -1732,6 +1736,17 @@ struct mlx5_ifc_cmd_hca_cap_bits {
u8 reserved_at_7c0[0x40];
};
+struct mlx5_ifc_cmd_hca_cap_2_bits {
+ u8 reserved_at_0[0xa0];
+
+ u8 max_reformat_insert_size[0x8];
+ u8 max_reformat_insert_offset[0x8];
+ u8 max_reformat_remove_size[0x8];
+ u8 max_reformat_remove_offset[0x8];
+
+ u8 reserved_at_c0[0x740];
+};
+
enum mlx5_flow_destination_type {
MLX5_FLOW_DESTINATION_TYPE_VPORT = 0x0,
MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE = 0x1,
@@ -3105,6 +3120,7 @@ struct mlx5_ifc_roce_addr_layout_bits {
union mlx5_ifc_hca_cap_union_bits {
struct mlx5_ifc_cmd_hca_cap_bits cmd_hca_cap;
+ struct mlx5_ifc_cmd_hca_cap_2_bits cmd_hca_cap_2;
struct mlx5_ifc_odp_cap_bits odp_cap;
struct mlx5_ifc_atomic_caps_bits atomic_caps;
struct mlx5_ifc_roce_cap_bits roce_cap;
@@ -5785,12 +5801,14 @@ struct mlx5_ifc_query_eq_in_bits {
};
struct mlx5_ifc_packet_reformat_context_in_bits {
- u8 reserved_at_0[0x5];
- u8 reformat_type[0x3];
- u8 reserved_at_8[0xe];
+ u8 reformat_type[0x8];
+ u8 reserved_at_8[0x4];
+ u8 reformat_param_0[0x4];
+ u8 reserved_at_10[0x6];
u8 reformat_data_size[0xa];
- u8 reserved_at_20[0x10];
+ u8 reformat_param_1[0x8];
+ u8 reserved_at_28[0x8];
u8 reformat_data[2][0x8];
u8 more_reformat_data[][0x8];
@@ -5830,12 +5848,20 @@ struct mlx5_ifc_alloc_packet_reformat_context_out_bits {
u8 reserved_at_60[0x20];
};
+enum {
+ MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START = 0x1,
+ MLX5_REFORMAT_CONTEXT_ANCHOR_IP_START = 0x7,
+ MLX5_REFORMAT_CONTEXT_ANCHOR_TCP_UDP_START = 0x9,
+};
+
enum mlx5_reformat_ctx_type {
MLX5_REFORMAT_TYPE_L2_TO_VXLAN = 0x0,
MLX5_REFORMAT_TYPE_L2_TO_NVGRE = 0x1,
MLX5_REFORMAT_TYPE_L2_TO_L2_TUNNEL = 0x2,
MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2 = 0x3,
MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL = 0x4,
+ MLX5_REFORMAT_TYPE_INSERT_HDR = 0xf,
+ MLX5_REFORMAT_TYPE_REMOVE_HDR = 0x10,
};
struct mlx5_ifc_alloc_packet_reformat_context_in_bits {
@@ -5956,6 +5982,8 @@ enum {
MLX5_ACTION_IN_FIELD_OUT_TCP_SEQ_NUM = 0x59,
MLX5_ACTION_IN_FIELD_OUT_TCP_ACK_NUM = 0x5B,
MLX5_ACTION_IN_FIELD_IPSEC_SYNDROME = 0x5D,
+ MLX5_ACTION_IN_FIELD_OUT_EMD_47_32 = 0x6F,
+ MLX5_ACTION_IN_FIELD_OUT_EMD_31_0 = 0x70,
};
struct mlx5_ifc_alloc_modify_header_context_out_bits {