diff options
Diffstat (limited to 'drivers/infiniband/hw/mlx5/main.c')
-rw-r--r-- | drivers/infiniband/hw/mlx5/main.c | 3254 |
1 files changed, 374 insertions, 2880 deletions
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6f99ed03d88e..fbc45a5e76c5 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1,33 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* - * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. - * - * This software is available to you under a choice of one of two - * licenses. You may choose to be licensed under the terms of the GNU - * General Public License (GPL) Version 2, available from the file - * COPYING in the main directory of this source tree, or the - * OpenIB.org BSD license below: - * - * Redistribution and use in source and binary forms, with or - * without modification, are permitted provided that the following - * conditions are met: - * - * - Redistributions of source code must retain the above - * copyright notice, this list of conditions and the following - * disclaimer. - * - * - Redistributions in binary form must reproduce the above - * copyright notice, this list of conditions and the following - * disclaimer in the documentation and/or other materials - * provided with the distribution. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND - * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS - * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN - * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. + * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved. */ #include <linux/debugfs.h> @@ -59,10 +32,13 @@ #include "mlx5_ib.h" #include "ib_rep.h" #include "cmd.h" +#include "devx.h" +#include "fs.h" #include "srq.h" #include "qp.h" #include "wr.h" -#include <linux/mlx5/fs_helpers.h> +#include "restrack.h" +#include "counters.h" #include <linux/mlx5/accel.h> #include <rdma/uverbs_std_types.h> #include <rdma/mlx5_user_ioctl_verbs.h> @@ -311,9 +287,6 @@ struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev, *native_port_num = 1; port = &ibdev->port[ib_port_num - 1]; - if (!port) - return NULL; - spin_lock(&port->mp.mpi_lock); mpi = ibdev->port[ib_port_num - 1].mp.mpi; if (mpi && !mpi->unaffiliate) { @@ -1765,6 +1738,92 @@ static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn, mlx5_ib_disable_lb(dev, true, false); } +static int set_ucontext_resp(struct ib_ucontext *uctx, + struct mlx5_ib_alloc_ucontext_resp *resp) +{ + struct ib_device *ibdev = uctx->device; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + struct mlx5_ib_ucontext *context = to_mucontext(uctx); + struct mlx5_bfreg_info *bfregi = &context->bfregi; + int err; + + if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { + err = mlx5_cmd_dump_fill_mkey(dev->mdev, + &resp->dump_fill_mkey); + if (err) + return err; + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; + } + + resp->qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); + if (dev->wc_support) + resp->bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, + log_bf_reg_size); + resp->cache_line_size = cache_line_size(); + resp->max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); + resp->max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); + resp->max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp->max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); + resp->max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); + resp->cqe_version = context->cqe_version; + resp->log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; + resp->num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? + MLX5_CAP_GEN(dev->mdev, + num_of_uars_per_page) : 1; + + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_DEVICE) { + if (mlx5_get_flow_namespace(dev->mdev, + MLX5_FLOW_NAMESPACE_EGRESS)) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; + if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; + if (mlx5_accel_ipsec_device_caps(dev->mdev) & + MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) + resp->flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; + /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ + } + + resp->tot_bfregs = bfregi->lib_uar_dyn ? 0 : + bfregi->total_num_bfregs - bfregi->num_dyn_bfregs; + resp->num_ports = dev->num_ports; + resp->cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | + MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; + + if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { + mlx5_query_min_inline(dev->mdev, &resp->eth_min_inline); + resp->eth_min_inline++; + } + + if (dev->mdev->clock_info) + resp->clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); + + /* + * We don't want to expose information from the PCI bar that is located + * after 4096 bytes, so if the arch only supports larger pages, let's + * pretend we don't support reading the HCA's core clock. This is also + * forced by mmap function. + */ + if (PAGE_SIZE <= 4096) { + resp->comp_mask |= + MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; + resp->hca_core_clock_offset = + offsetof(struct mlx5_init_seg, + internal_timer_h) % PAGE_SIZE; + } + + if (MLX5_CAP_GEN(dev->mdev, ece_support)) + resp->comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; + + resp->num_dyn_bfregs = bfregi->num_dyn_bfregs; + return 0; +} + static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { @@ -1772,14 +1831,12 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_ib_alloc_ucontext_req_v2 req = {}; struct mlx5_ib_alloc_ucontext_resp resp = {}; - struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_ib_ucontext *context = to_mucontext(uctx); struct mlx5_bfreg_info *bfregi; int ver; int err; size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2, max_cqe_version); - u32 dump_fill_mkey; bool lib_uar_4k; bool lib_uar_dyn; @@ -1808,37 +1865,6 @@ static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx, if (req.num_low_latency_bfregs > req.total_num_bfregs - 1) return -EINVAL; - resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp); - if (dev->wc_support) - resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size); - resp.cache_line_size = cache_line_size(); - resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq); - resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq); - resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); - resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz); - resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz); - resp.cqe_version = min_t(__u8, - (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), - req.max_cqe_version); - resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ? - MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT; - resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? - MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1; - resp.response_length = min(offsetof(typeof(resp), response_length) + - sizeof(resp.response_length), udata->outlen); - - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) { - if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS)) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA; - if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi)) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING; - if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN) - resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN; - /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */ - } - lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR; lib_uar_dyn = req.lib_caps & MLX5_LIB_CAP_DYN_UAR; bfregi = &context->bfregi; @@ -1887,87 +1913,24 @@ uar_done: if (err) goto out_devx; - if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { - err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey); - if (err) - goto out_mdev; - } - INIT_LIST_HEAD(&context->db_page_list); mutex_init(&context->db_page_mutex); - resp.tot_bfregs = lib_uar_dyn ? 0 : req.total_num_bfregs; - resp.num_ports = dev->num_ports; - - if (offsetofend(typeof(resp), cqe_version) <= udata->outlen) - resp.response_length += sizeof(resp.cqe_version); - - if (offsetofend(typeof(resp), cmds_supp_uhw) <= udata->outlen) { - resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | - MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; - resp.response_length += sizeof(resp.cmds_supp_uhw); - } - - if (offsetofend(typeof(resp), eth_min_inline) <= udata->outlen) { - if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) { - mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline); - resp.eth_min_inline++; - } - resp.response_length += sizeof(resp.eth_min_inline); - } - - if (offsetofend(typeof(resp), clock_info_versions) <= udata->outlen) { - if (mdev->clock_info) - resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1); - resp.response_length += sizeof(resp.clock_info_versions); - } - - /* - * We don't want to expose information from the PCI bar that is located - * after 4096 bytes, so if the arch only supports larger pages, let's - * pretend we don't support reading the HCA's core clock. This is also - * forced by mmap function. - */ - if (offsetofend(typeof(resp), hca_core_clock_offset) <= udata->outlen) { - if (PAGE_SIZE <= 4096) { - resp.comp_mask |= - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET; - resp.hca_core_clock_offset = - offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE; - } - resp.response_length += sizeof(resp.hca_core_clock_offset); - } - - if (offsetofend(typeof(resp), log_uar_size) <= udata->outlen) - resp.response_length += sizeof(resp.log_uar_size); - - if (offsetofend(typeof(resp), num_uars_per_page) <= udata->outlen) - resp.response_length += sizeof(resp.num_uars_per_page); - - if (offsetofend(typeof(resp), num_dyn_bfregs) <= udata->outlen) { - resp.num_dyn_bfregs = bfregi->num_dyn_bfregs; - resp.response_length += sizeof(resp.num_dyn_bfregs); - } - - if (offsetofend(typeof(resp), dump_fill_mkey) <= udata->outlen) { - if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) { - resp.dump_fill_mkey = dump_fill_mkey; - resp.comp_mask |= - MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY; - } - resp.response_length += sizeof(resp.dump_fill_mkey); - } + context->cqe_version = min_t(__u8, + (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version), + req.max_cqe_version); - if (MLX5_CAP_GEN(dev->mdev, ece_support)) - resp.comp_mask |= MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_ECE; + err = set_ucontext_resp(uctx, &resp); + if (err) + goto out_mdev; + resp.response_length = min(udata->outlen, sizeof(resp)); err = ib_copy_to_udata(udata, &resp, resp.response_length); if (err) goto out_mdev; bfregi->ver = ver; bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs; - context->cqe_version = resp.cqe_version; context->lib_caps = req.lib_caps; print_lib_caps(dev, context->lib_caps); @@ -2000,6 +1963,29 @@ out_ctx: return err; } +static int mlx5_ib_query_ucontext(struct ib_ucontext *ibcontext, + struct uverbs_attr_bundle *attrs) +{ + struct mlx5_ib_alloc_ucontext_resp uctx_resp = {}; + int ret; + + ret = set_ucontext_resp(ibcontext, &uctx_resp); + if (ret) + return ret; + + uctx_resp.response_length = + min_t(size_t, + uverbs_attr_get_len(attrs, + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX), + sizeof(uctx_resp)); + + ret = uverbs_copy_to_struct_or_zero(attrs, + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, + &uctx_resp, + sizeof(uctx_resp)); + return ret; +} + static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext) { struct mlx5_ib_ucontext *context = to_mucontext(ibcontext); @@ -2591,1820 +2577,6 @@ static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata) mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid); } -enum { - MATCH_CRITERIA_ENABLE_OUTER_BIT, - MATCH_CRITERIA_ENABLE_MISC_BIT, - MATCH_CRITERIA_ENABLE_INNER_BIT, - MATCH_CRITERIA_ENABLE_MISC2_BIT -}; - -#define HEADER_IS_ZERO(match_criteria, headers) \ - !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \ - 0, MLX5_FLD_SZ_BYTES(fte_match_param, headers))) \ - -static u8 get_match_criteria_enable(u32 *match_criteria) -{ - u8 match_criteria_enable; - - match_criteria_enable = - (!HEADER_IS_ZERO(match_criteria, outer_headers)) << - MATCH_CRITERIA_ENABLE_OUTER_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, misc_parameters)) << - MATCH_CRITERIA_ENABLE_MISC_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, inner_headers)) << - MATCH_CRITERIA_ENABLE_INNER_BIT; - match_criteria_enable |= - (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) << - MATCH_CRITERIA_ENABLE_MISC2_BIT; - - return match_criteria_enable; -} - -static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) -{ - u8 entry_mask; - u8 entry_val; - int err = 0; - - if (!mask) - goto out; - - entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c, - ip_protocol); - entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v, - ip_protocol); - if (!entry_mask) { - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); - goto out; - } - /* Don't override existing ip protocol */ - if (mask != entry_mask || val != entry_val) - err = -EINVAL; -out: - return err; -} - -static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val, - bool inner) -{ - if (inner) { - MLX5_SET(fte_match_set_misc, - misc_c, inner_ipv6_flow_label, mask); - MLX5_SET(fte_match_set_misc, - misc_v, inner_ipv6_flow_label, val); - } else { - MLX5_SET(fte_match_set_misc, - misc_c, outer_ipv6_flow_label, mask); - MLX5_SET(fte_match_set_misc, - misc_v, outer_ipv6_flow_label, val); - } -} - -static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) -{ - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val); - MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2); - MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2); -} - -static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask) -{ - if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS)) - return -EOPNOTSUPP; - - if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) && - !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL)) - return -EOPNOTSUPP; - - return 0; -} - -#define LAST_ETH_FIELD vlan_tag -#define LAST_IB_FIELD sl -#define LAST_IPV4_FIELD tos -#define LAST_IPV6_FIELD traffic_class -#define LAST_TCP_UDP_FIELD src_port -#define LAST_TUNNEL_FIELD tunnel_id -#define LAST_FLOW_TAG_FIELD tag_id -#define LAST_DROP_FIELD size -#define LAST_COUNTERS_FIELD counters - -/* Field is the last supported field */ -#define FIELDS_NOT_SUPPORTED(filter, field)\ - memchr_inv((void *)&filter.field +\ - sizeof(filter.field), 0,\ - sizeof(filter) -\ - offsetof(typeof(filter), field) -\ - sizeof(filter.field)) - -int parse_flow_flow_action(struct mlx5_ib_flow_action *maction, - bool is_egress, - struct mlx5_flow_act *action) -{ - - switch (maction->ib_action.type) { - case IB_FLOW_ACTION_ESP: - if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)) - return -EINVAL; - /* Currently only AES_GCM keymat is supported by the driver */ - action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx; - action->action |= is_egress ? - MLX5_FLOW_CONTEXT_ACTION_ENCRYPT : - MLX5_FLOW_CONTEXT_ACTION_DECRYPT; - return 0; - case IB_FLOW_ACTION_UNSPECIFIED: - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_MODIFY_HEADER) { - if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) - return -EINVAL; - action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; - action->modify_hdr = - maction->flow_action_raw.modify_hdr; - return 0; - } - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_DECAP) { - if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) - return -EINVAL; - action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP; - return 0; - } - if (maction->flow_action_raw.sub_type == - MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) { - if (action->action & - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT) - return -EINVAL; - action->action |= - MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; - action->pkt_reformat = - maction->flow_action_raw.pkt_reformat; - return 0; - } - /* fall through */ - default: - return -EOPNOTSUPP; - } -} - -static int parse_flow_attr(struct mlx5_core_dev *mdev, - struct mlx5_flow_spec *spec, - const union ib_flow_spec *ib_spec, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_act *action, u32 prev_type) -{ - struct mlx5_flow_context *flow_context = &spec->flow_context; - u32 *match_c = spec->match_criteria; - u32 *match_v = spec->match_value; - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, - misc_parameters); - void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, - misc_parameters); - void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c, - misc_parameters_2); - void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v, - misc_parameters_2); - void *headers_c; - void *headers_v; - int match_ipv; - int ret; - - if (ib_spec->type & IB_FLOW_SPEC_INNER) { - headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - inner_headers); - headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - inner_headers); - match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_ip_version); - } else { - headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); - match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_ip_version); - } - - switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { - case IB_FLOW_SPEC_ETH: - if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) - return -EOPNOTSUPP; - - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dmac_47_16), - ib_spec->eth.mask.dst_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dmac_47_16), - ib_spec->eth.val.dst_mac); - - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - smac_47_16), - ib_spec->eth.mask.src_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - smac_47_16), - ib_spec->eth.val.src_mac); - - if (ib_spec->eth.mask.vlan_tag) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - cvlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - cvlan_tag, 1); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_vid, ntohs(ib_spec->eth.val.vlan_tag)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_cfi, - ntohs(ib_spec->eth.mask.vlan_tag) >> 12); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_cfi, - ntohs(ib_spec->eth.val.vlan_tag) >> 12); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - first_prio, - ntohs(ib_spec->eth.mask.vlan_tag) >> 13); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - first_prio, - ntohs(ib_spec->eth.val.vlan_tag) >> 13); - } - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, ntohs(ib_spec->eth.mask.ether_type)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ntohs(ib_spec->eth.val.ether_type)); - break; - case IB_FLOW_SPEC_IPV4: - if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) - return -EOPNOTSUPP; - - if (match_ipv) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ip_version, 0xf); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_version, MLX5_FS_IPV4_VERSION); - } else { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ETH_P_IP); - } - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.mask.src_ip, - sizeof(ib_spec->ipv4.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.val.src_ip, - sizeof(ib_spec->ipv4.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.mask.dst_ip, - sizeof(ib_spec->ipv4.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4), - &ib_spec->ipv4.val.dst_ip, - sizeof(ib_spec->ipv4.val.dst_ip)); - - set_tos(headers_c, headers_v, - ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); - - if (set_proto(headers_c, headers_v, - ib_spec->ipv4.mask.proto, - ib_spec->ipv4.val.proto)) - return -EINVAL; - break; - case IB_FLOW_SPEC_IPV6: - if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) - return -EOPNOTSUPP; - - if (match_ipv) { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ip_version, 0xf); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ip_version, MLX5_FS_IPV6_VERSION); - } else { - MLX5_SET(fte_match_set_lyr_2_4, headers_c, - ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, - ethertype, ETH_P_IPV6); - } - - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.mask.src_ip, - sizeof(ib_spec->ipv6.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - src_ipv4_src_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.val.src_ip, - sizeof(ib_spec->ipv6.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.mask.dst_ip, - sizeof(ib_spec->ipv6.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, - dst_ipv4_dst_ipv6.ipv6_layout.ipv6), - &ib_spec->ipv6.val.dst_ip, - sizeof(ib_spec->ipv6.val.dst_ip)); - - set_tos(headers_c, headers_v, - ib_spec->ipv6.mask.traffic_class, - ib_spec->ipv6.val.traffic_class); - - if (set_proto(headers_c, headers_v, - ib_spec->ipv6.mask.next_hdr, - ib_spec->ipv6.val.next_hdr)) - return -EINVAL; - - set_flow_label(misc_params_c, misc_params_v, - ntohl(ib_spec->ipv6.mask.flow_label), - ntohl(ib_spec->ipv6.val.flow_label), - ib_spec->type & IB_FLOW_SPEC_INNER); - break; - case IB_FLOW_SPEC_ESP: - if (ib_spec->esp.mask.seq) - return -EOPNOTSUPP; - - MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi, - ntohl(ib_spec->esp.mask.spi)); - MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi, - ntohl(ib_spec->esp.val.spi)); - break; - case IB_FLOW_SPEC_TCP: - if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, - LAST_TCP_UDP_FIELD)) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, - ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, - ntohs(ib_spec->tcp_udp.val.src_port)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, - ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, - ntohs(ib_spec->tcp_udp.val.dst_port)); - break; - case IB_FLOW_SPEC_UDP: - if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, - LAST_TCP_UDP_FIELD)) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, - ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, - ntohs(ib_spec->tcp_udp.val.src_port)); - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, - ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, - ntohs(ib_spec->tcp_udp.val.dst_port)); - break; - case IB_FLOW_SPEC_GRE: - if (ib_spec->gre.mask.c_ks_res0_ver) - return -EOPNOTSUPP; - - if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE)) - return -EINVAL; - - MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, - 0xff); - MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, - IPPROTO_GRE); - - MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol, - ntohs(ib_spec->gre.mask.protocol)); - MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol, - ntohs(ib_spec->gre.val.protocol)); - - memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c, - gre_key.nvgre.hi), - &ib_spec->gre.mask.key, - sizeof(ib_spec->gre.mask.key)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v, - gre_key.nvgre.hi), - &ib_spec->gre.val.key, - sizeof(ib_spec->gre.val.key)); - break; - case IB_FLOW_SPEC_MPLS: - switch (prev_type) { - case IB_FLOW_SPEC_UDP: - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls_over_udp), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls_over_udp), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls_over_udp), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - break; - case IB_FLOW_SPEC_GRE: - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls_over_gre), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls_over_gre), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls_over_gre), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - break; - default: - if (ib_spec->type & IB_FLOW_SPEC_INNER) { - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_first_mpls), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - inner_first_mpls), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - inner_first_mpls), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - } else { - if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_first_mpls), - &ib_spec->mpls.mask.tag)) - return -EOPNOTSUPP; - - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v, - outer_first_mpls), - &ib_spec->mpls.val.tag, - sizeof(ib_spec->mpls.val.tag)); - memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c, - outer_first_mpls), - &ib_spec->mpls.mask.tag, - sizeof(ib_spec->mpls.mask.tag)); - } - } - break; - case IB_FLOW_SPEC_VXLAN_TUNNEL: - if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, - LAST_TUNNEL_FIELD)) - return -EOPNOTSUPP; - - MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, - ntohl(ib_spec->tunnel.mask.tunnel_id)); - MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, - ntohl(ib_spec->tunnel.val.tunnel_id)); - break; - case IB_FLOW_SPEC_ACTION_TAG: - if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag, - LAST_FLOW_TAG_FIELD)) - return -EOPNOTSUPP; - if (ib_spec->flow_tag.tag_id >= BIT(24)) - return -EINVAL; - - flow_context->flow_tag = ib_spec->flow_tag.tag_id; - flow_context->flags |= FLOW_CONTEXT_HAS_TAG; - break; - case IB_FLOW_SPEC_ACTION_DROP: - if (FIELDS_NOT_SUPPORTED(ib_spec->drop, - LAST_DROP_FIELD)) - return -EOPNOTSUPP; - action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP; - break; - case IB_FLOW_SPEC_ACTION_HANDLE: - ret = parse_flow_flow_action(to_mflow_act(ib_spec->action.act), - flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS, action); - if (ret) - return ret; - break; - case IB_FLOW_SPEC_ACTION_COUNT: - if (FIELDS_NOT_SUPPORTED(ib_spec->flow_count, - LAST_COUNTERS_FIELD)) - return -EOPNOTSUPP; - - /* for now support only one counters spec per flow */ - if (action->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) - return -EINVAL; - - action->counters = ib_spec->flow_count.counters; - action->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT; - break; - default: - return -EINVAL; - } - - return 0; -} - -/* If a flow could catch both multicast and unicast packets, - * it won't fall into the multicast flow steering table and this rule - * could steal other multicast packets. - */ -static bool flow_is_multicast_only(const struct ib_flow_attr *ib_attr) -{ - union ib_flow_spec *flow_spec; - - if (ib_attr->type != IB_FLOW_ATTR_NORMAL || - ib_attr->num_of_specs < 1) - return false; - - flow_spec = (union ib_flow_spec *)(ib_attr + 1); - if (flow_spec->type == IB_FLOW_SPEC_IPV4) { - struct ib_flow_spec_ipv4 *ipv4_spec; - - ipv4_spec = (struct ib_flow_spec_ipv4 *)flow_spec; - if (ipv4_is_multicast(ipv4_spec->val.dst_ip)) - return true; - - return false; - } - - if (flow_spec->type == IB_FLOW_SPEC_ETH) { - struct ib_flow_spec_eth *eth_spec; - - eth_spec = (struct ib_flow_spec_eth *)flow_spec; - return is_multicast_ether_addr(eth_spec->mask.dst_mac) && - is_multicast_ether_addr(eth_spec->val.dst_mac); - } - - return false; -} - -enum valid_spec { - VALID_SPEC_INVALID, - VALID_SPEC_VALID, - VALID_SPEC_NA, -}; - -static enum valid_spec -is_valid_esp_aes_gcm(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - const u32 *match_c = spec->match_criteria; - bool is_crypto = - (flow_act->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT | - MLX5_FLOW_CONTEXT_ACTION_DECRYPT)); - bool is_ipsec = mlx5_fs_is_ipsec_flow(match_c); - bool is_drop = flow_act->action & MLX5_FLOW_CONTEXT_ACTION_DROP; - - /* - * Currently only crypto is supported in egress, when regular egress - * rules would be supported, always return VALID_SPEC_NA. - */ - if (!is_crypto) - return VALID_SPEC_NA; - - return is_crypto && is_ipsec && - (!egress || (!is_drop && - !(spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG))) ? - VALID_SPEC_VALID : VALID_SPEC_INVALID; -} - -static bool is_valid_spec(struct mlx5_core_dev *mdev, - const struct mlx5_flow_spec *spec, - const struct mlx5_flow_act *flow_act, - bool egress) -{ - /* We curretly only support ipsec egress flow */ - return is_valid_esp_aes_gcm(mdev, spec, flow_act, egress) != VALID_SPEC_INVALID; -} - -static bool is_valid_ethertype(struct mlx5_core_dev *mdev, - const struct ib_flow_attr *flow_attr, - bool check_inner) -{ - union ib_flow_spec *ib_spec = (union ib_flow_spec *)(flow_attr + 1); - int match_ipv = check_inner ? - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.inner_ip_version) : - MLX5_CAP_FLOWTABLE_NIC_RX(mdev, - ft_field_support.outer_ip_version); - int inner_bit = check_inner ? IB_FLOW_SPEC_INNER : 0; - bool ipv4_spec_valid, ipv6_spec_valid; - unsigned int ip_spec_type = 0; - bool has_ethertype = false; - unsigned int spec_index; - bool mask_valid = true; - u16 eth_type = 0; - bool type_valid; - - /* Validate that ethertype is correct */ - for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - if ((ib_spec->type == (IB_FLOW_SPEC_ETH | inner_bit)) && - ib_spec->eth.mask.ether_type) { - mask_valid = (ib_spec->eth.mask.ether_type == - htons(0xffff)); - has_ethertype = true; - eth_type = ntohs(ib_spec->eth.val.ether_type); - } else if ((ib_spec->type == (IB_FLOW_SPEC_IPV4 | inner_bit)) || - (ib_spec->type == (IB_FLOW_SPEC_IPV6 | inner_bit))) { - ip_spec_type = ib_spec->type; - } - ib_spec = (void *)ib_spec + ib_spec->size; - } - - type_valid = (!has_ethertype) || (!ip_spec_type); - if (!type_valid && mask_valid) { - ipv4_spec_valid = (eth_type == ETH_P_IP) && - (ip_spec_type == (IB_FLOW_SPEC_IPV4 | inner_bit)); - ipv6_spec_valid = (eth_type == ETH_P_IPV6) && - (ip_spec_type == (IB_FLOW_SPEC_IPV6 | inner_bit)); - - type_valid = (ipv4_spec_valid) || (ipv6_spec_valid) || - (((eth_type == ETH_P_MPLS_UC) || - (eth_type == ETH_P_MPLS_MC)) && match_ipv); - } - - return type_valid; -} - -static bool is_valid_attr(struct mlx5_core_dev *mdev, - const struct ib_flow_attr *flow_attr) -{ - return is_valid_ethertype(mdev, flow_attr, false) && - is_valid_ethertype(mdev, flow_attr, true); -} - -static void put_flow_table(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *prio, bool ft_added) -{ - prio->refcount -= !!ft_added; - if (!prio->refcount) { - mlx5_destroy_flow_table(prio->flow_table); - prio->flow_table = NULL; - } -} - -static void counters_clear_description(struct ib_counters *counters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - - mutex_lock(&mcounters->mcntrs_mutex); - kfree(mcounters->counters_data); - mcounters->counters_data = NULL; - mcounters->cntrs_max_index = 0; - mutex_unlock(&mcounters->mcntrs_mutex); -} - -static int mlx5_ib_destroy_flow(struct ib_flow *flow_id) -{ - struct mlx5_ib_flow_handler *handler = container_of(flow_id, - struct mlx5_ib_flow_handler, - ibflow); - struct mlx5_ib_flow_handler *iter, *tmp; - struct mlx5_ib_dev *dev = handler->dev; - - mutex_lock(&dev->flow_db->lock); - - list_for_each_entry_safe(iter, tmp, &handler->list, list) { - mlx5_del_flow_rules(iter->rule); - put_flow_table(dev, iter->prio, true); - list_del(&iter->list); - kfree(iter); - } - - mlx5_del_flow_rules(handler->rule); - put_flow_table(dev, handler->prio, true); - if (handler->ibcounters && - atomic_read(&handler->ibcounters->usecnt) == 1) - counters_clear_description(handler->ibcounters); - - mutex_unlock(&dev->flow_db->lock); - if (handler->flow_matcher) - atomic_dec(&handler->flow_matcher->usecnt); - kfree(handler); - - return 0; -} - -static int ib_prio_to_core_prio(unsigned int priority, bool dont_trap) -{ - priority *= 2; - if (!dont_trap) - priority++; - return priority; -} - -enum flow_table_type { - MLX5_IB_FT_RX, - MLX5_IB_FT_TX -}; - -#define MLX5_FS_MAX_TYPES 6 -#define MLX5_FS_MAX_ENTRIES BIT(16) - -static struct mlx5_ib_flow_prio *_get_prio(struct mlx5_flow_namespace *ns, - struct mlx5_ib_flow_prio *prio, - int priority, - int num_entries, int num_groups, - u32 flags) -{ - struct mlx5_flow_table_attr ft_attr = {}; - struct mlx5_flow_table *ft; - - ft_attr.prio = priority; - ft_attr.max_fte = num_entries; - ft_attr.flags = flags; - ft_attr.autogroup.max_num_groups = num_groups; - ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr); - if (IS_ERR(ft)) - return ERR_CAST(ft); - - prio->flow_table = ft; - prio->refcount = 0; - return prio; -} - -static struct mlx5_ib_flow_prio *get_flow_table(struct mlx5_ib_dev *dev, - struct ib_flow_attr *flow_attr, - enum flow_table_type ft_type) -{ - bool dont_trap = flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP; - struct mlx5_flow_namespace *ns = NULL; - struct mlx5_ib_flow_prio *prio; - struct mlx5_flow_table *ft; - int max_table_size; - int num_entries; - int num_groups; - bool esw_encap; - u32 flags = 0; - int priority; - - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); - esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != - DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - enum mlx5_flow_namespace_type fn_type; - - if (flow_is_multicast_only(flow_attr) && - !dont_trap) - priority = MLX5_IB_FLOW_MCAST_PRIO; - else - priority = ib_prio_to_core_prio(flow_attr->priority, - dont_trap); - if (ft_type == MLX5_IB_FT_RX) { - fn_type = MLX5_FLOW_NAMESPACE_BYPASS; - prio = &dev->flow_db->prios[priority]; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, - log_max_ft_size)); - fn_type = MLX5_FLOW_NAMESPACE_EGRESS; - prio = &dev->flow_db->egress_prios[priority]; - if (!dev->is_rep && !esw_encap && - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat)) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } - ns = mlx5_get_flow_namespace(dev->mdev, fn_type); - num_entries = MLX5_FS_MAX_ENTRIES; - num_groups = MLX5_FS_MAX_TYPES; - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { - ns = mlx5_get_flow_namespace(dev->mdev, - MLX5_FLOW_NAMESPACE_LEFTOVERS); - build_leftovers_ft_param(&priority, - &num_entries, - &num_groups); - prio = &dev->flow_db->prios[MLX5_IB_FLOW_LEFTOVERS_PRIO]; - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - if (!MLX5_CAP_FLOWTABLE(dev->mdev, - allow_sniffer_and_nic_rx_shared_tir)) - return ERR_PTR(-ENOTSUPP); - - ns = mlx5_get_flow_namespace(dev->mdev, ft_type == MLX5_IB_FT_RX ? - MLX5_FLOW_NAMESPACE_SNIFFER_RX : - MLX5_FLOW_NAMESPACE_SNIFFER_TX); - - prio = &dev->flow_db->sniffer[ft_type]; - priority = 0; - num_entries = 1; - num_groups = 1; - } - - if (!ns) - return ERR_PTR(-ENOTSUPP); - - max_table_size = min_t(int, num_entries, max_table_size); - - ft = prio->flow_table; - if (!ft) - return _get_prio(ns, prio, priority, max_table_size, num_groups, - flags); - - return prio; -} - -static void set_underlay_qp(struct mlx5_ib_dev *dev, - struct mlx5_flow_spec *spec, - u32 underlay_qpn) -{ - void *misc_params_c = MLX5_ADDR_OF(fte_match_param, - spec->match_criteria, - misc_parameters); - void *misc_params_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - - if (underlay_qpn && - MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - ft_field_support.bth_dst_qp)) { - MLX5_SET(fte_match_set_misc, - misc_params_v, bth_dst_qp, underlay_qpn); - MLX5_SET(fte_match_set_misc, - misc_params_c, bth_dst_qp, 0xffffff); - } -} - -static int read_flow_counters(struct ib_device *ibdev, - struct mlx5_read_counters_attr *read_attr) -{ - struct mlx5_fc *fc = read_attr->hw_cntrs_hndl; - struct mlx5_ib_dev *dev = to_mdev(ibdev); - - return mlx5_fc_query(dev->mdev, fc, - &read_attr->out[IB_COUNTER_PACKETS], - &read_attr->out[IB_COUNTER_BYTES]); -} - -/* flow counters currently expose two counters packets and bytes */ -#define FLOW_COUNTERS_NUM 2 -static int counters_set_description(struct ib_counters *counters, - enum mlx5_ib_counters_type counters_type, - struct mlx5_ib_flow_counters_desc *desc_data, - u32 ncounters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - u32 cntrs_max_index = 0; - int i; - - if (counters_type != MLX5_IB_COUNTERS_FLOW) - return -EINVAL; - - /* init the fields for the object */ - mcounters->type = counters_type; - mcounters->read_counters = read_flow_counters; - mcounters->counters_num = FLOW_COUNTERS_NUM; - mcounters->ncounters = ncounters; - /* each counter entry have both description and index pair */ - for (i = 0; i < ncounters; i++) { - if (desc_data[i].description > IB_COUNTER_BYTES) - return -EINVAL; - - if (cntrs_max_index <= desc_data[i].index) - cntrs_max_index = desc_data[i].index + 1; - } - - mutex_lock(&mcounters->mcntrs_mutex); - mcounters->counters_data = desc_data; - mcounters->cntrs_max_index = cntrs_max_index; - mutex_unlock(&mcounters->mcntrs_mutex); - - return 0; -} - -#define MAX_COUNTERS_NUM (USHRT_MAX / (sizeof(u32) * 2)) -static int flow_counters_set_data(struct ib_counters *ibcounters, - struct mlx5_ib_create_flow *ucmd) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(ibcounters); - struct mlx5_ib_flow_counters_data *cntrs_data = NULL; - struct mlx5_ib_flow_counters_desc *desc_data = NULL; - bool hw_hndl = false; - int ret = 0; - - if (ucmd && ucmd->ncounters_data != 0) { - cntrs_data = ucmd->data; - if (cntrs_data->ncounters > MAX_COUNTERS_NUM) - return -EINVAL; - - desc_data = kcalloc(cntrs_data->ncounters, - sizeof(*desc_data), - GFP_KERNEL); - if (!desc_data) - return -ENOMEM; - - if (copy_from_user(desc_data, - u64_to_user_ptr(cntrs_data->counters_data), - sizeof(*desc_data) * cntrs_data->ncounters)) { - ret = -EFAULT; - goto free; - } - } - - if (!mcounters->hw_cntrs_hndl) { - mcounters->hw_cntrs_hndl = mlx5_fc_create( - to_mdev(ibcounters->device)->mdev, false); - if (IS_ERR(mcounters->hw_cntrs_hndl)) { - ret = PTR_ERR(mcounters->hw_cntrs_hndl); - goto free; - } - hw_hndl = true; - } - - if (desc_data) { - /* counters already bound to at least one flow */ - if (mcounters->cntrs_max_index) { - ret = -EINVAL; - goto free_hndl; - } - - ret = counters_set_description(ibcounters, - MLX5_IB_COUNTERS_FLOW, - desc_data, - cntrs_data->ncounters); - if (ret) - goto free_hndl; - - } else if (!mcounters->cntrs_max_index) { - /* counters not bound yet, must have udata passed */ - ret = -EINVAL; - goto free_hndl; - } - - return 0; - -free_hndl: - if (hw_hndl) { - mlx5_fc_destroy(to_mdev(ibcounters->device)->mdev, - mcounters->hw_cntrs_hndl); - mcounters->hw_cntrs_hndl = NULL; - } -free: - kfree(desc_data); - return ret; -} - -static void mlx5_ib_set_rule_source_port(struct mlx5_ib_dev *dev, - struct mlx5_flow_spec *spec, - struct mlx5_eswitch_rep *rep) -{ - struct mlx5_eswitch *esw = dev->mdev->priv.eswitch; - void *misc; - - if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters_2); - - MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_for_match(esw, - rep->vport)); - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters_2); - - MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_0, - mlx5_eswitch_get_vport_metadata_mask()); - } else { - misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, - misc_parameters); - - MLX5_SET(fte_match_set_misc, misc, source_port, rep->vport); - - misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, - misc_parameters); - - MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); - } -} - -static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst, - u32 underlay_qpn, - struct mlx5_ib_create_flow *ucmd) -{ - struct mlx5_flow_table *ft = ft_prio->flow_table; - struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_act flow_act = {}; - struct mlx5_flow_spec *spec; - struct mlx5_flow_destination dest_arr[2] = {}; - struct mlx5_flow_destination *rule_dst = dest_arr; - const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr); - unsigned int spec_index; - u32 prev_type = 0; - int err = 0; - int dest_num = 0; - bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; - - if (!is_valid_attr(dev->mdev, flow_attr)) - return ERR_PTR(-EINVAL); - - if (dev->is_rep && is_egress) - return ERR_PTR(-EINVAL); - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - handler = kzalloc(sizeof(*handler), GFP_KERNEL); - if (!handler || !spec) { - err = -ENOMEM; - goto free; - } - - INIT_LIST_HEAD(&handler->list); - - for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) { - err = parse_flow_attr(dev->mdev, spec, - ib_flow, flow_attr, &flow_act, - prev_type); - if (err < 0) - goto free; - - prev_type = ((union ib_flow_spec *)ib_flow)->type; - ib_flow += ((union ib_flow_spec *)ib_flow)->size; - } - - if (dst && !(flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP)) { - memcpy(&dest_arr[0], dst, sizeof(*dst)); - dest_num++; - } - - if (!flow_is_multicast_only(flow_attr)) - set_underlay_qp(dev, spec, underlay_qpn); - - if (dev->is_rep) { - struct mlx5_eswitch_rep *rep; - - rep = dev->port[flow_attr->port - 1].rep; - if (!rep) { - err = -EINVAL; - goto free; - } - - mlx5_ib_set_rule_source_port(dev, spec, rep); - } - - spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria); - - if (is_egress && - !is_valid_spec(dev->mdev, spec, &flow_act, is_egress)) { - err = -EINVAL; - goto free; - } - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - struct mlx5_ib_mcounters *mcounters; - - err = flow_counters_set_data(flow_act.counters, ucmd); - if (err) - goto free; - - mcounters = to_mcounters(flow_act.counters); - handler->ibcounters = flow_act.counters; - dest_arr[dest_num].type = - MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dest_arr[dest_num].counter_id = - mlx5_fc_id(mcounters->hw_cntrs_hndl); - dest_num++; - } - - if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_DROP) { - if (!dest_num) - rule_dst = NULL; - } else { - if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP) - flow_act.action |= - MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO; - if (is_egress) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; - else if (dest_num) - flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } - - if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG) && - (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { - mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n", - spec->flow_context.flow_tag, flow_attr->type); - err = -EINVAL; - goto free; - } - handler->rule = mlx5_add_flow_rules(ft, spec, - &flow_act, - rule_dst, dest_num); - - if (IS_ERR(handler->rule)) { - err = PTR_ERR(handler->rule); - goto free; - } - - ft_prio->refcount++; - handler->prio = ft_prio; - handler->dev = dev; - - ft_prio->flow_table = ft; -free: - if (err && handler) { - if (handler->ibcounters && - atomic_read(&handler->ibcounters->usecnt) == 1) - counters_clear_description(handler->ibcounters); - kfree(handler); - } - kvfree(spec); - return err ? ERR_PTR(err) : handler; -} - -static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - const struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) -{ - return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL); -} - -enum { - LEFTOVERS_MC, - LEFTOVERS_UC, -}; - -static struct mlx5_ib_flow_handler *create_leftovers_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - struct ib_flow_attr *flow_attr, - struct mlx5_flow_destination *dst) -{ - struct mlx5_ib_flow_handler *handler_ucast = NULL; - struct mlx5_ib_flow_handler *handler = NULL; - - static struct { - struct ib_flow_attr flow_attr; - struct ib_flow_spec_eth eth_flow; - } leftovers_specs[] = { - [LEFTOVERS_MC] = { - .flow_attr = { - .num_of_specs = 1, - .size = sizeof(leftovers_specs[0]) - }, - .eth_flow = { - .type = IB_FLOW_SPEC_ETH, - .size = sizeof(struct ib_flow_spec_eth), - .mask = {.dst_mac = {0x1} }, - .val = {.dst_mac = {0x1} } - } - }, - [LEFTOVERS_UC] = { - .flow_attr = { - .num_of_specs = 1, - .size = sizeof(leftovers_specs[0]) - }, - .eth_flow = { - .type = IB_FLOW_SPEC_ETH, - .size = sizeof(struct ib_flow_spec_eth), - .mask = {.dst_mac = {0x1} }, - .val = {.dst_mac = {} } - } - } - }; - - handler = create_flow_rule(dev, ft_prio, - &leftovers_specs[LEFTOVERS_MC].flow_attr, - dst); - if (!IS_ERR(handler) && - flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT) { - handler_ucast = create_flow_rule(dev, ft_prio, - &leftovers_specs[LEFTOVERS_UC].flow_attr, - dst); - if (IS_ERR(handler_ucast)) { - mlx5_del_flow_rules(handler->rule); - ft_prio->refcount--; - kfree(handler); - handler = handler_ucast; - } else { - list_add(&handler_ucast->list, &handler->list); - } - } - - return handler; -} - -static struct mlx5_ib_flow_handler *create_sniffer_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_rx, - struct mlx5_ib_flow_prio *ft_tx, - struct mlx5_flow_destination *dst) -{ - struct mlx5_ib_flow_handler *handler_rx; - struct mlx5_ib_flow_handler *handler_tx; - int err; - static const struct ib_flow_attr flow_attr = { - .num_of_specs = 0, - .size = sizeof(flow_attr) - }; - - handler_rx = create_flow_rule(dev, ft_rx, &flow_attr, dst); - if (IS_ERR(handler_rx)) { - err = PTR_ERR(handler_rx); - goto err; - } - - handler_tx = create_flow_rule(dev, ft_tx, &flow_attr, dst); - if (IS_ERR(handler_tx)) { - err = PTR_ERR(handler_tx); - goto err_tx; - } - - list_add(&handler_tx->list, &handler_rx->list); - - return handler_rx; - -err_tx: - mlx5_del_flow_rules(handler_rx->rule); - ft_rx->refcount--; - kfree(handler_rx); -err: - return ERR_PTR(err); -} - -static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp, - struct ib_flow_attr *flow_attr, - int domain, - struct ib_udata *udata) -{ - struct mlx5_ib_dev *dev = to_mdev(qp->device); - struct mlx5_ib_qp *mqp = to_mqp(qp); - struct mlx5_ib_flow_handler *handler = NULL; - struct mlx5_flow_destination *dst = NULL; - struct mlx5_ib_flow_prio *ft_prio_tx = NULL; - struct mlx5_ib_flow_prio *ft_prio; - bool is_egress = flow_attr->flags & IB_FLOW_ATTR_FLAGS_EGRESS; - struct mlx5_ib_create_flow *ucmd = NULL, ucmd_hdr; - size_t min_ucmd_sz, required_ucmd_sz; - int err; - int underlay_qpn; - - if (udata && udata->inlen) { - min_ucmd_sz = offsetof(typeof(ucmd_hdr), reserved) + - sizeof(ucmd_hdr.reserved); - if (udata->inlen < min_ucmd_sz) - return ERR_PTR(-EOPNOTSUPP); - - err = ib_copy_from_udata(&ucmd_hdr, udata, min_ucmd_sz); - if (err) - return ERR_PTR(err); - - /* currently supports only one counters data */ - if (ucmd_hdr.ncounters_data > 1) - return ERR_PTR(-EINVAL); - - required_ucmd_sz = min_ucmd_sz + - sizeof(struct mlx5_ib_flow_counters_data) * - ucmd_hdr.ncounters_data; - if (udata->inlen > required_ucmd_sz && - !ib_is_udata_cleared(udata, required_ucmd_sz, - udata->inlen - required_ucmd_sz)) - return ERR_PTR(-EOPNOTSUPP); - - ucmd = kzalloc(required_ucmd_sz, GFP_KERNEL); - if (!ucmd) - return ERR_PTR(-ENOMEM); - - err = ib_copy_from_udata(ucmd, udata, required_ucmd_sz); - if (err) - goto free_ucmd; - } - - if (flow_attr->priority > MLX5_IB_FLOW_LAST_PRIO) { - err = -ENOMEM; - goto free_ucmd; - } - - if (domain != IB_FLOW_DOMAIN_USER || - flow_attr->port > dev->num_ports || - (flow_attr->flags & ~(IB_FLOW_ATTR_FLAGS_DONT_TRAP | - IB_FLOW_ATTR_FLAGS_EGRESS))) { - err = -EINVAL; - goto free_ucmd; - } - - if (is_egress && - (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) { - err = -EINVAL; - goto free_ucmd; - } - - dst = kzalloc(sizeof(*dst), GFP_KERNEL); - if (!dst) { - err = -ENOMEM; - goto free_ucmd; - } - - mutex_lock(&dev->flow_db->lock); - - ft_prio = get_flow_table(dev, flow_attr, - is_egress ? MLX5_IB_FT_TX : MLX5_IB_FT_RX); - if (IS_ERR(ft_prio)) { - err = PTR_ERR(ft_prio); - goto unlock; - } - if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - ft_prio_tx = get_flow_table(dev, flow_attr, MLX5_IB_FT_TX); - if (IS_ERR(ft_prio_tx)) { - err = PTR_ERR(ft_prio_tx); - ft_prio_tx = NULL; - goto destroy_ft; - } - } - - if (is_egress) { - dst->type = MLX5_FLOW_DESTINATION_TYPE_PORT; - } else { - dst->type = MLX5_FLOW_DESTINATION_TYPE_TIR; - if (mqp->is_rss) - dst->tir_num = mqp->rss_qp.tirn; - else - dst->tir_num = mqp->raw_packet_qp.rq.tirn; - } - - if (flow_attr->type == IB_FLOW_ATTR_NORMAL) { - underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ? - mqp->underlay_qpn : - 0; - handler = _create_flow_rule(dev, ft_prio, flow_attr, dst, - underlay_qpn, ucmd); - } else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT || - flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) { - handler = create_leftovers_rule(dev, ft_prio, flow_attr, - dst); - } else if (flow_attr->type == IB_FLOW_ATTR_SNIFFER) { - handler = create_sniffer_rule(dev, ft_prio, ft_prio_tx, dst); - } else { - err = -EINVAL; - goto destroy_ft; - } - - if (IS_ERR(handler)) { - err = PTR_ERR(handler); - handler = NULL; - goto destroy_ft; - } - - mutex_unlock(&dev->flow_db->lock); - kfree(dst); - kfree(ucmd); - - return &handler->ibflow; - -destroy_ft: - put_flow_table(dev, ft_prio, false); - if (ft_prio_tx) - put_flow_table(dev, ft_prio_tx, false); -unlock: - mutex_unlock(&dev->flow_db->lock); - kfree(dst); -free_ucmd: - kfree(ucmd); - return ERR_PTR(err); -} - -static struct mlx5_ib_flow_prio * -_get_flow_table(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_matcher *fs_matcher, - bool mcast) -{ - struct mlx5_flow_namespace *ns = NULL; - struct mlx5_ib_flow_prio *prio = NULL; - int max_table_size = 0; - bool esw_encap; - u32 flags = 0; - int priority; - - if (mcast) - priority = MLX5_IB_FLOW_MCAST_PRIO; - else - priority = ib_prio_to_core_prio(fs_matcher->priority, false); - - esw_encap = mlx5_eswitch_get_encap_mode(dev->mdev) != - DEVLINK_ESWITCH_ENCAP_MODE_NONE; - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) { - max_table_size = BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, decap) && !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, - reformat_l3_tunnel_to_l2) && - !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) { - max_table_size = BIT( - MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, reformat) && !esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) { - max_table_size = BIT( - MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, log_max_ft_size)); - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, decap) && esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_DECAP; - if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev->mdev, reformat_l3_tunnel_to_l2) && - esw_encap) - flags |= MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; - priority = FDB_BYPASS_PATH; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_RX(dev->mdev, - log_max_ft_size)); - priority = fs_matcher->priority; - } else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) { - max_table_size = - BIT(MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev, - log_max_ft_size)); - priority = fs_matcher->priority; - } - - max_table_size = min_t(int, max_table_size, MLX5_FS_MAX_ENTRIES); - - ns = mlx5_get_flow_namespace(dev->mdev, fs_matcher->ns_type); - if (!ns) - return ERR_PTR(-ENOTSUPP); - - if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_BYPASS) - prio = &dev->flow_db->prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_EGRESS) - prio = &dev->flow_db->egress_prios[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_FDB) - prio = &dev->flow_db->fdb; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_RX) - prio = &dev->flow_db->rdma_rx[priority]; - else if (fs_matcher->ns_type == MLX5_FLOW_NAMESPACE_RDMA_TX) - prio = &dev->flow_db->rdma_tx[priority]; - - if (!prio) - return ERR_PTR(-EINVAL); - - if (prio->flow_table) - return prio; - - return _get_prio(ns, prio, priority, max_table_size, - MLX5_FS_MAX_TYPES, flags); -} - -static struct mlx5_ib_flow_handler * -_create_raw_flow_rule(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_prio *ft_prio, - struct mlx5_flow_destination *dst, - struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_context *flow_context, - struct mlx5_flow_act *flow_act, - void *cmd_in, int inlen, - int dst_num) -{ - struct mlx5_ib_flow_handler *handler; - struct mlx5_flow_spec *spec; - struct mlx5_flow_table *ft = ft_prio->flow_table; - int err = 0; - - spec = kvzalloc(sizeof(*spec), GFP_KERNEL); - handler = kzalloc(sizeof(*handler), GFP_KERNEL); - if (!handler || !spec) { - err = -ENOMEM; - goto free; - } - - INIT_LIST_HEAD(&handler->list); - - memcpy(spec->match_value, cmd_in, inlen); - memcpy(spec->match_criteria, fs_matcher->matcher_mask.match_params, - fs_matcher->mask_len); - spec->match_criteria_enable = fs_matcher->match_criteria_enable; - spec->flow_context = *flow_context; - - handler->rule = mlx5_add_flow_rules(ft, spec, - flow_act, dst, dst_num); - - if (IS_ERR(handler->rule)) { - err = PTR_ERR(handler->rule); - goto free; - } - - ft_prio->refcount++; - handler->prio = ft_prio; - handler->dev = dev; - ft_prio->flow_table = ft; - -free: - if (err) - kfree(handler); - kvfree(spec); - return err ? ERR_PTR(err) : handler; -} - -static bool raw_fs_is_multicast(struct mlx5_ib_flow_matcher *fs_matcher, - void *match_v) -{ - void *match_c; - void *match_v_set_lyr_2_4, *match_c_set_lyr_2_4; - void *dmac, *dmac_mask; - void *ipv4, *ipv4_mask; - - if (!(fs_matcher->match_criteria_enable & - (1 << MATCH_CRITERIA_ENABLE_OUTER_BIT))) - return false; - - match_c = fs_matcher->matcher_mask.match_params; - match_v_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); - match_c_set_lyr_2_4 = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - - dmac = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, - dmac_47_16); - dmac_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, - dmac_47_16); - - if (is_multicast_ether_addr(dmac) && - is_multicast_ether_addr(dmac_mask)) - return true; - - ipv4 = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_v_set_lyr_2_4, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - - ipv4_mask = MLX5_ADDR_OF(fte_match_set_lyr_2_4, match_c_set_lyr_2_4, - dst_ipv4_dst_ipv6.ipv4_layout.ipv4); - - if (ipv4_is_multicast(*(__be32 *)(ipv4)) && - ipv4_is_multicast(*(__be32 *)(ipv4_mask))) - return true; - - return false; -} - -struct mlx5_ib_flow_handler * -mlx5_ib_raw_fs_rule_add(struct mlx5_ib_dev *dev, - struct mlx5_ib_flow_matcher *fs_matcher, - struct mlx5_flow_context *flow_context, - struct mlx5_flow_act *flow_act, - u32 counter_id, - void *cmd_in, int inlen, int dest_id, - int dest_type) -{ - struct mlx5_flow_destination *dst; - struct mlx5_ib_flow_prio *ft_prio; - struct mlx5_ib_flow_handler *handler; - int dst_num = 0; - bool mcast; - int err; - - if (fs_matcher->flow_type != MLX5_IB_FLOW_TYPE_NORMAL) - return ERR_PTR(-EOPNOTSUPP); - - if (fs_matcher->priority > MLX5_IB_FLOW_LAST_PRIO) - return ERR_PTR(-ENOMEM); - - dst = kcalloc(2, sizeof(*dst), GFP_KERNEL); - if (!dst) - return ERR_PTR(-ENOMEM); - - mcast = raw_fs_is_multicast(fs_matcher, cmd_in); - mutex_lock(&dev->flow_db->lock); - - ft_prio = _get_flow_table(dev, fs_matcher, mcast); - if (IS_ERR(ft_prio)) { - err = PTR_ERR(ft_prio); - goto unlock; - } - - if (dest_type == MLX5_FLOW_DESTINATION_TYPE_TIR) { - dst[dst_num].type = dest_type; - dst[dst_num++].tir_num = dest_id; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE) { - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE_NUM; - dst[dst_num++].ft_num = dest_id; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST; - } else if (dest_type == MLX5_FLOW_DESTINATION_TYPE_PORT) { - dst[dst_num++].type = MLX5_FLOW_DESTINATION_TYPE_PORT; - flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW; - } - - - if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) { - dst[dst_num].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; - dst[dst_num].counter_id = counter_id; - dst_num++; - } - - handler = _create_raw_flow_rule(dev, ft_prio, dst, fs_matcher, - flow_context, flow_act, - cmd_in, inlen, dst_num); - - if (IS_ERR(handler)) { - err = PTR_ERR(handler); - goto destroy_ft; - } - - mutex_unlock(&dev->flow_db->lock); - atomic_inc(&fs_matcher->usecnt); - handler->flow_matcher = fs_matcher; - - kfree(dst); - - return handler; - -destroy_ft: - put_flow_table(dev, ft_prio, false); -unlock: - mutex_unlock(&dev->flow_db->lock); - kfree(dst); - - return ERR_PTR(err); -} - -static u32 mlx5_ib_flow_action_flags_to_accel_xfrm_flags(u32 mlx5_flags) -{ - u32 flags = 0; - - if (mlx5_flags & MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA) - flags |= MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA; - - return flags; -} - -#define MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED MLX5_IB_UAPI_FLOW_ACTION_FLAGS_REQUIRE_METADATA -static struct ib_flow_action * -mlx5_ib_create_flow_action_esp(struct ib_device *device, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_dev *mdev = to_mdev(device); - struct ib_uverbs_flow_action_esp_keymat_aes_gcm *aes_gcm; - struct mlx5_accel_esp_xfrm_attrs accel_attrs = {}; - struct mlx5_ib_flow_action *action; - u64 action_flags; - u64 flags; - int err = 0; - - err = uverbs_get_flags64( - &action_flags, attrs, MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, - ((MLX5_FLOW_ACTION_ESP_CREATE_LAST_SUPPORTED << 1) - 1)); - if (err) - return ERR_PTR(err); - - flags = mlx5_ib_flow_action_flags_to_accel_xfrm_flags(action_flags); - - /* We current only support a subset of the standard features. Only a - * keymat of type AES_GCM, with icv_len == 16, iv_algo == SEQ and esn - * (with overlap). Full offload mode isn't supported. - */ - if (!attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT))) - return ERR_PTR(-EOPNOTSUPP); - - if (attr->keymat->protocol != - IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM) - return ERR_PTR(-EOPNOTSUPP); - - aes_gcm = &attr->keymat->keymat.aes_gcm; - - if (aes_gcm->icv_len != 16 || - aes_gcm->iv_algo != IB_UVERBS_FLOW_ACTION_IV_ALGO_SEQ) - return ERR_PTR(-EOPNOTSUPP); - - action = kmalloc(sizeof(*action), GFP_KERNEL); - if (!action) - return ERR_PTR(-ENOMEM); - - action->esp_aes_gcm.ib_flags = attr->flags; - memcpy(&accel_attrs.keymat.aes_gcm.aes_key, &aes_gcm->aes_key, - sizeof(accel_attrs.keymat.aes_gcm.aes_key)); - accel_attrs.keymat.aes_gcm.key_len = aes_gcm->key_len * 8; - memcpy(&accel_attrs.keymat.aes_gcm.salt, &aes_gcm->salt, - sizeof(accel_attrs.keymat.aes_gcm.salt)); - memcpy(&accel_attrs.keymat.aes_gcm.seq_iv, &aes_gcm->iv, - sizeof(accel_attrs.keymat.aes_gcm.seq_iv)); - accel_attrs.keymat.aes_gcm.icv_len = aes_gcm->icv_len * 8; - accel_attrs.keymat.aes_gcm.iv_algo = MLX5_ACCEL_ESP_AES_GCM_IV_ALGO_SEQ; - accel_attrs.keymat_type = MLX5_ACCEL_ESP_KEYMAT_AES_GCM; - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_TRIGGERED; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ENCRYPT) - accel_attrs.action |= MLX5_ACCEL_ESP_ACTION_ENCRYPT; - - action->esp_aes_gcm.ctx = - mlx5_accel_esp_create_xfrm(mdev->mdev, &accel_attrs, flags); - if (IS_ERR(action->esp_aes_gcm.ctx)) { - err = PTR_ERR(action->esp_aes_gcm.ctx); - goto err_parse; - } - - action->esp_aes_gcm.ib_flags = attr->flags; - - return &action->ib_action; - -err_parse: - kfree(action); - return ERR_PTR(err); -} - -static int -mlx5_ib_modify_flow_action_esp(struct ib_flow_action *action, - const struct ib_flow_action_attrs_esp *attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_flow_action *maction = to_mflow_act(action); - struct mlx5_accel_esp_xfrm_attrs accel_attrs; - int err = 0; - - if (attr->keymat || attr->replay || attr->encap || - attr->spi || attr->seq || attr->tfc_pad || - attr->hard_limit_pkts || - (attr->flags & ~(IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_FLOW_ACTION_ESP_FLAGS_MOD_ESP_ATTRS | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW))) - return -EOPNOTSUPP; - - /* Only the ESN value or the MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP can - * be modified. - */ - if (!(maction->esp_aes_gcm.ib_flags & - IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED) && - attr->flags & (IB_FLOW_ACTION_ESP_FLAGS_ESN_TRIGGERED | - IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW)) - return -EINVAL; - - memcpy(&accel_attrs, &maction->esp_aes_gcm.ctx->attrs, - sizeof(accel_attrs)); - - accel_attrs.esn = attr->esn; - if (attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW) - accel_attrs.flags |= MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - else - accel_attrs.flags &= ~MLX5_ACCEL_ESP_FLAGS_ESN_STATE_OVERLAP; - - err = mlx5_accel_esp_modify_xfrm(maction->esp_aes_gcm.ctx, - &accel_attrs); - if (err) - return err; - - maction->esp_aes_gcm.ib_flags &= - ~IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - maction->esp_aes_gcm.ib_flags |= - attr->flags & IB_UVERBS_FLOW_ACTION_ESP_FLAGS_ESN_NEW_WINDOW; - - return 0; -} - -static int mlx5_ib_destroy_flow_action(struct ib_flow_action *action) -{ - struct mlx5_ib_flow_action *maction = to_mflow_act(action); - - switch (action->type) { - case IB_FLOW_ACTION_ESP: - /* - * We only support aes_gcm by now, so we implicitly know this is - * the underline crypto. - */ - mlx5_accel_esp_destroy_xfrm(maction->esp_aes_gcm.ctx); - break; - case IB_FLOW_ACTION_UNSPECIFIED: - mlx5_ib_destroy_flow_action_raw(maction); - break; - default: - WARN_ON(true); - break; - } - - kfree(maction); - return 0; -} - static int mlx5_ib_mcg_attach(struct ib_qp *ibqp, union ib_gid *gid, u16 lid) { struct mlx5_ib_dev *dev = to_mdev(ibqp->device); @@ -4848,137 +3020,6 @@ static int get_port_caps(struct mlx5_ib_dev *dev, u8 port) return __get_port_caps(dev, port); } -static void destroy_umrc_res(struct mlx5_ib_dev *dev) -{ - int err; - - err = mlx5_mr_cache_cleanup(dev); - if (err) - mlx5_ib_warn(dev, "mr cache cleanup failed\n"); - - if (dev->umrc.qp) - mlx5_ib_destroy_qp(dev->umrc.qp, NULL); - if (dev->umrc.cq) - ib_free_cq(dev->umrc.cq); - if (dev->umrc.pd) - ib_dealloc_pd(dev->umrc.pd); -} - -enum { - MAX_UMR_WR = 128, -}; - -static int create_umr_res(struct mlx5_ib_dev *dev) -{ - struct ib_qp_init_attr *init_attr = NULL; - struct ib_qp_attr *attr = NULL; - struct ib_pd *pd; - struct ib_cq *cq; - struct ib_qp *qp; - int ret; - - attr = kzalloc(sizeof(*attr), GFP_KERNEL); - init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); - if (!attr || !init_attr) { - ret = -ENOMEM; - goto error_0; - } - - pd = ib_alloc_pd(&dev->ib_dev, 0); - if (IS_ERR(pd)) { - mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); - ret = PTR_ERR(pd); - goto error_0; - } - - cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); - if (IS_ERR(cq)) { - mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); - ret = PTR_ERR(cq); - goto error_2; - } - - init_attr->send_cq = cq; - init_attr->recv_cq = cq; - init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; - init_attr->cap.max_send_wr = MAX_UMR_WR; - init_attr->cap.max_send_sge = 1; - init_attr->qp_type = MLX5_IB_QPT_REG_UMR; - init_attr->port_num = 1; - qp = mlx5_ib_create_qp(pd, init_attr, NULL); - if (IS_ERR(qp)) { - mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); - ret = PTR_ERR(qp); - goto error_3; - } - qp->device = &dev->ib_dev; - qp->real_qp = qp; - qp->uobject = NULL; - qp->qp_type = MLX5_IB_QPT_REG_UMR; - qp->send_cq = init_attr->send_cq; - qp->recv_cq = init_attr->recv_cq; - - attr->qp_state = IB_QPS_INIT; - attr->port_num = 1; - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | - IB_QP_PORT, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTR; - attr->path_mtu = IB_MTU_256; - - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); - goto error_4; - } - - memset(attr, 0, sizeof(*attr)); - attr->qp_state = IB_QPS_RTS; - ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); - if (ret) { - mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); - goto error_4; - } - - dev->umrc.qp = qp; - dev->umrc.cq = cq; - dev->umrc.pd = pd; - - sema_init(&dev->umrc.sem, MAX_UMR_WR); - ret = mlx5_mr_cache_init(dev); - if (ret) { - mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); - goto error_4; - } - - kfree(attr); - kfree(init_attr); - - return 0; - -error_4: - mlx5_ib_destroy_qp(qp, NULL); - dev->umrc.qp = NULL; - -error_3: - ib_free_cq(cq); - dev->umrc.cq = NULL; - -error_2: - ib_dealloc_pd(pd); - dev->umrc.pd = NULL; - -error_0: - kfree(attr); - kfree(init_attr); - return ret; -} - static u8 mlx5_get_umr_fence(u8 umr_fence_cap) { switch (umr_fence_cap) { @@ -4991,18 +3032,20 @@ static u8 mlx5_get_umr_fence(u8 umr_fence_cap) } } -static int create_dev_resources(struct mlx5_ib_resources *devr) +static int mlx5_ib_dev_res_init(struct mlx5_ib_dev *dev) { + struct mlx5_ib_resources *devr = &dev->devr; struct ib_srq_init_attr attr; - struct mlx5_ib_dev *dev; struct ib_device *ibdev; struct ib_cq_init_attr cq_attr = {.cqe = 1}; int port; int ret = 0; - dev = container_of(devr, struct mlx5_ib_dev, devr); ibdev = &dev->ib_dev; + if (!MLX5_CAP_GEN(dev->mdev, xrc)) + return -EOPNOTSUPP; + mutex_init(&devr->mutex); devr->p0 = rdma_zalloc_drv_obj(ibdev, ib_pd); @@ -5030,34 +3073,19 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) if (ret) goto err_create_cq; - devr->x0 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); - if (IS_ERR(devr->x0)) { - ret = PTR_ERR(devr->x0); + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn0, 0); + if (ret) goto error2; - } - devr->x0->device = &dev->ib_dev; - devr->x0->inode = NULL; - atomic_set(&devr->x0->usecnt, 0); - mutex_init(&devr->x0->tgt_qp_mutex); - INIT_LIST_HEAD(&devr->x0->tgt_qp_list); - - devr->x1 = mlx5_ib_alloc_xrcd(&dev->ib_dev, NULL); - if (IS_ERR(devr->x1)) { - ret = PTR_ERR(devr->x1); + + ret = mlx5_cmd_xrcd_alloc(dev->mdev, &devr->xrcdn1, 0); + if (ret) goto error3; - } - devr->x1->device = &dev->ib_dev; - devr->x1->inode = NULL; - atomic_set(&devr->x1->usecnt, 0); - mutex_init(&devr->x1->tgt_qp_mutex); - INIT_LIST_HEAD(&devr->x1->tgt_qp_list); memset(&attr, 0, sizeof(attr)); attr.attr.max_sge = 1; attr.attr.max_wr = 1; attr.srq_type = IB_SRQT_XRC; attr.ext.cq = devr->c0; - attr.ext.xrc.xrcd = devr->x0; devr->s0 = rdma_zalloc_drv_obj(ibdev, ib_srq); if (!devr->s0) { @@ -5068,13 +3096,11 @@ static int create_dev_resources(struct mlx5_ib_resources *devr) devr->s0->device = &dev->ib_dev; devr->s0->pd = devr->p0; devr->s0->srq_type = IB_SRQT_XRC; - devr->s0->ext.xrc.xrcd = devr->x0; devr->s0->ext.cq = devr->c0; ret = mlx5_ib_create_srq(devr->s0, &attr, NULL); if (ret) goto err_create; - atomic_inc(&devr->s0->ext.xrc.xrcd->usecnt); atomic_inc(&devr->s0->ext.cq->usecnt); atomic_inc(&devr->p0->usecnt); atomic_set(&devr->s0->usecnt, 0); @@ -5116,9 +3142,9 @@ error5: err_create: kfree(devr->s0); error4: - mlx5_ib_dealloc_xrcd(devr->x1, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); error3: - mlx5_ib_dealloc_xrcd(devr->x0, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); error2: mlx5_ib_destroy_cq(devr->c0, NULL); err_create_cq: @@ -5130,16 +3156,17 @@ error0: return ret; } -static void destroy_dev_resources(struct mlx5_ib_resources *devr) +static void mlx5_ib_dev_res_cleanup(struct mlx5_ib_dev *dev) { + struct mlx5_ib_resources *devr = &dev->devr; int port; mlx5_ib_destroy_srq(devr->s1, NULL); kfree(devr->s1); mlx5_ib_destroy_srq(devr->s0, NULL); kfree(devr->s0); - mlx5_ib_dealloc_xrcd(devr->x0, NULL); - mlx5_ib_dealloc_xrcd(devr->x1, NULL); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn1, 0); + mlx5_cmd_xrcd_dealloc(dev->mdev, devr->xrcdn0, 0); mlx5_ib_destroy_cq(devr->c0, NULL); kfree(devr->c0); mlx5_ib_dealloc_pd(devr->p0, NULL); @@ -5332,466 +3359,6 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev) mlx5_nic_vport_disable_roce(dev->mdev); } -struct mlx5_ib_counter { - const char *name; - size_t offset; -}; - -#define INIT_Q_COUNTER(_name) \ - { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)} - -static const struct mlx5_ib_counter basic_q_cnts[] = { - INIT_Q_COUNTER(rx_write_requests), - INIT_Q_COUNTER(rx_read_requests), - INIT_Q_COUNTER(rx_atomic_requests), - INIT_Q_COUNTER(out_of_buffer), -}; - -static const struct mlx5_ib_counter out_of_seq_q_cnts[] = { - INIT_Q_COUNTER(out_of_sequence), -}; - -static const struct mlx5_ib_counter retrans_q_cnts[] = { - INIT_Q_COUNTER(duplicate_request), - INIT_Q_COUNTER(rnr_nak_retry_err), - INIT_Q_COUNTER(packet_seq_err), - INIT_Q_COUNTER(implied_nak_seq_err), - INIT_Q_COUNTER(local_ack_timeout_err), -}; - -#define INIT_CONG_COUNTER(_name) \ - { .name = #_name, .offset = \ - MLX5_BYTE_OFF(query_cong_statistics_out, _name ## _high)} - -static const struct mlx5_ib_counter cong_cnts[] = { - INIT_CONG_COUNTER(rp_cnp_ignored), - INIT_CONG_COUNTER(rp_cnp_handled), - INIT_CONG_COUNTER(np_ecn_marked_roce_packets), - INIT_CONG_COUNTER(np_cnp_sent), -}; - -static const struct mlx5_ib_counter extended_err_cnts[] = { - INIT_Q_COUNTER(resp_local_length_error), - INIT_Q_COUNTER(resp_cqe_error), - INIT_Q_COUNTER(req_cqe_error), - INIT_Q_COUNTER(req_remote_invalid_request), - INIT_Q_COUNTER(req_remote_access_errors), - INIT_Q_COUNTER(resp_remote_access_errors), - INIT_Q_COUNTER(resp_cqe_flush_error), - INIT_Q_COUNTER(req_cqe_flush_error), -}; - -static const struct mlx5_ib_counter roce_accl_cnts[] = { - INIT_Q_COUNTER(roce_adp_retrans), - INIT_Q_COUNTER(roce_adp_retrans_to), - INIT_Q_COUNTER(roce_slow_restart), - INIT_Q_COUNTER(roce_slow_restart_cnps), - INIT_Q_COUNTER(roce_slow_restart_trans), -}; - -#define INIT_EXT_PPCNT_COUNTER(_name) \ - { .name = #_name, .offset = \ - MLX5_BYTE_OFF(ppcnt_reg, \ - counter_set.eth_extended_cntrs_grp_data_layout._name##_high)} - -static const struct mlx5_ib_counter ext_ppcnt_cnts[] = { - INIT_EXT_PPCNT_COUNTER(rx_icrc_encapsulated), -}; - -static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev) -{ - return MLX5_ESWITCH_MANAGER(mdev) && - mlx5_ib_eswitch_mode(mdev->priv.eswitch) == - MLX5_ESWITCH_OFFLOADS; -} - -static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev) -{ - u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; - int num_cnt_ports; - int i; - - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; - - MLX5_SET(dealloc_q_counter_in, in, opcode, - MLX5_CMD_OP_DEALLOC_Q_COUNTER); - - for (i = 0; i < num_cnt_ports; i++) { - if (dev->port[i].cnts.set_id) { - MLX5_SET(dealloc_q_counter_in, in, counter_set_id, - dev->port[i].cnts.set_id); - mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); - } - kfree(dev->port[i].cnts.names); - kfree(dev->port[i].cnts.offsets); - } -} - -static int __mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev, - struct mlx5_ib_counters *cnts) -{ - u32 num_counters; - - num_counters = ARRAY_SIZE(basic_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) - num_counters += ARRAY_SIZE(out_of_seq_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) - num_counters += ARRAY_SIZE(retrans_q_cnts); - - if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) - num_counters += ARRAY_SIZE(extended_err_cnts); - - if (MLX5_CAP_GEN(dev->mdev, roce_accl)) - num_counters += ARRAY_SIZE(roce_accl_cnts); - - cnts->num_q_counters = num_counters; - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - cnts->num_cong_counters = ARRAY_SIZE(cong_cnts); - num_counters += ARRAY_SIZE(cong_cnts); - } - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - cnts->num_ext_ppcnt_counters = ARRAY_SIZE(ext_ppcnt_cnts); - num_counters += ARRAY_SIZE(ext_ppcnt_cnts); - } - cnts->names = kcalloc(num_counters, sizeof(cnts->names), GFP_KERNEL); - if (!cnts->names) - return -ENOMEM; - - cnts->offsets = kcalloc(num_counters, - sizeof(cnts->offsets), GFP_KERNEL); - if (!cnts->offsets) - goto err_names; - - return 0; - -err_names: - kfree(cnts->names); - cnts->names = NULL; - return -ENOMEM; -} - -static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev, - const char **names, - size_t *offsets) -{ - int i; - int j = 0; - - for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) { - names[j] = basic_q_cnts[i].name; - offsets[j] = basic_q_cnts[i].offset; - } - - if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) { - for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) { - names[j] = out_of_seq_q_cnts[i].name; - offsets[j] = out_of_seq_q_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) { - for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) { - names[j] = retrans_q_cnts[i].name; - offsets[j] = retrans_q_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, enhanced_error_q_counters)) { - for (i = 0; i < ARRAY_SIZE(extended_err_cnts); i++, j++) { - names[j] = extended_err_cnts[i].name; - offsets[j] = extended_err_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, roce_accl)) { - for (i = 0; i < ARRAY_SIZE(roce_accl_cnts); i++, j++) { - names[j] = roce_accl_cnts[i].name; - offsets[j] = roce_accl_cnts[i].offset; - } - } - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - for (i = 0; i < ARRAY_SIZE(cong_cnts); i++, j++) { - names[j] = cong_cnts[i].name; - offsets[j] = cong_cnts[i].offset; - } - } - - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - for (i = 0; i < ARRAY_SIZE(ext_ppcnt_cnts); i++, j++) { - names[j] = ext_ppcnt_cnts[i].name; - offsets[j] = ext_ppcnt_cnts[i].offset; - } - } -} - -static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev) -{ - u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; - u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; - int num_cnt_ports; - int err = 0; - int i; - bool is_shared; - - MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER); - is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0; - num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports; - - for (i = 0; i < num_cnt_ports; i++) { - err = __mlx5_ib_alloc_counters(dev, &dev->port[i].cnts); - if (err) - goto err_alloc; - - mlx5_ib_fill_counters(dev, dev->port[i].cnts.names, - dev->port[i].cnts.offsets); - - MLX5_SET(alloc_q_counter_in, in, uid, - is_shared ? MLX5_SHARED_RESOURCE_UID : 0); - - err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); - if (err) { - mlx5_ib_warn(dev, - "couldn't allocate queue counter for port %d, err %d\n", - i + 1, err); - goto err_alloc; - } - - dev->port[i].cnts.set_id = - MLX5_GET(alloc_q_counter_out, out, counter_set_id); - } - return 0; - -err_alloc: - mlx5_ib_dealloc_counters(dev); - return err; -} - -static const struct mlx5_ib_counters *get_counters(struct mlx5_ib_dev *dev, - u8 port_num) -{ - return is_mdev_switchdev_mode(dev->mdev) ? &dev->port[0].cnts : - &dev->port[port_num].cnts; -} - -/** - * mlx5_ib_get_counters_id - Returns counters id to use for device+port - * @dev: Pointer to mlx5 IB device - * @port_num: Zero based port number - * - * mlx5_ib_get_counters_id() Returns counters set id to use for given - * device port combination in switchdev and non switchdev mode of the - * parent device. - */ -u16 mlx5_ib_get_counters_id(struct mlx5_ib_dev *dev, u8 port_num) -{ - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num); - - return cnts->set_id; -} - -static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev, - u8 port_num) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct mlx5_ib_counters *cnts; - bool is_switchdev = is_mdev_switchdev_mode(dev->mdev); - - if ((is_switchdev && port_num) || (!is_switchdev && !port_num)) - return NULL; - - cnts = get_counters(dev, port_num - 1); - - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev, - const struct mlx5_ib_counters *cnts, - struct rdma_hw_stats *stats, - u16 set_id) -{ - u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {}; - u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {}; - __be32 val; - int ret, i; - - MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER); - MLX5_SET(query_q_counter_in, in, counter_set_id, set_id); - ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out); - if (ret) - return ret; - - for (i = 0; i < cnts->num_q_counters; i++) { - val = *(__be32 *)((void *)out + cnts->offsets[i]); - stats->value[i] = (u64)be32_to_cpu(val); - } - - return 0; -} - -static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev, - const struct mlx5_ib_counters *cnts, - struct rdma_hw_stats *stats) -{ - int offset = cnts->num_q_counters + cnts->num_cong_counters; - int sz = MLX5_ST_SZ_BYTES(ppcnt_reg); - int ret, i; - void *out; - - out = kvzalloc(sz, GFP_KERNEL); - if (!out) - return -ENOMEM; - - ret = mlx5_cmd_query_ext_ppcnt_counters(dev->mdev, out); - if (ret) - goto free; - - for (i = 0; i < cnts->num_ext_ppcnt_counters; i++) - stats->value[i + offset] = - be64_to_cpup((__be64 *)(out + - cnts->offsets[i + offset])); -free: - kvfree(out); - return ret; -} - -static int mlx5_ib_get_hw_stats(struct ib_device *ibdev, - struct rdma_hw_stats *stats, - u8 port_num, int index) -{ - struct mlx5_ib_dev *dev = to_mdev(ibdev); - const struct mlx5_ib_counters *cnts = get_counters(dev, port_num - 1); - struct mlx5_core_dev *mdev; - int ret, num_counters; - u8 mdev_port_num; - - if (!stats) - return -EINVAL; - - num_counters = cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters; - - /* q_counters are per IB device, query the master mdev */ - ret = mlx5_ib_query_q_counters(dev->mdev, cnts, stats, cnts->set_id); - if (ret) - return ret; - - if (MLX5_CAP_PCAM_FEATURE(dev->mdev, rx_icrc_encapsulated_counter)) { - ret = mlx5_ib_query_ext_ppcnt_counters(dev, cnts, stats); - if (ret) - return ret; - } - - if (MLX5_CAP_GEN(dev->mdev, cc_query_allowed)) { - mdev = mlx5_ib_get_native_port_mdev(dev, port_num, - &mdev_port_num); - if (!mdev) { - /* If port is not affiliated yet, its in down state - * which doesn't have any counters yet, so it would be - * zero. So no need to read from the HCA. - */ - goto done; - } - ret = mlx5_lag_query_cong_counters(dev->mdev, - stats->value + - cnts->num_q_counters, - cnts->num_cong_counters, - cnts->offsets + - cnts->num_q_counters); - - mlx5_ib_put_native_port_mdev(dev, port_num); - if (ret) - return ret; - } - -done: - return num_counters; -} - -static struct rdma_hw_stats * -mlx5_ib_counter_alloc_stats(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - const struct mlx5_ib_counters *cnts = - get_counters(dev, counter->port - 1); - - return rdma_alloc_hw_stats_struct(cnts->names, - cnts->num_q_counters + - cnts->num_cong_counters + - cnts->num_ext_ppcnt_counters, - RDMA_HW_STATS_DEFAULT_LIFESPAN); -} - -static int mlx5_ib_counter_update_stats(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - const struct mlx5_ib_counters *cnts = - get_counters(dev, counter->port - 1); - - return mlx5_ib_query_q_counters(dev->mdev, cnts, - counter->stats, counter->id); -} - -static int mlx5_ib_counter_dealloc(struct rdma_counter *counter) -{ - struct mlx5_ib_dev *dev = to_mdev(counter->device); - u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {}; - - if (!counter->id) - return 0; - - MLX5_SET(dealloc_q_counter_in, in, opcode, - MLX5_CMD_OP_DEALLOC_Q_COUNTER); - MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id); - return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in); -} - -static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter, - struct ib_qp *qp) -{ - struct mlx5_ib_dev *dev = to_mdev(qp->device); - int err; - - if (!counter->id) { - u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {}; - u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {}; - - MLX5_SET(alloc_q_counter_in, in, opcode, - MLX5_CMD_OP_ALLOC_Q_COUNTER); - MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID); - err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out); - if (err) - return err; - counter->id = - MLX5_GET(alloc_q_counter_out, out, counter_set_id); - } - - err = mlx5_ib_qp_set_counter(qp, counter); - if (err) - goto fail_set_counter; - - return 0; - -fail_set_counter: - mlx5_ib_counter_dealloc(counter); - counter->id = 0; - - return err; -} - -static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp) -{ - return mlx5_ib_qp_set_counter(qp, NULL); -} - static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, enum rdma_netdev_t type, struct rdma_netdev_alloc_params *params) @@ -5802,23 +3369,6 @@ static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num, return mlx5_rdma_rn_get_params(to_mdev(device)->mdev, device, params); } -static void delay_drop_debugfs_cleanup(struct mlx5_ib_dev *dev) -{ - if (!dev->delay_drop.dir_debugfs) - return; - debugfs_remove_recursive(dev->delay_drop.dir_debugfs); - dev->delay_drop.dir_debugfs = NULL; -} - -static void cancel_delay_drop(struct mlx5_ib_dev *dev) -{ - if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) - return; - - cancel_work_sync(&dev->delay_drop.delay_drop_work); - delay_drop_debugfs_cleanup(dev); -} - static ssize_t delay_drop_timeout_read(struct file *filp, char __user *buf, size_t count, loff_t *pos) { @@ -5858,40 +3408,6 @@ static const struct file_operations fops_delay_drop_timeout = { .read = delay_drop_timeout_read, }; -static void delay_drop_debugfs_init(struct mlx5_ib_dev *dev) -{ - struct dentry *root; - - if (!mlx5_debugfs_root) - return; - - root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); - dev->delay_drop.dir_debugfs = root; - - debugfs_create_atomic_t("num_timeout_events", 0400, root, - &dev->delay_drop.events_cnt); - debugfs_create_atomic_t("num_rqs", 0400, root, - &dev->delay_drop.rqs_cnt); - debugfs_create_file("timeout", 0600, root, &dev->delay_drop, - &fops_delay_drop_timeout); -} - -static void init_delay_drop(struct mlx5_ib_dev *dev) -{ - if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) - return; - - mutex_init(&dev->delay_drop.lock); - dev->delay_drop.dev = dev; - dev->delay_drop.activate = false; - dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; - INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); - atomic_set(&dev->delay_drop.rqs_cnt, 0); - atomic_set(&dev->delay_drop.events_cnt, 0); - - delay_drop_debugfs_init(dev); -} - static void mlx5_ib_unbind_slave_port(struct mlx5_ib_dev *ibdev, struct mlx5_ib_multiport_info *mpi) { @@ -6385,90 +3901,32 @@ ADD_UVERBS_ATTRIBUTES_SIMPLE( UVERBS_ATTR_FLAGS_IN(MLX5_IB_ATTR_CREATE_FLOW_ACTION_FLAGS, enum mlx5_ib_uapi_flow_action_flags)); +ADD_UVERBS_ATTRIBUTES_SIMPLE( + mlx5_ib_query_context, + UVERBS_OBJECT_DEVICE, + UVERBS_METHOD_QUERY_CONTEXT, + UVERBS_ATTR_PTR_OUT( + MLX5_IB_ATTR_QUERY_CONTEXT_RESP_UCTX, + UVERBS_ATTR_STRUCT(struct mlx5_ib_alloc_ucontext_resp, + dump_fill_mkey), + UA_MANDATORY)); + static const struct uapi_definition mlx5_ib_defs[] = { UAPI_DEF_CHAIN(mlx5_ib_devx_defs), UAPI_DEF_CHAIN(mlx5_ib_flow_defs), UAPI_DEF_CHAIN(mlx5_ib_qos_defs), + UAPI_DEF_CHAIN(mlx5_ib_std_types_defs), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_FLOW_ACTION, &mlx5_ib_flow_action), UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DM, &mlx5_ib_dm), + UAPI_DEF_CHAIN_OBJ_TREE(UVERBS_OBJECT_DEVICE, &mlx5_ib_query_context), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_VAR, UAPI_DEF_IS_OBJ_SUPPORTED(var_is_supported)), UAPI_DEF_CHAIN_OBJ_TREE_NAMED(MLX5_IB_OBJECT_UAR), {} }; -static int mlx5_ib_read_counters(struct ib_counters *counters, - struct ib_counters_read_attr *read_attr, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - struct mlx5_read_counters_attr mread_attr = {}; - struct mlx5_ib_flow_counters_desc *desc; - int ret, i; - - mutex_lock(&mcounters->mcntrs_mutex); - if (mcounters->cntrs_max_index > read_attr->ncounters) { - ret = -EINVAL; - goto err_bound; - } - - mread_attr.out = kcalloc(mcounters->counters_num, sizeof(u64), - GFP_KERNEL); - if (!mread_attr.out) { - ret = -ENOMEM; - goto err_bound; - } - - mread_attr.hw_cntrs_hndl = mcounters->hw_cntrs_hndl; - mread_attr.flags = read_attr->flags; - ret = mcounters->read_counters(counters->device, &mread_attr); - if (ret) - goto err_read; - - /* do the pass over the counters data array to assign according to the - * descriptions and indexing pairs - */ - desc = mcounters->counters_data; - for (i = 0; i < mcounters->ncounters; i++) - read_attr->counters_buff[desc[i].index] += mread_attr.out[desc[i].description]; - -err_read: - kfree(mread_attr.out); -err_bound: - mutex_unlock(&mcounters->mcntrs_mutex); - return ret; -} - -static int mlx5_ib_destroy_counters(struct ib_counters *counters) -{ - struct mlx5_ib_mcounters *mcounters = to_mcounters(counters); - - counters_clear_description(counters); - if (mcounters->hw_cntrs_hndl) - mlx5_fc_destroy(to_mdev(counters->device)->mdev, - mcounters->hw_cntrs_hndl); - - kfree(mcounters); - - return 0; -} - -static struct ib_counters *mlx5_ib_create_counters(struct ib_device *device, - struct uverbs_attr_bundle *attrs) -{ - struct mlx5_ib_mcounters *mcounters; - - mcounters = kzalloc(sizeof(*mcounters), GFP_KERNEL); - if (!mcounters) - return ERR_PTR(-ENOMEM); - - mutex_init(&mcounters->mcntrs_mutex); - - return &mcounters->ibcntrs; -} - static void mlx5_ib_stage_init_cleanup(struct mlx5_ib_dev *dev) { mlx5_ib_cleanup_multiport_master(dev); @@ -6547,21 +4005,16 @@ err_mp: return -ENOMEM; } -static int mlx5_ib_stage_flow_db_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_enable_driver(struct ib_device *dev) { - dev->flow_db = kzalloc(sizeof(*dev->flow_db), GFP_KERNEL); - - if (!dev->flow_db) - return -ENOMEM; - - mutex_init(&dev->flow_db->lock); + struct mlx5_ib_dev *mdev = to_mdev(dev); + int ret; - return 0; -} + ret = mlx5_ib_test_wc(mdev); + mlx5_ib_dbg(mdev, "Write-Combining %s", + mdev->wc_support ? "supported" : "not supported"); -static void mlx5_ib_stage_flow_db_cleanup(struct mlx5_ib_dev *dev) -{ - kfree(dev->flow_db); + return ret; } static const struct ib_device_ops mlx5_ib_dev_ops = { @@ -6577,9 +4030,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .attach_mcast = mlx5_ib_mcg_attach, .check_mr_status = mlx5_ib_check_mr_status, .create_ah = mlx5_ib_create_ah, - .create_counters = mlx5_ib_create_counters, .create_cq = mlx5_ib_create_cq, - .create_flow = mlx5_ib_create_flow, .create_qp = mlx5_ib_create_qp, .create_srq = mlx5_ib_create_srq, .dealloc_pd = mlx5_ib_dealloc_pd, @@ -6587,10 +4038,7 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .del_gid = mlx5_ib_del_gid, .dereg_mr = mlx5_ib_dereg_mr, .destroy_ah = mlx5_ib_destroy_ah, - .destroy_counters = mlx5_ib_destroy_counters, .destroy_cq = mlx5_ib_destroy_cq, - .destroy_flow = mlx5_ib_destroy_flow, - .destroy_flow_action = mlx5_ib_destroy_flow_action, .destroy_qp = mlx5_ib_destroy_qp, .destroy_srq = mlx5_ib_destroy_srq, .detach_mcast = mlx5_ib_mcg_detach, @@ -6598,8 +4046,6 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .drain_rq = mlx5_ib_drain_rq, .drain_sq = mlx5_ib_drain_sq, .enable_driver = mlx5_ib_enable_driver, - .fill_res_entry = mlx5_ib_fill_res_entry, - .fill_stat_entry = mlx5_ib_fill_stat_entry, .get_dev_fw_str = get_dev_fw_str, .get_dma_mr = mlx5_ib_get_dma_mr, .get_link_layer = mlx5_ib_port_link_layer, @@ -6623,24 +4069,20 @@ static const struct ib_device_ops mlx5_ib_dev_ops = { .query_pkey = mlx5_ib_query_pkey, .query_qp = mlx5_ib_query_qp, .query_srq = mlx5_ib_query_srq, - .read_counters = mlx5_ib_read_counters, + .query_ucontext = mlx5_ib_query_ucontext, .reg_user_mr = mlx5_ib_reg_user_mr, .req_notify_cq = mlx5_ib_arm_cq, .rereg_user_mr = mlx5_ib_rereg_user_mr, .resize_cq = mlx5_ib_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, mlx5_ib_ah, ibah), + INIT_RDMA_OBJ_SIZE(ib_counters, mlx5_ib_mcounters, ibcntrs), INIT_RDMA_OBJ_SIZE(ib_cq, mlx5_ib_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, mlx5_ib_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, mlx5_ib_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, mlx5_ib_ucontext, ibucontext), }; -static const struct ib_device_ops mlx5_ib_dev_flow_ipsec_ops = { - .create_flow_action_esp = mlx5_ib_create_flow_action_esp, - .modify_flow_action_esp = mlx5_ib_modify_flow_action_esp, -}; - static const struct ib_device_ops mlx5_ib_dev_ipoib_enhanced_ops = { .rdma_netdev_get_params = mlx5_ib_rn_get_params, }; @@ -6661,6 +4103,8 @@ static const struct ib_device_ops mlx5_ib_dev_mw_ops = { static const struct ib_device_ops mlx5_ib_dev_xrc_ops = { .alloc_xrcd = mlx5_ib_alloc_xrcd, .dealloc_xrcd = mlx5_ib_dealloc_xrcd, + + INIT_RDMA_OBJ_SIZE(ib_xrcd, mlx5_ib_xrcd, ibxrcd), }; static const struct ib_device_ops mlx5_ib_dev_dm_ops = { @@ -6769,9 +4213,6 @@ static int mlx5_ib_stage_caps_init(struct mlx5_ib_dev *dev) MLX5_GENERAL_OBJ_TYPES_CAP_SW_ICM) ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_dm_ops); - if (mlx5_accel_ipsec_device_caps(dev->mdev) & - MLX5_ACCEL_IPSEC_CAP_DEVICE) - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_flow_ipsec_ops); ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_ops); if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) @@ -6829,65 +4270,36 @@ static const struct ib_device_ops mlx5_ib_dev_common_roce_ops = { .modify_wq = mlx5_ib_modify_wq, }; -static int mlx5_ib_stage_common_roce_init(struct mlx5_ib_dev *dev) -{ - u8 port_num; - - dev->ib_dev.uverbs_ex_cmd_mask |= - (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | - (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); - - port_num = mlx5_core_native_port_num(dev->mdev) - 1; - - /* Register only for native ports */ - return mlx5_add_netdev_notifier(dev, port_num); -} - -static void mlx5_ib_stage_common_roce_cleanup(struct mlx5_ib_dev *dev) -{ - u8 port_num = mlx5_core_native_port_num(dev->mdev) - 1; - - mlx5_remove_netdev_notifier(dev, port_num); -} - -static int mlx5_ib_stage_raw_eth_roce_init(struct mlx5_ib_dev *dev) -{ - struct mlx5_core_dev *mdev = dev->mdev; - enum rdma_link_layer ll; - int port_type_cap; - int err = 0; - - port_type_cap = MLX5_CAP_GEN(mdev, port_type); - ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - - if (ll == IB_LINK_LAYER_ETHERNET) - err = mlx5_ib_stage_common_roce_init(dev); - - return err; -} - -static void mlx5_ib_stage_raw_eth_roce_cleanup(struct mlx5_ib_dev *dev) -{ - mlx5_ib_stage_common_roce_cleanup(dev); -} - -static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) +static int mlx5_ib_roce_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; + u8 port_num = 0; int err; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - err = mlx5_ib_stage_common_roce_init(dev); - if (err) + dev->ib_dev.uverbs_ex_cmd_mask |= + (1ull << IB_USER_VERBS_EX_CMD_CREATE_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_WQ) | + (1ull << IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL) | + (1ull << IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL); + ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_common_roce_ops); + + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + + /* Register only for native ports */ + err = mlx5_add_netdev_notifier(dev, port_num); + if (err || dev->is_rep || !mlx5_is_roce_enabled(mdev)) + /* + * We don't enable ETH interface for + * 1. IB representors + * 2. User disabled ROCE through devlink interface + */ return err; err = mlx5_enable_eth(dev); @@ -6897,71 +4309,27 @@ static int mlx5_ib_stage_roce_init(struct mlx5_ib_dev *dev) return 0; cleanup: - mlx5_ib_stage_common_roce_cleanup(dev); - + mlx5_remove_netdev_notifier(dev, port_num); return err; } -static void mlx5_ib_stage_roce_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_ib_roce_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; enum rdma_link_layer ll; int port_type_cap; + u8 port_num; port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); if (ll == IB_LINK_LAYER_ETHERNET) { - mlx5_disable_eth(dev); - mlx5_ib_stage_common_roce_cleanup(dev); - } -} - -static int mlx5_ib_stage_dev_res_init(struct mlx5_ib_dev *dev) -{ - return create_dev_resources(&dev->devr); -} + if (!dev->is_rep) + mlx5_disable_eth(dev); -static void mlx5_ib_stage_dev_res_cleanup(struct mlx5_ib_dev *dev) -{ - destroy_dev_resources(&dev->devr); -} - -static int mlx5_ib_stage_odp_init(struct mlx5_ib_dev *dev) -{ - return mlx5_ib_odp_init_one(dev); -} - -static void mlx5_ib_stage_odp_cleanup(struct mlx5_ib_dev *dev) -{ - mlx5_ib_odp_cleanup_one(dev); -} - -static const struct ib_device_ops mlx5_ib_dev_hw_stats_ops = { - .alloc_hw_stats = mlx5_ib_alloc_hw_stats, - .get_hw_stats = mlx5_ib_get_hw_stats, - .counter_bind_qp = mlx5_ib_counter_bind_qp, - .counter_unbind_qp = mlx5_ib_counter_unbind_qp, - .counter_dealloc = mlx5_ib_counter_dealloc, - .counter_alloc_stats = mlx5_ib_counter_alloc_stats, - .counter_update_stats = mlx5_ib_counter_update_stats, -}; - -static int mlx5_ib_stage_counters_init(struct mlx5_ib_dev *dev) -{ - if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) { - ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_hw_stats_ops); - - return mlx5_ib_alloc_counters(dev); + port_num = mlx5_core_native_port_num(dev->mdev) - 1; + mlx5_remove_netdev_notifier(dev, port_num); } - - return 0; -} - -static void mlx5_ib_stage_counters_cleanup(struct mlx5_ib_dev *dev) -{ - if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) - mlx5_ib_dealloc_counters(dev); } static int mlx5_ib_stage_cong_debugfs_init(struct mlx5_ib_dev *dev) @@ -7023,7 +4391,18 @@ static int mlx5_ib_stage_ib_reg_init(struct mlx5_ib_dev *dev) static void mlx5_ib_stage_pre_ib_reg_umr_cleanup(struct mlx5_ib_dev *dev) { - destroy_umrc_res(dev); + int err; + + err = mlx5_mr_cache_cleanup(dev); + if (err) + mlx5_ib_warn(dev, "mr cache cleanup failed\n"); + + if (dev->umrc.qp) + mlx5_ib_destroy_qp(dev->umrc.qp, NULL); + if (dev->umrc.cq) + ib_free_cq(dev->umrc.cq); + if (dev->umrc.pd) + ib_dealloc_pd(dev->umrc.pd); } static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) @@ -7031,21 +4410,162 @@ static void mlx5_ib_stage_ib_reg_cleanup(struct mlx5_ib_dev *dev) ib_unregister_device(&dev->ib_dev); } +enum { + MAX_UMR_WR = 128, +}; + static int mlx5_ib_stage_post_ib_reg_umr_init(struct mlx5_ib_dev *dev) { - return create_umr_res(dev); + struct ib_qp_init_attr *init_attr = NULL; + struct ib_qp_attr *attr = NULL; + struct ib_pd *pd; + struct ib_cq *cq; + struct ib_qp *qp; + int ret; + + attr = kzalloc(sizeof(*attr), GFP_KERNEL); + init_attr = kzalloc(sizeof(*init_attr), GFP_KERNEL); + if (!attr || !init_attr) { + ret = -ENOMEM; + goto error_0; + } + + pd = ib_alloc_pd(&dev->ib_dev, 0); + if (IS_ERR(pd)) { + mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); + ret = PTR_ERR(pd); + goto error_0; + } + + cq = ib_alloc_cq(&dev->ib_dev, NULL, 128, 0, IB_POLL_SOFTIRQ); + if (IS_ERR(cq)) { + mlx5_ib_dbg(dev, "Couldn't create CQ for sync UMR QP\n"); + ret = PTR_ERR(cq); + goto error_2; + } + + init_attr->send_cq = cq; + init_attr->recv_cq = cq; + init_attr->sq_sig_type = IB_SIGNAL_ALL_WR; + init_attr->cap.max_send_wr = MAX_UMR_WR; + init_attr->cap.max_send_sge = 1; + init_attr->qp_type = MLX5_IB_QPT_REG_UMR; + init_attr->port_num = 1; + qp = mlx5_ib_create_qp(pd, init_attr, NULL); + if (IS_ERR(qp)) { + mlx5_ib_dbg(dev, "Couldn't create sync UMR QP\n"); + ret = PTR_ERR(qp); + goto error_3; + } + qp->device = &dev->ib_dev; + qp->real_qp = qp; + qp->uobject = NULL; + qp->qp_type = MLX5_IB_QPT_REG_UMR; + qp->send_cq = init_attr->send_cq; + qp->recv_cq = init_attr->recv_cq; + + attr->qp_state = IB_QPS_INIT; + attr->port_num = 1; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE | IB_QP_PKEY_INDEX | + IB_QP_PORT, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify UMR QP\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTR; + attr->path_mtu = IB_MTU_256; + + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rtr\n"); + goto error_4; + } + + memset(attr, 0, sizeof(*attr)); + attr->qp_state = IB_QPS_RTS; + ret = mlx5_ib_modify_qp(qp, attr, IB_QP_STATE, NULL); + if (ret) { + mlx5_ib_dbg(dev, "Couldn't modify umr QP to rts\n"); + goto error_4; + } + + dev->umrc.qp = qp; + dev->umrc.cq = cq; + dev->umrc.pd = pd; + + sema_init(&dev->umrc.sem, MAX_UMR_WR); + ret = mlx5_mr_cache_init(dev); + if (ret) { + mlx5_ib_warn(dev, "mr cache init failed %d\n", ret); + goto error_4; + } + + kfree(attr); + kfree(init_attr); + + return 0; + +error_4: + mlx5_ib_destroy_qp(qp, NULL); + dev->umrc.qp = NULL; + +error_3: + ib_free_cq(cq); + dev->umrc.cq = NULL; + +error_2: + ib_dealloc_pd(pd); + dev->umrc.pd = NULL; + +error_0: + kfree(attr); + kfree(init_attr); + return ret; } static int mlx5_ib_stage_delay_drop_init(struct mlx5_ib_dev *dev) { - init_delay_drop(dev); + struct dentry *root; + + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return 0; + + mutex_init(&dev->delay_drop.lock); + dev->delay_drop.dev = dev; + dev->delay_drop.activate = false; + dev->delay_drop.timeout = MLX5_MAX_DELAY_DROP_TIMEOUT_MS * 1000; + INIT_WORK(&dev->delay_drop.delay_drop_work, delay_drop_handler); + atomic_set(&dev->delay_drop.rqs_cnt, 0); + atomic_set(&dev->delay_drop.events_cnt, 0); + + if (!mlx5_debugfs_root) + return 0; + + root = debugfs_create_dir("delay_drop", dev->mdev->priv.dbg_root); + dev->delay_drop.dir_debugfs = root; + debugfs_create_atomic_t("num_timeout_events", 0400, root, + &dev->delay_drop.events_cnt); + debugfs_create_atomic_t("num_rqs", 0400, root, + &dev->delay_drop.rqs_cnt); + debugfs_create_file("timeout", 0600, root, &dev->delay_drop, + &fops_delay_drop_timeout); return 0; } static void mlx5_ib_stage_delay_drop_cleanup(struct mlx5_ib_dev *dev) { - cancel_delay_drop(dev); + if (!(dev->ib_dev.attrs.raw_packet_caps & IB_RAW_PACKET_CAP_DELAY_DROP)) + return; + + cancel_work_sync(&dev->delay_drop.delay_drop_work); + if (!dev->delay_drop.dir_debugfs) + return; + + debugfs_remove_recursive(dev->delay_drop.dir_debugfs); + dev->delay_drop.dir_debugfs = NULL; } static int mlx5_ib_stage_dev_notifier_init(struct mlx5_ib_dev *dev) @@ -7060,38 +4580,6 @@ static void mlx5_ib_stage_dev_notifier_cleanup(struct mlx5_ib_dev *dev) mlx5_notifier_unregister(dev->mdev, &dev->mdev_events); } -static int mlx5_ib_stage_devx_init(struct mlx5_ib_dev *dev) -{ - int uid; - - uid = mlx5_ib_devx_create(dev, false); - if (uid > 0) { - dev->devx_whitelist_uid = uid; - mlx5_ib_devx_init_event_table(dev); - } - - return 0; -} -static void mlx5_ib_stage_devx_cleanup(struct mlx5_ib_dev *dev) -{ - if (dev->devx_whitelist_uid) { - mlx5_ib_devx_cleanup_event_table(dev); - mlx5_ib_devx_destroy(dev, dev->devx_whitelist_uid); - } -} - -int mlx5_ib_enable_driver(struct ib_device *dev) -{ - struct mlx5_ib_dev *mdev = to_mdev(dev); - int ret; - - ret = mlx5_ib_test_wc(mdev); - mlx5_ib_dbg(mdev, "Write-Combining %s", - mdev->wc_support ? "supported" : "not supported"); - - return ret; -} - void __mlx5_ib_remove(struct mlx5_ib_dev *dev, const struct mlx5_ib_profile *profile, int stage) @@ -7139,9 +4627,9 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, - mlx5_ib_stage_flow_db_init, - mlx5_ib_stage_flow_db_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_FS, + mlx5_ib_fs_init, + mlx5_ib_fs_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, mlx5_ib_stage_caps_cleanup), @@ -7149,8 +4637,8 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_ib_stage_non_default_cb, NULL), STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_roce_init, - mlx5_ib_stage_roce_cleanup), + mlx5_ib_roce_init, + mlx5_ib_roce_cleanup), STAGE_CREATE(MLX5_IB_STAGE_QP, mlx5_init_qp_table, mlx5_cleanup_qp_table), @@ -7158,17 +4646,17 @@ static const struct mlx5_ib_profile pf_profile = { mlx5_init_srq_table, mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, - mlx5_ib_stage_dev_res_init, - mlx5_ib_stage_dev_res_cleanup), + mlx5_ib_dev_res_init, + mlx5_ib_dev_res_cleanup), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, mlx5_ib_stage_dev_notifier_init, mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_ODP, - mlx5_ib_stage_odp_init, - mlx5_ib_stage_odp_cleanup), + mlx5_ib_odp_init_one, + mlx5_ib_odp_cleanup_one), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, - mlx5_ib_stage_counters_init, - mlx5_ib_stage_counters_cleanup), + mlx5_ib_counters_init, + mlx5_ib_counters_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, mlx5_ib_stage_cong_debugfs_init, mlx5_ib_stage_cong_debugfs_cleanup), @@ -7182,8 +4670,8 @@ static const struct mlx5_ib_profile pf_profile = { NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, - mlx5_ib_stage_devx_init, - mlx5_ib_stage_devx_cleanup), + mlx5_ib_devx_init, + mlx5_ib_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), @@ -7193,15 +4681,18 @@ static const struct mlx5_ib_profile pf_profile = { STAGE_CREATE(MLX5_IB_STAGE_DELAY_DROP, mlx5_ib_stage_delay_drop_init, mlx5_ib_stage_delay_drop_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, + mlx5_ib_restrack_init, + NULL), }; const struct mlx5_ib_profile raw_eth_profile = { STAGE_CREATE(MLX5_IB_STAGE_INIT, mlx5_ib_stage_init_init, mlx5_ib_stage_init_cleanup), - STAGE_CREATE(MLX5_IB_STAGE_FLOW_DB, - mlx5_ib_stage_flow_db_init, - mlx5_ib_stage_flow_db_cleanup), + STAGE_CREATE(MLX5_IB_STAGE_FS, + mlx5_ib_fs_init, + mlx5_ib_fs_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CAPS, mlx5_ib_stage_caps_init, mlx5_ib_stage_caps_cleanup), @@ -7209,8 +4700,8 @@ const struct mlx5_ib_profile raw_eth_profile = { mlx5_ib_stage_raw_eth_non_default_cb, NULL), STAGE_CREATE(MLX5_IB_STAGE_ROCE, - mlx5_ib_stage_raw_eth_roce_init, - mlx5_ib_stage_raw_eth_roce_cleanup), + mlx5_ib_roce_init, + mlx5_ib_roce_cleanup), STAGE_CREATE(MLX5_IB_STAGE_QP, mlx5_init_qp_table, mlx5_cleanup_qp_table), @@ -7218,14 +4709,14 @@ const struct mlx5_ib_profile raw_eth_profile = { mlx5_init_srq_table, mlx5_cleanup_srq_table), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_RESOURCES, - mlx5_ib_stage_dev_res_init, - mlx5_ib_stage_dev_res_cleanup), + mlx5_ib_dev_res_init, + mlx5_ib_dev_res_cleanup), STAGE_CREATE(MLX5_IB_STAGE_DEVICE_NOTIFIER, mlx5_ib_stage_dev_notifier_init, mlx5_ib_stage_dev_notifier_cleanup), STAGE_CREATE(MLX5_IB_STAGE_COUNTERS, - mlx5_ib_stage_counters_init, - mlx5_ib_stage_counters_cleanup), + mlx5_ib_counters_init, + mlx5_ib_counters_cleanup), STAGE_CREATE(MLX5_IB_STAGE_CONG_DEBUGFS, mlx5_ib_stage_cong_debugfs_init, mlx5_ib_stage_cong_debugfs_cleanup), @@ -7239,14 +4730,17 @@ const struct mlx5_ib_profile raw_eth_profile = { NULL, mlx5_ib_stage_pre_ib_reg_umr_cleanup), STAGE_CREATE(MLX5_IB_STAGE_WHITELIST_UID, - mlx5_ib_stage_devx_init, - mlx5_ib_stage_devx_cleanup), + mlx5_ib_devx_init, + mlx5_ib_devx_cleanup), STAGE_CREATE(MLX5_IB_STAGE_IB_REG, mlx5_ib_stage_ib_reg_init, mlx5_ib_stage_ib_reg_cleanup), STAGE_CREATE(MLX5_IB_STAGE_POST_IB_REG_UMR, mlx5_ib_stage_post_ib_reg_umr_init, NULL), + STAGE_CREATE(MLX5_IB_STAGE_RESTRACK, + mlx5_ib_restrack_init, + NULL), }; static void *mlx5_ib_add_slave_port(struct mlx5_core_dev *mdev) |