From 8ecc7985b4b15f1f14bce31d8ab45dc426df7da3 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Thu, 27 Oct 2016 16:36:30 +0300 Subject: IB/core: Save QP in ib_flow structure When we create flow steering rule, we need to save the related QP in the ib_flow struct. this QP is used in destroy flow. Move the QP assignment from ib_uverbs_ex_create_flow into ib_create_flow, this would allow both kernel and userspace consumers to use it. This bug wasn't seen in the wild because there are no kernel consumers currently in the kernel. Fixes: 319a441d1361 ("IB/core: Add receive flow steering support") Signed-off-by: Mark Bloch Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 1 - drivers/infiniband/core/verbs.c | 4 +++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index cb3f515a2285..2690c9263ee4 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3745,7 +3745,6 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, err = PTR_ERR(flow_id); goto err_free; } - flow_id->qp = qp; flow_id->uobject = uobj; uobj->object = flow_id; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 83687646da68..b7f9f3d95975 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1734,8 +1734,10 @@ struct ib_flow *ib_create_flow(struct ib_qp *qp, return ERR_PTR(-ENOSYS); flow_id = qp->device->create_flow(qp, flow_attr, domain); - if (!IS_ERR(flow_id)) + if (!IS_ERR(flow_id)) { atomic_inc(&qp->usecnt); + flow_id->qp = qp; + } return flow_id; } EXPORT_SYMBOL(ib_create_flow); -- cgit v1.2.3 From 2d2215888d758c194efb38332aaf5e2069ac578e Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 27 Oct 2016 16:36:36 +0300 Subject: IB/mlx5: Replace numerical constant with predefined MACRO Replace the pre-defined macro signifying inline umr instead of the numerical constant. Signed-off-by: Max Gurtovoy Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 41f4c2afbcdd..00cffbfe6c35 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3068,7 +3068,7 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) { memset(umr, 0, sizeof(*umr)); umr->mkey_mask = cpu_to_be64(MLX5_MKEY_MASK_FREE); - umr->flags = 1 << 7; + umr->flags = MLX5_UMR_INLINE; } static __be64 get_umr_reg_mr_mask(void) -- cgit v1.2.3 From 578e72647ba5a77a09004606cb572f0881c34e0d Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 27 Oct 2016 16:36:37 +0300 Subject: IB/mlx5: Fix atomic cap in indirect UMR Remove from the driver the limitation imposed by firmware check to not allow change of atomic permissions for indirect UMRs. In order to avoid failures on old firmware, we only ask for change of atomic permissions if atomic operations are supported. Fixes: 968e78dd9644 ('IB/mlx5: Enhance UMR support to allow partial page table update') Signed-off-by: Eli Cohen Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 00cffbfe6c35..5bdf20c676fe 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -3071,7 +3071,7 @@ static void set_linv_umr_seg(struct mlx5_wqe_umr_ctrl_seg *umr) umr->flags = MLX5_UMR_INLINE; } -static __be64 get_umr_reg_mr_mask(void) +static __be64 get_umr_reg_mr_mask(int atomic) { u64 result; @@ -3084,9 +3084,11 @@ static __be64 get_umr_reg_mr_mask(void) MLX5_MKEY_MASK_KEY | MLX5_MKEY_MASK_RR | MLX5_MKEY_MASK_RW | - MLX5_MKEY_MASK_A | MLX5_MKEY_MASK_FREE; + if (atomic) + result |= MLX5_MKEY_MASK_A; + return cpu_to_be64(result); } @@ -3147,7 +3149,7 @@ static __be64 get_umr_update_pd_mask(void) } static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, - struct ib_send_wr *wr) + struct ib_send_wr *wr, int atomic) { struct mlx5_umr_wr *umrwr = umr_wr(wr); @@ -3172,7 +3174,7 @@ static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr, if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_PD) umr->mkey_mask |= get_umr_update_pd_mask(); if (!umr->mkey_mask) - umr->mkey_mask = get_umr_reg_mr_mask(); + umr->mkey_mask = get_umr_reg_mr_mask(atomic); } else { umr->mkey_mask = get_umr_unreg_mr_mask(); } @@ -4025,7 +4027,7 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, } qp->sq.wr_data[idx] = MLX5_IB_WR_UMR; ctrl->imm = cpu_to_be32(umr_wr(wr)->mkey); - set_reg_umr_segment(seg, wr); + set_reg_umr_segment(seg, wr, !!(MLX5_CAP_GEN(mdev, atomic))); seg += sizeof(struct mlx5_wqe_umr_ctrl_seg); size += sizeof(struct mlx5_wqe_umr_ctrl_seg) / 16; if (unlikely((seg == qend))) -- cgit v1.2.3 From 86695a6582e3b1c4895de2bde4e1022b3a8fbda0 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 27 Oct 2016 16:36:38 +0300 Subject: IB/mlx5: Put non zero value in max_ah We put INT_MAX since this is the max value that can be held. Though there is no hardware limitation, this is practically a large enough number so we can use it. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8e0dbd51944e..16ac41d89dfc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -643,6 +643,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_total_mcast_qp_attach = props->max_mcast_qp_attach * props->max_mcast_grp; props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */ + props->max_ah = INT_MAX; props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz); props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL; -- cgit v1.2.3 From acbda523884dcf45613bf6818d8ead5180df35c2 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 27 Oct 2016 16:36:43 +0300 Subject: IB/mlx5: Wait for all async command completions to complete Wait before continuing unload till all pending mkey async creation requests are done. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index d4ad672b905b..881166a9d868 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -645,6 +645,33 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) return 0; } +static void wait_for_async_commands(struct mlx5_ib_dev *dev) +{ + struct mlx5_mr_cache *cache = &dev->cache; + struct mlx5_cache_ent *ent; + int total = 0; + int i; + int j; + + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + for (j = 0 ; j < 1000; j++) { + if (!ent->pending) + break; + msleep(50); + } + } + for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { + ent = &cache->ent[i]; + total += ent->pending; + } + + if (total) + mlx5_ib_warn(dev, "aborted while there are %d pending mr requests\n", total); + else + mlx5_ib_warn(dev, "done with all pending requests\n"); +} + int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) { int i; @@ -658,6 +685,7 @@ int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) clean_keys(dev, i); destroy_workqueue(dev->cache.wq); + wait_for_async_commands(dev); del_timer_sync(&dev->delay_timer); return 0; -- cgit v1.2.3 From 288c01b746aab484651391ca6d64b585d3eb5ec6 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 27 Oct 2016 16:36:45 +0300 Subject: IB/mlx5: Fix reported max SGE calculation Add the 512 bytes limit of RDMA READ and the size of remote address to the max SGE calculation. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 8 +++++--- drivers/infiniband/hw/mlx5/qp.c | 29 ++++++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 16ac41d89dfc..46facd30e672 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -496,6 +496,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, struct mlx5_ib_dev *dev = to_mdev(ibdev); struct mlx5_core_dev *mdev = dev->mdev; int err = -ENOMEM; + int max_sq_desc; int max_rq_sg; int max_sq_sg; u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz); @@ -618,9 +619,10 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, props->max_qp_wr = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz); max_rq_sg = MLX5_CAP_GEN(mdev, max_wqe_sz_rq) / sizeof(struct mlx5_wqe_data_seg); - max_sq_sg = (MLX5_CAP_GEN(mdev, max_wqe_sz_sq) - - sizeof(struct mlx5_wqe_ctrl_seg)) / - sizeof(struct mlx5_wqe_data_seg); + max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512); + max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); props->max_sge = min(max_rq_sg, max_sq_sg); props->max_sge_rd = MLX5_MAX_SGE_RD; props->max_cq = 1 << MLX5_CAP_GEN(mdev, log_max_cq); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 5bdf20c676fe..c84b07f9f111 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -352,6 +352,29 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr) return ALIGN(max_t(int, inl_size, size), MLX5_SEND_WQE_BB); } +static int get_send_sge(struct ib_qp_init_attr *attr, int wqe_size) +{ + int max_sge; + + if (attr->qp_type == IB_QPT_RC) + max_sge = (min_t(int, wqe_size, 512) - + sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); + else if (attr->qp_type == IB_QPT_XRC_INI) + max_sge = (min_t(int, wqe_size, 512) - + sizeof(struct mlx5_wqe_ctrl_seg) - + sizeof(struct mlx5_wqe_xrc_seg) - + sizeof(struct mlx5_wqe_raddr_seg)) / + sizeof(struct mlx5_wqe_data_seg); + else + max_sge = (wqe_size - sq_overhead(attr)) / + sizeof(struct mlx5_wqe_data_seg); + + return min_t(int, max_sge, wqe_size - sq_overhead(attr) / + sizeof(struct mlx5_wqe_data_seg)); +} + static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, struct mlx5_ib_qp *qp) { @@ -388,7 +411,11 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, return -ENOMEM; } qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB); - qp->sq.max_gs = attr->cap.max_send_sge; + qp->sq.max_gs = get_send_sge(attr, wqe_size); + if (qp->sq.max_gs < attr->cap.max_send_sge) + return -ENOMEM; + + attr->cap.max_send_sge = qp->sq.max_gs; qp->sq.max_post = wq_size / wqe_size; attr->cap.max_send_wr = qp->sq.max_post; -- cgit v1.2.3 From 762f899ae7875554284af92b821be8c083227092 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 27 Oct 2016 16:36:47 +0300 Subject: IB/mlx5: Limit mkey page size to 2GB The maximum page size in the mkey context is 2GB. Until today, we didn't enforce this requirement in the code, and therefore, if we got a page size larger than 2GB, we have passed zeros in the log_page_shift instead of the actual value and the registration failed. This patch limits the driver to use compound pages of 2GB for mkeys. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Maor Gottlieb Signed-off-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 4 ++-- drivers/infiniband/hw/mlx5/mem.c | 7 ++++++- drivers/infiniband/hw/mlx5/mlx5_ib.h | 6 +++++- drivers/infiniband/hw/mlx5/mr.c | 3 ++- drivers/infiniband/hw/mlx5/qp.c | 4 ++-- drivers/infiniband/hw/mlx5/srq.c | 2 +- 6 files changed, 18 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 79d017baf6f4..9e0598b5615f 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -770,7 +770,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, if (err) goto err_umem; - mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, &npages, &page_shift, + mlx5_ib_cont_pages(cq->buf.umem, ucmd.buf_addr, 0, &npages, &page_shift, &ncont, NULL); mlx5_ib_dbg(dev, "addr 0x%llx, size %u, npages %d, page_shift %d, ncont %d\n", ucmd.buf_addr, entries * ucmd.cqe_size, npages, page_shift, ncont); @@ -1125,7 +1125,7 @@ static int resize_user(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *cq, return err; } - mlx5_ib_cont_pages(umem, ucmd.buf_addr, &npages, page_shift, + mlx5_ib_cont_pages(umem, ucmd.buf_addr, 0, &npages, page_shift, npas, NULL); cq->resize_umem = umem; diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 996b54e366b0..6851357c16f4 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -37,12 +37,15 @@ /* @umem: umem object to scan * @addr: ib virtual address requested by the user + * @max_page_shift: high limit for page_shift - 0 means no limit * @count: number of PAGE_SIZE pages covered by umem * @shift: page shift for the compound pages found in the region * @ncont: number of compund pages * @order: log2 of the number of compound pages */ -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, +void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, + unsigned long max_page_shift, + int *count, int *shift, int *ncont, int *order) { unsigned long tmp; @@ -72,6 +75,8 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, addr = addr >> page_shift; tmp = (unsigned long)addr; m = find_first_bit(&tmp, BITS_PER_LONG); + if (max_page_shift) + m = min_t(unsigned long, max_page_shift - page_shift, m); skip = 1 << m; mask = skip - 1; i = 0; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d5d007740159..95937e7d2584 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -63,6 +63,8 @@ pr_warn("%s:%s:%d:(pid %d): " format, (dev)->ib_dev.name, __func__, \ #define MLX5_IB_DEFAULT_UIDX 0xffffff #define MLX5_USER_ASSIGNED_UIDX_MASK __mlx5_mask(qpc, user_index) +#define MLX5_MKEY_PAGE_SHIFT_MASK __mlx5_mask(mkc, log_page_size) + enum { MLX5_IB_MMAP_CMD_SHIFT = 8, MLX5_IB_MMAP_CMD_MASK = 0xff, @@ -823,7 +825,9 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *props); int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev); void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev); -void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift, +void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, + unsigned long max_page_shift, + int *count, int *shift, int *ncont, int *order); void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, int page_shift, size_t offset, size_t num_pages, diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 881166a9d868..6cbda901f259 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -855,7 +855,8 @@ static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, return (void *)umem; } - mlx5_ib_cont_pages(umem, start, npages, page_shift, ncont, order); + mlx5_ib_cont_pages(umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, + page_shift, ncont, order); if (!*npages) { mlx5_ib_warn(dev, "avoid zero region\n"); ib_umem_release(umem); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index c84b07f9f111..aa27688f5ae9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -675,7 +675,7 @@ static int mlx5_ib_umem_get(struct mlx5_ib_dev *dev, return PTR_ERR(*umem); } - mlx5_ib_cont_pages(*umem, addr, npages, page_shift, ncont, NULL); + mlx5_ib_cont_pages(*umem, addr, 0, npages, page_shift, ncont, NULL); err = mlx5_ib_get_buf_offset(addr, *page_shift, offset); if (err) { @@ -728,7 +728,7 @@ static int create_user_rq(struct mlx5_ib_dev *dev, struct ib_pd *pd, return err; } - mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, &npages, &page_shift, + mlx5_ib_cont_pages(rwq->umem, ucmd->buf_addr, 0, &npages, &page_shift, &ncont, NULL); err = mlx5_ib_get_buf_offset(ucmd->buf_addr, page_shift, &rwq->rq_page_offset); diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index 3857dbd9c956..f384db5367fb 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -118,7 +118,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq, return err; } - mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, &npages, + mlx5_ib_cont_pages(srq->umem, ucmd.buf_addr, 0, &npages, &page_shift, &ncont, NULL); err = mlx5_ib_get_buf_offset(ucmd.buf_addr, page_shift, &offset); -- cgit v1.2.3 From 0b59970e7d96edcb3c7f651d9d48e1a59af3c3b0 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Thu, 10 Nov 2016 10:16:48 +0200 Subject: IB/IPoIB: Remove can't use GFP_NOIO warning Remove the warning print of "can't use of GFP_NOIO" to avoid prints in each QP creation when devices aren't supporting IB_QP_CREATE_USE_GFP_NOIO. This print become more annoying when the IPoIB interface is configured to work in connected mode. Fixes: 09b93088d750 ('IB: Add a QP creation flag to use GFP_NOIO allocations') Signed-off-by: Kamal Heib Signed-off-by: Leon Romanovsky Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index 4ad297d3de89..50c9772bde32 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -1053,8 +1053,6 @@ static struct ib_qp *ipoib_cm_create_tx_qp(struct net_device *dev, struct ipoib_ tx_qp = ib_create_qp(priv->pd, &attr); if (PTR_ERR(tx_qp) == -EINVAL) { - ipoib_warn(priv, "can't use GFP_NOIO for QPs on device %s, using GFP_KERNEL\n", - priv->ca->name); attr.create_flags &= ~IB_QP_CREATE_USE_GFP_NOIO; tx_qp = ib_create_qp(priv->pd, &attr); } -- cgit v1.2.3 From af4295c117b82a521b05d0daf39ce879d26e6cb1 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 10 Nov 2016 11:30:53 +0200 Subject: IB/mlx4: Set traffic class in AH Set traffic class within sl_tclass_flowlabel when create iboe AH. Without this the TOS value will be empty when running VLAN tagged traffic, because the TOS value is taken from the traffic class in the address handle attributes. Fixes: 9106c4106974 ('IB/mlx4: Fix SL to 802.1Q priority-bits mapping for IBoE') Signed-off-by: Maor Gottlieb Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/ah.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 5fc623362731..6be7dc320ff7 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -111,7 +111,9 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr !(1 << ah->av.eth.stat_rate & dev->caps.stat_rate_support)) --ah->av.eth.stat_rate; } - + ah->av.eth.sl_tclass_flowlabel |= + cpu_to_be32((ah_attr->grh.traffic_class << 20) | + ah_attr->grh.flow_label); /* * HW requires multicast LID so we just choose one. */ @@ -119,7 +121,7 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr ah->av.ib.dlid = cpu_to_be16(0xc000); memcpy(ah->av.eth.dgid, ah_attr->grh.dgid.raw, 16); - ah->av.eth.sl_tclass_flowlabel = cpu_to_be32(ah_attr->sl << 29); + ah->av.eth.sl_tclass_flowlabel |= cpu_to_be32(ah_attr->sl << 29); return &ah->ibah; } -- cgit v1.2.3 From 850d8fd765079d223d47de89f1f03ab95d1036cb Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Thu, 10 Nov 2016 11:30:56 +0200 Subject: IB/mlx4: Handle IPv4 header when demultiplexing MAD When MAD arrives to the hypervisor, we need to identify which slave it should be sent by destination GID. When L3 protocol is IPv4 the GRH is replaced by an IPv4 header. This patch detects when IPv4 header needs to be parsed instead of GRH. Fixes: b6ffaeffaea4 ('mlx4: In RoCE allow guests to have multiple GIDS') Signed-off-by: Moni Shoua Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 16 +++++++++------- drivers/infiniband/hw/mlx4/mad.c | 33 ++++++++++++++++++++++++++++++--- include/rdma/ib_verbs.h | 19 +++++++++++++++++++ 3 files changed, 58 insertions(+), 10 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index b7f9f3d95975..98e10c5f2a96 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -328,7 +328,7 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) } EXPORT_SYMBOL(ib_create_ah); -static int ib_get_header_version(const union rdma_network_hdr *hdr) +int ib_get_rdma_header_version(const union rdma_network_hdr *hdr) { const struct iphdr *ip4h = (struct iphdr *)&hdr->roce4grh; struct iphdr ip4h_checked; @@ -359,6 +359,7 @@ static int ib_get_header_version(const union rdma_network_hdr *hdr) return 4; return 6; } +EXPORT_SYMBOL(ib_get_rdma_header_version); static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, u8 port_num, @@ -369,7 +370,7 @@ static enum rdma_network_type ib_get_net_type_by_grh(struct ib_device *device, if (rdma_protocol_ib(device, port_num)) return RDMA_NETWORK_IB; - grh_version = ib_get_header_version((union rdma_network_hdr *)grh); + grh_version = ib_get_rdma_header_version((union rdma_network_hdr *)grh); if (grh_version == 4) return RDMA_NETWORK_IPV4; @@ -415,9 +416,9 @@ static int get_sgid_index_from_eth(struct ib_device *device, u8 port_num, &context, gid_index); } -static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr, - enum rdma_network_type net_type, - union ib_gid *sgid, union ib_gid *dgid) +int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, + enum rdma_network_type net_type, + union ib_gid *sgid, union ib_gid *dgid) { struct sockaddr_in src_in; struct sockaddr_in dst_in; @@ -447,6 +448,7 @@ static int get_gids_from_rdma_hdr(union rdma_network_hdr *hdr, return -EINVAL; } } +EXPORT_SYMBOL(ib_get_gids_from_rdma_hdr); int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, const struct ib_wc *wc, const struct ib_grh *grh, @@ -469,8 +471,8 @@ int ib_init_ah_from_wc(struct ib_device *device, u8 port_num, net_type = ib_get_net_type_by_grh(device, port_num, grh); gid_type = ib_network_to_gid_type(net_type); } - ret = get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, - &sgid, &dgid); + ret = ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, + &sgid, &dgid); if (ret) return ret; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 1672907ff219..b8e90130db37 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -39,6 +39,8 @@ #include #include #include +#include +#include #include #include "mlx4_ib.h" @@ -480,6 +482,23 @@ static int find_slave_port_pkey_ix(struct mlx4_ib_dev *dev, int slave, return -EINVAL; } +static int get_gids_from_l3_hdr(struct ib_grh *grh, union ib_gid *sgid, + union ib_gid *dgid) +{ + int version = ib_get_rdma_header_version((const union rdma_network_hdr *)grh); + enum rdma_network_type net_type; + + if (version == 4) + net_type = RDMA_NETWORK_IPV4; + else if (version == 6) + net_type = RDMA_NETWORK_IPV6; + else + return -EINVAL; + + return ib_get_gids_from_rdma_hdr((union rdma_network_hdr *)grh, net_type, + sgid, dgid); +} + int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, enum ib_qp_type dest_qpt, struct ib_wc *wc, struct ib_grh *grh, struct ib_mad *mad) @@ -538,7 +557,10 @@ int mlx4_ib_send_to_slave(struct mlx4_ib_dev *dev, int slave, u8 port, memset(&attr, 0, sizeof attr); attr.port_num = port; if (is_eth) { - memcpy(&attr.grh.dgid.raw[0], &grh->dgid.raw[0], 16); + union ib_gid sgid; + + if (get_gids_from_l3_hdr(grh, &sgid, &attr.grh.dgid)) + return -EINVAL; attr.ah_flags = IB_AH_GRH; } ah = ib_create_ah(tun_ctx->pd, &attr); @@ -651,6 +673,11 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, is_eth = 1; if (is_eth) { + union ib_gid dgid; + union ib_gid sgid; + + if (get_gids_from_l3_hdr(grh, &sgid, &dgid)) + return -EINVAL; if (!(wc->wc_flags & IB_WC_GRH)) { mlx4_ib_warn(ibdev, "RoCE grh not present.\n"); return -EINVAL; @@ -659,10 +686,10 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, mlx4_ib_warn(ibdev, "RoCE mgmt class is not CM\n"); return -EINVAL; } - err = mlx4_get_slave_from_roce_gid(dev->dev, port, grh->dgid.raw, &slave); + err = mlx4_get_slave_from_roce_gid(dev->dev, port, dgid.raw, &slave); if (err && mlx4_is_mf_bonded(dev->dev)) { other_port = (port == 1) ? 2 : 1; - err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, grh->dgid.raw, &slave); + err = mlx4_get_slave_from_roce_gid(dev->dev, other_port, dgid.raw, &slave); if (!err) { port = other_port; pr_debug("resolved slave %d from gid %pI6 wire port %d other %d\n", diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 5ad43a487745..467a4b476e29 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2580,6 +2580,24 @@ void ib_dealloc_pd(struct ib_pd *pd); */ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +/** + * ib_get_gids_from_rdma_hdr - Get sgid and dgid from GRH or IPv4 header + * work completion. + * @hdr: the L3 header to parse + * @net_type: type of header to parse + * @sgid: place to store source gid + * @dgid: place to store destination gid + */ +int ib_get_gids_from_rdma_hdr(const union rdma_network_hdr *hdr, + enum rdma_network_type net_type, + union ib_gid *sgid, union ib_gid *dgid); + +/** + * ib_get_rdma_header_version - Get the header version + * @hdr: the L3 header to parse + */ +int ib_get_rdma_header_version(const union rdma_network_hdr *hdr); + /** * ib_init_ah_from_wc - Initializes address handle attributes from a * work completion. @@ -3357,4 +3375,5 @@ int ib_sg_to_pages(struct ib_mr *mr, struct scatterlist *sgl, int sg_nents, void ib_drain_rq(struct ib_qp *qp); void ib_drain_sq(struct ib_qp *qp); void ib_drain_qp(struct ib_qp *qp); + #endif /* IB_VERBS_H */ -- cgit v1.2.3 From befcabcd530e4ffb6f016638f693b7d94986d2ba Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Thu, 10 Nov 2016 11:30:57 +0200 Subject: IB/mlx4: Handle well-known-gid in mad_demux processing If OpenSM runs over a ConnectX-3, and there are ConnectX-4 or Connect-IB VFs active on the network, the OpenSM will receive QP1 packets containing a GRH where the destination GID is the "Well-Known GID" -- which is not a GID in the HCA Port's GID Table. This GID must be tested-for separately -- and packets which contain this destination GID should be routed to slave 0 (the PF). Fixes: 37bfc7c1e83f ('IB/mlx4: SR-IOV multiplex and demultiplex MADs') Signed-off-by: Jack Morgenstein Signed-off-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/mad.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index b8e90130db37..7bcae4115408 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -729,10 +729,18 @@ static int mlx4_ib_demux_mad(struct ib_device *ibdev, u8 port, /* If a grh is present, we demux according to it */ if (wc->wc_flags & IB_WC_GRH) { - slave = mlx4_ib_find_real_gid(ibdev, port, grh->dgid.global.interface_id); - if (slave < 0) { - mlx4_ib_warn(ibdev, "failed matching grh\n"); - return -ENOENT; + if (grh->dgid.global.interface_id == + cpu_to_be64(IB_SA_WELL_KNOWN_GUID) && + grh->dgid.global.subnet_prefix == cpu_to_be64( + atomic64_read(&dev->sriov.demux[port - 1].subnet_prefix))) { + slave = 0; + } else { + slave = mlx4_ib_find_real_gid(ibdev, port, + grh->dgid.global.interface_id); + if (slave < 0) { + mlx4_ib_warn(ibdev, "failed matching grh\n"); + return -ENOENT; + } } } /* Class-specific handling */ -- cgit v1.2.3 From 731e0415b4af3a133d0316e4dc8ef0ea57dc3fdf Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 10 Nov 2016 11:30:58 +0200 Subject: IB/mlx4: Put non zero value in max_ah device attribute Use INT_MAX since this is the max value the attribute can hold, though hardware capability is unlimited. Fixes: 225c7b1feef1 ('IB/mlx4: Add a driver Mellanox ConnectX InfiniBand adapters') Signed-off-by: Maor Gottlieb Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Reviewed-by: Yuval Shaia Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index b597e8227591..05ab3cb34cb3 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -547,6 +547,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev, props->max_map_per_fmr = dev->dev->caps.max_fmr_maps; props->hca_core_clock = dev->dev->caps.hca_core_clock * 1000UL; props->timestamp_mask = 0xFFFFFFFFFFFFULL; + props->max_ah = INT_MAX; if (!mlx4_is_slave(dev->dev)) err = mlx4_get_internal_clock_params(dev->dev, &clock_params); -- cgit v1.2.3 From 6fa26208206c406fa529cd73f7ae6bf4181e270b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 10 Nov 2016 11:30:59 +0200 Subject: IB/mlx4: Fix port query for 56Gb Ethernet links Report the correct speed in the port attributes when using a 56Gbps ethernet link. Without this change the field is incorrectly set to 10. Fixes: a9c766bb75ee ('IB/mlx4: Fix info returned when querying IBoE ports') Fixes: 2e96691c31ec ('IB: Use central enum for speed instead of hard-coded values') Signed-off-by: Saeed Mahameed Signed-off-by: Yishai Hadas Signed-off-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 05ab3cb34cb3..4054a1bfabb5 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -698,9 +698,11 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port, if (err) goto out; - props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) ? - IB_WIDTH_4X : IB_WIDTH_1X; - props->active_speed = IB_SPEED_QDR; + props->active_width = (((u8 *)mailbox->buf)[5] == 0x40) || + (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? + IB_WIDTH_4X : IB_WIDTH_1X; + props->active_speed = (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ? + IB_SPEED_FDR : IB_SPEED_QDR; props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS; props->gid_tbl_len = mdev->dev->caps.gid_table_len[port]; props->max_msg_sz = mdev->dev->caps.max_msg_sz; -- cgit v1.2.3 From 1f22e454df2eb99ba6b7ace3f594f6805cdf5cbc Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 10 Nov 2016 11:31:00 +0200 Subject: IB/mlx4: When no DMFS for IPoIB, don't allow NET_IF QPs According to the firmware spec, FLOW_STEERING_IB_UC_QP_RANGE command is supported only if dmfs_ipoib bit is set. If it isn't set we want to ensure allocating NET_IF QPs fail. We do so by filling out the allocation bitmap. By thus, the NET_IF QPs allocating function won't find any free QP and will fail. Fixes: c1c98501121e ('IB/mlx4: Add support for steerable IB UD QPs') Signed-off-by: Eran Ben Elisha Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/main.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 4054a1bfabb5..4a67ffc1ddfb 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -2823,14 +2823,19 @@ static void *mlx4_ib_add(struct mlx4_dev *dev) goto err_steer_qp_release; } - bitmap_zero(ibdev->ib_uc_qpns_bitmap, ibdev->steer_qpn_count); - - err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( - dev, ibdev->steer_qpn_base, - ibdev->steer_qpn_base + - ibdev->steer_qpn_count - 1); - if (err) - goto err_steer_free_bitmap; + if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_DMFS_IPOIB) { + bitmap_zero(ibdev->ib_uc_qpns_bitmap, + ibdev->steer_qpn_count); + err = mlx4_FLOW_STEERING_IB_UC_QP_RANGE( + dev, ibdev->steer_qpn_base, + ibdev->steer_qpn_base + + ibdev->steer_qpn_count - 1); + if (err) + goto err_steer_free_bitmap; + } else { + bitmap_fill(ibdev->ib_uc_qpns_bitmap, + ibdev->steer_qpn_count); + } } for (j = 1; j <= ibdev->dev->caps.num_ports; j++) -- cgit v1.2.3 From bf08e884bfd5be068fd2ccf2bc450f085d8dd853 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 10 Nov 2016 11:31:01 +0200 Subject: IB/mlx4: Check if GRH is available before using it Before reading GRH attributes, need to make sure AH contains GRH, and in addition, initialize GID type. Fixes: dbf727de7440 ('IB/core: Use GID table in AH creation and dmac resolution') Signed-off-by: Eran Ben Elisha Signed-off-by: Daniel Jurgens Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 570bc866b1d6..0914731c9aac 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1764,14 +1764,14 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, u8 port_num = mlx4_is_bonded(to_mdev(ibqp->device)->dev) ? 1 : attr_mask & IB_QP_PORT ? attr->port_num : qp->port; union ib_gid gid; - struct ib_gid_attr gid_attr; + struct ib_gid_attr gid_attr = {.gid_type = IB_GID_TYPE_IB}; u16 vlan = 0xffff; u8 smac[ETH_ALEN]; int status = 0; int is_eth = rdma_cap_eth_ah(&dev->ib_dev, port_num) && attr->ah_attr.ah_flags & IB_AH_GRH; - if (is_eth) { + if (is_eth && attr->ah_attr.ah_flags & IB_AH_GRH) { int index = attr->ah_attr.grh.sgid_index; status = ib_get_cached_gid(ibqp->device, port_num, -- cgit v1.2.3 From d680ebed91e0b45c43ae03a880a0b43211096161 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Wed, 16 Nov 2016 10:39:16 +0200 Subject: IB/rxe: Increase max number of completions to 32k Increase limit of max CQE from 8K to 32K to allow demanding applications to work over SoftRoCE with same configuration as most RoCEv2 HW vendors have. Fixes: 8700e3e7c485 ("Soft RoCE driver") Signed-off-by: Yonatan Cohen Reviewed-by: Moni Shoua Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/sw/rxe/rxe_param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/sw/rxe/rxe_param.h b/drivers/infiniband/sw/rxe/rxe_param.h index f459c43a77c8..13ed2cc6eaa2 100644 --- a/drivers/infiniband/sw/rxe/rxe_param.h +++ b/drivers/infiniband/sw/rxe/rxe_param.h @@ -82,7 +82,7 @@ enum rxe_device_param { RXE_MAX_SGE = 32, RXE_MAX_SGE_RD = 32, RXE_MAX_CQ = 16384, - RXE_MAX_LOG_CQE = 13, + RXE_MAX_LOG_CQE = 15, RXE_MAX_MR = 2 * 1024, RXE_MAX_PD = 0x7ffc, RXE_MAX_QP_RD_ATOM = 128, -- cgit v1.2.3 From 191ded4a4d991acf17207e0b4370fef070bce3e9 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 31 Oct 2016 12:15:21 +0200 Subject: IB/mlx5: Report mlx5 multi packet WQE caps during query The capabilities whether hardware support multi packet WQE or not is exposed to user space through query_device by uhw. Signed-off-by: Bodong Wang Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 11 +++++++++++ include/uapi/rdma/mlx5-abi.h | 2 ++ 2 files changed, 13 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 46facd30e672..a16207c33333 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -672,6 +672,17 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, 1 << MLX5_CAP_GEN(dev->mdev, log_max_rq); } + if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes, + uhw->outlen)) { + resp.mlx5_ib_support_multi_pkt_send_wqes = + MLX5_CAP_ETH(mdev, multi_pkt_send_wqe); + resp.response_length += + sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes); + } + + if (field_avail(typeof(resp), reserved, uhw->outlen)) + resp.response_length += sizeof(resp.reserved); + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index f5d0f4e83b59..93d6b9fe8e78 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -129,6 +129,8 @@ struct mlx5_ib_query_device_resp { __u32 response_length; struct mlx5_ib_tso_caps tso_caps; struct mlx5_ib_rss_caps rss_caps; + __u32 mlx5_ib_support_multi_pkt_send_wqes; + __u32 reserved; }; struct mlx5_ib_create_cq { -- cgit v1.2.3 From 7e43a2a5bae39fedaa7cce21d637e0c8d96d8e54 Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 31 Oct 2016 12:16:44 +0200 Subject: IB/mlx5: Report mlx5 CQE compression caps during query The capabilities include: - Max number of compressed and aggregated CQEs in a single session, while zero means unsupported. - For Responder, there are two formats of mini CQE: mini CQE with Rx hash and mini CQE with checksum. They're mutual exclusive. Signed-off-by: Bodong Wang Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 10 ++++++++++ include/uapi/rdma/mlx5-abi.h | 12 ++++++++++++ 2 files changed, 22 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a16207c33333..2687a93c4af2 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -683,6 +683,16 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, if (field_avail(typeof(resp), reserved, uhw->outlen)) resp.response_length += sizeof(resp.reserved); + if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) { + resp.cqe_comp_caps.max_num = + MLX5_CAP_GEN(dev->mdev, cqe_compression) ? + MLX5_CAP_GEN(dev->mdev, cqe_compression_max_num) : 0; + resp.cqe_comp_caps.supported_format = + MLX5_IB_CQE_RES_FORMAT_HASH | + MLX5_IB_CQE_RES_FORMAT_CSUM; + resp.response_length += sizeof(resp.cqe_comp_caps); + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 93d6b9fe8e78..6649d13a2dbb 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -124,11 +124,23 @@ struct mlx5_ib_rss_caps { __u8 reserved[7]; }; +enum mlx5_ib_cqe_comp_res_format { + MLX5_IB_CQE_RES_FORMAT_HASH = 1 << 0, + MLX5_IB_CQE_RES_FORMAT_CSUM = 1 << 1, + MLX5_IB_CQE_RES_RESERVED = 1 << 2, +}; + +struct mlx5_ib_cqe_comp_caps { + __u32 max_num; + __u32 supported_format; /* enum mlx5_ib_cqe_comp_res_format */ +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; struct mlx5_ib_tso_caps tso_caps; struct mlx5_ib_rss_caps rss_caps; + struct mlx5_ib_cqe_comp_caps cqe_comp_caps; __u32 mlx5_ib_support_multi_pkt_send_wqes; __u32 reserved; }; -- cgit v1.2.3 From 1cbe6fc86ccfe05a910be4883da7c7bd28c190fe Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Mon, 31 Oct 2016 12:16:45 +0200 Subject: IB/mlx5: Add support for CQE compressing CQE compressing reduces PCI overhead by coalescing and compressing multiple CQEs into a single merged CQE. Successful compressing improves message rate especially for small packet traffic. CQE compressing is supported for all 64B CQE formats (with certain limitations) generated by RQ/Responder or by SQ/Requestor. Signed-off-by: Bodong Wang Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/cq.c | 30 +++++++++++++++++++++++++++++- include/uapi/rdma/mlx5-abi.h | 4 +++- 2 files changed, 32 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c index 9e0598b5615f..d72a4367c891 100644 --- a/drivers/infiniband/hw/mlx5/cq.c +++ b/drivers/infiniband/hw/mlx5/cq.c @@ -731,7 +731,7 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, int entries, u32 **cqb, int *cqe_size, int *index, int *inlen) { - struct mlx5_ib_create_cq ucmd; + struct mlx5_ib_create_cq ucmd = {}; size_t ucmdlen; int page_shift; __be64 *pas; @@ -792,8 +792,36 @@ static int create_cq_user(struct mlx5_ib_dev *dev, struct ib_udata *udata, *index = to_mucontext(context)->uuari.uars[0].index; + if (ucmd.cqe_comp_en == 1) { + if (unlikely((*cqe_size != 64) || + !MLX5_CAP_GEN(dev->mdev, cqe_compression))) { + err = -EOPNOTSUPP; + mlx5_ib_warn(dev, "CQE compression is not supported for size %d!\n", + *cqe_size); + goto err_cqb; + } + + if (unlikely(!ucmd.cqe_comp_res_format || + !(ucmd.cqe_comp_res_format < + MLX5_IB_CQE_RES_RESERVED) || + (ucmd.cqe_comp_res_format & + (ucmd.cqe_comp_res_format - 1)))) { + err = -EOPNOTSUPP; + mlx5_ib_warn(dev, "CQE compression res format %d is not supported!\n", + ucmd.cqe_comp_res_format); + goto err_cqb; + } + + MLX5_SET(cqc, cqc, cqe_comp_en, 1); + MLX5_SET(cqc, cqc, mini_cqe_res_format, + ilog2(ucmd.cqe_comp_res_format)); + } + return 0; +err_cqb: + kfree(cqb); + err_db: mlx5_ib_db_unmap_user(to_mucontext(context), &cq->db); diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 6649d13a2dbb..b0a41c8dc1fb 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -149,7 +149,9 @@ struct mlx5_ib_create_cq { __u64 buf_addr; __u64 db_addr; __u32 cqe_size; - __u32 reserved; /* explicit padding (optional on i386) */ + __u8 cqe_comp_en; + __u8 cqe_comp_res_format; + __u16 reserved; /* explicit padding (optional on i386) */ }; struct mlx5_ib_create_cq_resp { -- cgit v1.2.3 From 0dbf3332b7b683db33a385a3ce9baab157e3ff9a Mon Sep 17 00:00:00 2001 From: Moses Reuben Date: Mon, 14 Nov 2016 19:04:47 +0200 Subject: IB/core: Add flow spec tunneling support In order to support tunneling, that can be used by the QP, both struct ib_flow_spec_tunnel and struct ib_flow_tunnel_filter can be used to more IP or UDP based tunneling protocols (e.g NVGRE, GRE, etc). IB_FLOW_SPEC_VXLAN_TUNNEL type flow specification is added to use this functionality and match specific Vxlan packets. In similar to IPv6, we check overflow of the vni value by comparing with the maximum size. Signed-off-by: Moses Reuben Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 15 +++++++++++++++ include/rdma/ib_verbs.h | 19 ++++++++++++++++++- 2 files changed, 33 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 2690c9263ee4..deb06fa3548b 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3175,6 +3175,21 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz); memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz); break; + case IB_FLOW_SPEC_VXLAN_TUNNEL: + ib_filter_sz = offsetof(struct ib_flow_tunnel_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) + return -EINVAL; + ib_spec->tunnel.size = sizeof(struct ib_flow_spec_tunnel); + memcpy(&ib_spec->tunnel.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->tunnel.mask, kern_spec_mask, actual_filter_sz); + + if ((ntohl(ib_spec->tunnel.mask.tunnel_id)) >= BIT(24) || + (ntohl(ib_spec->tunnel.val.tunnel_id)) >= BIT(24)) + return -EINVAL; + break; default: return -EINVAL; } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 467a4b476e29..b8213818de89 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1599,7 +1599,8 @@ enum ib_flow_spec_type { IB_FLOW_SPEC_IPV6 = 0x31, /* L4 headers*/ IB_FLOW_SPEC_TCP = 0x40, - IB_FLOW_SPEC_UDP = 0x41 + IB_FLOW_SPEC_UDP = 0x41, + IB_FLOW_SPEC_VXLAN_TUNNEL = 0x50, }; #define IB_FLOW_SPEC_LAYER_MASK 0xF0 #define IB_FLOW_SPEC_SUPPORT_LAYERS 4 @@ -1707,6 +1708,21 @@ struct ib_flow_spec_tcp_udp { struct ib_flow_tcp_udp_filter mask; }; +struct ib_flow_tunnel_filter { + __be32 tunnel_id; + u8 real_sz[0]; +}; + +/* ib_flow_spec_tunnel describes the Vxlan tunnel + * the tunnel_id from val has the vni value + */ +struct ib_flow_spec_tunnel { + enum ib_flow_spec_type type; + u16 size; + struct ib_flow_tunnel_filter val; + struct ib_flow_tunnel_filter mask; +}; + union ib_flow_spec { struct { enum ib_flow_spec_type type; @@ -1717,6 +1733,7 @@ union ib_flow_spec { struct ib_flow_spec_ipv4 ipv4; struct ib_flow_spec_tcp_udp tcp_udp; struct ib_flow_spec_ipv6 ipv6; + struct ib_flow_spec_tunnel tunnel; }; struct ib_flow_attr { -- cgit v1.2.3 From ffb30d8f107b27820a069ae6772ab48e58cc0b2f Mon Sep 17 00:00:00 2001 From: Moses Reuben Date: Mon, 14 Nov 2016 19:04:50 +0200 Subject: IB/mlx5: Support Vxlan tunneling specification Add support to receive specific Vxlan packet in ConnectX-4. Signed-off-by: Moses Reuben Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 2687a93c4af2..a9e5851365d3 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1539,6 +1539,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) #define LAST_IPV4_FIELD tos #define LAST_IPV6_FIELD traffic_class #define LAST_TCP_UDP_FIELD src_port +#define LAST_TUNNEL_FIELD tunnel_id /* Field is the last supported field */ #define FIELDS_NOT_SUPPORTED(filter, field)\ @@ -1722,6 +1723,16 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport, ntohs(ib_spec->tcp_udp.val.dst_port)); break; + case IB_FLOW_SPEC_VXLAN_TUNNEL: + if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask, + LAST_TUNNEL_FIELD)) + return -ENOTSUPP; + + MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni, + ntohl(ib_spec->tunnel.mask.tunnel_id)); + MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni, + ntohl(ib_spec->tunnel.val.tunnel_id)); + break; default: return -EINVAL; } -- cgit v1.2.3 From fbf46860b19ddb485f00bef1ad1a43aabc9f71ad Mon Sep 17 00:00:00 2001 From: Moses Reuben Date: Mon, 14 Nov 2016 19:04:51 +0200 Subject: IB/core: Introduce inner flow steering For a tunneled packet which contains external and internal headers, we refer to the external headers as "outer fields" and the internal headers as "inner fields". Example of a tunneled packet: { L2 | L3 | L4 | tunnel header | L2 | L3 | l4 | data } | | | | | | | { outer fields }{ inner fields } This patch introduces a new flag for flow steering rules - IB_FLOW_SPEC_INNER - which specifies that the rule applies to the inner fields, rather than to the outer fields of the packet. Signed-off-by: Moses Reuben Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 4 +++- include/rdma/ib_verbs.h | 17 +++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index deb06fa3548b..d84dcde7a8bb 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3124,8 +3124,10 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, kern_spec_val = (void *)kern_spec + sizeof(struct ib_uverbs_flow_spec_hdr); kern_spec_mask = kern_spec_val + kern_filter_sz; + if (ib_spec->type == (IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL)) + return -EINVAL; - switch (ib_spec->type) { + switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { case IB_FLOW_SPEC_ETH: ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz); actual_filter_sz = spec_filter_size(kern_spec_mask, diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4bc748fddcac..6d0dd6525e14 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1601,9 +1601,10 @@ enum ib_flow_spec_type { IB_FLOW_SPEC_TCP = 0x40, IB_FLOW_SPEC_UDP = 0x41, IB_FLOW_SPEC_VXLAN_TUNNEL = 0x50, + IB_FLOW_SPEC_INNER = 0x100, }; #define IB_FLOW_SPEC_LAYER_MASK 0xF0 -#define IB_FLOW_SPEC_SUPPORT_LAYERS 4 +#define IB_FLOW_SPEC_SUPPORT_LAYERS 8 /* Flow steering rule priority is set according to it's domain. * Lower domain value means higher priority. @@ -1631,7 +1632,7 @@ struct ib_flow_eth_filter { }; struct ib_flow_spec_eth { - enum ib_flow_spec_type type; + u32 type; u16 size; struct ib_flow_eth_filter val; struct ib_flow_eth_filter mask; @@ -1645,7 +1646,7 @@ struct ib_flow_ib_filter { }; struct ib_flow_spec_ib { - enum ib_flow_spec_type type; + u32 type; u16 size; struct ib_flow_ib_filter val; struct ib_flow_ib_filter mask; @@ -1670,7 +1671,7 @@ struct ib_flow_ipv4_filter { }; struct ib_flow_spec_ipv4 { - enum ib_flow_spec_type type; + u32 type; u16 size; struct ib_flow_ipv4_filter val; struct ib_flow_ipv4_filter mask; @@ -1688,7 +1689,7 @@ struct ib_flow_ipv6_filter { }; struct ib_flow_spec_ipv6 { - enum ib_flow_spec_type type; + u32 type; u16 size; struct ib_flow_ipv6_filter val; struct ib_flow_ipv6_filter mask; @@ -1702,7 +1703,7 @@ struct ib_flow_tcp_udp_filter { }; struct ib_flow_spec_tcp_udp { - enum ib_flow_spec_type type; + u32 type; u16 size; struct ib_flow_tcp_udp_filter val; struct ib_flow_tcp_udp_filter mask; @@ -1717,7 +1718,7 @@ struct ib_flow_tunnel_filter { * the tunnel_id from val has the vni value */ struct ib_flow_spec_tunnel { - enum ib_flow_spec_type type; + u32 type; u16 size; struct ib_flow_tunnel_filter val; struct ib_flow_tunnel_filter mask; @@ -1725,7 +1726,7 @@ struct ib_flow_spec_tunnel { union ib_flow_spec { struct { - enum ib_flow_spec_type type; + u32 type; u16 size; }; struct ib_flow_spec_eth eth; -- cgit v1.2.3 From 2d1e697e9b716b8a692bc9c197e5f4ffd10d7307 Mon Sep 17 00:00:00 2001 From: Moses Reuben Date: Mon, 14 Nov 2016 19:04:52 +0200 Subject: IB/mlx5: Add support to match inner packet fields Add support to match packet fields which are tunneled, i.e. support matching the header of the inner packet which is the result of or bit operation of the original header and the IB_FLOW_SPEC_INNER type. The combination of IB_FLOW_SPEC_INNER | IB_FLOW_SPEC_VXLAN_TUNNEL is not needed to be checked, because the IB core has this check already. Signed-off-by: Moses Reuben Reviewed-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 131 +++++++++++++++++++++++--------------- 1 file changed, 78 insertions(+), 53 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index a9e5851365d3..f7a299431e6c 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1526,6 +1526,22 @@ static void set_proto(void *outer_c, void *outer_v, u8 mask, u8 val) MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val); } +static void set_flow_label(void *misc_c, void *misc_v, u8 mask, u8 val, + bool inner) +{ + if (inner) { + MLX5_SET(fte_match_set_misc, + misc_c, inner_ipv6_flow_label, mask); + MLX5_SET(fte_match_set_misc, + misc_v, inner_ipv6_flow_label, val); + } else { + MLX5_SET(fte_match_set_misc, + misc_c, outer_ipv6_flow_label, mask); + MLX5_SET(fte_match_set_misc, + misc_v, outer_ipv6_flow_label, val); + } +} + static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) { MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask); @@ -1552,155 +1568,164 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val) static int parse_flow_attr(u32 *match_c, u32 *match_v, const union ib_flow_spec *ib_spec) { - void *outer_headers_c = MLX5_ADDR_OF(fte_match_param, match_c, - outer_headers); - void *outer_headers_v = MLX5_ADDR_OF(fte_match_param, match_v, - outer_headers); void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c, misc_parameters); void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v, misc_parameters); + void *headers_c; + void *headers_v; + + if (ib_spec->type & IB_FLOW_SPEC_INNER) { + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + inner_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + inner_headers); + } else { + headers_c = MLX5_ADDR_OF(fte_match_param, match_c, + outer_headers); + headers_v = MLX5_ADDR_OF(fte_match_param, match_v, + outer_headers); + } - switch (ib_spec->type) { + switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) { case IB_FLOW_SPEC_ETH: if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD)) return -ENOTSUPP; - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dmac_47_16), ib_spec->eth.mask.dst_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dmac_47_16), ib_spec->eth.val.dst_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, smac_47_16), ib_spec->eth.mask.src_mac); - ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, smac_47_16), ib_spec->eth.val.src_mac); if (ib_spec->eth.mask.vlan_tag) { - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, vlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, vlan_tag, 1); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid, ntohs(ib_spec->eth.mask.vlan_tag)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid, ntohs(ib_spec->eth.val.vlan_tag)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_cfi, ntohs(ib_spec->eth.mask.vlan_tag) >> 12); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_cfi, ntohs(ib_spec->eth.val.vlan_tag) >> 12); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio, ntohs(ib_spec->eth.mask.vlan_tag) >> 13); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio, ntohs(ib_spec->eth.val.vlan_tag) >> 13); } - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, ntohs(ib_spec->eth.mask.ether_type)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ntohs(ib_spec->eth.val.ether_type)); break; case IB_FLOW_SPEC_IPV4: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD)) return -ENOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IP); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.mask.src_ip, sizeof(ib_spec->ipv4.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.val.src_ip, sizeof(ib_spec->ipv4.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.mask.dst_ip, sizeof(ib_spec->ipv4.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv4_layout.ipv4), &ib_spec->ipv4.val.dst_ip, sizeof(ib_spec->ipv4.val.dst_ip)); - set_tos(outer_headers_c, outer_headers_v, + set_tos(headers_c, headers_v, ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos); - set_proto(outer_headers_c, outer_headers_v, + set_proto(headers_c, headers_v, ib_spec->ipv4.mask.proto, ib_spec->ipv4.val.proto); break; case IB_FLOW_SPEC_IPV6: if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD)) return -ENOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype, 0xffff); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype, ETH_P_IPV6); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, src_ipv4_src_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.mask.src_ip, sizeof(ib_spec->ipv6.mask.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, src_ipv4_src_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.val.src_ip, sizeof(ib_spec->ipv6.val.src_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_c, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.mask.dst_ip, sizeof(ib_spec->ipv6.mask.dst_ip)); - memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, outer_headers_v, + memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v, dst_ipv4_dst_ipv6.ipv6_layout.ipv6), &ib_spec->ipv6.val.dst_ip, sizeof(ib_spec->ipv6.val.dst_ip)); - set_tos(outer_headers_c, outer_headers_v, + set_tos(headers_c, headers_v, ib_spec->ipv6.mask.traffic_class, ib_spec->ipv6.val.traffic_class); - set_proto(outer_headers_c, outer_headers_v, + set_proto(headers_c, headers_v, ib_spec->ipv6.mask.next_hdr, ib_spec->ipv6.val.next_hdr); - MLX5_SET(fte_match_set_misc, misc_params_c, - outer_ipv6_flow_label, - ntohl(ib_spec->ipv6.mask.flow_label)); - MLX5_SET(fte_match_set_misc, misc_params_v, - outer_ipv6_flow_label, - ntohl(ib_spec->ipv6.val.flow_label)); + set_flow_label(misc_params_c, misc_params_v, + ntohl(ib_spec->ipv6.mask.flow_label), + ntohl(ib_spec->ipv6.val.flow_label), + ib_spec->type & IB_FLOW_SPEC_INNER); + break; case IB_FLOW_SPEC_TCP: if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask, LAST_TCP_UDP_FIELD)) return -ENOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 0xff); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_TCP); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_sport, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_sport, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport, ntohs(ib_spec->tcp_udp.val.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, tcp_dport, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport, ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, tcp_dport, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport, ntohs(ib_spec->tcp_udp.val.dst_port)); break; case IB_FLOW_SPEC_UDP: @@ -1708,19 +1733,19 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v, LAST_TCP_UDP_FIELD)) return -ENOTSUPP; - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, ip_protocol, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol, 0xff); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, ip_protocol, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol, IPPROTO_UDP); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_sport, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport, ntohs(ib_spec->tcp_udp.mask.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_sport, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport, ntohs(ib_spec->tcp_udp.val.src_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_c, udp_dport, + MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport, ntohs(ib_spec->tcp_udp.mask.dst_port)); - MLX5_SET(fte_match_set_lyr_2_4, outer_headers_v, udp_dport, + MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport, ntohs(ib_spec->tcp_udp.val.dst_port)); break; case IB_FLOW_SPEC_VXLAN_TUNNEL: -- cgit v1.2.3 From c90ea9d8e51196d9c528e57d9ab09ee7d41f0ba0 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 23 Nov 2016 08:23:22 +0200 Subject: IB/core: Change ib_resolve_eth_dmac to use it in create AH The function ib_resolve_eth_dmac() requires struct qp_attr * and qp_attr_mask as parameters while the function might be useful to resolve dmac for address handles. This patch changes the signature of the function so it can be used in the flow of creating an address handle. Signed-off-by: Moni Shoua Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/core_priv.h | 3 -- drivers/infiniband/core/uverbs_cmd.c | 8 ++-- drivers/infiniband/core/verbs.c | 84 ++++++++++++++++++------------------ include/rdma/ib_verbs.h | 2 + 4 files changed, 49 insertions(+), 48 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/core_priv.h b/drivers/infiniband/core/core_priv.h index 19d499dcab76..1acc95b3aaa3 100644 --- a/drivers/infiniband/core/core_priv.h +++ b/drivers/infiniband/core/core_priv.h @@ -72,9 +72,6 @@ void ib_device_unregister_sysfs(struct ib_device *device); void ib_cache_setup(void); void ib_cache_cleanup(void); -int ib_resolve_eth_dmac(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, int *qp_attr_mask); - typedef void (*roce_netdev_callback)(struct ib_device *device, u8 port, struct net_device *idev, void *cookie); diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index d84dcde7a8bb..77bb15f66414 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2402,9 +2402,11 @@ ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; if (qp->real_qp == qp) { - ret = ib_resolve_eth_dmac(qp, attr, &cmd.attr_mask); - if (ret) - goto release_qp; + if (cmd.attr_mask & IB_QP_AV) { + ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); + if (ret) + goto release_qp; + } ret = qp->device->modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); } else { diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 98e10c5f2a96..cacee169cd55 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1198,66 +1198,66 @@ int ib_modify_qp_is_ok(enum ib_qp_state cur_state, enum ib_qp_state next_state, } EXPORT_SYMBOL(ib_modify_qp_is_ok); -int ib_resolve_eth_dmac(struct ib_qp *qp, - struct ib_qp_attr *qp_attr, int *qp_attr_mask) +int ib_resolve_eth_dmac(struct ib_device *device, + struct ib_ah_attr *ah_attr) { int ret = 0; - if (*qp_attr_mask & IB_QP_AV) { - if (qp_attr->ah_attr.port_num < rdma_start_port(qp->device) || - qp_attr->ah_attr.port_num > rdma_end_port(qp->device)) - return -EINVAL; - - if (!rdma_cap_eth_ah(qp->device, qp_attr->ah_attr.port_num)) - return 0; - - if (rdma_link_local_addr((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw)) { - rdma_get_ll_mac((struct in6_addr *)qp_attr->ah_attr.grh.dgid.raw, - qp_attr->ah_attr.dmac); - } else { - union ib_gid sgid; - struct ib_gid_attr sgid_attr; - int ifindex; - int hop_limit; - - ret = ib_query_gid(qp->device, - qp_attr->ah_attr.port_num, - qp_attr->ah_attr.grh.sgid_index, - &sgid, &sgid_attr); - - if (ret || !sgid_attr.ndev) { - if (!ret) - ret = -ENXIO; - goto out; - } + if (ah_attr->port_num < rdma_start_port(device) || + ah_attr->port_num > rdma_end_port(device)) + return -EINVAL; - ifindex = sgid_attr.ndev->ifindex; + if (!rdma_cap_eth_ah(device, ah_attr->port_num)) + return 0; - ret = rdma_addr_find_l2_eth_by_grh(&sgid, - &qp_attr->ah_attr.grh.dgid, - qp_attr->ah_attr.dmac, - NULL, &ifindex, &hop_limit); + if (rdma_link_local_addr((struct in6_addr *)ah_attr->grh.dgid.raw)) { + rdma_get_ll_mac((struct in6_addr *)ah_attr->grh.dgid.raw, + ah_attr->dmac); + } else { + union ib_gid sgid; + struct ib_gid_attr sgid_attr; + int ifindex; + int hop_limit; + + ret = ib_query_gid(device, + ah_attr->port_num, + ah_attr->grh.sgid_index, + &sgid, &sgid_attr); + + if (ret || !sgid_attr.ndev) { + if (!ret) + ret = -ENXIO; + goto out; + } - dev_put(sgid_attr.ndev); + ifindex = sgid_attr.ndev->ifindex; - qp_attr->ah_attr.grh.hop_limit = hop_limit; - } + ret = rdma_addr_find_l2_eth_by_grh(&sgid, + &ah_attr->grh.dgid, + ah_attr->dmac, + NULL, &ifindex, &hop_limit); + + dev_put(sgid_attr.ndev); + + ah_attr->grh.hop_limit = hop_limit; } out: return ret; } EXPORT_SYMBOL(ib_resolve_eth_dmac); - int ib_modify_qp(struct ib_qp *qp, struct ib_qp_attr *qp_attr, int qp_attr_mask) { - int ret; - ret = ib_resolve_eth_dmac(qp, qp_attr, &qp_attr_mask); - if (ret) - return ret; + if (qp_attr_mask & IB_QP_AV) { + int ret; + + ret = ib_resolve_eth_dmac(qp->device, &qp_attr->ah_attr); + if (ret) + return ret; + } return qp->device->modify_qp(qp->real_qp, qp_attr, qp_attr_mask, NULL); } diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 6d0dd6525e14..0c6f973e407c 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -3394,4 +3394,6 @@ void ib_drain_rq(struct ib_qp *qp); void ib_drain_sq(struct ib_qp *qp); void ib_drain_qp(struct ib_qp *qp); +int ib_resolve_eth_dmac(struct ib_device *device, + struct ib_ah_attr *ah_attr); #endif /* IB_VERBS_H */ -- cgit v1.2.3 From 6ad279c5a2e55bf2bd100b4222090d4717de88d5 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 23 Nov 2016 08:23:23 +0200 Subject: IB/mlx5: Report that device has udata response in create_ah To make mlx5 user driver aware of whether kernel driver returns dmac in user data response add a new flag that will be returned back to user-space through alloc_ucontext. Signed-off-by: Moni Shoua Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 3 ++- include/uapi/rdma/mlx5-abi.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f7a299431e6c..f2ef2a170d9f 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -1117,7 +1117,8 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev, resp.response_length += sizeof(resp.cqe_version); if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) { - resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE; + resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE | + MLX5_USER_CMDS_SUPP_UHW_CREATE_AH; resp.response_length += sizeof(resp.cmds_supp_uhw); } diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index b0a41c8dc1fb..ac28729a1245 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -82,6 +82,7 @@ enum mlx5_ib_alloc_ucontext_resp_mask { enum mlx5_user_cmds_supp_uhw { MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE = 1 << 0, + MLX5_USER_CMDS_SUPP_UHW_CREATE_AH = 1 << 1, }; struct mlx5_ib_alloc_ucontext_resp { -- cgit v1.2.3 From 477864c8fcd953e5a988073ca5be18bb7fd93410 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 23 Nov 2016 08:23:24 +0200 Subject: IB/core: Let create_ah return extended response to user Add struct ib_udata to the signature of create_ah callback that is implemented by IB device drivers. This allows HW drivers to return extra data to the userspace library. This patch prepares the ground for mlx5 driver to resolve destination mac address for a given GID and return it to userspace. This patch was previously submitted by Knut Omang as a part of the patch set to support Oracle's Infiniband HCA (SIF). Signed-off-by: Knut Omang Signed-off-by: Moni Shoua Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 11 ++++++++++- drivers/infiniband/core/verbs.c | 2 +- drivers/infiniband/hw/cxgb3/iwch_provider.c | 3 ++- drivers/infiniband/hw/cxgb4/provider.c | 4 +++- drivers/infiniband/hw/hns/hns_roce_ah.c | 3 ++- drivers/infiniband/hw/hns/hns_roce_device.h | 3 ++- drivers/infiniband/hw/i40iw/i40iw_verbs.c | 4 +++- drivers/infiniband/hw/mlx4/ah.c | 4 +++- drivers/infiniband/hw/mlx4/mlx4_ib.h | 3 ++- drivers/infiniband/hw/mlx5/ah.c | 4 +++- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 ++- drivers/infiniband/hw/mthca/mthca_provider.c | 4 +++- drivers/infiniband/hw/nes/nes_verbs.c | 3 ++- drivers/infiniband/hw/ocrdma/ocrdma_ah.c | 3 ++- drivers/infiniband/hw/ocrdma/ocrdma_ah.h | 4 +++- drivers/infiniband/hw/qedr/verbs.c | 3 ++- drivers/infiniband/hw/qedr/verbs.h | 3 ++- drivers/infiniband/hw/usnic/usnic_ib_verbs.c | 4 +++- drivers/infiniband/hw/usnic/usnic_ib_verbs.h | 4 +++- drivers/infiniband/sw/rxe/rxe_verbs.c | 4 +++- include/rdma/ib_verbs.h | 3 ++- 21 files changed, 58 insertions(+), 21 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 77bb15f66414..cdb935ddab69 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2877,6 +2877,7 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, struct ib_ah *ah; struct ib_ah_attr attr; int ret; + struct ib_udata udata; if (out_len < sizeof resp) return -ENOSPC; @@ -2884,6 +2885,10 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, if (copy_from_user(&cmd, buf, sizeof cmd)) return -EFAULT; + INIT_UDATA(&udata, buf + sizeof(cmd), + (unsigned long)cmd.response + sizeof(resp), + in_len - sizeof(cmd), out_len - sizeof(resp)); + uobj = kmalloc(sizeof *uobj, GFP_KERNEL); if (!uobj) return -ENOMEM; @@ -2910,12 +2915,16 @@ ssize_t ib_uverbs_create_ah(struct ib_uverbs_file *file, memset(&attr.dmac, 0, sizeof(attr.dmac)); memcpy(attr.grh.dgid.raw, cmd.attr.grh.dgid, 16); - ah = ib_create_ah(pd, &attr); + ah = pd->device->create_ah(pd, &attr, &udata); + if (IS_ERR(ah)) { ret = PTR_ERR(ah); goto err_put; } + ah->device = pd->device; + ah->pd = pd; + atomic_inc(&pd->usecnt); ah->uobject = uobj; uobj->object = ah; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index cacee169cd55..c976f29f7ad2 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -315,7 +315,7 @@ struct ib_ah *ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) { struct ib_ah *ah; - ah = pd->device->create_ah(pd, ah_attr); + ah = pd->device->create_ah(pd, ah_attr, NULL); if (!IS_ERR(ah)) { ah->device = pd->device; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index cba57bb53dba..9d5fe1853da4 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -62,7 +62,8 @@ #include "common.h" static struct ib_ah *iwch_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) { return ERR_PTR(-ENOSYS); } diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index 645e606a17c5..49b51b7e0fd7 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -59,7 +59,9 @@ module_param(fastreg_support, int, 0644); MODULE_PARM_DESC(fastreg_support, "Advertise fastreg support (default=1)"); static struct ib_ah *c4iw_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { return ERR_PTR(-ENOSYS); } diff --git a/drivers/infiniband/hw/hns/hns_roce_ah.c b/drivers/infiniband/hw/hns/hns_roce_ah.c index 24f79ee39fdf..0ac294db3b29 100644 --- a/drivers/infiniband/hw/hns/hns_roce_ah.c +++ b/drivers/infiniband/hw/hns/hns_roce_ah.c @@ -39,7 +39,8 @@ #define HNS_ROCE_VLAN_SL_BIT_MASK 7 #define HNS_ROCE_VLAN_SL_SHIFT 13 -struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr) +struct ib_ah *hns_roce_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) { struct hns_roce_dev *hr_dev = to_hr_dev(ibpd->device); struct device *dev = &hr_dev->pdev->dev; diff --git a/drivers/infiniband/hw/hns/hns_roce_device.h b/drivers/infiniband/hw/hns/hns_roce_device.h index 341731553a60..470615f7b517 100644 --- a/drivers/infiniband/hw/hns/hns_roce_device.h +++ b/drivers/infiniband/hw/hns/hns_roce_device.h @@ -667,7 +667,8 @@ int hns_roce_bitmap_alloc_range(struct hns_roce_bitmap *bitmap, int cnt, void hns_roce_bitmap_free_range(struct hns_roce_bitmap *bitmap, unsigned long obj, int cnt); -struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *hns_roce_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int hns_roce_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int hns_roce_destroy_ah(struct ib_ah *ah); diff --git a/drivers/infiniband/hw/i40iw/i40iw_verbs.c b/drivers/infiniband/hw/i40iw/i40iw_verbs.c index 6329c971c22f..f03fc1527d70 100644 --- a/drivers/infiniband/hw/i40iw/i40iw_verbs.c +++ b/drivers/infiniband/hw/i40iw/i40iw_verbs.c @@ -2562,7 +2562,9 @@ static int i40iw_query_pkey(struct ib_device *ibdev, * @ah_attr: address handle attributes */ static struct ib_ah *i40iw_create_ah(struct ib_pd *ibpd, - struct ib_ah_attr *attr) + struct ib_ah_attr *attr, + struct ib_udata *udata) + { return ERR_PTR(-ENOSYS); } diff --git a/drivers/infiniband/hw/mlx4/ah.c b/drivers/infiniband/hw/mlx4/ah.c index 6be7dc320ff7..20c6d17ac8b8 100644 --- a/drivers/infiniband/hw/mlx4/ah.c +++ b/drivers/infiniband/hw/mlx4/ah.c @@ -126,7 +126,9 @@ static struct ib_ah *create_iboe_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr return &ah->ibah; } -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { struct mlx4_ib_ah *ah; struct ib_ah *ret; diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h index 35141f451e5c..7f3d976d81ed 100644 --- a/drivers/infiniband/hw/mlx4/mlx4_ib.h +++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h @@ -742,7 +742,8 @@ int mlx4_ib_arm_cq(struct ib_cq *cq, enum ib_cq_notify_flags flags); void __mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); void mlx4_ib_cq_clean(struct mlx4_ib_cq *cq, u32 qpn, struct mlx4_ib_srq *srq); -struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *mlx4_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int mlx4_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx4_ib_destroy_ah(struct ib_ah *ah); diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index 745efa4cfc71..ecac9ea2c85f 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -64,7 +64,9 @@ static struct ib_ah *create_ib_ah(struct mlx5_ib_dev *dev, return &ah->ibah; } -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { struct mlx5_ib_ah *ah; struct mlx5_ib_dev *dev = to_mdev(pd->device); diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 95937e7d2584..ff05afa864ee 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -739,7 +739,8 @@ void mlx5_ib_free_srq_wqe(struct mlx5_ib_srq *srq, int wqe_index); int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey, u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh, const void *in_mad, void *response_mad); -struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr); +struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int mlx5_ib_query_ah(struct ib_ah *ibah, struct ib_ah_attr *ah_attr); int mlx5_ib_destroy_ah(struct ib_ah *ah); struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index 358930a41e36..d31708742ba5 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -410,7 +410,9 @@ static int mthca_dealloc_pd(struct ib_pd *pd) } static struct ib_ah *mthca_ah_create(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { int err; struct mthca_ah *ah; diff --git a/drivers/infiniband/hw/nes/nes_verbs.c b/drivers/infiniband/hw/nes/nes_verbs.c index bd69125731c1..0bb857c7e439 100644 --- a/drivers/infiniband/hw/nes/nes_verbs.c +++ b/drivers/infiniband/hw/nes/nes_verbs.c @@ -771,7 +771,8 @@ static int nes_dealloc_pd(struct ib_pd *ibpd) /** * nes_create_ah */ -static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr) +static struct ib_ah *nes_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, + struct ib_udata *udata) { return ERR_PTR(-ENOSYS); } diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c index 797362a297b2..14d33b0f3950 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.c @@ -154,7 +154,8 @@ static inline int set_av_attr(struct ocrdma_dev *dev, struct ocrdma_ah *ah, return status; } -struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +struct ib_ah *ocrdma_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) { u32 *ahid_addr; int status; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h index 3856dd4c7e3d..0704a24b17c8 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_ah.h +++ b/drivers/infiniband/hw/ocrdma/ocrdma_ah.h @@ -50,7 +50,9 @@ enum { OCRDMA_AH_L3_TYPE_MASK = 0x03, OCRDMA_AH_L3_TYPE_SHIFT = 0x1D /* 29 bits */ }; -struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *); + +struct ib_ah *ocrdma_create_ah(struct ib_pd *, struct ib_ah_attr *, + struct ib_udata *); int ocrdma_destroy_ah(struct ib_ah *); int ocrdma_query_ah(struct ib_ah *, struct ib_ah_attr *); int ocrdma_modify_ah(struct ib_ah *, struct ib_ah_attr *); diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c index a61514296767..ccff6c6e3f33 100644 --- a/drivers/infiniband/hw/qedr/verbs.c +++ b/drivers/infiniband/hw/qedr/verbs.c @@ -2094,7 +2094,8 @@ int qedr_destroy_qp(struct ib_qp *ibqp) return rc; } -struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) { struct qedr_ah *ah; diff --git a/drivers/infiniband/hw/qedr/verbs.h b/drivers/infiniband/hw/qedr/verbs.h index a9b5e67bb81e..070677ca4d19 100644 --- a/drivers/infiniband/hw/qedr/verbs.h +++ b/drivers/infiniband/hw/qedr/verbs.h @@ -70,7 +70,8 @@ int qedr_query_qp(struct ib_qp *, struct ib_qp_attr *qp_attr, int qp_attr_mask, struct ib_qp_init_attr *); int qedr_destroy_qp(struct ib_qp *ibqp); -struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr); +struct ib_ah *qedr_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata); int qedr_destroy_ah(struct ib_ah *ibah); int qedr_dereg_mr(struct ib_mr *); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c index a5bfbba6bbac..fd2a50eb4c91 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.c @@ -738,7 +738,9 @@ int usnic_ib_mmap(struct ib_ucontext *context, /* In ib callbacks section - Start of stub funcs */ struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, - struct ib_ah_attr *ah_attr) + struct ib_ah_attr *ah_attr, + struct ib_udata *udata) + { usnic_dbg("\n"); return ERR_PTR(-EPERM); diff --git a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h index 0d9d2e6a14d5..0ed8e072329e 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_verbs.h +++ b/drivers/infiniband/hw/usnic/usnic_ib_verbs.h @@ -75,7 +75,9 @@ int usnic_ib_dealloc_ucontext(struct ib_ucontext *ibcontext); int usnic_ib_mmap(struct ib_ucontext *context, struct vm_area_struct *vma); struct ib_ah *usnic_ib_create_ah(struct ib_pd *pd, - struct ib_ah_attr *ah_attr); + struct ib_ah_attr *ah_attr, + struct ib_udata *udata); + int usnic_ib_destroy_ah(struct ib_ah *ah); int usnic_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, struct ib_send_wr **bad_wr); diff --git a/drivers/infiniband/sw/rxe/rxe_verbs.c b/drivers/infiniband/sw/rxe/rxe_verbs.c index 19841c863daf..187d85ccfe58 100644 --- a/drivers/infiniband/sw/rxe/rxe_verbs.c +++ b/drivers/infiniband/sw/rxe/rxe_verbs.c @@ -316,7 +316,9 @@ static int rxe_init_av(struct rxe_dev *rxe, struct ib_ah_attr *attr, return err; } -static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr) +static struct ib_ah *rxe_create_ah(struct ib_pd *ibpd, struct ib_ah_attr *attr, + struct ib_udata *udata) + { int err; struct rxe_dev *rxe = to_rdev(ibpd->device); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0c6f973e407c..73417a22ee4d 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1951,7 +1951,8 @@ struct ib_device { struct ib_udata *udata); int (*dealloc_pd)(struct ib_pd *pd); struct ib_ah * (*create_ah)(struct ib_pd *pd, - struct ib_ah_attr *ah_attr); + struct ib_ah_attr *ah_attr, + struct ib_udata *udata); int (*modify_ah)(struct ib_ah *ah, struct ib_ah_attr *ah_attr); int (*query_ah)(struct ib_ah *ah, -- cgit v1.2.3 From 5097e71f3edafad3e7d8d4f9c4a137d9aad0fae2 Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 23 Nov 2016 08:23:25 +0200 Subject: IB/mlx5: Use kernel driver to help userspace create ah Resolving a MAC address for a given IP address in userspace is inefficient. This patch lets mlx5 user driver using the kernel driver to resolve the mac and get the answer in the private section of the response. Signed-off-by: Moni Shoua Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/ah.c | 21 +++++++++++++++++++++ include/uapi/rdma/mlx5-abi.h | 6 ++++++ 2 files changed, 27 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/ah.c b/drivers/infiniband/hw/mlx5/ah.c index ecac9ea2c85f..d090e96f6f01 100644 --- a/drivers/infiniband/hw/mlx5/ah.c +++ b/drivers/infiniband/hw/mlx5/ah.c @@ -77,6 +77,27 @@ struct ib_ah *mlx5_ib_create_ah(struct ib_pd *pd, struct ib_ah_attr *ah_attr, if (ll == IB_LINK_LAYER_ETHERNET && !(ah_attr->ah_flags & IB_AH_GRH)) return ERR_PTR(-EINVAL); + if (ll == IB_LINK_LAYER_ETHERNET && udata) { + int err; + struct mlx5_ib_create_ah_resp resp = {}; + u32 min_resp_len = offsetof(typeof(resp), dmac) + + sizeof(resp.dmac); + + if (udata->outlen < min_resp_len) + return ERR_PTR(-EINVAL); + + resp.response_length = min_resp_len; + + err = ib_resolve_eth_dmac(pd->device, ah_attr); + if (err) + return ERR_PTR(err); + + memcpy(resp.dmac, ah_attr->dmac, ETH_ALEN); + err = ib_copy_to_udata(udata, &resp, resp.response_length); + if (err) + return ERR_PTR(err); + } + ah = kzalloc(sizeof(*ah), GFP_ATOMIC); if (!ah) return ERR_PTR(-ENOMEM); diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index ac28729a1245..3ebf3db24c34 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -249,6 +249,12 @@ struct mlx5_ib_create_wq { __u32 reserved; }; +struct mlx5_ib_create_ah_resp { + __u32 response_length; + __u8 dmac[ETH_ALEN]; + __u8 reserved[6]; +}; + struct mlx5_ib_create_wq_resp { __u32 response_length; __u32 reserved; -- cgit v1.2.3 From 41c450fd8da549c5f7cced6650354095b0d4312a Mon Sep 17 00:00:00 2001 From: Moni Shoua Date: Wed, 23 Nov 2016 08:23:26 +0200 Subject: IB/mlx5: Make create/destroy_ah available to userspace Advertise that create_ah and destroy_ah verbs are accessible from uverbs interface. Signed-off-by: Moni Shoua Reviewed-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f2ef2a170d9f..7e5b80361724 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3053,6 +3053,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) (1ull << IB_USER_VERBS_CMD_QUERY_PORT) | (1ull << IB_USER_VERBS_CMD_ALLOC_PD) | (1ull << IB_USER_VERBS_CMD_DEALLOC_PD) | + (1ull << IB_USER_VERBS_CMD_CREATE_AH) | + (1ull << IB_USER_VERBS_CMD_DESTROY_AH) | (1ull << IB_USER_VERBS_CMD_REG_MR) | (1ull << IB_USER_VERBS_CMD_REREG_MR) | (1ull << IB_USER_VERBS_CMD_DEREG_MR) | -- cgit v1.2.3 From c482af646d0809a8d5e1b7f4398cce3592589b98 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Sun, 27 Nov 2016 15:18:19 +0200 Subject: IB/mlx4: Fix out-of-range array index in destroy qp flow For non-special QPs, the port value becomes non-zero only at the RESET-to-INIT transition. If the QP has not undergone that transition, its port number value is still zero. If such a QP is destroyed before being moved out of the RESET state, subtracting one from the qp port number results in a negative value. Using that negative value as an index into the qp1_proxy array results in an out-of-bounds array reference. Fix this by testing that the QP type is one that uses qp1_proxy before using the port number. For special QPs of all types, the port number is specified at QP creation time. Fixes: 9433c188915c ("IB/mlx4: Invoke UPDATE_QP for proxy QP1 on MAC changes") Signed-off-by: Jack Morgenstein Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 0914731c9aac..c22454383976 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -1280,7 +1280,8 @@ static int _mlx4_ib_destroy_qp(struct ib_qp *qp) if (is_qp0(dev, mqp)) mlx4_CLOSE_PORT(dev->dev, mqp->port); - if (dev->qp1_proxy[mqp->port - 1] == mqp) { + if (mqp->mlx4_ib_qp_type == MLX4_IB_QPT_PROXY_GSI && + dev->qp1_proxy[mqp->port - 1] == mqp) { mutex_lock(&dev->qp1_proxy_lock[mqp->port - 1]); dev->qp1_proxy[mqp->port - 1] = NULL; mutex_unlock(&dev->qp1_proxy_lock[mqp->port - 1]); -- cgit v1.2.3 From c73b7911de97fad3ab9032a110af48d6ab2da48f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 27 Nov 2016 15:18:20 +0200 Subject: IB/mlx5: Assign SRQ type earlier Move the SRQ type assignment to be before actually using it in create_srq_user() and in create_srq_kernel() functions. Fixes: af1ba291c5e4 ('{net, IB}/mlx5: Refactor internal SRQ API') Signed-off-by: Maor Gottlieb Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/srq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c index f384db5367fb..83588db2785f 100644 --- a/drivers/infiniband/hw/mlx5/srq.c +++ b/drivers/infiniband/hw/mlx5/srq.c @@ -282,6 +282,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, mlx5_ib_dbg(dev, "desc_size 0x%x, req wr 0x%x, srq size 0x%x, max_gs 0x%x, max_avail_gather 0x%x\n", desc_size, init_attr->attr.max_wr, srq->msrq.max, srq->msrq.max_gs, srq->msrq.max_avail_gather); + in.type = init_attr->srq_type; if (pd->uobject) err = create_srq_user(pd, srq, &in, udata, buf_size); @@ -294,7 +295,6 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd, goto err_srq; } - in.type = init_attr->srq_type; in.log_size = ilog2(srq->msrq.max); in.wqe_shift = srq->msrq.wqe_shift - 4; if (srq->wq_sig) -- cgit v1.2.3 From afd02cd3a9b6c04b41d946b5d7f6e17b3fc30c6b Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Sun, 27 Nov 2016 15:18:21 +0200 Subject: IB/mlx5: Avoid system crash when enabling many VFs When enabling many VFs, the total amount of DMA mappings increase significantly. This causes DMA allocations to take a lot of time since they are serialized in the kernel. As a result the driver enters into fatal condition due to timeout and the system hangs. To recover from this we disable MR cache for VFs. PFs will still have a full cache and VFs cache can be manipulated as usual after driver load. Fixes: e126ba97dba9 ('mlx5: Add driver for Mellanox Connect-IB adapters') Signed-off-by: Eli Cohen Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 6cbda901f259..5258ac870e6a 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -627,7 +627,8 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) ent->order = i + 2; ent->dev = dev; - if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) + if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && + (mlx5_core_is_pf(dev->mdev))) limit = dev->mdev->profile->mr_cache[i].limit; else limit = 0; -- cgit v1.2.3 From b216af408c985092a79472ad10e6f216cb2973fc Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 27 Nov 2016 15:18:22 +0200 Subject: IB/mlx5: Use u64 for UMR length The fast_registration length is used to convey length for memory registrations through UMR which can be of any size up to 2^64. Change the length type to be u64. Fixes: 968e78dd9644 ('IB/mlx5: Enhance UMR support to allow partial page table update') Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index ff05afa864ee..df3d6af3f683 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -420,7 +420,7 @@ struct mlx5_umr_wr { struct ib_pd *pd; unsigned int page_shift; unsigned int npages; - u32 length; + u64 length; int access_flags; u32 mkey; }; -- cgit v1.2.3 From d012f5d6f8597f936f44c79e46345fda86dcff4d Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 27 Nov 2016 16:51:34 +0200 Subject: IB/mlx5: Refactor registration to netdev notifier Refactor the netdev notifier registration into a small helper function. This is a pre-step towards having mlx5 IB device over an Ethernet port which doesn't support RoCE. Also, renamed the de-registration helper and the new helper as netdev notifier and not roce, to make it clear this is not only used with roce. This patch doesn't change any functionality. Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 29 ++++++++++++++++++++--------- 1 file changed, 20 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 7e5b80361724..0ee1e4ab02fa 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2843,7 +2843,21 @@ static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev) } } -static void mlx5_remove_roce_notifier(struct mlx5_ib_dev *dev) +static int mlx5_add_netdev_notifier(struct mlx5_ib_dev *dev) +{ + int err; + + dev->roce.nb.notifier_call = mlx5_netdev_event; + err = register_netdevice_notifier(&dev->roce.nb); + if (err) { + dev->roce.nb.notifier_call = NULL; + return err; + } + + return 0; +} + +static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev) { if (dev->roce.nb.notifier_call) { unregister_netdevice_notifier(&dev->roce.nb); @@ -2855,12 +2869,9 @@ static int mlx5_enable_roce(struct mlx5_ib_dev *dev) { int err; - dev->roce.nb.notifier_call = mlx5_netdev_event; - err = register_netdevice_notifier(&dev->roce.nb); - if (err) { - dev->roce.nb.notifier_call = NULL; + err = mlx5_add_netdev_notifier(dev); + if (err) return err; - } err = mlx5_nic_vport_enable_roce(dev->mdev); if (err) @@ -2876,7 +2887,7 @@ err_disable_roce: mlx5_nic_vport_disable_roce(dev->mdev); err_unregister_netdevice_notifier: - mlx5_remove_roce_notifier(dev); + mlx5_remove_netdev_notifier(dev); return err; } @@ -3242,7 +3253,7 @@ err_rsrc: err_disable_roce: if (ll == IB_LINK_LAYER_ETHERNET) { mlx5_disable_roce(dev); - mlx5_remove_roce_notifier(dev); + mlx5_remove_netdev_notifier(dev); } err_free_port: @@ -3259,7 +3270,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) struct mlx5_ib_dev *dev = context; enum rdma_link_layer ll = mlx5_ib_port_link_layer(&dev->ib_dev, 1); - mlx5_remove_roce_notifier(dev); + mlx5_remove_netdev_notifier(dev); ib_unregister_device(&dev->ib_dev); mlx5_ib_dealloc_q_counters(dev); destroy_umrc_res(dev); -- cgit v1.2.3 From 45f95acd63222dd1dc752fa904536327b10f1082 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 27 Nov 2016 16:51:35 +0200 Subject: IB/mlx5: Rename RoCE related helpers to reflect being Eth ones This is a pre-step towards having mlx5 IB device also over Eth ports where RoCE is not supported. We change the roce enable/disable and roce_lag init/fini function names to have _eth instead of _roce. This patch doesn't change any functionality. Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 0ee1e4ab02fa..b5b7459e9af0 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2802,7 +2802,7 @@ static void get_dev_fw_str(struct ib_device *ibdev, char *str, fw_rev_min(dev->mdev), fw_rev_sub(dev->mdev)); } -static int mlx5_roce_lag_init(struct mlx5_ib_dev *dev) +static int mlx5_eth_lag_init(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; struct mlx5_flow_namespace *ns = mlx5_get_flow_namespace(mdev, @@ -2831,7 +2831,7 @@ err_destroy_vport_lag: return err; } -static void mlx5_roce_lag_cleanup(struct mlx5_ib_dev *dev) +static void mlx5_eth_lag_cleanup(struct mlx5_ib_dev *dev) { struct mlx5_core_dev *mdev = dev->mdev; @@ -2865,7 +2865,7 @@ static void mlx5_remove_netdev_notifier(struct mlx5_ib_dev *dev) } } -static int mlx5_enable_roce(struct mlx5_ib_dev *dev) +static int mlx5_enable_eth(struct mlx5_ib_dev *dev) { int err; @@ -2877,7 +2877,7 @@ static int mlx5_enable_roce(struct mlx5_ib_dev *dev) if (err) goto err_unregister_netdevice_notifier; - err = mlx5_roce_lag_init(dev); + err = mlx5_eth_lag_init(dev); if (err) goto err_disable_roce; @@ -2891,9 +2891,9 @@ err_unregister_netdevice_notifier: return err; } -static void mlx5_disable_roce(struct mlx5_ib_dev *dev) +static void mlx5_disable_eth(struct mlx5_ib_dev *dev) { - mlx5_roce_lag_cleanup(dev); + mlx5_eth_lag_cleanup(dev); mlx5_nic_vport_disable_roce(dev->mdev); } @@ -3199,14 +3199,14 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) spin_lock_init(&dev->reset_flow_resource_lock); if (ll == IB_LINK_LAYER_ETHERNET) { - err = mlx5_enable_roce(dev); + err = mlx5_enable_eth(dev); if (err) goto err_dealloc; } err = create_dev_resources(&dev->devr); if (err) - goto err_disable_roce; + goto err_disable_eth; err = mlx5_ib_odp_init_one(dev); if (err) @@ -3250,9 +3250,9 @@ err_odp: err_rsrc: destroy_dev_resources(&dev->devr); -err_disable_roce: +err_disable_eth: if (ll == IB_LINK_LAYER_ETHERNET) { - mlx5_disable_roce(dev); + mlx5_disable_eth(dev); mlx5_remove_netdev_notifier(dev); } @@ -3277,7 +3277,7 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context) mlx5_ib_odp_remove_one(dev); destroy_dev_resources(&dev->devr); if (ll == IB_LINK_LAYER_ETHERNET) - mlx5_disable_roce(dev); + mlx5_disable_eth(dev); kfree(dev->port); ib_dealloc_device(&dev->ib_dev); } -- cgit v1.2.3 From ca5b91d63192ceaa41a6145f8c923debb64c71fa Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 27 Nov 2016 16:51:36 +0200 Subject: IB/mlx5: Support RAW Ethernet when RoCE is disabled On some environments, such as certain SRIOV VF configurations, RoCE is not supported for mlx5 Ethernet ports. Currently, the driver will not open IB device on that port. This is problematic, since we do want user-space RAW Ethernet (RAW_PACKET QPs) functionality to remain in place. For that end, enhance the relevant driver flows such that we do create a device instance in that case. Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index b5b7459e9af0..8b013f8b832a 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2779,6 +2779,8 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, struct ib_port_immutable *immutable) { struct ib_port_attr attr; + struct mlx5_ib_dev *dev = to_mdev(ibdev); + enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num); int err; err = mlx5_ib_query_port(ibdev, port_num, &attr); @@ -2788,7 +2790,8 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num, immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->core_cap_flags = get_core_cap_flags(ibdev); - immutable->max_mad_size = IB_MGMT_MAD_SIZE; + if ((ll == IB_LINK_LAYER_INFINIBAND) || MLX5_CAP_GEN(dev->mdev, roce)) + immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; } @@ -2873,9 +2876,11 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) if (err) return err; - err = mlx5_nic_vport_enable_roce(dev->mdev); - if (err) - goto err_unregister_netdevice_notifier; + if (MLX5_CAP_GEN(dev->mdev, roce)) { + err = mlx5_nic_vport_enable_roce(dev->mdev); + if (err) + goto err_unregister_netdevice_notifier; + } err = mlx5_eth_lag_init(dev); if (err) @@ -2884,7 +2889,8 @@ static int mlx5_enable_eth(struct mlx5_ib_dev *dev) return 0; err_disable_roce: - mlx5_nic_vport_disable_roce(dev->mdev); + if (MLX5_CAP_GEN(dev->mdev, roce)) + mlx5_nic_vport_disable_roce(dev->mdev); err_unregister_netdevice_notifier: mlx5_remove_netdev_notifier(dev); @@ -2894,7 +2900,8 @@ err_unregister_netdevice_notifier: static void mlx5_disable_eth(struct mlx5_ib_dev *dev) { mlx5_eth_lag_cleanup(dev); - mlx5_nic_vport_disable_roce(dev->mdev); + if (MLX5_CAP_GEN(dev->mdev, roce)) + mlx5_nic_vport_disable_roce(dev->mdev); } static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev) @@ -3016,9 +3023,6 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) port_type_cap = MLX5_CAP_GEN(mdev, port_type); ll = mlx5_port_type_cap_to_rdma_ll(port_type_cap); - if ((ll == IB_LINK_LAYER_ETHERNET) && !MLX5_CAP_GEN(mdev, roce)) - return NULL; - printk_once(KERN_INFO "%s", mlx5_version); dev = (struct mlx5_ib_dev *)ib_alloc_device(sizeof(*dev)); -- cgit v1.2.3 From d949167d68b304c0a00331cf33ef49a29b65d85f Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 1 Dec 2016 13:43:13 +0200 Subject: IB/mlx5: Report mlx5 packet pacing capabilities when querying device Enable mlx5 based hardware to report packet pacing capabilities from kernel to user space. Packet pacing allows to limit the rate to any number between the maximum and minimum, based on user settings. The capabilities are exposed to user space through query_device by uhw. The following capabilities are reported: 1. The maximum and minimum rate limit in kbps supported by packet pacing. 2. Bitmap showing which QP types are supported by packet pacing operation. Signed-off-by: Bodong Wang Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 13 +++++++++++++ include/uapi/rdma/mlx5-abi.h | 13 +++++++++++++ 2 files changed, 26 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 8b013f8b832a..6c194000903d 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -693,6 +693,19 @@ static int mlx5_ib_query_device(struct ib_device *ibdev, resp.response_length += sizeof(resp.cqe_comp_caps); } + if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen)) { + if (MLX5_CAP_QOS(mdev, packet_pacing) && + MLX5_CAP_GEN(mdev, qos)) { + resp.packet_pacing_caps.qp_rate_limit_max = + MLX5_CAP_QOS(mdev, packet_pacing_max_rate); + resp.packet_pacing_caps.qp_rate_limit_min = + MLX5_CAP_QOS(mdev, packet_pacing_min_rate); + resp.packet_pacing_caps.supported_qpts |= + 1 << IB_QPT_RAW_PACKET; + } + resp.response_length += sizeof(resp.packet_pacing_caps); + } + if (uhw->outlen) { err = ib_copy_to_udata(uhw, &resp, resp.response_length); diff --git a/include/uapi/rdma/mlx5-abi.h b/include/uapi/rdma/mlx5-abi.h index 3ebf3db24c34..fae6cdaeb56d 100644 --- a/include/uapi/rdma/mlx5-abi.h +++ b/include/uapi/rdma/mlx5-abi.h @@ -136,12 +136,25 @@ struct mlx5_ib_cqe_comp_caps { __u32 supported_format; /* enum mlx5_ib_cqe_comp_res_format */ }; +struct mlx5_packet_pacing_caps { + __u32 qp_rate_limit_min; + __u32 qp_rate_limit_max; /* In kpbs */ + + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_RAW_PACKET + */ + __u32 supported_qpts; + __u32 reserved; +}; + struct mlx5_ib_query_device_resp { __u32 comp_mask; __u32 response_length; struct mlx5_ib_tso_caps tso_caps; struct mlx5_ib_rss_caps rss_caps; struct mlx5_ib_cqe_comp_caps cqe_comp_caps; + struct mlx5_packet_pacing_caps packet_pacing_caps; __u32 mlx5_ib_support_multi_pkt_send_wqes; __u32 reserved; }; -- cgit v1.2.3 From 528e5a1bd3f0e9b760cb3a1062fce7513712a15d Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 1 Dec 2016 13:43:14 +0200 Subject: IB/core: Support rate limit for packet pacing Add new member rate_limit to ib_qp_attr which holds the packet pacing rate in kbps, 0 means unlimited. IB_QP_RATE_LIMIT is added to ib_attr_mask and could be used by RAW QPs when changing QP state from RTR to RTS, RTS to RTS. Signed-off-by: Bodong Wang Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 2 ++ include/rdma/ib_verbs.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index c976f29f7ad2..71580cc28c9e 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1016,6 +1016,7 @@ static const struct { IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, } } }, @@ -1049,6 +1050,7 @@ static const struct { IB_QP_QKEY), [IB_QPT_GSI] = (IB_QP_CUR_STATE | IB_QP_QKEY), + [IB_QPT_RAW_PACKET] = IB_QP_RATE_LIMIT, } }, [IB_QPS_SQD] = { diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 73417a22ee4d..8029d2a51f14 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1102,6 +1102,7 @@ enum ib_qp_attr_mask { IB_QP_RESERVED2 = (1<<22), IB_QP_RESERVED3 = (1<<23), IB_QP_RESERVED4 = (1<<24), + IB_QP_RATE_LIMIT = (1<<25), }; enum ib_qp_state { @@ -1151,6 +1152,7 @@ struct ib_qp_attr { u8 rnr_retry; u8 alt_port_num; u8 alt_timeout; + u32 rate_limit; }; enum ib_wr_opcode { -- cgit v1.2.3 From 189aba99e70030cfb56bd8f199bc5b077a1bc6ff Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 1 Dec 2016 13:43:15 +0200 Subject: IB/uverbs: Extend modify_qp and support packet pacing An new uverbs command ib_uverbs_ex_modify_qp is added to support more QP attributes. User driver should choose to call the legacy/extended API based on input mask. IB_USER_LAST_QP_ATTR_MASK is added to indicated the maximum bit position which supports legacy ib_uverbs_modify_qp. IB_USER_LEGACY_LAST_QP_ATTR_MASK indicates the maximum bit position which supports ib_uverbs_ex_modify_qp, the value of this mask should be updated if new mask is added later. Along with this change, rate_limit is supported by the extended command, user driver could use it to control packet packing. Signed-off-by: Bodong Wang Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs.h | 1 + drivers/infiniband/core/uverbs_cmd.c | 192 ++++++++++++++++++++++------------ drivers/infiniband/core/uverbs_main.c | 1 + include/uapi/rdma/ib_user_verbs.h | 21 ++++ 4 files changed, 146 insertions(+), 69 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/core/uverbs.h b/drivers/infiniband/core/uverbs.h index df26a741cda6..455034ac994e 100644 --- a/drivers/infiniband/core/uverbs.h +++ b/drivers/infiniband/core/uverbs.h @@ -289,5 +289,6 @@ IB_UVERBS_DECLARE_EX_CMD(modify_wq); IB_UVERBS_DECLARE_EX_CMD(destroy_wq); IB_UVERBS_DECLARE_EX_CMD(create_rwq_ind_table); IB_UVERBS_DECLARE_EX_CMD(destroy_rwq_ind_table); +IB_UVERBS_DECLARE_EX_CMD(modify_qp); #endif /* UVERBS_H */ diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index cdb935ddab69..09b649159e6c 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -2328,96 +2328,88 @@ static int modify_qp_mask(enum ib_qp_type qp_type, int mask) } } -ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, - struct ib_device *ib_dev, - const char __user *buf, int in_len, - int out_len) +static int modify_qp(struct ib_uverbs_file *file, + struct ib_uverbs_ex_modify_qp *cmd, struct ib_udata *udata) { - struct ib_uverbs_modify_qp cmd; - struct ib_udata udata; - struct ib_qp *qp; - struct ib_qp_attr *attr; - int ret; - - if (copy_from_user(&cmd, buf, sizeof cmd)) - return -EFAULT; - - INIT_UDATA(&udata, buf + sizeof cmd, NULL, in_len - sizeof cmd, - out_len); + struct ib_qp_attr *attr; + struct ib_qp *qp; + int ret; attr = kmalloc(sizeof *attr, GFP_KERNEL); if (!attr) return -ENOMEM; - qp = idr_read_qp(cmd.qp_handle, file->ucontext); + qp = idr_read_qp(cmd->base.qp_handle, file->ucontext); if (!qp) { ret = -EINVAL; goto out; } - attr->qp_state = cmd.qp_state; - attr->cur_qp_state = cmd.cur_qp_state; - attr->path_mtu = cmd.path_mtu; - attr->path_mig_state = cmd.path_mig_state; - attr->qkey = cmd.qkey; - attr->rq_psn = cmd.rq_psn; - attr->sq_psn = cmd.sq_psn; - attr->dest_qp_num = cmd.dest_qp_num; - attr->qp_access_flags = cmd.qp_access_flags; - attr->pkey_index = cmd.pkey_index; - attr->alt_pkey_index = cmd.alt_pkey_index; - attr->en_sqd_async_notify = cmd.en_sqd_async_notify; - attr->max_rd_atomic = cmd.max_rd_atomic; - attr->max_dest_rd_atomic = cmd.max_dest_rd_atomic; - attr->min_rnr_timer = cmd.min_rnr_timer; - attr->port_num = cmd.port_num; - attr->timeout = cmd.timeout; - attr->retry_cnt = cmd.retry_cnt; - attr->rnr_retry = cmd.rnr_retry; - attr->alt_port_num = cmd.alt_port_num; - attr->alt_timeout = cmd.alt_timeout; - - memcpy(attr->ah_attr.grh.dgid.raw, cmd.dest.dgid, 16); - attr->ah_attr.grh.flow_label = cmd.dest.flow_label; - attr->ah_attr.grh.sgid_index = cmd.dest.sgid_index; - attr->ah_attr.grh.hop_limit = cmd.dest.hop_limit; - attr->ah_attr.grh.traffic_class = cmd.dest.traffic_class; - attr->ah_attr.dlid = cmd.dest.dlid; - attr->ah_attr.sl = cmd.dest.sl; - attr->ah_attr.src_path_bits = cmd.dest.src_path_bits; - attr->ah_attr.static_rate = cmd.dest.static_rate; - attr->ah_attr.ah_flags = cmd.dest.is_global ? IB_AH_GRH : 0; - attr->ah_attr.port_num = cmd.dest.port_num; - - memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd.alt_dest.dgid, 16); - attr->alt_ah_attr.grh.flow_label = cmd.alt_dest.flow_label; - attr->alt_ah_attr.grh.sgid_index = cmd.alt_dest.sgid_index; - attr->alt_ah_attr.grh.hop_limit = cmd.alt_dest.hop_limit; - attr->alt_ah_attr.grh.traffic_class = cmd.alt_dest.traffic_class; - attr->alt_ah_attr.dlid = cmd.alt_dest.dlid; - attr->alt_ah_attr.sl = cmd.alt_dest.sl; - attr->alt_ah_attr.src_path_bits = cmd.alt_dest.src_path_bits; - attr->alt_ah_attr.static_rate = cmd.alt_dest.static_rate; - attr->alt_ah_attr.ah_flags = cmd.alt_dest.is_global ? IB_AH_GRH : 0; - attr->alt_ah_attr.port_num = cmd.alt_dest.port_num; + attr->qp_state = cmd->base.qp_state; + attr->cur_qp_state = cmd->base.cur_qp_state; + attr->path_mtu = cmd->base.path_mtu; + attr->path_mig_state = cmd->base.path_mig_state; + attr->qkey = cmd->base.qkey; + attr->rq_psn = cmd->base.rq_psn; + attr->sq_psn = cmd->base.sq_psn; + attr->dest_qp_num = cmd->base.dest_qp_num; + attr->qp_access_flags = cmd->base.qp_access_flags; + attr->pkey_index = cmd->base.pkey_index; + attr->alt_pkey_index = cmd->base.alt_pkey_index; + attr->en_sqd_async_notify = cmd->base.en_sqd_async_notify; + attr->max_rd_atomic = cmd->base.max_rd_atomic; + attr->max_dest_rd_atomic = cmd->base.max_dest_rd_atomic; + attr->min_rnr_timer = cmd->base.min_rnr_timer; + attr->port_num = cmd->base.port_num; + attr->timeout = cmd->base.timeout; + attr->retry_cnt = cmd->base.retry_cnt; + attr->rnr_retry = cmd->base.rnr_retry; + attr->alt_port_num = cmd->base.alt_port_num; + attr->alt_timeout = cmd->base.alt_timeout; + attr->rate_limit = cmd->rate_limit; + + memcpy(attr->ah_attr.grh.dgid.raw, cmd->base.dest.dgid, 16); + attr->ah_attr.grh.flow_label = cmd->base.dest.flow_label; + attr->ah_attr.grh.sgid_index = cmd->base.dest.sgid_index; + attr->ah_attr.grh.hop_limit = cmd->base.dest.hop_limit; + attr->ah_attr.grh.traffic_class = cmd->base.dest.traffic_class; + attr->ah_attr.dlid = cmd->base.dest.dlid; + attr->ah_attr.sl = cmd->base.dest.sl; + attr->ah_attr.src_path_bits = cmd->base.dest.src_path_bits; + attr->ah_attr.static_rate = cmd->base.dest.static_rate; + attr->ah_attr.ah_flags = cmd->base.dest.is_global ? + IB_AH_GRH : 0; + attr->ah_attr.port_num = cmd->base.dest.port_num; + + memcpy(attr->alt_ah_attr.grh.dgid.raw, cmd->base.alt_dest.dgid, 16); + attr->alt_ah_attr.grh.flow_label = cmd->base.alt_dest.flow_label; + attr->alt_ah_attr.grh.sgid_index = cmd->base.alt_dest.sgid_index; + attr->alt_ah_attr.grh.hop_limit = cmd->base.alt_dest.hop_limit; + attr->alt_ah_attr.grh.traffic_class = cmd->base.alt_dest.traffic_class; + attr->alt_ah_attr.dlid = cmd->base.alt_dest.dlid; + attr->alt_ah_attr.sl = cmd->base.alt_dest.sl; + attr->alt_ah_attr.src_path_bits = cmd->base.alt_dest.src_path_bits; + attr->alt_ah_attr.static_rate = cmd->base.alt_dest.static_rate; + attr->alt_ah_attr.ah_flags = cmd->base.alt_dest.is_global ? + IB_AH_GRH : 0; + attr->alt_ah_attr.port_num = cmd->base.alt_dest.port_num; if (qp->real_qp == qp) { - if (cmd.attr_mask & IB_QP_AV) { + if (cmd->base.attr_mask & IB_QP_AV) { ret = ib_resolve_eth_dmac(qp->device, &attr->ah_attr); if (ret) goto release_qp; } ret = qp->device->modify_qp(qp, attr, - modify_qp_mask(qp->qp_type, cmd.attr_mask), &udata); + modify_qp_mask(qp->qp_type, + cmd->base.attr_mask), + udata); } else { - ret = ib_modify_qp(qp, attr, modify_qp_mask(qp->qp_type, cmd.attr_mask)); + ret = ib_modify_qp(qp, attr, + modify_qp_mask(qp->qp_type, + cmd->base.attr_mask)); } - if (ret) - goto release_qp; - - ret = in_len; - release_qp: put_qp_read(qp); @@ -2427,6 +2419,68 @@ out: return ret; } +ssize_t ib_uverbs_modify_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + const char __user *buf, int in_len, + int out_len) +{ + struct ib_uverbs_ex_modify_qp cmd = {}; + struct ib_udata udata; + int ret; + + if (copy_from_user(&cmd.base, buf, sizeof(cmd.base))) + return -EFAULT; + + if (cmd.base.attr_mask & + ~((IB_USER_LEGACY_LAST_QP_ATTR_MASK << 1) - 1)) + return -EOPNOTSUPP; + + INIT_UDATA(&udata, buf + sizeof(cmd.base), NULL, + in_len - sizeof(cmd.base), out_len); + + ret = modify_qp(file, &cmd, &udata); + if (ret) + return ret; + + return in_len; +} + +int ib_uverbs_ex_modify_qp(struct ib_uverbs_file *file, + struct ib_device *ib_dev, + struct ib_udata *ucore, + struct ib_udata *uhw) +{ + struct ib_uverbs_ex_modify_qp cmd = {}; + int ret; + + /* + * Last bit is reserved for extending the attr_mask by + * using another field. + */ + BUILD_BUG_ON(IB_USER_LAST_QP_ATTR_MASK == (1 << 31)); + + if (ucore->inlen < sizeof(cmd.base)) + return -EINVAL; + + ret = ib_copy_from_udata(&cmd, ucore, min(sizeof(cmd), ucore->inlen)); + if (ret) + return ret; + + if (cmd.base.attr_mask & + ~((IB_USER_LAST_QP_ATTR_MASK << 1) - 1)) + return -EOPNOTSUPP; + + if (ucore->inlen > sizeof(cmd)) { + if (ib_is_udata_cleared(ucore, sizeof(cmd), + ucore->inlen - sizeof(cmd))) + return -EOPNOTSUPP; + } + + ret = modify_qp(file, &cmd, uhw); + + return ret; +} + ssize_t ib_uverbs_destroy_qp(struct ib_uverbs_file *file, struct ib_device *ib_dev, const char __user *buf, int in_len, diff --git a/drivers/infiniband/core/uverbs_main.c b/drivers/infiniband/core/uverbs_main.c index 0012fa58c105..80839c31c887 100644 --- a/drivers/infiniband/core/uverbs_main.c +++ b/drivers/infiniband/core/uverbs_main.c @@ -137,6 +137,7 @@ static int (*uverbs_ex_cmd_table[])(struct ib_uverbs_file *file, [IB_USER_VERBS_EX_CMD_DESTROY_WQ] = ib_uverbs_ex_destroy_wq, [IB_USER_VERBS_EX_CMD_CREATE_RWQ_IND_TBL] = ib_uverbs_ex_create_rwq_ind_table, [IB_USER_VERBS_EX_CMD_DESTROY_RWQ_IND_TBL] = ib_uverbs_ex_destroy_rwq_ind_table, + [IB_USER_VERBS_EX_CMD_MODIFY_QP] = ib_uverbs_ex_modify_qp, }; static void ib_uverbs_add_one(struct ib_device *device); diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 90ba5e88ec00..dfdfe4e92d31 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -37,6 +37,7 @@ #define IB_USER_VERBS_H #include +#include /* * Increment this value if any changes that break userspace ABI @@ -93,6 +94,7 @@ enum { IB_USER_VERBS_EX_CMD_QUERY_DEVICE = IB_USER_VERBS_CMD_QUERY_DEVICE, IB_USER_VERBS_EX_CMD_CREATE_CQ = IB_USER_VERBS_CMD_CREATE_CQ, IB_USER_VERBS_EX_CMD_CREATE_QP = IB_USER_VERBS_CMD_CREATE_QP, + IB_USER_VERBS_EX_CMD_MODIFY_QP = IB_USER_VERBS_CMD_MODIFY_QP, IB_USER_VERBS_EX_CMD_CREATE_FLOW = IB_USER_VERBS_CMD_THRESHOLD, IB_USER_VERBS_EX_CMD_DESTROY_FLOW, IB_USER_VERBS_EX_CMD_CREATE_WQ, @@ -545,6 +547,14 @@ enum { IB_UVERBS_CREATE_QP_SUP_COMP_MASK = IB_UVERBS_CREATE_QP_MASK_IND_TABLE, }; +enum { + IB_USER_LEGACY_LAST_QP_ATTR_MASK = IB_QP_DEST_QPN +}; + +enum { + IB_USER_LAST_QP_ATTR_MASK = IB_QP_RATE_LIMIT +}; + struct ib_uverbs_ex_create_qp { __u64 user_handle; __u32 pd_handle; @@ -684,9 +694,20 @@ struct ib_uverbs_modify_qp { __u64 driver_data[0]; }; +struct ib_uverbs_ex_modify_qp { + struct ib_uverbs_modify_qp base; + __u32 rate_limit; + __u32 reserved; +}; + struct ib_uverbs_modify_qp_resp { }; +struct ib_uverbs_ex_modify_qp_resp { + __u32 comp_mask; + __u32 response_length; +}; + struct ib_uverbs_destroy_qp { __u64 response; __u32 qp_handle; -- cgit v1.2.3 From 7d29f349a4b9dcf5bc9dcc05630d6a7f6b6b3ccd Mon Sep 17 00:00:00 2001 From: Bodong Wang Date: Thu, 1 Dec 2016 13:43:16 +0200 Subject: IB/mlx5: Properly adjust rate limit on QP state transitions - Add MODIFY_QP_EX CMD to extend modify_qp. - Rate limit will be updated in the following state transactions: RTR2RTS, RTS2RTS. The limit will be removed when SQ is in RST and ERR state. Signed-off-by: Bodong Wang Reviewed-by: Matan Barak Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 3 +- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.c | 74 ++++++++++++++++++++++++++++++++---- 3 files changed, 69 insertions(+), 9 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 6c194000903d..cda541ced141 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -3105,7 +3105,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev) dev->ib_dev.uverbs_ex_cmd_mask = (1ull << IB_USER_VERBS_EX_CMD_QUERY_DEVICE) | (1ull << IB_USER_VERBS_EX_CMD_CREATE_CQ) | - (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP); + (1ull << IB_USER_VERBS_EX_CMD_CREATE_QP) | + (1ull << IB_USER_VERBS_EX_CMD_MODIFY_QP); dev->ib_dev.query_device = mlx5_ib_query_device; dev->ib_dev.query_port = mlx5_ib_query_port; diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index df3d6af3f683..ab8961cc8bca 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -389,6 +389,7 @@ struct mlx5_ib_qp { struct list_head qps_list; struct list_head cq_recv_list; struct list_head cq_send_list; + u32 rate_limit; }; struct mlx5_ib_cq_buf { diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index aa27688f5ae9..a69524fb6032 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -78,12 +78,14 @@ struct mlx5_wqe_eth_pad { enum raw_qp_set_mask_map { MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID = 1UL << 0, + MLX5_RAW_QP_RATE_LIMIT = 1UL << 1, }; struct mlx5_modify_raw_qp_param { u16 operation; u32 set_mask; /* raw_qp_set_mask_map */ + u32 rate_limit; u8 rq_q_ctr_id; }; @@ -2470,8 +2472,14 @@ out: } static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, - struct mlx5_ib_sq *sq, int new_state) + struct mlx5_ib_sq *sq, + int new_state, + const struct mlx5_modify_raw_qp_param *raw_qp_param) { + struct mlx5_ib_qp *ibqp = sq->base.container_mibqp; + u32 old_rate = ibqp->rate_limit; + u32 new_rate = old_rate; + u16 rl_index = 0; void *in; void *sqc; int inlen; @@ -2487,10 +2495,44 @@ static int modify_raw_packet_qp_sq(struct mlx5_core_dev *dev, sqc = MLX5_ADDR_OF(modify_sq_in, in, ctx); MLX5_SET(sqc, sqc, state, new_state); + if (raw_qp_param->set_mask & MLX5_RAW_QP_RATE_LIMIT) { + if (new_state != MLX5_SQC_STATE_RDY) + pr_warn("%s: Rate limit can only be changed when SQ is moving to RDY\n", + __func__); + else + new_rate = raw_qp_param->rate_limit; + } + + if (old_rate != new_rate) { + if (new_rate) { + err = mlx5_rl_add_rate(dev, new_rate, &rl_index); + if (err) { + pr_err("Failed configuring rate %u: %d\n", + new_rate, err); + goto out; + } + } + + MLX5_SET64(modify_sq_in, in, modify_bitmask, 1); + MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index); + } + err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen); - if (err) + if (err) { + /* Remove new rate from table if failed */ + if (new_rate && + old_rate != new_rate) + mlx5_rl_remove_rate(dev, new_rate); goto out; + } + /* Only remove the old rate after new rate was set */ + if ((old_rate && + (old_rate != new_rate)) || + (new_state != MLX5_SQC_STATE_RDY)) + mlx5_rl_remove_rate(dev, old_rate); + + ibqp->rate_limit = new_rate; sq->state = new_state; out: @@ -2505,6 +2547,8 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, struct mlx5_ib_raw_packet_qp *raw_packet_qp = &qp->raw_packet_qp; struct mlx5_ib_rq *rq = &raw_packet_qp->rq; struct mlx5_ib_sq *sq = &raw_packet_qp->sq; + int modify_rq = !!qp->rq.wqe_cnt; + int modify_sq = !!qp->sq.wqe_cnt; int rq_state; int sq_state; int err; @@ -2522,10 +2566,18 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, rq_state = MLX5_RQC_STATE_RST; sq_state = MLX5_SQC_STATE_RST; break; - case MLX5_CMD_OP_INIT2INIT_QP: - case MLX5_CMD_OP_INIT2RTR_QP: case MLX5_CMD_OP_RTR2RTS_QP: case MLX5_CMD_OP_RTS2RTS_QP: + if (raw_qp_param->set_mask == + MLX5_RAW_QP_RATE_LIMIT) { + modify_rq = 0; + sq_state = sq->state; + } else { + return raw_qp_param->set_mask ? -EINVAL : 0; + } + break; + case MLX5_CMD_OP_INIT2INIT_QP: + case MLX5_CMD_OP_INIT2RTR_QP: if (raw_qp_param->set_mask) return -EINVAL; else @@ -2535,13 +2587,13 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return -EINVAL; } - if (qp->rq.wqe_cnt) { - err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); + if (modify_rq) { + err = modify_raw_packet_qp_rq(dev, rq, rq_state, raw_qp_param); if (err) return err; } - if (qp->sq.wqe_cnt) { + if (modify_sq) { if (tx_affinity) { err = modify_raw_packet_tx_affinity(dev->mdev, sq, tx_affinity); @@ -2549,7 +2601,7 @@ static int modify_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp, return err; } - return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state); + return modify_raw_packet_qp_sq(dev->mdev, sq, sq_state, raw_qp_param); } return 0; @@ -2804,6 +2856,12 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id; raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID; } + + if (attr_mask & IB_QP_RATE_LIMIT) { + raw_qp_param.rate_limit = attr->rate_limit; + raw_qp_param.set_mask |= MLX5_RAW_QP_RATE_LIMIT; + } + err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity); } else { err = mlx5_core_qp_modify(dev->mdev, op, optpar, context, -- cgit v1.2.3 From 14ab8896f5d993c5f427504276e3c42ccb3cc354 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Oct 2016 22:48:21 +0200 Subject: IB/mlx5: avoid bogus -Wmaybe-uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We get a false-positive warning in linux-next for the mlx5 driver: infiniband/hw/mlx5/mr.c: In function ‘mlx5_ib_reg_user_mr’: infiniband/hw/mlx5/mr.c:1172:5: error: ‘order’ may be used uninitialized in this function [-Werror=maybe-uninitialized] infiniband/hw/mlx5/mr.c:1161:6: note: ‘order’ was declared here infiniband/hw/mlx5/mr.c:1173:6: error: ‘ncont’ may be used uninitialized in this function [-Werror=maybe-uninitialized] infiniband/hw/mlx5/mr.c:1160:6: note: ‘ncont’ was declared here infiniband/hw/mlx5/mr.c:1173:6: error: ‘page_shift’ may be used uninitialized in this function [-Werror=maybe-uninitialized] infiniband/hw/mlx5/mr.c:1158:6: note: ‘page_shift’ was declared here infiniband/hw/mlx5/mr.c:1143:13: error: ‘npages’ may be used uninitialized in this function [-Werror=maybe-uninitialized] infiniband/hw/mlx5/mr.c:1159:6: note: ‘npages’ was declared here I had a trivial workaround for gcc-5 or higher, but that didn't work on gcc-4.9 unfortunately. The only way I found to avoid the warnings for gcc-4.9, short of initializing each of the arguments first was to change the calling conventions to separate the error code from the umem pointer. This avoids casting the error codes from one pointer to another incompatible pointer, and lets gcc figure out when that the data is actually valid whenever we return successfully. Acked-by: Leon Romanovsky Signed-off-by: Arnd Bergmann Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/mr.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 5258ac870e6a..67985c69f9b9 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -844,30 +844,34 @@ static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, umrwr->mkey = key; } -static struct ib_umem *mr_umem_get(struct ib_pd *pd, u64 start, u64 length, - int access_flags, int *npages, - int *page_shift, int *ncont, int *order) +static int mr_umem_get(struct ib_pd *pd, u64 start, u64 length, + int access_flags, struct ib_umem **umem, + int *npages, int *page_shift, int *ncont, + int *order) { struct mlx5_ib_dev *dev = to_mdev(pd->device); - struct ib_umem *umem = ib_umem_get(pd->uobject->context, start, length, - access_flags, 0); - if (IS_ERR(umem)) { + int err; + + *umem = ib_umem_get(pd->uobject->context, start, length, + access_flags, 0); + err = PTR_ERR_OR_ZERO(*umem); + if (err < 0) { mlx5_ib_err(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); - return (void *)umem; + return err; } - mlx5_ib_cont_pages(umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, + mlx5_ib_cont_pages(*umem, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, page_shift, ncont, order); if (!*npages) { mlx5_ib_warn(dev, "avoid zero region\n"); - ib_umem_release(umem); - return ERR_PTR(-EINVAL); + ib_umem_release(*umem); + return -EINVAL; } mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", *npages, *ncont, *order, *page_shift); - return umem; + return 0; } static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) @@ -1193,11 +1197,11 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", start, virt_addr, length, access_flags); - umem = mr_umem_get(pd, start, length, access_flags, &npages, + err = mr_umem_get(pd, start, length, access_flags, &umem, &npages, &page_shift, &ncont, &order); - if (IS_ERR(umem)) - return (void *)umem; + if (err < 0) + return ERR_PTR(err); if (use_umr(order)) { mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, @@ -1371,10 +1375,9 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, */ flags |= IB_MR_REREG_TRANS; ib_umem_release(mr->umem); - mr->umem = mr_umem_get(pd, addr, len, access_flags, &npages, - &page_shift, &ncont, &order); - if (IS_ERR(mr->umem)) { - err = PTR_ERR(mr->umem); + err = mr_umem_get(pd, addr, len, access_flags, &mr->umem, + &npages, &page_shift, &ncont, &order); + if (err < 0) { mr->umem = NULL; return err; } -- cgit v1.2.3 From b42dde478bcaa8113b0d1cd82ad0048372c5599d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 14 Nov 2016 08:44:11 -0800 Subject: IB/mlx4: Rework special QP creation error path The special QP creation error path relies on offset_of(struct mlx4_ib_sqp, qp) == 0. Remove this assumption because that makes the QP creation code easier to understand. Signed-off-by: Bart Van Assche Cc: Yishai Hadas Reviewed-by: Laurence Oberman Reviewed-by: Yishai Hadas Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx4/qp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index c22454383976..c068add8838b 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -644,7 +644,7 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd, int qpn; int err; struct ib_qp_cap backup_cap; - struct mlx4_ib_sqp *sqp; + struct mlx4_ib_sqp *sqp = NULL; struct mlx4_ib_qp *qp; enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type; struct mlx4_ib_cq *mcq; @@ -933,7 +933,9 @@ err_db: mlx4_db_free(dev->dev, &qp->db); err: - if (!*caller_qp) + if (sqp) + kfree(sqp); + else if (!*caller_qp) kfree(qp); return err; } -- cgit v1.2.3 From 626bc02d4d33510b6ecb6f37c577f844cc6cfc57 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 5 Dec 2016 17:18:08 -0800 Subject: mlx5: Use { } instead of { 0 } to init struct Detected by sparse. Signed-off-by: Bart Van Assche Cc: Eli Cohen Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index cda541ced141..b81736d625fc 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -127,7 +127,7 @@ static int mlx5_netdev_event(struct notifier_block *this, if ((upper == ndev || (!upper && ndev == ibdev->roce.netdev)) && ibdev->ib_active) { - struct ib_event ibev = {0}; + struct ib_event ibev = { }; ibev.device = &ibdev->ib_dev; ibev.event = (event == NETDEV_UP) ? -- cgit v1.2.3 From 3d6bdf1625857ba50f433bd140dada387432f051 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 5 Dec 2016 17:18:27 -0800 Subject: mlx5: Remove a set-but-not-used variable This has been detected by building the mlx5 driver with W=1. Fixes: 1a412fb1caa2 ('net/mlx5: Fixes: 1a412fb1caa2 (IB/mlx5: Modify QP commands via mlx5 ifc') Signed-off-by: Bart Van Assche Cc: Eli Cohen Acked-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index a69524fb6032..30f6066b0560 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -2657,7 +2657,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, struct mlx5_ib_port *mibport = NULL; enum mlx5_qp_state mlx5_cur, mlx5_new; enum mlx5_qp_optpar optpar; - int sqd_event; int mlx5_st; int err; u16 op; @@ -2804,12 +2803,6 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp, if (qp->rq.wqe_cnt && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) context->db_rec_addr = cpu_to_be64(qp->db.dma); - if (cur_state == IB_QPS_RTS && new_state == IB_QPS_SQD && - attr_mask & IB_QP_EN_SQD_ASYNC_NOTIFY && attr->en_sqd_async_notify) - sqd_event = 1; - else - sqd_event = 0; - if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) { u8 port_num = (attr_mask & IB_QP_PORT ? attr->port_num : qp->port) - 1; -- cgit v1.2.3 From 1974ab9d9d5b6eeafa629f793fdf59958646cb9d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 5 Dec 2016 17:19:52 -0800 Subject: mlx5, calc_sq_size(): Make a debug message more informative Make it clear that qp->sq.wqe_cnt is not the number of WQEs. Signed-off-by: Bart Van Assche Cc: Eli Cohen Acked-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mlx5/qp.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 30f6066b0560..cc24f2d429b9 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -407,7 +407,8 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr, wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size); qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB; if (qp->sq.wqe_cnt > (1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz))) { - mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n", + mlx5_ib_dbg(dev, "send queue size (%d * %d / %d -> %d) exceeds limits(%d)\n", + attr->cap.max_send_wr, wqe_size, MLX5_SEND_WQE_BB, qp->sq.wqe_cnt, 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz)); return -ENOMEM; -- cgit v1.2.3 From 7ceb740c540dde362b3055ad92c6a38009eb7a83 Mon Sep 17 00:00:00 2001 From: Souptick Joarder Date: Fri, 2 Dec 2016 00:11:59 +0530 Subject: IB/mthca: Replace pci_pool_alloc by pci_pool_zalloc In mthca_create_ah(), pci_pool_alloc() followed by memset will be replaced by pci_pool_zalloc() Signed-off-by: Souptick joarder Reviewed-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/hw/mthca/mthca_av.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband') diff --git a/drivers/infiniband/hw/mthca/mthca_av.c b/drivers/infiniband/hw/mthca/mthca_av.c index bcac294042f5..c9f0f364f484 100644 --- a/drivers/infiniband/hw/mthca/mthca_av.c +++ b/drivers/infiniband/hw/mthca/mthca_av.c @@ -186,8 +186,8 @@ int mthca_create_ah(struct mthca_dev *dev, on_hca_fail: if (ah->type == MTHCA_AH_PCI_POOL) { - ah->av = pci_pool_alloc(dev->av_table.pool, - GFP_ATOMIC, &ah->avdma); + ah->av = pci_pool_zalloc(dev->av_table.pool, + GFP_ATOMIC, &ah->avdma); if (!ah->av) return -ENOMEM; @@ -196,8 +196,6 @@ on_hca_fail: ah->key = pd->ntmr.ibmr.lkey; - memset(av, 0, MTHCA_AV_SIZE); - av->port_pd = cpu_to_be32(pd->pd_num | (ah_attr->port_num << 24)); av->g_slid = ah_attr->src_path_bits; av->dlid = cpu_to_be16(ah_attr->dlid); -- cgit v1.2.3