From 50d46335b03baa9767e79a6f4757df7bd31eba21 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2016 12:56:16 +0200 Subject: IB/core: rename pd->local_mr to pd->__internal_mr This has two reasons: a) to clearly mark that drivers don't have any business using it, and b) because we're going to use it for the (dangerous) global rkey soon, so that drivers don't create on themselves. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 2 +- drivers/infiniband/core/verbs.c | 12 ++++++------ 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index f6647318138d..fe784a908817 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -571,7 +571,7 @@ ssize_t ib_uverbs_alloc_pd(struct ib_uverbs_file *file, pd->device = ib_dev; pd->uobject = uobj; - pd->local_mr = NULL; + pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); uobj->object = pd; diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index f2b776efab3a..9159ea5ad821 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -237,7 +237,7 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device) pd->device = device; pd->uobject = NULL; - pd->local_mr = NULL; + pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) @@ -251,8 +251,8 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device) return (struct ib_pd *)mr; } - pd->local_mr = mr; - pd->local_dma_lkey = pd->local_mr->lkey; + pd->__internal_mr = mr; + pd->local_dma_lkey = pd->__internal_mr->lkey; } return pd; } @@ -270,10 +270,10 @@ void ib_dealloc_pd(struct ib_pd *pd) { int ret; - if (pd->local_mr) { - ret = ib_dereg_mr(pd->local_mr); + if (pd->__internal_mr) { + ret = ib_dereg_mr(pd->__internal_mr); WARN_ON(ret); - pd->local_mr = NULL; + pd->__internal_mr = NULL; } /* uverbs manipulates usecnt with proper locking, while the kabi -- cgit v1.2.3 From ed082d36a7b2c27d1cda55fdfb28af18040c4a89 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2016 12:56:17 +0200 Subject: IB/core: add support to create a unsafe global rkey to ib_create_pd Instead of exposing ib_get_dma_mr to ULPs and letting them use it more or less unchecked, this moves the capability of creating a global rkey into the RDMA core, where it can be easily audited. It also prints a warning everytime this feature is used as well. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/mad.c | 2 +- drivers/infiniband/core/verbs.c | 27 ++++++++++++++++++---- drivers/infiniband/hw/mlx4/mad.c | 2 +- drivers/infiniband/hw/mlx4/main.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_verbs.c | 2 +- drivers/infiniband/ulp/iser/iser_verbs.c | 2 +- drivers/infiniband/ulp/isert/ib_isert.c | 2 +- drivers/infiniband/ulp/srp/ib_srp.c | 2 +- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- drivers/nvme/host/rdma.c | 2 +- drivers/nvme/target/rdma.c | 2 +- .../staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c | 2 +- include/rdma/ib_verbs.h | 20 +++++++++++++++- net/9p/trans_rdma.c | 2 +- net/rds/ib.c | 2 +- net/sunrpc/xprtrdma/svc_rdma_transport.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 2 +- 18 files changed, 57 insertions(+), 22 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 2d49228f28b2..95d33a3572e6 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3160,7 +3160,7 @@ static int ib_mad_port_open(struct ib_device *device, goto error3; } - port_priv->pd = ib_alloc_pd(device); + port_priv->pd = ib_alloc_pd(device, 0); if (IS_ERR(port_priv->pd)) { dev_err(&device->dev, "Couldn't create ib_mad PD\n"); ret = PTR_ERR(port_priv->pd); diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 9159ea5ad821..e87b5187729c 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -227,9 +227,11 @@ EXPORT_SYMBOL(rdma_port_get_link_layer); * Every PD has a local_dma_lkey which can be used as the lkey value for local * memory operations. */ -struct ib_pd *ib_alloc_pd(struct ib_device *device) +struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, + const char *caller) { struct ib_pd *pd; + int mr_access_flags = 0; pd = device->alloc_pd(device, NULL, NULL); if (IS_ERR(pd)) @@ -239,24 +241,39 @@ struct ib_pd *ib_alloc_pd(struct ib_device *device) pd->uobject = NULL; pd->__internal_mr = NULL; atomic_set(&pd->usecnt, 0); + pd->flags = flags; if (device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY) pd->local_dma_lkey = device->local_dma_lkey; - else { + else + mr_access_flags |= IB_ACCESS_LOCAL_WRITE; + + if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) { + pr_warn("%s: enabling unsafe global rkey\n", caller); + mr_access_flags |= IB_ACCESS_REMOTE_READ | IB_ACCESS_REMOTE_WRITE; + } + + if (mr_access_flags) { struct ib_mr *mr; - mr = ib_get_dma_mr(pd, IB_ACCESS_LOCAL_WRITE); + mr = ib_get_dma_mr(pd, mr_access_flags); if (IS_ERR(mr)) { ib_dealloc_pd(pd); return (struct ib_pd *)mr; } pd->__internal_mr = mr; - pd->local_dma_lkey = pd->__internal_mr->lkey; + + if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) + pd->local_dma_lkey = pd->__internal_mr->lkey; + + if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) + pd->unsafe_global_rkey = pd->__internal_mr->rkey; } + return pd; } -EXPORT_SYMBOL(ib_alloc_pd); +EXPORT_SYMBOL(__ib_alloc_pd); /** * ib_dealloc_pd - Deallocates a protection domain. diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9c2e53d28f98..517346f8ed75 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -1897,7 +1897,7 @@ static int create_pv_resources(struct ib_device *ibdev, int slave, int port, goto err_buf; } - ctx->pd = ib_alloc_pd(ctx->ib_dev); + ctx->pd = ib_alloc_pd(ctx->ib_dev, 0); if (IS_ERR(ctx->pd)) { ret = PTR_ERR(ctx->pd); pr_err("Couldn't create tunnel PD (%d)\n", ret); diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index 2af44c2de262..a96e78c2c75a 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -1259,7 +1259,7 @@ static struct ib_xrcd *mlx4_ib_alloc_xrcd(struct ib_device *ibdev, if (err) goto err1; - xrcd->pd = ib_alloc_pd(ibdev); + xrcd->pd = ib_alloc_pd(ibdev, 0); if (IS_ERR(xrcd->pd)) { err = PTR_ERR(xrcd->pd); goto err2; diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index f02a975320bd..cf1eee492a4e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -2223,7 +2223,7 @@ static int create_umr_res(struct mlx5_ib_dev *dev) goto error_0; } - pd = ib_alloc_pd(&dev->ib_dev); + pd = ib_alloc_pd(&dev->ib_dev, 0); if (IS_ERR(pd)) { mlx5_ib_dbg(dev, "Couldn't create PD for sync UMR QP\n"); ret = PTR_ERR(pd); diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c index c55ecb2c3736..6067f075772a 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c @@ -147,7 +147,7 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca) int ret, size; int i; - priv->pd = ib_alloc_pd(priv->ca); + priv->pd = ib_alloc_pd(priv->ca, 0); if (IS_ERR(priv->pd)) { printk(KERN_WARNING "%s: failed to allocate PD\n", ca->name); return -ENODEV; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 1b4945367e4f..e9de99219d74 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -88,7 +88,7 @@ static int iser_create_device_ib_res(struct iser_device *device) device->comps_used, ib_dev->name, ib_dev->num_comp_vectors, max_cqe); - device->pd = ib_alloc_pd(ib_dev); + device->pd = ib_alloc_pd(ib_dev, 0); if (IS_ERR(device->pd)) goto pd_err; diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index ba6be060a476..8df608ede366 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -309,7 +309,7 @@ isert_create_device_ib_res(struct isert_device *device) if (ret) goto out; - device->pd = ib_alloc_pd(ib_dev); + device->pd = ib_alloc_pd(ib_dev, 0); if (IS_ERR(device->pd)) { ret = PTR_ERR(device->pd); isert_err("failed to allocate pd, device %p, ret=%d\n", diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c index 3322ed750172..579b8aedfcdd 100644 --- a/drivers/infiniband/ulp/srp/ib_srp.c +++ b/drivers/infiniband/ulp/srp/ib_srp.c @@ -3573,7 +3573,7 @@ static void srp_add_one(struct ib_device *device) INIT_LIST_HEAD(&srp_dev->dev_list); srp_dev->dev = device; - srp_dev->pd = ib_alloc_pd(device); + srp_dev->pd = ib_alloc_pd(device, 0); if (IS_ERR(srp_dev->pd)) goto free_dev; diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index dfa23b075a88..48a44af740a6 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2475,7 +2475,7 @@ static void srpt_add_one(struct ib_device *device) init_waitqueue_head(&sdev->ch_releaseQ); mutex_init(&sdev->mutex); - sdev->pd = ib_alloc_pd(device); + sdev->pd = ib_alloc_pd(device, 0); if (IS_ERR(sdev->pd)) goto free_dev; diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 8d2875b4c56d..a4961edf0ca1 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -446,7 +446,7 @@ nvme_rdma_find_get_device(struct rdma_cm_id *cm_id) ndev->dev = cm_id->device; kref_init(&ndev->ref); - ndev->pd = ib_alloc_pd(ndev->dev); + ndev->pd = ib_alloc_pd(ndev->dev, 0); if (IS_ERR(ndev->pd)) goto out_free_dev; diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index b4d648536c3e..187763a77355 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -848,7 +848,7 @@ nvmet_rdma_find_get_device(struct rdma_cm_id *cm_id) ndev->device = cm_id->device; kref_init(&ndev->ref); - ndev->pd = ib_alloc_pd(ndev->device); + ndev->pd = ib_alloc_pd(ndev->device, 0); if (IS_ERR(ndev->pd)) goto out_free_dev; diff --git a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c index 4f5978b3767b..0e4c6090bf62 100644 --- a/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c +++ b/drivers/staging/lustre/lnet/klnds/o2iblnd/o2iblnd.c @@ -2465,7 +2465,7 @@ int kiblnd_dev_failover(struct kib_dev *dev) hdev->ibh_cmid = cmid; hdev->ibh_ibdev = cmid->device; - pd = ib_alloc_pd(cmid->device); + pd = ib_alloc_pd(cmid->device, 0); if (IS_ERR(pd)) { rc = PTR_ERR(pd); CERROR("Can't allocate PD: %d\n", rc); diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 38a08dae49c4..4bdd898697cf 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1370,10 +1370,13 @@ struct ib_udata { struct ib_pd { u32 local_dma_lkey; + u32 flags; struct ib_device *device; struct ib_uobject *uobject; atomic_t usecnt; /* count all resources */ + u32 unsafe_global_rkey; + /* * Implementation details of the RDMA core, don't use in drivers: */ @@ -2506,8 +2509,23 @@ int ib_find_gid(struct ib_device *device, union ib_gid *gid, int ib_find_pkey(struct ib_device *device, u8 port_num, u16 pkey, u16 *index); -struct ib_pd *ib_alloc_pd(struct ib_device *device); +enum ib_pd_flags { + /* + * Create a memory registration for all memory in the system and place + * the rkey for it into pd->unsafe_global_rkey. This can be used by + * ULPs to avoid the overhead of dynamic MRs. + * + * This flag is generally considered unsafe and must only be used in + * extremly trusted environments. Every use of it will log a warning + * in the kernel log. + */ + IB_PD_UNSAFE_GLOBAL_RKEY = 0x01, +}; +struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, + const char *caller); +#define ib_alloc_pd(device, flags) \ + __ib_alloc_pd((device), (flags), __func__) void ib_dealloc_pd(struct ib_pd *pd); /** diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 1852e383afd6..553ed4ecb6a0 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -680,7 +680,7 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args) goto error; /* Create the Protection Domain */ - rdma->pd = ib_alloc_pd(rdma->cm_id->device); + rdma->pd = ib_alloc_pd(rdma->cm_id->device, 0); if (IS_ERR(rdma->pd)) goto error; diff --git a/net/rds/ib.c b/net/rds/ib.c index 7eaf887e46f8..5680d90b0b77 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -160,7 +160,7 @@ static void rds_ib_add_one(struct ib_device *device) rds_ibdev->max_responder_resources = device->attrs.max_qp_rd_atom; rds_ibdev->dev = device; - rds_ibdev->pd = ib_alloc_pd(device); + rds_ibdev->pd = ib_alloc_pd(device, 0); if (IS_ERR(rds_ibdev->pd)) { rds_ibdev->pd = NULL; goto put_dev; diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index dd9440137834..eb2857f52b05 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -993,7 +993,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_ord = min_t(size_t, dev->attrs.max_qp_rd_atom, newxprt->sc_ord); newxprt->sc_ord = min_t(size_t, svcrdma_ord, newxprt->sc_ord); - newxprt->sc_pd = ib_alloc_pd(dev); + newxprt->sc_pd = ib_alloc_pd(dev, 0); if (IS_ERR(newxprt->sc_pd)) { dprintk("svcrdma: error creating PD for connect request\n"); goto errout; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 536d0be3f61b..6561d4a35acb 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -386,7 +386,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) } ia->ri_device = ia->ri_id->device; - ia->ri_pd = ib_alloc_pd(ia->ri_device); + ia->ri_pd = ib_alloc_pd(ia->ri_device, 0); if (IS_ERR(ia->ri_pd)) { rc = PTR_ERR(ia->ri_pd); pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc); -- cgit v1.2.3 From 5ef990f06bd7e3cf521b5705d898d8e43d04ea90 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 5 Sep 2016 12:56:21 +0200 Subject: IB/core: remove ib_get_dma_mr We now only use it from ib_alloc_pd to create a local DMA lkey if the device doesn't provide one, or a global rkey if the ULP requests it. This patch removes ib_get_dma_mr and open codes the functionality in ib_alloc_pd so that we can simplify the code and prevent abuse of the functionality. As a side effect we can also simplify things by removing the valid access bit check, and the PD refcounting. In the future I hope to also remove the per-PD global MR entirely by shifting this work into the HW drivers, as one step towards avoiding the struct ib_mr overload for various different use cases. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Jason Gunthorpe Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 34 ++++++++-------------------------- include/rdma/ib_verbs.h | 12 ------------ 2 files changed, 8 insertions(+), 38 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index e87b5187729c..d4ef3b1a695a 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -256,12 +256,17 @@ struct ib_pd *__ib_alloc_pd(struct ib_device *device, unsigned int flags, if (mr_access_flags) { struct ib_mr *mr; - mr = ib_get_dma_mr(pd, mr_access_flags); + mr = pd->device->get_dma_mr(pd, mr_access_flags); if (IS_ERR(mr)) { ib_dealloc_pd(pd); - return (struct ib_pd *)mr; + return ERR_CAST(mr); } + mr->device = pd->device; + mr->pd = pd; + mr->uobject = NULL; + mr->need_inval = false; + pd->__internal_mr = mr; if (!(device->attrs.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) @@ -288,7 +293,7 @@ void ib_dealloc_pd(struct ib_pd *pd) int ret; if (pd->__internal_mr) { - ret = ib_dereg_mr(pd->__internal_mr); + ret = pd->device->dereg_mr(pd->__internal_mr); WARN_ON(ret); pd->__internal_mr = NULL; } @@ -1408,29 +1413,6 @@ EXPORT_SYMBOL(ib_resize_cq); /* Memory regions */ -struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags) -{ - struct ib_mr *mr; - int err; - - err = ib_check_mr_access(mr_access_flags); - if (err) - return ERR_PTR(err); - - mr = pd->device->get_dma_mr(pd, mr_access_flags); - - if (!IS_ERR(mr)) { - mr->device = pd->device; - mr->pd = pd; - mr->uobject = NULL; - atomic_inc(&pd->usecnt); - mr->need_inval = false; - } - - return mr; -} -EXPORT_SYMBOL(ib_get_dma_mr); - int ib_dereg_mr(struct ib_mr *mr) { struct ib_pd *pd = mr->pd; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 4bdd898697cf..0a6c70895c8b 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -2878,18 +2878,6 @@ static inline int ib_req_ncomp_notif(struct ib_cq *cq, int wc_cnt) -ENOSYS; } -/** - * ib_get_dma_mr - Returns a memory region for system memory that is - * usable for DMA. - * @pd: The protection domain associated with the memory region. - * @mr_access_flags: Specifies the memory access rights. - * - * Note that the ib_dma_*() functions defined below must be used - * to create/destroy addresses used with the Lkey or Rkey returned - * by ib_get_dma_mr(). - */ -struct ib_mr *ib_get_dma_mr(struct ib_pd *pd, int mr_access_flags); - /** * ib_dma_mapping_error - check a DMA addr for error * @dev: The device for which the dma_addr was created -- cgit v1.2.3 From 47adf2f4f5805d075359fe6cbe3437612f7d4a34 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Sun, 28 Aug 2016 11:28:44 +0300 Subject: IB/uverbs: Expose RSS related capabilities Query RSS related attributes and return them to user-space via the extended query device uverbs command. It includes both direct ones (i.e. struct ib_uverbs_rss_caps) and max_wq_type_rq which may be used in both RSS and non RSS flows. Signed-off-by: Yishai Hadas Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 17 +++++++++++++++++ include/uapi/rdma/ib_user_verbs.h | 14 ++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index fe784a908817..19c1ebf596ad 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -4173,6 +4173,23 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file, resp.device_cap_flags_ex = attr.device_cap_flags; resp.response_length += sizeof(resp.device_cap_flags_ex); + + if (ucore->outlen < resp.response_length + sizeof(resp.rss_caps)) + goto end; + + resp.rss_caps.supported_qpts = attr.rss_caps.supported_qpts; + resp.rss_caps.max_rwq_indirection_tables = + attr.rss_caps.max_rwq_indirection_tables; + resp.rss_caps.max_rwq_indirection_table_size = + attr.rss_caps.max_rwq_indirection_table_size; + + resp.response_length += sizeof(resp.rss_caps); + + if (ucore->outlen < resp.response_length + sizeof(resp.max_wq_type_rq)) + goto end; + + resp.max_wq_type_rq = attr.max_wq_type_rq; + resp.response_length += sizeof(resp.max_wq_type_rq); end: err = ib_copy_to_udata(ucore, &resp, resp.response_length); return err; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 7f035f4b53b0..a9ebb477dc77 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -224,6 +224,17 @@ struct ib_uverbs_odp_caps { __u32 reserved; }; +struct ib_uverbs_rss_caps { + /* Corresponding bit will be set if qp type from + * 'enum ib_qp_type' is supported, e.g. + * supported_qpts |= 1 << IB_QPT_UD + */ + __u32 supported_qpts; + __u32 max_rwq_indirection_tables; + __u32 max_rwq_indirection_table_size; + __u32 reserved; +}; + struct ib_uverbs_ex_query_device_resp { struct ib_uverbs_query_device_resp base; __u32 comp_mask; @@ -232,6 +243,9 @@ struct ib_uverbs_ex_query_device_resp { __u64 timestamp_mask; __u64 hca_core_clock; /* in KHZ */ __u64 device_cap_flags_ex; + struct ib_uverbs_rss_caps rss_caps; + __u32 max_wq_type_rq; + __u32 reserved; }; struct ib_uverbs_query_port { -- cgit v1.2.3 From 15dfbd6b4f058571f41be5b7917465dab2ff55da Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 Aug 2016 16:58:32 +0300 Subject: IB/uverbs: Add support to extend flow steering specifications Flow steering specifications structures were implemented as in an extensible way that allows one to add new filters and new fields to existing filters. These specifications have never been extended, therefore the kernel flow specifications size and the user flow specifications size were must to be equal. In downstream patch, the IPv4 flow specifications type is extended to support TOS and TTL fields. To support an extension we change the flow specifications size condition test to be as following: * If the user flow specifications is bigger than the kernel specifications, we verify that all the bits which not in the kernel specifications are zeros and the flow is added only with the kernel specifications fields. * Otherwise, we add flow rule only with the user specifications fields. User space filters must be aligned with 32bits. Signed-off-by: Maor Gottlieb Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 98 +++++++++++++++++++++++++++--------- include/rdma/ib_verbs.h | 10 ++++ 2 files changed, 83 insertions(+), 25 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index 19c1ebf596ad..b9fb256c25fa 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3078,51 +3078,98 @@ out_put: return ret ? ret : in_len; } +static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec) +{ + /* Returns user space filter size, includes padding */ + return (spec->size - sizeof(struct ib_uverbs_flow_spec_hdr)) / 2; +} + +static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size, + u16 ib_real_filter_sz) +{ + /* + * User space filter structures must be 64 bit aligned, otherwise this + * may pass, but we won't handle additional new attributes. + */ + + if (kern_filter_size > ib_real_filter_sz) { + if (memchr_inv(kern_spec_filter + + ib_real_filter_sz, 0, + kern_filter_size - ib_real_filter_sz)) + return -EINVAL; + return ib_real_filter_sz; + } + return kern_filter_size; +} + static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, union ib_flow_spec *ib_spec) { + ssize_t actual_filter_sz; + ssize_t kern_filter_sz; + ssize_t ib_filter_sz; + void *kern_spec_mask; + void *kern_spec_val; + if (kern_spec->reserved) return -EINVAL; ib_spec->type = kern_spec->type; + kern_filter_sz = kern_spec_filter_sz(&kern_spec->hdr); + /* User flow spec size must be aligned to 4 bytes */ + if (kern_filter_sz != ALIGN(kern_filter_sz, 4)) + return -EINVAL; + + kern_spec_val = (void *)kern_spec + + sizeof(struct ib_uverbs_flow_spec_hdr); + kern_spec_mask = kern_spec_val + kern_filter_sz; + switch (ib_spec->type) { case IB_FLOW_SPEC_ETH: - ib_spec->eth.size = sizeof(struct ib_flow_spec_eth); - if (ib_spec->eth.size != kern_spec->eth.size) + ib_filter_sz = offsetof(struct ib_flow_eth_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) return -EINVAL; - memcpy(&ib_spec->eth.val, &kern_spec->eth.val, - sizeof(struct ib_flow_eth_filter)); - memcpy(&ib_spec->eth.mask, &kern_spec->eth.mask, - sizeof(struct ib_flow_eth_filter)); + ib_spec->size = sizeof(struct ib_flow_spec_eth); + memcpy(&ib_spec->eth.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->eth.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_IPV4: - ib_spec->ipv4.size = sizeof(struct ib_flow_spec_ipv4); - if (ib_spec->ipv4.size != kern_spec->ipv4.size) + ib_filter_sz = offsetof(struct ib_flow_ipv4_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) return -EINVAL; - memcpy(&ib_spec->ipv4.val, &kern_spec->ipv4.val, - sizeof(struct ib_flow_ipv4_filter)); - memcpy(&ib_spec->ipv4.mask, &kern_spec->ipv4.mask, - sizeof(struct ib_flow_ipv4_filter)); + ib_spec->size = sizeof(struct ib_flow_spec_ipv4); + memcpy(&ib_spec->ipv4.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->ipv4.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_IPV6: - ib_spec->ipv6.size = sizeof(struct ib_flow_spec_ipv6); - if (ib_spec->ipv6.size != kern_spec->ipv6.size) + ib_filter_sz = offsetof(struct ib_flow_ipv6_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) return -EINVAL; - memcpy(&ib_spec->ipv6.val, &kern_spec->ipv6.val, - sizeof(struct ib_flow_ipv6_filter)); - memcpy(&ib_spec->ipv6.mask, &kern_spec->ipv6.mask, - sizeof(struct ib_flow_ipv6_filter)); + ib_spec->size = sizeof(struct ib_flow_spec_ipv6); + memcpy(&ib_spec->ipv6.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->ipv6.mask, kern_spec_mask, actual_filter_sz); break; case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: - ib_spec->tcp_udp.size = sizeof(struct ib_flow_spec_tcp_udp); - if (ib_spec->tcp_udp.size != kern_spec->tcp_udp.size) + ib_filter_sz = offsetof(struct ib_flow_tcp_udp_filter, real_sz); + actual_filter_sz = spec_filter_size(kern_spec_mask, + kern_filter_sz, + ib_filter_sz); + if (actual_filter_sz <= 0) return -EINVAL; - memcpy(&ib_spec->tcp_udp.val, &kern_spec->tcp_udp.val, - sizeof(struct ib_flow_tcp_udp_filter)); - memcpy(&ib_spec->tcp_udp.mask, &kern_spec->tcp_udp.mask, - sizeof(struct ib_flow_tcp_udp_filter)); + ib_spec->size = sizeof(struct ib_flow_spec_tcp_udp); + memcpy(&ib_spec->tcp_udp.val, kern_spec_val, actual_filter_sz); + memcpy(&ib_spec->tcp_udp.mask, kern_spec_mask, actual_filter_sz); break; default: return -EINVAL; @@ -3654,7 +3701,8 @@ int ib_uverbs_ex_create_flow(struct ib_uverbs_file *file, goto err_uobj; } - flow_attr = kmalloc(sizeof(*flow_attr) + cmd.flow_attr.size, GFP_KERNEL); + flow_attr = kzalloc(sizeof(*flow_attr) + cmd.flow_attr.num_of_specs * + sizeof(union ib_flow_spec), GFP_KERNEL); if (!flow_attr) { err = -ENOMEM; goto err_put; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d74c76bf76d3..4570d8844f63 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1623,6 +1623,8 @@ struct ib_flow_eth_filter { u8 src_mac[6]; __be16 ether_type; __be16 vlan_tag; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_eth { @@ -1635,6 +1637,8 @@ struct ib_flow_spec_eth { struct ib_flow_ib_filter { __be16 dlid; __u8 sl; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_ib { @@ -1647,6 +1651,8 @@ struct ib_flow_spec_ib { struct ib_flow_ipv4_filter { __be32 src_ip; __be32 dst_ip; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_ipv4 { @@ -1659,6 +1665,8 @@ struct ib_flow_spec_ipv4 { struct ib_flow_ipv6_filter { u8 src_ip[16]; u8 dst_ip[16]; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_ipv6 { @@ -1671,6 +1679,8 @@ struct ib_flow_spec_ipv6 { struct ib_flow_tcp_udp_filter { __be16 dst_port; __be16 src_port; + /* Must be last */ + u8 real_sz[0]; }; struct ib_flow_spec_tcp_udp { -- cgit v1.2.3 From a72c6a2b0e699fcbcf679b881d5af2683228ae98 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Tue, 30 Aug 2016 16:58:34 +0300 Subject: IB/core: Add more fields to IPv6 flow specification Add the following fields to IPv6 flow filter specification: 1. Traffic Class 2. Flow Label 3. Next Header 4. Hop Limit Signed-off-by: Maor Gottlieb Reviewed-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- drivers/infiniband/core/uverbs_cmd.c | 4 ++++ include/rdma/ib_verbs.h | 4 ++++ include/uapi/rdma/ib_user_verbs.h | 9 +++++++-- 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c index b9fb256c25fa..cb3f515a2285 100644 --- a/drivers/infiniband/core/uverbs_cmd.c +++ b/drivers/infiniband/core/uverbs_cmd.c @@ -3158,6 +3158,10 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec, ib_spec->size = sizeof(struct ib_flow_spec_ipv6); memcpy(&ib_spec->ipv6.val, kern_spec_val, actual_filter_sz); memcpy(&ib_spec->ipv6.mask, kern_spec_mask, actual_filter_sz); + + if ((ntohl(ib_spec->ipv6.mask.flow_label)) >= BIT(20) || + (ntohl(ib_spec->ipv6.val.flow_label)) >= BIT(20)) + return -EINVAL; break; case IB_FLOW_SPEC_TCP: case IB_FLOW_SPEC_UDP: diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 990220b757f0..0cec4da51eb7 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -1676,6 +1676,10 @@ struct ib_flow_spec_ipv4 { struct ib_flow_ipv6_filter { u8 src_ip[16]; u8 dst_ip[16]; + __be32 flow_label; + u8 next_hdr; + u8 traffic_class; + u8 hop_limit; /* Must be last */ u8 real_sz[0]; }; diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h index 804f086835c6..25225ebbc7d5 100644 --- a/include/uapi/rdma/ib_user_verbs.h +++ b/include/uapi/rdma/ib_user_verbs.h @@ -886,8 +886,13 @@ struct ib_uverbs_flow_spec_tcp_udp { }; struct ib_uverbs_flow_ipv6_filter { - __u8 src_ip[16]; - __u8 dst_ip[16]; + __u8 src_ip[16]; + __u8 dst_ip[16]; + __be32 flow_label; + __u8 next_hdr; + __u8 traffic_class; + __u8 hop_limit; + __u8 reserved; }; struct ib_uverbs_flow_spec_ipv6 { -- cgit v1.2.3 From 4534d859020190c8fa04d899ddca656191339de1 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:27:46 +0530 Subject: IB/sa : Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "ib_nl" queues work item &ib_nl_timed_work. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/core/sa_query.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/sa_query.c b/drivers/infiniband/core/sa_query.c index b9bf7aa055e7..81b742ca1639 100644 --- a/drivers/infiniband/core/sa_query.c +++ b/drivers/infiniband/core/sa_query.c @@ -2015,7 +2015,7 @@ int ib_sa_init(void) goto err2; } - ib_nl_wq = create_singlethread_workqueue("ib_nl_sa_wq"); + ib_nl_wq = alloc_ordered_workqueue("ib_nl_sa_wq", WQ_MEM_RECLAIM); if (!ib_nl_wq) { ret = -ENOMEM; goto err3; -- cgit v1.2.3 From 1c99e299ba62f72b24e8ec2f4436f574d7433f11 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:28:07 +0530 Subject: IB/mad: Remove deprecated create_singlethread_workqueue The workqueue "ib_nl" queues work items &ib_nl_timed_work and &mad_agent_priv->local_work. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/core/mad.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c index 95d33a3572e6..40cbd6bdb73b 100644 --- a/drivers/infiniband/core/mad.c +++ b/drivers/infiniband/core/mad.c @@ -3177,7 +3177,7 @@ static int ib_mad_port_open(struct ib_device *device, goto error7; snprintf(name, sizeof name, "ib_mad%d", port_num); - port_priv->wq = create_singlethread_workqueue(name); + port_priv->wq = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM); if (!port_priv->wq) { ret = -ENOMEM; goto error8; -- cgit v1.2.3 From 01013cdf06d930dbc293d001ccf509200e403057 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:28:36 +0530 Subject: IB/multicast: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "mcast_wq" queues work item &group->work. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/core/multicast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/multicast.c b/drivers/infiniband/core/multicast.c index 3a3c5d73bbfc..49ecde98a3d9 100644 --- a/drivers/infiniband/core/multicast.c +++ b/drivers/infiniband/core/multicast.c @@ -882,7 +882,7 @@ int mcast_init(void) { int ret; - mcast_wq = create_singlethread_workqueue("ib_mcast"); + mcast_wq = alloc_ordered_workqueue("ib_mcast", WQ_MEM_RECLAIM); if (!mcast_wq) return -ENOMEM; -- cgit v1.2.3 From a190d3b07c9f8046784681fa6167005711ed3f5e Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:29:10 +0530 Subject: IB/ucma: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "close_wq" queues work items &ctx->close_work (maps to ucma_close_id) and &con_req_eve->close_work (maps to ucma_close_event_id). It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/core/ucma.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/ucma.c b/drivers/infiniband/core/ucma.c index 2825ece91d3c..9520154f1d7c 100644 --- a/drivers/infiniband/core/ucma.c +++ b/drivers/infiniband/core/ucma.c @@ -1638,7 +1638,8 @@ static int ucma_open(struct inode *inode, struct file *filp) if (!file) return -ENOMEM; - file->close_wq = create_singlethread_workqueue("ucma_close_id"); + file->close_wq = alloc_ordered_workqueue("ucma_close_id", + WQ_MEM_RECLAIM); if (!file->close_wq) { kfree(file); return -ENOMEM; -- cgit v1.2.3 From dee9acbb3254f67f7b0ce0766f13e25c5618ae78 Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:29:35 +0530 Subject: IB/cma: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "cma_wq" queues work item cma_work_handler. It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/cma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index e6dfa1bd3def..8954792f1acc 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -4363,7 +4363,7 @@ static int __init cma_init(void) { int ret; - cma_wq = create_singlethread_workqueue("rdma_cm"); + cma_wq = alloc_ordered_workqueue("rdma_cm", WQ_MEM_RECLAIM); if (!cma_wq) return -ENOMEM; -- cgit v1.2.3 From f54816261c2b36e59e43f02d654ac5834a71537d Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:30:32 +0530 Subject: IB/addr: Remove deprecated create_singlethread_workqueue The workqueue "addr_wq" queues a single work item &work and hence doesn't require ordering. Also, it is being used on a memory reclaim path. Hence, it has been converted to use alloc_workqueue with WQ_MEM_RECLAIM set. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Signed-off-by: Doug Ledford --- drivers/infiniband/core/addr.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c index 1374541a4528..b136d3acc5bd 100644 --- a/drivers/infiniband/core/addr.c +++ b/drivers/infiniband/core/addr.c @@ -800,7 +800,7 @@ static struct notifier_block nb = { int addr_init(void) { - addr_wq = create_singlethread_workqueue("ib_addr"); + addr_wq = alloc_workqueue("ib_addr", WQ_MEM_RECLAIM, 0); if (!addr_wq) return -ENOMEM; -- cgit v1.2.3 From daf0879f4cb9b413d8f82404350cf956ce3e458b Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Mon, 15 Aug 2016 23:37:44 +0530 Subject: IB/iwcm: Remove deprecated create_singlethread_workqueue alloc_ordered_workqueue() with WQ_MEM_RECLAIM set, replaces deprecated create_singlethread_workqueue(). This is the identity conversion. The workqueue "iwcm_wq" queues work item &work(maps to cm_work_handler). It has been identity converted. WQ_MEM_RECLAIM has been set to ensure forward progress under memory pressure. Signed-off-by: Bhaktipriya Shridhar Reviewed-by: Steve Wise Signed-off-by: Doug Ledford --- drivers/infiniband/core/iwcm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/iwcm.c b/drivers/infiniband/core/iwcm.c index 357624f8b9d3..5495e22839a7 100644 --- a/drivers/infiniband/core/iwcm.c +++ b/drivers/infiniband/core/iwcm.c @@ -1160,7 +1160,7 @@ static int __init iw_cm_init(void) if (ret) pr_err("iw_cm: couldn't register netlink callbacks\n"); - iwcm_wq = create_singlethread_workqueue("iw_cm_wq"); + iwcm_wq = alloc_ordered_workqueue("iw_cm_wq", WQ_MEM_RECLAIM); if (!iwcm_wq) return -ENOMEM; -- cgit v1.2.3 From bd99fdea420b00925e9b83a50f2ccc5e1f07ef7d Mon Sep 17 00:00:00 2001 From: Yuval Shaia Date: Thu, 25 Aug 2016 10:57:07 -0700 Subject: IB/{core,hw}: Add constant for node_desc Signed-off-by: Yuval Shaia Signed-off-by: Doug Ledford --- drivers/infiniband/core/sysfs.c | 2 +- drivers/infiniband/hw/cxgb3/iwch_provider.c | 1 + drivers/infiniband/hw/cxgb4/provider.c | 1 + drivers/infiniband/hw/hfi1/verbs.c | 3 ++- drivers/infiniband/hw/mlx4/mad.c | 3 ++- drivers/infiniband/hw/mlx4/main.c | 6 +++--- drivers/infiniband/hw/mlx5/mad.c | 2 +- drivers/infiniband/hw/mlx5/main.c | 6 +++--- drivers/infiniband/hw/mthca/mthca_mad.c | 3 ++- drivers/infiniband/hw/mthca/mthca_provider.c | 5 +++-- drivers/infiniband/hw/ocrdma/ocrdma_main.c | 1 + drivers/infiniband/hw/qib/qib_verbs.c | 3 ++- include/rdma/ib_verbs.h | 6 ++++-- 13 files changed, 26 insertions(+), 16 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c index 15defefecb4f..c1fb545e8d78 100644 --- a/drivers/infiniband/core/sysfs.c +++ b/drivers/infiniband/core/sysfs.c @@ -1193,7 +1193,7 @@ static ssize_t set_node_desc(struct device *device, if (!dev->modify_device) return -EIO; - memcpy(desc.node_desc, buf, min_t(int, count, 64)); + memcpy(desc.node_desc, buf, min_t(int, count, IB_DEVICE_NODE_DESC_MAX)); ret = ib_modify_device(dev, IB_DEVICE_MODIFY_NODE_DESC, &desc); if (ret) return ret; diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 3edb80644b53..b47be87d5a53 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -1396,6 +1396,7 @@ int iwch_register_device(struct iwch_dev *dev) (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV); dev->ibdev.node_type = RDMA_NODE_RNIC; + BUILD_BUG_ON(sizeof(IWCH_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, IWCH_NODE_DESC, sizeof(IWCH_NODE_DESC)); dev->ibdev.phys_port_cnt = dev->rdev.port_info.nports; dev->ibdev.num_comp_vectors = 1; diff --git a/drivers/infiniband/hw/cxgb4/provider.c b/drivers/infiniband/hw/cxgb4/provider.c index df127ce6b6ec..645e606a17c5 100644 --- a/drivers/infiniband/hw/cxgb4/provider.c +++ b/drivers/infiniband/hw/cxgb4/provider.c @@ -563,6 +563,7 @@ int c4iw_register_device(struct c4iw_dev *dev) (1ull << IB_USER_VERBS_CMD_POST_SEND) | (1ull << IB_USER_VERBS_CMD_POST_RECV); dev->ibdev.node_type = RDMA_NODE_RNIC; + BUILD_BUG_ON(sizeof(C4IW_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, C4IW_NODE_DESC, sizeof(C4IW_NODE_DESC)); dev->ibdev.phys_port_cnt = dev->rdev.lldi.nports; dev->ibdev.num_comp_vectors = dev->rdev.lldi.nciq; diff --git a/drivers/infiniband/hw/hfi1/verbs.c b/drivers/infiniband/hw/hfi1/verbs.c index 2b359540901d..f803f7b5ef5d 100644 --- a/drivers/infiniband/hw/hfi1/verbs.c +++ b/drivers/infiniband/hw/hfi1/verbs.c @@ -1423,7 +1423,8 @@ static int modify_device(struct ib_device *device, } if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { - memcpy(device->node_desc, device_modify->node_desc, 64); + memcpy(device->node_desc, device_modify->node_desc, + IB_DEVICE_NODE_DESC_MAX); for (i = 0; i < dd->num_pports; i++) { struct hfi1_ibport *ibp = &dd->pport[i].ibport_data; diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c index 9b4b21bd0222..d8886d051d4b 100644 --- a/drivers/infiniband/hw/mlx4/mad.c +++ b/drivers/infiniband/hw/mlx4/mad.c @@ -345,7 +345,8 @@ static void node_desc_override(struct ib_device *dev, mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { spin_lock_irqsave(&to_mdev(dev)->sm_lock, flags); - memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); + memcpy(((struct ib_smp *) mad)->data, dev->node_desc, + IB_DEVICE_NODE_DESC_MAX); spin_unlock_irqrestore(&to_mdev(dev)->sm_lock, flags); } } diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c index f07cbf87db52..3ae64cefc39e 100644 --- a/drivers/infiniband/hw/mlx4/main.c +++ b/drivers/infiniband/hw/mlx4/main.c @@ -886,7 +886,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, return -EOPNOTSUPP; spin_lock_irqsave(&to_mdev(ibdev)->sm_lock, flags); - memcpy(ibdev->node_desc, props->node_desc, 64); + memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX); spin_unlock_irqrestore(&to_mdev(ibdev)->sm_lock, flags); /* @@ -897,7 +897,7 @@ static int mlx4_ib_modify_device(struct ib_device *ibdev, int mask, if (IS_ERR(mailbox)) return 0; - memcpy(mailbox->buf, props->node_desc, 64); + memcpy(mailbox->buf, props->node_desc, IB_DEVICE_NODE_DESC_MAX); mlx4_cmd(to_mdev(ibdev)->dev, mailbox->dma, 1, 0, MLX4_CMD_SET_NODE, MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE); @@ -2025,7 +2025,7 @@ static int init_node_data(struct mlx4_ib_dev *dev) if (err) goto out; - memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c index 364aab9f3c9e..39e58489dcc2 100644 --- a/drivers/infiniband/hw/mlx5/mad.c +++ b/drivers/infiniband/hw/mlx5/mad.c @@ -394,7 +394,7 @@ int mlx5_query_mad_ifc_node_desc(struct mlx5_ib_dev *dev, char *node_desc) if (err) goto out; - memcpy(node_desc, out_mad->data, 64); + memcpy(node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX); out: kfree(in_mad); kfree(out_mad); diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index 24f5f48e9ddf..29878aa716ee 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -472,7 +472,7 @@ static int mlx5_query_node_guid(struct mlx5_ib_dev *dev, } struct mlx5_reg_node_desc { - u8 desc[64]; + u8 desc[IB_DEVICE_NODE_DESC_MAX]; }; static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc) @@ -920,13 +920,13 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask, * If possible, pass node desc to FW, so it can generate * a 144 trap. If cmd fails, just ignore. */ - memcpy(&in, props->node_desc, 64); + memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX); err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out, sizeof(out), MLX5_REG_NODE_DESC, 0, 1); if (err) return err; - memcpy(ibdev->node_desc, props->node_desc, 64); + memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX); return err; } diff --git a/drivers/infiniband/hw/mthca/mthca_mad.c b/drivers/infiniband/hw/mthca/mthca_mad.c index 7c3f2fb44ba5..9139405c4810 100644 --- a/drivers/infiniband/hw/mthca/mthca_mad.c +++ b/drivers/infiniband/hw/mthca/mthca_mad.c @@ -153,7 +153,8 @@ static void node_desc_override(struct ib_device *dev, mad->mad_hdr.method == IB_MGMT_METHOD_GET_RESP && mad->mad_hdr.attr_id == IB_SMP_ATTR_NODE_DESC) { mutex_lock(&to_mdev(dev)->cap_mask_mutex); - memcpy(((struct ib_smp *) mad)->data, dev->node_desc, 64); + memcpy(((struct ib_smp *) mad)->data, dev->node_desc, + IB_DEVICE_NODE_DESC_MAX); mutex_unlock(&to_mdev(dev)->cap_mask_mutex); } } diff --git a/drivers/infiniband/hw/mthca/mthca_provider.c b/drivers/infiniband/hw/mthca/mthca_provider.c index da2335f7f7c3..b697a0f77231 100644 --- a/drivers/infiniband/hw/mthca/mthca_provider.c +++ b/drivers/infiniband/hw/mthca/mthca_provider.c @@ -193,7 +193,8 @@ static int mthca_modify_device(struct ib_device *ibdev, if (mask & IB_DEVICE_MODIFY_NODE_DESC) { if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex)) return -ERESTARTSYS; - memcpy(ibdev->node_desc, props->node_desc, 64); + memcpy(ibdev->node_desc, props->node_desc, + IB_DEVICE_NODE_DESC_MAX); mutex_unlock(&to_mdev(ibdev)->cap_mask_mutex); } @@ -1138,7 +1139,7 @@ static int mthca_init_node_data(struct mthca_dev *dev) if (err) goto out; - memcpy(dev->ib_dev.node_desc, out_mad->data, 64); + memcpy(dev->ib_dev.node_desc, out_mad->data, IB_DEVICE_NODE_DESC_MAX); in_mad->attr_id = IB_SMP_ATTR_NODE_INFO; diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_main.c b/drivers/infiniband/hw/ocrdma/ocrdma_main.c index 07d0c6c5b046..15e35acc690d 100644 --- a/drivers/infiniband/hw/ocrdma/ocrdma_main.c +++ b/drivers/infiniband/hw/ocrdma/ocrdma_main.c @@ -119,6 +119,7 @@ static int ocrdma_register_device(struct ocrdma_dev *dev) { strlcpy(dev->ibdev.name, "ocrdma%d", IB_DEVICE_NAME_MAX); ocrdma_get_guid(dev, (u8 *)&dev->ibdev.node_guid); + BUILD_BUG_ON(sizeof(OCRDMA_NODE_DESC) > IB_DEVICE_NODE_DESC_MAX); memcpy(dev->ibdev.node_desc, OCRDMA_NODE_DESC, sizeof(OCRDMA_NODE_DESC)); dev->ibdev.owner = THIS_MODULE; diff --git a/drivers/infiniband/hw/qib/qib_verbs.c b/drivers/infiniband/hw/qib/qib_verbs.c index fd1dfbce5539..2d7e52619b55 100644 --- a/drivers/infiniband/hw/qib/qib_verbs.c +++ b/drivers/infiniband/hw/qib/qib_verbs.c @@ -1370,7 +1370,8 @@ static int qib_modify_device(struct ib_device *device, } if (device_modify_mask & IB_DEVICE_MODIFY_NODE_DESC) { - memcpy(device->node_desc, device_modify->node_desc, 64); + memcpy(device->node_desc, device_modify->node_desc, + IB_DEVICE_NODE_DESC_MAX); for (i = 0; i < dd->num_pports; i++) { struct qib_ibport *ibp = &dd->pport[i].ibport_data; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index 0cec4da51eb7..d3fba0a56e17 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -537,9 +537,11 @@ enum ib_device_modify_flags { IB_DEVICE_MODIFY_NODE_DESC = 1 << 1 }; +#define IB_DEVICE_NODE_DESC_MAX 64 + struct ib_device_modify { u64 sys_image_guid; - char node_desc[64]; + char node_desc[IB_DEVICE_NODE_DESC_MAX]; }; enum ib_port_modify_flags { @@ -2077,7 +2079,7 @@ struct ib_device { u64 uverbs_cmd_mask; u64 uverbs_ex_cmd_mask; - char node_desc[64]; + char node_desc[IB_DEVICE_NODE_DESC_MAX]; __be64 node_guid; u32 local_dma_lkey; u16 is_switch:1; -- cgit v1.2.3 From 52746129e02998fb45942aed359e77e63def4997 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 26 Sep 2016 09:09:42 -0700 Subject: IB/core: Improve ib_map_mr_sg() documentation Document that ib_map_mr_sg() is able to map physically discontiguous sg-lists as a single MR. Change IB_MR_TYPE_SG_GAPS_REG into IB_MR_TYPE_SG_GAPS. Signed-off-by: Bart Van Assche Cc: Sagi Grimberg Cc: Christoph Hellwig Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index d4ef3b1a695a..1c6b2a4f50d4 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -1811,13 +1811,13 @@ EXPORT_SYMBOL(ib_set_vf_guid); * * Constraints: * - The first sg element is allowed to have an offset. - * - Each sg element must be aligned to page_size (or physically - * contiguous to the previous element). In case an sg element has a - * non contiguous offset, the mapping prefix will not include it. + * - Each sg element must either be aligned to page_size or virtually + * contiguous to the previous element. In case an sg element has a + * non-contiguous offset, the mapping prefix will not include it. * - The last sg element is allowed to have length less than page_size. * - If sg_nents total byte length exceeds the mr max_num_sge * page_size * then only max_num_sg entries will be mapped. - * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS_REG, non of these + * - If the MR was allocated with type IB_MR_TYPE_SG_GAPS, none of these * constraints holds and the page_size argument is ignored. * * Returns the number of sg elements that were mapped to the memory region. -- cgit v1.2.3 From b6bc1c731f0b985e91f618561fc82c6e252dfaf4 Mon Sep 17 00:00:00 2001 From: Steve Wise Date: Thu, 29 Sep 2016 07:31:33 -0700 Subject: IB/core: correctly handle rdma_rw_init_mrs() failure Function ib_create_qp() was failing to return an error when rdma_rw_init_mrs() fails, causing a crash further down in ib_create_qp() when trying to dereferece the qp pointer which was actually a negative errno. The crash: crash> log|grep BUG [ 136.458121] BUG: unable to handle kernel NULL pointer dereference at 0000000000000098 crash> bt PID: 3736 TASK: ffff8808543215c0 CPU: 2 COMMAND: "kworker/u64:2" #0 [ffff88084d323340] machine_kexec at ffffffff8105fbb0 #1 [ffff88084d3233b0] __crash_kexec at ffffffff81116758 #2 [ffff88084d323480] crash_kexec at ffffffff8111682d #3 [ffff88084d3234b0] oops_end at ffffffff81032bd6 #4 [ffff88084d3234e0] no_context at ffffffff8106e431 #5 [ffff88084d323530] __bad_area_nosemaphore at ffffffff8106e610 #6 [ffff88084d323590] bad_area_nosemaphore at ffffffff8106e6f4 #7 [ffff88084d3235a0] __do_page_fault at ffffffff8106ebdc #8 [ffff88084d323620] do_page_fault at ffffffff8106f057 #9 [ffff88084d323660] page_fault at ffffffff816e3148 [exception RIP: ib_create_qp+427] RIP: ffffffffa02554fb RSP: ffff88084d323718 RFLAGS: 00010246 RAX: 0000000000000004 RBX: fffffffffffffff4 RCX: 000000018020001f RDX: ffff880830997fc0 RSI: 0000000000000001 RDI: ffff88085f407200 RBP: ffff88084d323778 R8: 0000000000000001 R9: ffffea0020bae210 R10: ffffea0020bae218 R11: 0000000000000001 R12: ffff88084d3237c8 R13: 00000000fffffff4 R14: ffff880859fa5000 R15: ffff88082eb89800 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #10 [ffff88084d323780] rdma_create_qp at ffffffffa0782681 [rdma_cm] #11 [ffff88084d3237b0] nvmet_rdma_create_queue_ib at ffffffffa07c43f3 [nvmet_rdma] #12 [ffff88084d323860] nvmet_rdma_alloc_queue at ffffffffa07c5ba9 [nvmet_rdma] #13 [ffff88084d323900] nvmet_rdma_queue_connect at ffffffffa07c5c96 [nvmet_rdma] #14 [ffff88084d323980] nvmet_rdma_cm_handler at ffffffffa07c6450 [nvmet_rdma] #15 [ffff88084d3239b0] iw_conn_req_handler at ffffffffa0787480 [rdma_cm] #16 [ffff88084d323a60] cm_conn_req_handler at ffffffffa0775f06 [iw_cm] #17 [ffff88084d323ab0] process_event at ffffffffa0776019 [iw_cm] #18 [ffff88084d323af0] cm_work_handler at ffffffffa0776170 [iw_cm] #19 [ffff88084d323cb0] process_one_work at ffffffff810a1483 #20 [ffff88084d323d90] worker_thread at ffffffff810a211d #21 [ffff88084d323ec0] kthread at ffffffff810a6c5c #22 [ffff88084d323f50] ret_from_fork at ffffffff816e1ebf Fixes: 632bc3f65081 ("IB/core, RDMA RW API: Do not exceed QP SGE send limit") Signed-off-by: Steve Wise Cc: stable@vger.kernel.org Reviewed-by: Bart Van Assche Signed-off-by: Doug Ledford --- drivers/infiniband/core/verbs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/infiniband/core') diff --git a/drivers/infiniband/core/verbs.c b/drivers/infiniband/core/verbs.c index 1c6b2a4f50d4..83687646da68 100644 --- a/drivers/infiniband/core/verbs.c +++ b/drivers/infiniband/core/verbs.c @@ -843,7 +843,7 @@ struct ib_qp *ib_create_qp(struct ib_pd *pd, if (ret) { pr_err("failed to init MR pool ret= %d\n", ret); ib_destroy_qp(qp); - qp = ERR_PTR(ret); + return ERR_PTR(ret); } } -- cgit v1.2.3