From 2145328515c8fa9b8a9f7889250bc6c032f2a0e6 Mon Sep 17 00:00:00 2001 From: Long Li Date: Sat, 13 May 2023 23:18:15 -0700 Subject: RDMA/mana_ib: Use v2 version of cfg_rx_steer_req to enable RX coalescing With RX coalescing, one CQE entry can be used to indicate multiple packets on the receive queue. This saves processing time and PCI bandwidth over the CQ. The MANA Ethernet driver also uses the v2 version of the protocol. It doesn't use RX coalescing and its behavior is not changed. Link: https://lore.kernel.org/r/1684045095-31228-1-git-send-email-longli@linuxonhyperv.com Signed-off-by: Long Li Signed-off-by: Jason Gunthorpe --- include/net/mana/mana.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/mana/mana.h b/include/net/mana/mana.h index cd386aa7c7cc..1512bd48df81 100644 --- a/include/net/mana/mana.h +++ b/include/net/mana/mana.h @@ -581,7 +581,7 @@ struct mana_fence_rq_resp { }; /* HW DATA */ /* Configure vPort Rx Steering */ -struct mana_cfg_rx_steer_req { +struct mana_cfg_rx_steer_req_v2 { struct gdma_req_hdr hdr; mana_handle_t vport; u16 num_indir_entries; @@ -594,6 +594,8 @@ struct mana_cfg_rx_steer_req { u8 reserved; mana_handle_t default_rxobj; u8 hashkey[MANA_HASH_KEY_SIZE]; + u8 cqe_coalescing_enable; + u8 reserved2[7]; }; /* HW DATA */ struct mana_cfg_rx_steer_resp { -- cgit v1.2.3 From 2ecfd946169e7f56534db2a5f6935858be3005ba Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Mon, 5 Jun 2023 13:14:05 +0300 Subject: RDMA/mlx5: Reduce QP table exposure driver.h is common header to whole mlx5 code base, but struct mlx5_qp_table is used in mlx5_ib driver only. So move that struct to be under sole responsibility of mlx5_ib. Link: https://lore.kernel.org/r/bec0dc1158e795813b135d1143147977f26bf668.1685953497.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 1 + drivers/infiniband/hw/mlx5/qp.h | 11 ++++++++++- include/linux/mlx5/driver.h | 9 --------- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index efa4dc6e7dee..4bceef878c43 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -25,6 +25,7 @@ #include #include "srq.h" +#include "qp.h" #define mlx5_ib_dbg(_dev, format, arg...) \ dev_dbg(&(_dev)->ib_dev.dev, "%s:%d:(pid %d): " format, __func__, \ diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index 77f9b4a54816..f5130873dd52 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -6,7 +6,16 @@ #ifndef _MLX5_IB_QP_H #define _MLX5_IB_QP_H -#include "mlx5_ib.h" +struct mlx5_ib_dev; + +struct mlx5_qp_table { + struct notifier_block nb; + + /* protect radix tree + */ + spinlock_t lock; + struct radix_tree_root tree; +}; int mlx5_init_qp_table(struct mlx5_ib_dev *dev); void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a4c4f737f9c1..e3b616388b18 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -443,15 +443,6 @@ struct mlx5_core_health { struct delayed_work update_fw_log_ts_work; }; -struct mlx5_qp_table { - struct notifier_block nb; - - /* protect radix tree - */ - spinlock_t lock; - struct radix_tree_root tree; -}; - enum { MLX5_PF_NOTIFY_DISABLE_VF, MLX5_PF_NOTIFY_ENABLE_VF, -- cgit v1.2.3 From afff24899846ffca0c25c75b24893c90aef82603 Mon Sep 17 00:00:00 2001 From: Patrisious Haddad Date: Mon, 5 Jun 2023 13:14:06 +0300 Subject: RDMA/mlx5: Handle DCT QP logic separately from low level QP interface Previously when destroying a DCT, if the firmware function for the destruction failed, the common resource would have been destroyed either way, since it was destroyed before the firmware object. Which leads to kernel warning "refcount_t: underflow" which indicates possible use-after-free. Which is triggered when we try to destroy the common resource for the second time and execute refcount_dec_and_test(&common->refcount). So, let's fix the destruction order by factoring out the DCT QP logic to be in separate XArray database. refcount_t: underflow; use-after-free. WARNING: CPU: 8 PID: 1002 at lib/refcount.c:28 refcount_warn_saturate+0xd8/0xe0 Modules linked in: xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core overlay mlx5_core fuse CPU: 8 PID: 1002 Comm: python3 Not tainted 5.16.0-rc5+ #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:refcount_warn_saturate+0xd8/0xe0 Code: ff 48 c7 c7 18 f5 23 82 c6 05 60 70 ff 00 01 e8 d0 0a 45 00 0f 0b c3 48 c7 c7 c0 f4 23 82 c6 05 4c 70 ff 00 01 e8 ba 0a 45 00 <0f> 0b c3 0f 1f 44 00 00 8b 07 3d 00 00 00 c0 74 12 83 f8 01 74 13 RSP: 0018:ffff8881221d3aa8 EFLAGS: 00010286 RAX: 0000000000000000 RBX: ffff8881313e8d40 RCX: ffff88852cc1b5c8 RDX: 00000000ffffffd8 RSI: 0000000000000027 RDI: ffff88852cc1b5c0 RBP: ffff888100f70000 R08: ffff88853ffd1ba8 R09: 0000000000000003 R10: 00000000fffff000 R11: 3fffffffffffffff R12: 0000000000000246 R13: ffff888100f71fa0 R14: ffff8881221d3c68 R15: 0000000000000020 FS: 00007efebbb13740(0000) GS:ffff88852cc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00005611aac29f80 CR3: 00000001313de004 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: destroy_resource_common+0x6e/0x95 [mlx5_ib] mlx5_core_destroy_rq_tracked+0x38/0xbe [mlx5_ib] mlx5_ib_destroy_wq+0x22/0x80 [mlx5_ib] ib_destroy_wq_user+0x1f/0x40 [ib_core] uverbs_free_wq+0x19/0x40 [ib_uverbs] destroy_hw_idr_uobject+0x18/0x50 [ib_uverbs] uverbs_destroy_uobject+0x2f/0x190 [ib_uverbs] uobj_destroy+0x3c/0x80 [ib_uverbs] ib_uverbs_cmd_verbs+0x3e4/0xb80 [ib_uverbs] ? uverbs_free_wq+0x40/0x40 [ib_uverbs] ? ip_list_rcv+0xf7/0x120 ? netif_receive_skb_list_internal+0x1b6/0x2d0 ? task_tick_fair+0xbf/0x450 ? __handle_mm_fault+0x11fc/0x1450 ib_uverbs_ioctl+0xa4/0x110 [ib_uverbs] __x64_sys_ioctl+0x3e4/0x8e0 ? handle_mm_fault+0xb9/0x210 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x44/0xae RIP: 0033:0x7efebc0be17b Code: 0f 1e fa 48 8b 05 1d ad 0c 00 64 c7 00 26 00 00 00 48 c7 c0 ff ff ff ff c3 66 0f 1f 44 00 00 f3 0f 1e fa b8 10 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ed ac 0c 00 f7 d8 64 89 01 48 RSP: 002b:00007ffe71813e78 EFLAGS: 00000246 ORIG_RAX: 0000000000000010 RAX: ffffffffffffffda RBX: 00007ffe71813fb8 RCX: 00007efebc0be17b RDX: 00007ffe71813fa0 RSI: 00000000c0181b01 RDI: 0000000000000005 RBP: 00007ffe71813f80 R08: 00005611aae96020 R09: 000000000000004f R10: 00007efebbf9ffa0 R11: 0000000000000246 R12: 00007ffe71813f80 R13: 00007ffe71813f4c R14: 00005611aae2eca0 R15: 00007efeae6c89d0 Signed-off-by: Patrisious Haddad Signed-off-by: Jason Gunthorpe Link: https://lore.kernel.org/r/4470888466c8a898edc9833286967529cc5f3c0d.1685953497.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/qp.h | 1 + drivers/infiniband/hw/mlx5/qpc.c | 83 ++++++++++++++++++++++++---------------- include/linux/mlx5/driver.h | 1 - 3 files changed, 51 insertions(+), 34 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h index f5130873dd52..b6ee7c3ee1ca 100644 --- a/drivers/infiniband/hw/mlx5/qp.h +++ b/drivers/infiniband/hw/mlx5/qp.h @@ -10,6 +10,7 @@ struct mlx5_ib_dev; struct mlx5_qp_table { struct notifier_block nb; + struct xarray dct_xa; /* protect radix tree */ diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c index bae0334d6e7f..3a2f755d4985 100644 --- a/drivers/infiniband/hw/mlx5/qpc.c +++ b/drivers/infiniband/hw/mlx5/qpc.c @@ -88,23 +88,35 @@ static bool is_event_type_allowed(int rsc_type, int event_type) } } +static int dct_event_notifier(struct mlx5_ib_dev *dev, struct mlx5_eqe *eqe) +{ + struct mlx5_core_dct *dct; + unsigned long flags; + u32 qpn; + + qpn = be32_to_cpu(eqe->data.dct.dctn) & 0xFFFFFF; + xa_lock_irqsave(&dev->qp_table.dct_xa, flags); + dct = xa_load(&dev->qp_table.dct_xa, qpn); + if (dct) + complete(&dct->drained); + xa_unlock_irqrestore(&dev->qp_table.dct_xa, flags); + return NOTIFY_OK; +} + static int rsc_event_notifier(struct notifier_block *nb, unsigned long type, void *data) { + struct mlx5_ib_dev *dev = + container_of(nb, struct mlx5_ib_dev, qp_table.nb); struct mlx5_core_rsc_common *common; - struct mlx5_qp_table *table; - struct mlx5_core_dct *dct; + struct mlx5_eqe *eqe = data; u8 event_type = (u8)type; struct mlx5_core_qp *qp; - struct mlx5_eqe *eqe; u32 rsn; switch (event_type) { case MLX5_EVENT_TYPE_DCT_DRAINED: - eqe = data; - rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff; - rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN); - break; + return dct_event_notifier(dev, eqe); case MLX5_EVENT_TYPE_PATH_MIG: case MLX5_EVENT_TYPE_COMM_EST: case MLX5_EVENT_TYPE_SQ_DRAINED: @@ -113,7 +125,6 @@ static int rsc_event_notifier(struct notifier_block *nb, case MLX5_EVENT_TYPE_PATH_MIG_FAILED: case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR: case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR: - eqe = data; rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff; rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN); break; @@ -121,8 +132,7 @@ static int rsc_event_notifier(struct notifier_block *nb, return NOTIFY_DONE; } - table = container_of(nb, struct mlx5_qp_table, nb); - common = mlx5_get_rsc(table, rsn); + common = mlx5_get_rsc(&dev->qp_table, rsn); if (!common) return NOTIFY_OK; @@ -137,11 +147,6 @@ static int rsc_event_notifier(struct notifier_block *nb, qp->event(qp, event_type); /* Need to put resource in event handler */ return NOTIFY_OK; - case MLX5_RES_DCT: - dct = (struct mlx5_core_dct *)common; - if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED) - complete(&dct->drained); - break; default: break; } @@ -188,28 +193,15 @@ static void destroy_resource_common(struct mlx5_ib_dev *dev, } static int _mlx5_core_destroy_dct(struct mlx5_ib_dev *dev, - struct mlx5_core_dct *dct, bool need_cleanup) + struct mlx5_core_dct *dct) { u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {}; struct mlx5_core_qp *qp = &dct->mqp; - int err; - err = mlx5_core_drain_dct(dev, dct); - if (err) { - if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) - goto destroy; - - return err; - } - wait_for_completion(&dct->drained); -destroy: - if (need_cleanup) - destroy_resource_common(dev, &dct->mqp); MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT); MLX5_SET(destroy_dct_in, in, dctn, qp->qpn); MLX5_SET(destroy_dct_in, in, uid, qp->uid); - err = mlx5_cmd_exec_in(dev->mdev, destroy_dct, in); - return err; + return mlx5_cmd_exec_in(dev->mdev, destroy_dct, in); } int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct, @@ -227,13 +219,13 @@ int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct, qp->qpn = MLX5_GET(create_dct_out, out, dctn); qp->uid = MLX5_GET(create_dct_in, in, uid); - err = create_resource_common(dev, qp, MLX5_RES_DCT); + err = xa_err(xa_store_irq(&dev->qp_table.dct_xa, qp->qpn, dct, GFP_KERNEL)); if (err) goto err_cmd; return 0; err_cmd: - _mlx5_core_destroy_dct(dev, dct, false); + _mlx5_core_destroy_dct(dev, dct); return err; } @@ -284,7 +276,31 @@ static int mlx5_core_drain_dct(struct mlx5_ib_dev *dev, int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct) { - return _mlx5_core_destroy_dct(dev, dct, true); + struct mlx5_qp_table *table = &dev->qp_table; + struct mlx5_core_dct *tmp; + int err; + + err = mlx5_core_drain_dct(dev, dct); + if (err) { + if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) + goto destroy; + + return err; + } + wait_for_completion(&dct->drained); + +destroy: + tmp = xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, dct, XA_ZERO_ENTRY, GFP_KERNEL); + if (WARN_ON(tmp != dct)) + return xa_err(tmp) ?: -EINVAL; + + err = _mlx5_core_destroy_dct(dev, dct); + if (err) { + xa_cmpxchg_irq(&table->dct_xa, dct->mqp.qpn, XA_ZERO_ENTRY, dct, 0); + return err; + } + xa_erase_irq(&table->dct_xa, dct->mqp.qpn); + return 0; } int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp) @@ -488,6 +504,7 @@ int mlx5_init_qp_table(struct mlx5_ib_dev *dev) spin_lock_init(&table->lock); INIT_RADIX_TREE(&table->tree, GFP_ATOMIC); + xa_init(&table->dct_xa); mlx5_qp_debugfs_init(dev->mdev); table->nb.notifier_call = rsc_event_notifier; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index e3b616388b18..e67c603d507b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -382,7 +382,6 @@ enum mlx5_res_type { MLX5_RES_SRQ = 3, MLX5_RES_XSRQ = 4, MLX5_RES_XRQ = 5, - MLX5_RES_DCT = MLX5_EVENT_QUEUE_TYPE_DCT, }; struct mlx5_core_rsc_common { -- cgit v1.2.3 From 360da60d6c6edb9740de7a8e6d8969d62ceff956 Mon Sep 17 00:00:00 2001 From: Selvin Xavier Date: Tue, 13 Jun 2023 11:12:23 -0700 Subject: RDMA/bnxt_re: Enable low latency push Introduce driver specific uapi functionalites. Added a alloc_page functionality for user library to allocate specific pages. Currently added support for allocating write combine pages for push functinality. This interface shall be extended for other page allocations. Allocate a WC page using the uapi hook for enabling the low latency push in Gen P5 adapters for small packets. This is supported only for the user space QPs. Link: https://lore.kernel.org/r/1686679943-17117-8-git-send-email-selvin.xavier@broadcom.com Signed-off-by: Selvin Xavier Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/bnxt_re.h | 3 + drivers/infiniband/hw/bnxt_re/ib_verbs.c | 148 ++++++++++++++++++++++++++++++ drivers/infiniband/hw/bnxt_re/ib_verbs.h | 4 + drivers/infiniband/hw/bnxt_re/main.c | 20 +++- drivers/infiniband/hw/bnxt_re/qplib_res.c | 3 + drivers/infiniband/hw/bnxt_re/qplib_res.h | 3 +- include/uapi/rdma/bnxt_re-abi.h | 27 ++++++ 7 files changed, 204 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/drivers/infiniband/hw/bnxt_re/bnxt_re.h b/drivers/infiniband/hw/bnxt_re/bnxt_re.h index f34fb87e3b77..9e278d23eb82 100644 --- a/drivers/infiniband/hw/bnxt_re/bnxt_re.h +++ b/drivers/infiniband/hw/bnxt_re/bnxt_re.h @@ -39,6 +39,7 @@ #ifndef __BNXT_RE_H__ #define __BNXT_RE_H__ +#include #include "hw_counters.h" #define ROCE_DRV_MODULE_NAME "bnxt_re" @@ -189,4 +190,6 @@ static inline struct device *rdev_to_dev(struct bnxt_re_dev *rdev) return &rdev->ibdev.dev; return NULL; } + +extern const struct uapi_definition bnxt_re_uapi_defs[]; #endif diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 8e3b45d9e656..a936e0d474f9 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -61,6 +61,15 @@ #include "bnxt_re.h" #include "ib_verbs.h" + +#include +#include + +#include + +#define UVERBS_MODULE_NAME bnxt_re +#include + #include static int __from_ib_access_flags(int iflags) @@ -546,6 +555,7 @@ bnxt_re_mmap_entry_insert(struct bnxt_re_ucontext *uctx, u64 mem_offset, entry->mem_offset = mem_offset; entry->mmap_flag = mmap_flag; + entry->uctx = uctx; switch (mmap_flag) { case BNXT_RE_MMAP_SH_PAGE: @@ -553,6 +563,7 @@ bnxt_re_mmap_entry_insert(struct bnxt_re_ucontext *uctx, u64 mem_offset, &entry->rdma_entry, PAGE_SIZE, 0); break; case BNXT_RE_MMAP_UC_DB: + case BNXT_RE_MMAP_WC_DB: ret = rdma_user_mmap_entry_insert(&uctx->ib_uctx, &entry->rdma_entry, PAGE_SIZE); break; @@ -4056,6 +4067,9 @@ int bnxt_re_alloc_ucontext(struct ib_ucontext *ctx, struct ib_udata *udata) resp.comp_mask |= BNXT_RE_UCNTX_CMASK_HAVE_MODE; resp.mode = rdev->chip_ctx->modes.wqe_mode; + if (rdev->chip_ctx->modes.db_push) + resp.comp_mask |= BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED; + entry = bnxt_re_mmap_entry_insert(uctx, 0, BNXT_RE_MMAP_SH_PAGE, NULL); if (!entry) { rc = -ENOMEM; @@ -4119,6 +4133,12 @@ int bnxt_re_mmap(struct ib_ucontext *ib_uctx, struct vm_area_struct *vma) rdma_entry); switch (bnxt_entry->mmap_flag) { + case BNXT_RE_MMAP_WC_DB: + pfn = bnxt_entry->mem_offset >> PAGE_SHIFT; + ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE, + pgprot_writecombine(vma->vm_page_prot), + rdma_entry); + break; case BNXT_RE_MMAP_UC_DB: pfn = bnxt_entry->mem_offset >> PAGE_SHIFT; ret = rdma_user_mmap_io(ib_uctx, vma, pfn, PAGE_SIZE, @@ -4146,3 +4166,131 @@ void bnxt_re_mmap_free(struct rdma_user_mmap_entry *rdma_entry) kfree(bnxt_entry); } + +static int UVERBS_HANDLER(BNXT_RE_METHOD_ALLOC_PAGE)(struct uverbs_attr_bundle *attrs) +{ + struct ib_uobject *uobj = uverbs_attr_get_uobject(attrs, BNXT_RE_ALLOC_PAGE_HANDLE); + enum bnxt_re_alloc_page_type alloc_type; + struct bnxt_re_user_mmap_entry *entry; + enum bnxt_re_mmap_flag mmap_flag; + struct bnxt_qplib_chip_ctx *cctx; + struct bnxt_re_ucontext *uctx; + struct bnxt_re_dev *rdev; + u64 mmap_offset; + u32 length; + u32 dpi; + u64 dbr; + int err; + + uctx = container_of(ib_uverbs_get_ucontext(attrs), struct bnxt_re_ucontext, ib_uctx); + if (IS_ERR(uctx)) + return PTR_ERR(uctx); + + err = uverbs_get_const(&alloc_type, attrs, BNXT_RE_ALLOC_PAGE_TYPE); + if (err) + return err; + + rdev = uctx->rdev; + cctx = rdev->chip_ctx; + + switch (alloc_type) { + case BNXT_RE_ALLOC_WC_PAGE: + if (cctx->modes.db_push) { + if (bnxt_qplib_alloc_dpi(&rdev->qplib_res, &uctx->wcdpi, + uctx, BNXT_QPLIB_DPI_TYPE_WC)) + return -ENOMEM; + length = PAGE_SIZE; + dpi = uctx->wcdpi.dpi; + dbr = (u64)uctx->wcdpi.umdbr; + mmap_flag = BNXT_RE_MMAP_WC_DB; + } else { + return -EINVAL; + } + + break; + + default: + return -EOPNOTSUPP; + } + + entry = bnxt_re_mmap_entry_insert(uctx, dbr, mmap_flag, &mmap_offset); + if (IS_ERR(entry)) + return PTR_ERR(entry); + + uobj->object = entry; + uverbs_finalize_uobj_create(attrs, BNXT_RE_ALLOC_PAGE_HANDLE); + err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_MMAP_OFFSET, + &mmap_offset, sizeof(mmap_offset)); + if (err) + return err; + + err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_MMAP_LENGTH, + &length, sizeof(length)); + if (err) + return err; + + err = uverbs_copy_to(attrs, BNXT_RE_ALLOC_PAGE_DPI, + &dpi, sizeof(length)); + if (err) + return err; + + return 0; +} + +static int alloc_page_obj_cleanup(struct ib_uobject *uobject, + enum rdma_remove_reason why, + struct uverbs_attr_bundle *attrs) +{ + struct bnxt_re_user_mmap_entry *entry = uobject->object; + struct bnxt_re_ucontext *uctx = entry->uctx; + + switch (entry->mmap_flag) { + case BNXT_RE_MMAP_WC_DB: + if (uctx && uctx->wcdpi.dbr) { + struct bnxt_re_dev *rdev = uctx->rdev; + + bnxt_qplib_dealloc_dpi(&rdev->qplib_res, &uctx->wcdpi); + uctx->wcdpi.dbr = NULL; + } + break; + default: + goto exit; + } + rdma_user_mmap_entry_remove(&entry->rdma_entry); +exit: + return 0; +} + +DECLARE_UVERBS_NAMED_METHOD(BNXT_RE_METHOD_ALLOC_PAGE, + UVERBS_ATTR_IDR(BNXT_RE_ALLOC_PAGE_HANDLE, + BNXT_RE_OBJECT_ALLOC_PAGE, + UVERBS_ACCESS_NEW, + UA_MANDATORY), + UVERBS_ATTR_CONST_IN(BNXT_RE_ALLOC_PAGE_TYPE, + enum bnxt_re_alloc_page_type, + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_MMAP_OFFSET, + UVERBS_ATTR_TYPE(u64), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_MMAP_LENGTH, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY), + UVERBS_ATTR_PTR_OUT(BNXT_RE_ALLOC_PAGE_DPI, + UVERBS_ATTR_TYPE(u32), + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_METHOD_DESTROY(BNXT_RE_METHOD_DESTROY_PAGE, + UVERBS_ATTR_IDR(BNXT_RE_DESTROY_PAGE_HANDLE, + BNXT_RE_OBJECT_ALLOC_PAGE, + UVERBS_ACCESS_DESTROY, + UA_MANDATORY)); + +DECLARE_UVERBS_NAMED_OBJECT(BNXT_RE_OBJECT_ALLOC_PAGE, + UVERBS_TYPE_ALLOC_IDR(alloc_page_obj_cleanup), + &UVERBS_METHOD(BNXT_RE_METHOD_ALLOC_PAGE), + &UVERBS_METHOD(BNXT_RE_METHOD_DESTROY_PAGE)); + +const struct uapi_definition bnxt_re_uapi_defs[] = { + UAPI_DEF_CHAIN_OBJ_TREE_NAMED(BNXT_RE_OBJECT_ALLOC_PAGE), + {} +}; diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.h b/drivers/infiniband/hw/bnxt_re/ib_verbs.h index dcd31aefad4c..32d9e9d09791 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.h +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.h @@ -61,6 +61,7 @@ struct bnxt_re_pd { struct bnxt_qplib_pd qplib_pd; struct bnxt_re_fence_data fence; struct rdma_user_mmap_entry *pd_db_mmap; + struct rdma_user_mmap_entry *pd_wcdb_mmap; }; struct bnxt_re_ah { @@ -135,6 +136,7 @@ struct bnxt_re_ucontext { struct ib_ucontext ib_uctx; struct bnxt_re_dev *rdev; struct bnxt_qplib_dpi dpi; + struct bnxt_qplib_dpi wcdpi; void *shpg; spinlock_t sh_lock; /* protect shpg */ struct rdma_user_mmap_entry *shpage_mmap; @@ -143,10 +145,12 @@ struct bnxt_re_ucontext { enum bnxt_re_mmap_flag { BNXT_RE_MMAP_SH_PAGE, BNXT_RE_MMAP_UC_DB, + BNXT_RE_MMAP_WC_DB, }; struct bnxt_re_user_mmap_entry { struct rdma_user_mmap_entry rdma_entry; + struct bnxt_re_ucontext *uctx; u64 mem_offset; u8 mmap_flag; }; diff --git a/drivers/infiniband/hw/bnxt_re/main.c b/drivers/infiniband/hw/bnxt_re/main.c index 0c681134dd57..0816cf2a0b38 100644 --- a/drivers/infiniband/hw/bnxt_re/main.c +++ b/drivers/infiniband/hw/bnxt_re/main.c @@ -66,6 +66,7 @@ #include #include "bnxt.h" #include "hw_counters.h" +#include "ib_verbs.h" static char version[] = BNXT_RE_DESC "\n"; @@ -117,6 +118,10 @@ static void bnxt_re_set_db_offset(struct bnxt_re_dev *rdev) * in such cases and DB-push will be disabled. */ barlen = pci_resource_len(res->pdev, RCFW_DBR_PCI_BAR_REGION); + if (cctx->modes.db_push && l2db_len && en_dev->l2_db_size != barlen) { + res->dpi_tbl.wcreg.offset = en_dev->l2_db_size; + dev_info(rdev_to_dev(rdev), "Low latency framework is enabled\n"); + } } static void bnxt_re_set_drv_mode(struct bnxt_re_dev *rdev, u8 mode) @@ -395,8 +400,7 @@ static int bnxt_re_hwrm_qcfg(struct bnxt_re_dev *rdev, u32 *db_len, int rc; memset(&fw_msg, 0, sizeof(fw_msg)); - bnxt_re_init_hwrm_hdr(rdev, (void *)&req, - HWRM_FUNC_QCFG, -1, -1); + bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCFG); req.fid = cpu_to_le16(0xffff); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); @@ -416,13 +420,20 @@ int bnxt_re_hwrm_qcaps(struct bnxt_re_dev *rdev) struct hwrm_func_qcaps_input req = {}; struct bnxt_qplib_chip_ctx *cctx; struct bnxt_fw_msg fw_msg = {}; + int rc; cctx = rdev->chip_ctx; bnxt_re_init_hwrm_hdr((void *)&req, HWRM_FUNC_QCAPS); req.fid = cpu_to_le16(0xffff); bnxt_re_fill_fw_msg(&fw_msg, (void *)&req, sizeof(req), (void *)&resp, sizeof(resp), DFLT_HWRM_CMD_TIMEOUT); - return bnxt_send_msg(en_dev, &fw_msg); + + rc = bnxt_send_msg(en_dev, &fw_msg); + if (rc) + return rc; + cctx->modes.db_push = le32_to_cpu(resp.flags) & FUNC_QCAPS_RESP_FLAGS_WCB_PUSH_MODE; + + return 0; } static int bnxt_re_net_ring_free(struct bnxt_re_dev *rdev, @@ -669,6 +680,9 @@ static int bnxt_re_register_ib(struct bnxt_re_dev *rdev) ibdev->dev.parent = &rdev->en_dev->pdev->dev; ibdev->local_dma_lkey = BNXT_QPLIB_RSVD_LKEY; + if (IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS)) + ibdev->driver_def = bnxt_re_uapi_defs; + ib_set_device_ops(ibdev, &bnxt_re_dev_ops); ret = ib_device_set_netdev(&rdev->ibdev, rdev->netdev, 1); if (ret) diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.c b/drivers/infiniband/hw/bnxt_re/qplib_res.c index e1cbe594e56b..174db831b888 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.c +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.c @@ -740,6 +740,9 @@ int bnxt_qplib_alloc_dpi(struct bnxt_qplib_res *res, dpi->dbr = dpit->priv_db; dpi->dpi = dpi->bit; break; + case BNXT_QPLIB_DPI_TYPE_WC: + dpi->dbr = ioremap_wc(umaddr, PAGE_SIZE); + break; default: dpi->dbr = ioremap(umaddr, PAGE_SIZE); break; diff --git a/drivers/infiniband/hw/bnxt_re/qplib_res.h b/drivers/infiniband/hw/bnxt_re/qplib_res.h index 398a469d8377..d850a553821e 100644 --- a/drivers/infiniband/hw/bnxt_re/qplib_res.h +++ b/drivers/infiniband/hw/bnxt_re/qplib_res.h @@ -47,7 +47,7 @@ extern const struct bnxt_qplib_gid bnxt_qplib_gid_zero; struct bnxt_qplib_drv_modes { u8 wqe_mode; - /* Other modes to follow here */ + bool db_push; }; struct bnxt_qplib_chip_ctx { @@ -194,6 +194,7 @@ struct bnxt_qplib_sgid_tbl { enum { BNXT_QPLIB_DPI_TYPE_KERNEL = 0, BNXT_QPLIB_DPI_TYPE_UC = 1, + BNXT_QPLIB_DPI_TYPE_WC = 2 }; struct bnxt_qplib_dpi { diff --git a/include/uapi/rdma/bnxt_re-abi.h b/include/uapi/rdma/bnxt_re-abi.h index c4e90775da0c..8a2a1d4f6b29 100644 --- a/include/uapi/rdma/bnxt_re-abi.h +++ b/include/uapi/rdma/bnxt_re-abi.h @@ -41,6 +41,7 @@ #define __BNXT_RE_UVERBS_ABI_H__ #include +#include #define BNXT_RE_ABI_VERSION 1 @@ -51,6 +52,7 @@ enum { BNXT_RE_UCNTX_CMASK_HAVE_CCTX = 0x1ULL, BNXT_RE_UCNTX_CMASK_HAVE_MODE = 0x02ULL, + BNXT_RE_UCNTX_CMASK_WC_DPI_ENABLED = 0x04ULL, }; enum bnxt_re_wqe_mode { @@ -127,4 +129,29 @@ enum bnxt_re_shpg_offt { BNXT_RE_END_RESV_OFFT = 0xFF0 }; +enum bnxt_re_objects { + BNXT_RE_OBJECT_ALLOC_PAGE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum bnxt_re_alloc_page_type { + BNXT_RE_ALLOC_WC_PAGE = 0, +}; + +enum bnxt_re_var_alloc_page_attrs { + BNXT_RE_ALLOC_PAGE_HANDLE = (1U << UVERBS_ID_NS_SHIFT), + BNXT_RE_ALLOC_PAGE_TYPE, + BNXT_RE_ALLOC_PAGE_DPI, + BNXT_RE_ALLOC_PAGE_MMAP_OFFSET, + BNXT_RE_ALLOC_PAGE_MMAP_LENGTH, +}; + +enum bnxt_re_alloc_page_attrs { + BNXT_RE_DESTROY_PAGE_HANDLE = (1U << UVERBS_ID_NS_SHIFT), +}; + +enum bnxt_re_alloc_page_methods { + BNXT_RE_METHOD_ALLOC_PAGE = (1U << UVERBS_ID_NS_SHIFT), + BNXT_RE_METHOD_DESTROY_PAGE, +}; + #endif /* __BNXT_RE_UVERBS_ABI_H__*/ -- cgit v1.2.3