summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/mlx5/odp.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/hw/mlx5/odp.c')
-rw-r--r--drivers/infiniband/hw/mlx5/odp.c189
1 files changed, 59 insertions, 130 deletions
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 1d257d1b3b0d..2e9b43061797 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -184,7 +184,7 @@ void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
for (i = 0; i < nentries; i++, pklm++) {
pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
va = (offset + i) * MLX5_IMR_MTT_SIZE;
- if (odp && odp->umem.address == va) {
+ if (odp && ib_umem_start(odp) == va) {
struct mlx5_ib_mr *mtt = odp->private;
pklm->key = cpu_to_be32(mtt->ibmr.lkey);
@@ -206,7 +206,7 @@ static void mr_leaf_free_action(struct work_struct *work)
mr->parent = NULL;
synchronize_srcu(&mr->dev->mr_srcu);
- ib_umem_release(&odp->umem);
+ ib_umem_odp_release(odp);
if (imr->live)
mlx5_ib_update_xlt(imr, idx, 1, 0,
MLX5_IB_UPD_XLT_INDIRECT |
@@ -301,7 +301,8 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
memset(caps, 0, sizeof(*caps));
- if (!MLX5_CAP_GEN(dev->mdev, pg))
+ if (!MLX5_CAP_GEN(dev->mdev, pg) ||
+ !mlx5_ib_can_use_umr(dev, true))
return;
caps->general_caps = IB_ODP_SUPPORT;
@@ -355,7 +356,8 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
MLX5_CAP_GEN(dev->mdev, null_mkey) &&
- MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+ MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset) &&
+ !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled))
caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
return;
@@ -384,7 +386,7 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
}
static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
- struct ib_umem *umem,
+ struct ib_umem_odp *umem_odp,
bool ksm, int access_flags)
{
struct mlx5_ib_dev *dev = to_mdev(pd->device);
@@ -402,7 +404,7 @@ static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
mr->dev = dev;
mr->access_flags = access_flags;
mr->mmkey.iova = 0;
- mr->umem = umem;
+ mr->umem = &umem_odp->umem;
if (ksm) {
err = mlx5_ib_update_xlt(mr, 0,
@@ -462,18 +464,17 @@ next_mr:
if (nentries)
nentries++;
} else {
- odp = ib_alloc_odp_umem(odp_mr, addr,
- MLX5_IMR_MTT_SIZE);
+ odp = ib_umem_odp_alloc_child(odp_mr, addr, MLX5_IMR_MTT_SIZE);
if (IS_ERR(odp)) {
mutex_unlock(&odp_mr->umem_mutex);
return ERR_CAST(odp);
}
- mtt = implicit_mr_alloc(mr->ibmr.pd, &odp->umem, 0,
+ mtt = implicit_mr_alloc(mr->ibmr.pd, odp, 0,
mr->access_flags);
if (IS_ERR(mtt)) {
mutex_unlock(&odp_mr->umem_mutex);
- ib_umem_release(&odp->umem);
+ ib_umem_odp_release(odp);
return ERR_CAST(mtt);
}
@@ -495,7 +496,7 @@ next_mr:
addr += MLX5_IMR_MTT_SIZE;
if (unlikely(addr < io_virt + bcnt)) {
odp = odp_next(odp);
- if (odp && odp->umem.address != addr)
+ if (odp && ib_umem_start(odp) != addr)
odp = NULL;
goto next_mr;
}
@@ -519,19 +520,19 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
int access_flags)
{
struct mlx5_ib_mr *imr;
- struct ib_umem *umem;
+ struct ib_umem_odp *umem_odp;
- umem = ib_umem_get(udata, 0, 0, access_flags, 0);
- if (IS_ERR(umem))
- return ERR_CAST(umem);
+ umem_odp = ib_umem_odp_alloc_implicit(udata, access_flags);
+ if (IS_ERR(umem_odp))
+ return ERR_CAST(umem_odp);
- imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags);
+ imr = implicit_mr_alloc(&pd->ibpd, umem_odp, 1, access_flags);
if (IS_ERR(imr)) {
- ib_umem_release(umem);
+ ib_umem_odp_release(umem_odp);
return ERR_CAST(imr);
}
- imr->umem = umem;
+ imr->umem = &umem_odp->umem;
init_waitqueue_head(&imr->q_leaf_free);
atomic_set(&imr->num_leaf_free, 0);
atomic_set(&imr->num_pending_prefetch, 0);
@@ -539,34 +540,31 @@ struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
return imr;
}
-static int mr_leaf_free(struct ib_umem_odp *umem_odp, u64 start, u64 end,
- void *cookie)
+void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
{
- struct mlx5_ib_mr *mr = umem_odp->private, *imr = cookie;
-
- if (mr->parent != imr)
- return 0;
+ struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr);
+ struct rb_node *node;
- ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
- ib_umem_end(umem_odp));
+ down_read(&per_mm->umem_rwsem);
+ for (node = rb_first_cached(&per_mm->umem_tree); node;
+ node = rb_next(node)) {
+ struct ib_umem_odp *umem_odp =
+ rb_entry(node, struct ib_umem_odp, interval_tree.rb);
+ struct mlx5_ib_mr *mr = umem_odp->private;
- if (umem_odp->dying)
- return 0;
+ if (mr->parent != imr)
+ continue;
- WRITE_ONCE(umem_odp->dying, 1);
- atomic_inc(&imr->num_leaf_free);
- schedule_work(&umem_odp->work);
+ ib_umem_odp_unmap_dma_pages(umem_odp, ib_umem_start(umem_odp),
+ ib_umem_end(umem_odp));
- return 0;
-}
-
-void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
-{
- struct ib_ucontext_per_mm *per_mm = mr_to_per_mm(imr);
+ if (umem_odp->dying)
+ continue;
- down_read(&per_mm->umem_rwsem);
- rbt_ib_umem_for_each_in_range(&per_mm->umem_tree, 0, ULLONG_MAX,
- mr_leaf_free, true, imr);
+ WRITE_ONCE(umem_odp->dying, 1);
+ atomic_inc(&imr->num_leaf_free);
+ schedule_work(&umem_odp->work);
+ }
up_read(&per_mm->umem_rwsem);
wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
@@ -587,7 +585,7 @@ static int pagefault_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
struct ib_umem_odp *odp;
size_t size;
- if (!odp_mr->page_list) {
+ if (odp_mr->is_implicit_odp) {
odp = implicit_mr_get_data(mr, io_virt, bcnt);
if (IS_ERR(odp))
@@ -605,7 +603,7 @@ next_mr:
start_idx = (io_virt - (mr->mmkey.iova & page_mask)) >> page_shift;
access_mask = ODP_READ_ALLOWED_BIT;
- if (prefetch && !downgrade && !mr->umem->writable) {
+ if (prefetch && !downgrade && !odp->umem.writable) {
/* prefetch with write-access must
* be supported by the MR
*/
@@ -613,7 +611,7 @@ next_mr:
goto out;
}
- if (mr->umem->writable && !downgrade)
+ if (odp->umem.writable && !downgrade)
access_mask |= ODP_WRITE_ALLOWED_BIT;
current_seq = READ_ONCE(odp->notifiers_seq);
@@ -623,8 +621,8 @@ next_mr:
*/
smp_rmb();
- ret = ib_umem_odp_map_dma_pages(to_ib_umem_odp(mr->umem), io_virt, size,
- access_mask, current_seq);
+ ret = ib_umem_odp_map_dma_pages(odp, io_virt, size, access_mask,
+ current_seq);
if (ret < 0)
goto out;
@@ -632,8 +630,7 @@ next_mr:
np = ret;
mutex_lock(&odp->umem_mutex);
- if (!ib_umem_mmu_notifier_retry(to_ib_umem_odp(mr->umem),
- current_seq)) {
+ if (!ib_umem_mmu_notifier_retry(odp, current_seq)) {
/*
* No need to check whether the MTTs really belong to
* this MR, since ib_umem_odp_map_dma_pages already
@@ -666,7 +663,7 @@ next_mr:
io_virt += size;
next = odp_next(odp);
- if (unlikely(!next || next->umem.address != io_virt)) {
+ if (unlikely(!next || ib_umem_start(next) != io_virt)) {
mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
io_virt, next);
return -EAGAIN;
@@ -985,17 +982,6 @@ static int pagefault_data_segments(struct mlx5_ib_dev *dev,
return ret < 0 ? ret : npages;
}
-static const u32 mlx5_ib_odp_opcode_cap[] = {
- [MLX5_OPCODE_SEND] = IB_ODP_SUPPORT_SEND,
- [MLX5_OPCODE_SEND_IMM] = IB_ODP_SUPPORT_SEND,
- [MLX5_OPCODE_SEND_INVAL] = IB_ODP_SUPPORT_SEND,
- [MLX5_OPCODE_RDMA_WRITE] = IB_ODP_SUPPORT_WRITE,
- [MLX5_OPCODE_RDMA_WRITE_IMM] = IB_ODP_SUPPORT_WRITE,
- [MLX5_OPCODE_RDMA_READ] = IB_ODP_SUPPORT_READ,
- [MLX5_OPCODE_ATOMIC_CS] = IB_ODP_SUPPORT_ATOMIC,
- [MLX5_OPCODE_ATOMIC_FA] = IB_ODP_SUPPORT_ATOMIC,
-};
-
/*
* Parse initiator WQE. Advances the wqe pointer to point at the
* scatter-gather list, and set wqe_end to the end of the WQE.
@@ -1006,12 +992,8 @@ static int mlx5_ib_mr_initiator_pfault_handler(
{
struct mlx5_wqe_ctrl_seg *ctrl = *wqe;
u16 wqe_index = pfault->wqe.wqe_index;
- u32 transport_caps;
struct mlx5_base_av *av;
unsigned ds, opcode;
-#if defined(DEBUG)
- u32 ctrl_wqe_index, ctrl_qpn;
-#endif
u32 qpn = qp->trans_qp.base.mqp.qpn;
ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
@@ -1027,58 +1009,17 @@ static int mlx5_ib_mr_initiator_pfault_handler(
return -EFAULT;
}
-#if defined(DEBUG)
- ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) &
- MLX5_WQE_CTRL_WQE_INDEX_MASK) >>
- MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
- if (wqe_index != ctrl_wqe_index) {
- mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
- wqe_index, qpn,
- ctrl_wqe_index);
- return -EFAULT;
- }
-
- ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
- MLX5_WQE_CTRL_QPN_SHIFT;
- if (qpn != ctrl_qpn) {
- mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
- wqe_index, qpn,
- ctrl_qpn);
- return -EFAULT;
- }
-#endif /* DEBUG */
-
*wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS;
*wqe += sizeof(*ctrl);
opcode = be32_to_cpu(ctrl->opmod_idx_opcode) &
MLX5_WQE_CTRL_OPCODE_MASK;
- switch (qp->ibqp.qp_type) {
- case IB_QPT_XRC_INI:
+ if (qp->ibqp.qp_type == IB_QPT_XRC_INI)
*wqe += sizeof(struct mlx5_wqe_xrc_seg);
- transport_caps = dev->odp_caps.per_transport_caps.xrc_odp_caps;
- break;
- case IB_QPT_RC:
- transport_caps = dev->odp_caps.per_transport_caps.rc_odp_caps;
- break;
- case IB_QPT_UD:
- transport_caps = dev->odp_caps.per_transport_caps.ud_odp_caps;
- break;
- default:
- mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport 0x%x\n",
- qp->ibqp.qp_type);
- return -EFAULT;
- }
- if (unlikely(opcode >= ARRAY_SIZE(mlx5_ib_odp_opcode_cap) ||
- !(transport_caps & mlx5_ib_odp_opcode_cap[opcode]))) {
- mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode 0x%x\n",
- opcode);
- return -EFAULT;
- }
-
- if (qp->ibqp.qp_type == IB_QPT_UD) {
+ if (qp->ibqp.qp_type == IB_QPT_UD ||
+ qp->qp_sub_type == MLX5_IB_QPT_DCI) {
av = *wqe;
if (av->dqp_dct & cpu_to_be32(MLX5_EXTENDED_UD_AV))
*wqe += sizeof(struct mlx5_av);
@@ -1141,19 +1082,6 @@ static int mlx5_ib_mr_responder_pfault_handler_rq(struct mlx5_ib_dev *dev,
return -EFAULT;
}
- switch (qp->ibqp.qp_type) {
- case IB_QPT_RC:
- if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
- IB_ODP_SUPPORT_RECV))
- goto invalid_transport_or_opcode;
- break;
- default:
-invalid_transport_or_opcode:
- mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport. transport: 0x%x\n",
- qp->ibqp.qp_type);
- return -EFAULT;
- }
-
*wqe_end = wqe + wqe_size;
return 0;
@@ -1203,7 +1131,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
{
bool sq = pfault->type & MLX5_PFAULT_REQUESTOR;
u16 wqe_index = pfault->wqe.wqe_index;
- void *wqe = NULL, *wqe_end = NULL;
+ void *wqe, *wqe_start = NULL, *wqe_end = NULL;
u32 bytes_mapped, total_wqe_bytes;
struct mlx5_core_rsc_common *res;
int resume_with_error = 1;
@@ -1224,12 +1152,13 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
goto resolve_page_fault;
}
- wqe = (void *)__get_free_page(GFP_KERNEL);
- if (!wqe) {
+ wqe_start = (void *)__get_free_page(GFP_KERNEL);
+ if (!wqe_start) {
mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
goto resolve_page_fault;
}
+ wqe = wqe_start;
qp = (res->res == MLX5_RES_QP) ? res_to_qp(res) : NULL;
if (qp && sq) {
ret = mlx5_ib_read_user_wqe_sq(qp, wqe_index, wqe, PAGE_SIZE,
@@ -1284,7 +1213,7 @@ resolve_page_fault:
pfault->wqe.wq_num, resume_with_error,
pfault->type);
mlx5_core_res_put(res);
- free_page((unsigned long)wqe);
+ free_page((unsigned long)wqe_start);
}
static int pages_in_range(u64 address, u32 length)
@@ -1616,14 +1545,17 @@ void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
static const struct ib_device_ops mlx5_ib_dev_odp_ops = {
.advise_mr = mlx5_ib_advise_mr,
+ .invalidate_range = mlx5_ib_invalidate_range,
};
int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
{
int ret = 0;
- if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
- ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops);
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT))
+ return ret;
+
+ ib_set_device_ops(&dev->ib_dev, &mlx5_ib_dev_odp_ops);
if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
@@ -1633,9 +1565,6 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
}
}
- if (!MLX5_CAP_GEN(dev->mdev, pg))
- return ret;
-
ret = mlx5_ib_create_pf_eq(dev, &dev->odp_pf_eq);
return ret;
@@ -1643,7 +1572,7 @@ int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
void mlx5_ib_odp_cleanup_one(struct mlx5_ib_dev *dev)
{
- if (!MLX5_CAP_GEN(dev->mdev, pg))
+ if (!(dev->odp_caps.general_caps & IB_ODP_SUPPORT))
return;
mlx5_ib_destroy_pf_eq(dev, &dev->odp_pf_eq);