summaryrefslogtreecommitdiff
path: root/drivers/infiniband/hw/hfi1/user_pages.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-02-25 02:11:03 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-02-25 02:11:03 +0300
commit8cbd92339db08b19b93d1637e5799ff2a8dddfd2 (patch)
tree7e62d961f32e8a2a96271029b376f1e8bbd70a7c /drivers/infiniband/hw/hfi1/user_pages.c
parent143c7bc6496c886ce5db2a2f9cec580494690170 (diff)
parent66fb1d5df6ace316a4a6e2c31e13fc123ea2b644 (diff)
downloadlinux-8cbd92339db08b19b93d1637e5799ff2a8dddfd2.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma
Pull rdma updates from Jason Gunthorpe: "Quite a small cycle this time, even with the rc8. I suppose everyone went to sleep over xmas. - Minor driver updates for hfi1, cxgb4, erdma, hns, irdma, mlx5, siw, mana - inline CQE support for hns - Have mlx5 display device error codes - Pinned DMABUF support for irdma - Continued rxe cleanups, particularly converting the MRs to use xarray - Improvements to what can be cached in the mlx5 mkey cache" * tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rdma/rdma: (61 commits) IB/mlx5: Extend debug control for CC parameters IB/hfi1: Fix sdma.h tx->num_descs off-by-one errors IB/hfi1: Fix math bugs in hfi1_can_pin_pages() RDMA/irdma: Add support for dmabuf pin memory regions RDMA/mlx5: Use query_special_contexts for mkeys net/mlx5e: Use query_special_contexts for mkeys net/mlx5: Change define name for 0x100 lkey value net/mlx5: Expose bits for querying special mkeys RDMA/rxe: Fix missing memory barriers in rxe_queue.h RDMA/mana_ib: Fix a bug when the PF indicates more entries for registering memory on first packet RDMA/rxe: Remove rxe_alloc() RDMA/cma: Distinguish between sockaddr_in and sockaddr_in6 by size Subject: RDMA/rxe: Handle zero length rdma iw_cxgb4: Fix potential NULL dereference in c4iw_fill_res_cm_id_entry() RDMA/mlx5: Use rdma_umem_for_each_dma_block() RDMA/umem: Remove unused 'work' member from struct ib_umem RDMA/irdma: Cap MSIX used to online CPUs + 1 RDMA/mlx5: Check reg_create() create for errors RDMA/restrack: Correct spelling RDMA/cxgb4: Fix potential null-ptr-deref in pass_establish() ...
Diffstat (limited to 'drivers/infiniband/hw/hfi1/user_pages.c')
-rw-r--r--drivers/infiniband/hw/hfi1/user_pages.c61
1 files changed, 40 insertions, 21 deletions
diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c
index 7bce963e2ae6..36aaedc65145 100644
--- a/drivers/infiniband/hw/hfi1/user_pages.c
+++ b/drivers/infiniband/hw/hfi1/user_pages.c
@@ -29,33 +29,52 @@ MODULE_PARM_DESC(cache_size, "Send and receive side cache size limit (in MB)");
bool hfi1_can_pin_pages(struct hfi1_devdata *dd, struct mm_struct *mm,
u32 nlocked, u32 npages)
{
- unsigned long ulimit = rlimit(RLIMIT_MEMLOCK), pinned, cache_limit,
- size = (cache_size * (1UL << 20)); /* convert to bytes */
- unsigned int usr_ctxts =
- dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
- bool can_lock = capable(CAP_IPC_LOCK);
+ unsigned long ulimit_pages;
+ unsigned long cache_limit_pages;
+ unsigned int usr_ctxts;
/*
- * Calculate per-cache size. The calculation below uses only a quarter
- * of the available per-context limit. This leaves space for other
- * pinning. Should we worry about shared ctxts?
+ * Perform RLIMIT_MEMLOCK based checks unless CAP_IPC_LOCK is present.
*/
- cache_limit = (ulimit / usr_ctxts) / 4;
-
- /* If ulimit isn't set to "unlimited" and is smaller than cache_size. */
- if (ulimit != (-1UL) && size > cache_limit)
- size = cache_limit;
-
- /* Convert to number of pages */
- size = DIV_ROUND_UP(size, PAGE_SIZE);
-
- pinned = atomic64_read(&mm->pinned_vm);
+ if (!capable(CAP_IPC_LOCK)) {
+ ulimit_pages =
+ DIV_ROUND_DOWN_ULL(rlimit(RLIMIT_MEMLOCK), PAGE_SIZE);
+
+ /*
+ * Pinning these pages would exceed this process's locked memory
+ * limit.
+ */
+ if (atomic64_read(&mm->pinned_vm) + npages > ulimit_pages)
+ return false;
+
+ /*
+ * Only allow 1/4 of the user's RLIMIT_MEMLOCK to be used for HFI
+ * caches. This fraction is then equally distributed among all
+ * existing user contexts. Note that if RLIMIT_MEMLOCK is
+ * 'unlimited' (-1), the value of this limit will be > 2^42 pages
+ * (2^64 / 2^12 / 2^8 / 2^2).
+ *
+ * The effectiveness of this check may be reduced if I/O occurs on
+ * some user contexts before all user contexts are created. This
+ * check assumes that this process is the only one using this
+ * context (e.g., the corresponding fd was not passed to another
+ * process for concurrent access) as there is no per-context,
+ * per-process tracking of pinned pages. It also assumes that each
+ * user context has only one cache to limit.
+ */
+ usr_ctxts = dd->num_rcv_contexts - dd->first_dyn_alloc_ctxt;
+ if (nlocked + npages > (ulimit_pages / usr_ctxts / 4))
+ return false;
+ }
- /* First, check the absolute limit against all pinned pages. */
- if (pinned + npages >= ulimit && !can_lock)
+ /*
+ * Pinning these pages would exceed the size limit for this cache.
+ */
+ cache_limit_pages = cache_size * (1024 * 1024) / PAGE_SIZE;
+ if (nlocked + npages > cache_limit_pages)
return false;
- return ((nlocked + npages) <= size) || can_lock;
+ return true;
}
int hfi1_acquire_user_pages(struct mm_struct *mm, unsigned long vaddr, size_t npages,