summaryrefslogtreecommitdiff
path: root/drivers/infiniband/core/umem.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/infiniband/core/umem.c')
-rw-r--r--drivers/infiniband/core/umem.c139
1 files changed, 39 insertions, 100 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c
index 831bff8d52e5..e9fecbdf391b 100644
--- a/drivers/infiniband/core/umem.c
+++ b/drivers/infiniband/core/umem.c
@@ -39,6 +39,7 @@
#include <linux/export.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/count_zeros.h>
#include <rdma/ib_umem_odp.h>
#include "uverbs.h"
@@ -60,73 +61,6 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d
sg_free_table(&umem->sg_head);
}
-/* ib_umem_add_sg_table - Add N contiguous pages to scatter table
- *
- * sg: current scatterlist entry
- * page_list: array of npage struct page pointers
- * npages: number of pages in page_list
- * max_seg_sz: maximum segment size in bytes
- * nents: [out] number of entries in the scatterlist
- *
- * Return new end of scatterlist
- */
-static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg,
- struct page **page_list,
- unsigned long npages,
- unsigned int max_seg_sz,
- int *nents)
-{
- unsigned long first_pfn;
- unsigned long i = 0;
- bool update_cur_sg = false;
- bool first = !sg_page(sg);
-
- /* Check if new page_list is contiguous with end of previous page_list.
- * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0.
- */
- if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) ==
- page_to_pfn(page_list[0])))
- update_cur_sg = true;
-
- while (i != npages) {
- unsigned long len;
- struct page *first_page = page_list[i];
-
- first_pfn = page_to_pfn(first_page);
-
- /* Compute the number of contiguous pages we have starting
- * at i
- */
- for (len = 0; i != npages &&
- first_pfn + len == page_to_pfn(page_list[i]) &&
- len < (max_seg_sz >> PAGE_SHIFT);
- len++)
- i++;
-
- /* Squash N contiguous pages from page_list into current sge */
- if (update_cur_sg) {
- if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) {
- sg_set_page(sg, sg_page(sg),
- sg->length + (len << PAGE_SHIFT),
- 0);
- update_cur_sg = false;
- continue;
- }
- update_cur_sg = false;
- }
-
- /* Squash N contiguous pages into next sge or first sge */
- if (!first)
- sg = sg_next(sg);
-
- (*nents)++;
- sg_set_page(sg, first_page, len << PAGE_SHIFT, 0);
- first = false;
- }
-
- return sg;
-}
-
/**
* ib_umem_find_best_pgsz - Find best HW page size to use for this MR
*
@@ -146,18 +80,28 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
unsigned long virt)
{
struct scatterlist *sg;
- unsigned int best_pg_bit;
unsigned long va, pgoff;
dma_addr_t mask;
int i;
+ /* rdma_for_each_block() has a bug if the page size is smaller than the
+ * page size used to build the umem. For now prevent smaller page sizes
+ * from being returned.
+ */
+ pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT);
+
/* At minimum, drivers must support PAGE_SIZE or smaller */
if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0))))
return 0;
- va = virt;
- /* max page size not to exceed MR length */
- mask = roundup_pow_of_two(umem->length);
+ umem->iova = va = virt;
+ /* The best result is the smallest page size that results in the minimum
+ * number of required pages. Compute the largest page size that could
+ * work based on VA address bits that don't change.
+ */
+ mask = pgsz_bitmap &
+ GENMASK(BITS_PER_LONG - 1,
+ bits_per((umem->length - 1 + virt) ^ virt));
/* offset into first SGL */
pgoff = umem->address & ~PAGE_MASK;
@@ -175,9 +119,14 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem,
mask |= va;
pgoff = 0;
}
- best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap);
- return BIT_ULL(best_pg_bit);
+ /* The mask accumulates 1's in each position where the VA and physical
+ * address differ, thus the length of trailing 0 is the largest page
+ * size that can pass the VA through to the physical.
+ */
+ if (mask)
+ pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0);
+ return rounddown_pow_of_two(pgsz_bitmap);
}
EXPORT_SYMBOL(ib_umem_find_best_pgsz);
@@ -201,7 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
struct mm_struct *mm;
unsigned long npages;
int ret;
- struct scatterlist *sg;
+ struct scatterlist *sg = NULL;
unsigned int gup_flags = FOLL_WRITE;
/*
@@ -224,6 +173,11 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
umem->ibdev = device;
umem->length = size;
umem->address = addr;
+ /*
+ * Drivers should call ib_umem_find_best_pgsz() to set the iova
+ * correctly.
+ */
+ umem->iova = addr;
umem->writable = ib_access_writable(access);
umem->owning_mm = mm = current->mm;
mmgrab(mm);
@@ -251,15 +205,9 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
cur_base = addr & PAGE_MASK;
- ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL);
- if (ret)
- goto vma;
-
if (!umem->writable)
gup_flags |= FOLL_FORCE;
- sg = umem->sg_head.sgl;
-
while (npages) {
cond_resched();
ret = pin_user_pages_fast(cur_base,
@@ -271,15 +219,19 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
goto umem_release;
cur_base += ret * PAGE_SIZE;
- npages -= ret;
-
- sg = ib_umem_add_sg_table(sg, page_list, ret,
- dma_get_max_seg_size(device->dma_device),
- &umem->sg_nents);
+ npages -= ret;
+ sg = __sg_alloc_table_from_pages(
+ &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT,
+ dma_get_max_seg_size(device->dma_device), sg, npages,
+ GFP_KERNEL);
+ umem->sg_nents = umem->sg_head.nents;
+ if (IS_ERR(sg)) {
+ unpin_user_pages_dirty_lock(page_list, ret, 0);
+ ret = PTR_ERR(sg);
+ goto umem_release;
+ }
}
- sg_mark_end(sg);
-
if (access & IB_ACCESS_RELAXED_ORDERING)
dma_attr |= DMA_ATTR_WEAK_ORDERING;
@@ -297,7 +249,6 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr,
umem_release:
__ib_umem_release(device, umem, 0);
-vma:
atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm);
out:
free_page((unsigned long) page_list);
@@ -329,18 +280,6 @@ void ib_umem_release(struct ib_umem *umem)
}
EXPORT_SYMBOL(ib_umem_release);
-int ib_umem_page_count(struct ib_umem *umem)
-{
- int i, n = 0;
- struct scatterlist *sg;
-
- for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i)
- n += sg_dma_len(sg) >> PAGE_SHIFT;
-
- return n;
-}
-EXPORT_SYMBOL(ib_umem_page_count);
-
/*
* Copy from the given ib_umem's pages to the given buffer.
*