diff options
Diffstat (limited to 'drivers/infiniband/core/umem.c')
-rw-r--r-- | drivers/infiniband/core/umem.c | 139 |
1 files changed, 39 insertions, 100 deletions
diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index 831bff8d52e5..e9fecbdf391b 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -39,6 +39,7 @@ #include <linux/export.h> #include <linux/slab.h> #include <linux/pagemap.h> +#include <linux/count_zeros.h> #include <rdma/ib_umem_odp.h> #include "uverbs.h" @@ -60,73 +61,6 @@ static void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int d sg_free_table(&umem->sg_head); } -/* ib_umem_add_sg_table - Add N contiguous pages to scatter table - * - * sg: current scatterlist entry - * page_list: array of npage struct page pointers - * npages: number of pages in page_list - * max_seg_sz: maximum segment size in bytes - * nents: [out] number of entries in the scatterlist - * - * Return new end of scatterlist - */ -static struct scatterlist *ib_umem_add_sg_table(struct scatterlist *sg, - struct page **page_list, - unsigned long npages, - unsigned int max_seg_sz, - int *nents) -{ - unsigned long first_pfn; - unsigned long i = 0; - bool update_cur_sg = false; - bool first = !sg_page(sg); - - /* Check if new page_list is contiguous with end of previous page_list. - * sg->length here is a multiple of PAGE_SIZE and sg->offset is 0. - */ - if (!first && (page_to_pfn(sg_page(sg)) + (sg->length >> PAGE_SHIFT) == - page_to_pfn(page_list[0]))) - update_cur_sg = true; - - while (i != npages) { - unsigned long len; - struct page *first_page = page_list[i]; - - first_pfn = page_to_pfn(first_page); - - /* Compute the number of contiguous pages we have starting - * at i - */ - for (len = 0; i != npages && - first_pfn + len == page_to_pfn(page_list[i]) && - len < (max_seg_sz >> PAGE_SHIFT); - len++) - i++; - - /* Squash N contiguous pages from page_list into current sge */ - if (update_cur_sg) { - if ((max_seg_sz - sg->length) >= (len << PAGE_SHIFT)) { - sg_set_page(sg, sg_page(sg), - sg->length + (len << PAGE_SHIFT), - 0); - update_cur_sg = false; - continue; - } - update_cur_sg = false; - } - - /* Squash N contiguous pages into next sge or first sge */ - if (!first) - sg = sg_next(sg); - - (*nents)++; - sg_set_page(sg, first_page, len << PAGE_SHIFT, 0); - first = false; - } - - return sg; -} - /** * ib_umem_find_best_pgsz - Find best HW page size to use for this MR * @@ -146,18 +80,28 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, unsigned long virt) { struct scatterlist *sg; - unsigned int best_pg_bit; unsigned long va, pgoff; dma_addr_t mask; int i; + /* rdma_for_each_block() has a bug if the page size is smaller than the + * page size used to build the umem. For now prevent smaller page sizes + * from being returned. + */ + pgsz_bitmap &= GENMASK(BITS_PER_LONG - 1, PAGE_SHIFT); + /* At minimum, drivers must support PAGE_SIZE or smaller */ if (WARN_ON(!(pgsz_bitmap & GENMASK(PAGE_SHIFT, 0)))) return 0; - va = virt; - /* max page size not to exceed MR length */ - mask = roundup_pow_of_two(umem->length); + umem->iova = va = virt; + /* The best result is the smallest page size that results in the minimum + * number of required pages. Compute the largest page size that could + * work based on VA address bits that don't change. + */ + mask = pgsz_bitmap & + GENMASK(BITS_PER_LONG - 1, + bits_per((umem->length - 1 + virt) ^ virt)); /* offset into first SGL */ pgoff = umem->address & ~PAGE_MASK; @@ -175,9 +119,14 @@ unsigned long ib_umem_find_best_pgsz(struct ib_umem *umem, mask |= va; pgoff = 0; } - best_pg_bit = rdma_find_pg_bit(mask, pgsz_bitmap); - return BIT_ULL(best_pg_bit); + /* The mask accumulates 1's in each position where the VA and physical + * address differ, thus the length of trailing 0 is the largest page + * size that can pass the VA through to the physical. + */ + if (mask) + pgsz_bitmap &= GENMASK(count_trailing_zeros(mask), 0); + return rounddown_pow_of_two(pgsz_bitmap); } EXPORT_SYMBOL(ib_umem_find_best_pgsz); @@ -201,7 +150,7 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, struct mm_struct *mm; unsigned long npages; int ret; - struct scatterlist *sg; + struct scatterlist *sg = NULL; unsigned int gup_flags = FOLL_WRITE; /* @@ -224,6 +173,11 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, umem->ibdev = device; umem->length = size; umem->address = addr; + /* + * Drivers should call ib_umem_find_best_pgsz() to set the iova + * correctly. + */ + umem->iova = addr; umem->writable = ib_access_writable(access); umem->owning_mm = mm = current->mm; mmgrab(mm); @@ -251,15 +205,9 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, cur_base = addr & PAGE_MASK; - ret = sg_alloc_table(&umem->sg_head, npages, GFP_KERNEL); - if (ret) - goto vma; - if (!umem->writable) gup_flags |= FOLL_FORCE; - sg = umem->sg_head.sgl; - while (npages) { cond_resched(); ret = pin_user_pages_fast(cur_base, @@ -271,15 +219,19 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, goto umem_release; cur_base += ret * PAGE_SIZE; - npages -= ret; - - sg = ib_umem_add_sg_table(sg, page_list, ret, - dma_get_max_seg_size(device->dma_device), - &umem->sg_nents); + npages -= ret; + sg = __sg_alloc_table_from_pages( + &umem->sg_head, page_list, ret, 0, ret << PAGE_SHIFT, + dma_get_max_seg_size(device->dma_device), sg, npages, + GFP_KERNEL); + umem->sg_nents = umem->sg_head.nents; + if (IS_ERR(sg)) { + unpin_user_pages_dirty_lock(page_list, ret, 0); + ret = PTR_ERR(sg); + goto umem_release; + } } - sg_mark_end(sg); - if (access & IB_ACCESS_RELAXED_ORDERING) dma_attr |= DMA_ATTR_WEAK_ORDERING; @@ -297,7 +249,6 @@ struct ib_umem *ib_umem_get(struct ib_device *device, unsigned long addr, umem_release: __ib_umem_release(device, umem, 0); -vma: atomic64_sub(ib_umem_num_pages(umem), &mm->pinned_vm); out: free_page((unsigned long) page_list); @@ -329,18 +280,6 @@ void ib_umem_release(struct ib_umem *umem) } EXPORT_SYMBOL(ib_umem_release); -int ib_umem_page_count(struct ib_umem *umem) -{ - int i, n = 0; - struct scatterlist *sg; - - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, i) - n += sg_dma_len(sg) >> PAGE_SHIFT; - - return n; -} -EXPORT_SYMBOL(ib_umem_page_count); - /* * Copy from the given ib_umem's pages to the given buffer. * |