From 0f53afa12baec8c00f5d1d6afb49325ada105253 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 29 Jan 2021 08:54:05 -0800 Subject: vfio/type1: unmap cleanup Minor changes in vfio_dma_do_unmap to improve readability, which also simplify the subsequent unmap-all patch. No functional change. Signed-off-by: Steve Sistare Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 38 +++++++++++++++----------------------- 1 file changed, 15 insertions(+), 23 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 0b4dedaa9128..e31c926b9040 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1074,34 +1074,28 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, { struct vfio_dma *dma, *dma_last = NULL; size_t unmapped = 0, pgsize; - int ret = 0, retries = 0; + int ret = -EINVAL, retries = 0; unsigned long pgshift; + dma_addr_t iova = unmap->iova; + unsigned long size = unmap->size; mutex_lock(&iommu->lock); pgshift = __ffs(iommu->pgsize_bitmap); pgsize = (size_t)1 << pgshift; - if (unmap->iova & (pgsize - 1)) { - ret = -EINVAL; + if (iova & (pgsize - 1)) goto unlock; - } - if (!unmap->size || unmap->size & (pgsize - 1)) { - ret = -EINVAL; + if (!size || size & (pgsize - 1)) goto unlock; - } - if (unmap->iova + unmap->size - 1 < unmap->iova || - unmap->size > SIZE_MAX) { - ret = -EINVAL; + if (iova + size - 1 < iova || size > SIZE_MAX) goto unlock; - } /* When dirty tracking is enabled, allow only min supported pgsize */ if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) && (!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) { - ret = -EINVAL; goto unlock; } @@ -1139,20 +1133,18 @@ again: * mappings within the range. */ if (iommu->v2) { - dma = vfio_find_dma(iommu, unmap->iova, 1); - if (dma && dma->iova != unmap->iova) { - ret = -EINVAL; + dma = vfio_find_dma(iommu, iova, 1); + if (dma && dma->iova != iova) goto unlock; - } - dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0); - if (dma && dma->iova + dma->size != unmap->iova + unmap->size) { - ret = -EINVAL; + + dma = vfio_find_dma(iommu, iova + size - 1, 0); + if (dma && dma->iova + dma->size != iova + size) goto unlock; - } } - while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) { - if (!iommu->v2 && unmap->iova > dma->iova) + ret = 0; + while ((dma = vfio_find_dma(iommu, iova, size))) { + if (!iommu->v2 && iova > dma->iova) break; /* * Task with same address space who mapped this iova range is @@ -1190,7 +1182,7 @@ again: if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { ret = update_user_bitmap(bitmap->data, iommu, dma, - unmap->iova, pgsize); + iova, pgsize); if (ret) break; } -- cgit v1.2.3 From c196509953741a8a472127c80070a0795a072f87 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 29 Jan 2021 08:54:06 -0800 Subject: vfio/type1: implement unmap all Implement VFIO_DMA_UNMAP_FLAG_ALL. Signed-off-by: Steve Sistare Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index e31c926b9040..5b388e5d2972 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -1078,6 +1078,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, unsigned long pgshift; dma_addr_t iova = unmap->iova; unsigned long size = unmap->size; + bool unmap_all = unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL; mutex_lock(&iommu->lock); @@ -1087,8 +1088,13 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, if (iova & (pgsize - 1)) goto unlock; - if (!size || size & (pgsize - 1)) + if (unmap_all) { + if (iova || size) + goto unlock; + size = SIZE_MAX; + } else if (!size || size & (pgsize - 1)) { goto unlock; + } if (iova + size - 1 < iova || size > SIZE_MAX) goto unlock; @@ -1132,7 +1138,7 @@ again: * will only return success and a size of zero if there were no * mappings within the range. */ - if (iommu->v2) { + if (iommu->v2 && !unmap_all) { dma = vfio_find_dma(iommu, iova, 1); if (dma && dma->iova != iova) goto unlock; @@ -2509,6 +2515,7 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, case VFIO_TYPE1_IOMMU: case VFIO_TYPE1v2_IOMMU: case VFIO_TYPE1_NESTING_IOMMU: + case VFIO_UNMAP_ALL: return 1; case VFIO_DMA_CC_IOMMU: if (!iommu) @@ -2698,6 +2705,8 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, { struct vfio_iommu_type1_dma_unmap unmap; struct vfio_bitmap bitmap = { 0 }; + uint32_t mask = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP | + VFIO_DMA_UNMAP_FLAG_ALL; unsigned long minsz; int ret; @@ -2706,8 +2715,11 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, if (copy_from_user(&unmap, (void __user *)arg, minsz)) return -EFAULT; - if (unmap.argsz < minsz || - unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) + if (unmap.argsz < minsz || unmap.flags & ~mask) + return -EINVAL; + + if ((unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) && + (unmap.flags & VFIO_DMA_UNMAP_FLAG_ALL)) return -EINVAL; if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { -- cgit v1.2.3 From 40ae9b807b89e60f86a40f33b931e6cde7eed2b7 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 29 Jan 2021 08:54:08 -0800 Subject: vfio/type1: massage unmap iteration Modify the iteration in vfio_dma_do_unmap so it does not depend on deletion of each dma entry. Add a variant of vfio_find_dma that returns the entry with the lowest iova in the search range to initialize the iteration. No externally visible change, but this behavior is needed in the subsequent update-vaddr patch. Signed-off-by: Steve Sistare Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 5b388e5d2972..ce97eda756fc 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -173,6 +173,31 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu, return NULL; } +static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu, + dma_addr_t start, size_t size) +{ + struct rb_node *res = NULL; + struct rb_node *node = iommu->dma_list.rb_node; + struct vfio_dma *dma_res = NULL; + + while (node) { + struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node); + + if (start < dma->iova + dma->size) { + res = node; + dma_res = dma; + if (start >= dma->iova) + break; + node = node->rb_left; + } else { + node = node->rb_right; + } + } + if (res && size && dma_res->iova >= start + size) + res = NULL; + return res; +} + static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new) { struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL; @@ -1079,6 +1104,7 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, dma_addr_t iova = unmap->iova; unsigned long size = unmap->size; bool unmap_all = unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL; + struct rb_node *n; mutex_lock(&iommu->lock); @@ -1149,7 +1175,13 @@ again: } ret = 0; - while ((dma = vfio_find_dma(iommu, iova, size))) { + n = vfio_find_dma_first_node(iommu, iova, size); + + while (n) { + dma = rb_entry(n, struct vfio_dma, node); + if (dma->iova >= iova + size) + break; + if (!iommu->v2 && iova > dma->iova) break; /* @@ -1194,6 +1226,7 @@ again: } unmapped += dma->size; + n = rb_next(n); vfio_remove_dma(iommu, dma); } -- cgit v1.2.3 From c3cbab24db3860d68924d8a3f752a97d3cca1623 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 29 Jan 2021 08:54:09 -0800 Subject: vfio/type1: implement interfaces to update vaddr Implement VFIO_DMA_UNMAP_FLAG_VADDR, VFIO_DMA_MAP_FLAG_VADDR, and VFIO_UPDATE_VADDR. This is a partial implementation. Blocking is added in a subsequent patch. Signed-off-by: Steve Sistare Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 59 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 53 insertions(+), 6 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index ce97eda756fc..55b49bfac993 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -69,6 +69,7 @@ struct vfio_iommu { struct rb_root dma_list; struct blocking_notifier_head notifier; unsigned int dma_avail; + unsigned int vaddr_invalid_count; uint64_t pgsize_bitmap; bool v2; bool nesting; @@ -92,6 +93,7 @@ struct vfio_dma { int prot; /* IOMMU_READ/WRITE */ bool iommu_mapped; bool lock_cap; /* capable(CAP_IPC_LOCK) */ + bool vaddr_invalid; struct task_struct *task; struct rb_root pfn_list; /* Ex-user pinned pfn list */ unsigned long *bitmap; @@ -974,6 +976,8 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) vfio_unlink_dma(iommu, dma); put_task_struct(dma->task); vfio_dma_bitmap_free(dma); + if (dma->vaddr_invalid) + iommu->vaddr_invalid_count--; kfree(dma); iommu->dma_avail++; } @@ -1104,7 +1108,8 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu, dma_addr_t iova = unmap->iova; unsigned long size = unmap->size; bool unmap_all = unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL; - struct rb_node *n; + bool invalidate_vaddr = unmap->flags & VFIO_DMA_UNMAP_FLAG_VADDR; + struct rb_node *n, *first_n; mutex_lock(&iommu->lock); @@ -1175,7 +1180,7 @@ again: } ret = 0; - n = vfio_find_dma_first_node(iommu, iova, size); + n = first_n = vfio_find_dma_first_node(iommu, iova, size); while (n) { dma = rb_entry(n, struct vfio_dma, node); @@ -1191,6 +1196,27 @@ again: if (dma->task->mm != current->mm) break; + if (invalidate_vaddr) { + if (dma->vaddr_invalid) { + struct rb_node *last_n = n; + + for (n = first_n; n != last_n; n = rb_next(n)) { + dma = rb_entry(n, + struct vfio_dma, node); + dma->vaddr_invalid = false; + iommu->vaddr_invalid_count--; + } + ret = -EINVAL; + unmapped = 0; + break; + } + dma->vaddr_invalid = true; + iommu->vaddr_invalid_count++; + unmapped += dma->size; + n = rb_next(n); + continue; + } + if (!RB_EMPTY_ROOT(&dma->pfn_list)) { struct vfio_iommu_type1_dma_unmap nb_unmap; @@ -1330,6 +1356,7 @@ static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu, static int vfio_dma_do_map(struct vfio_iommu *iommu, struct vfio_iommu_type1_dma_map *map) { + bool set_vaddr = map->flags & VFIO_DMA_MAP_FLAG_VADDR; dma_addr_t iova = map->iova; unsigned long vaddr = map->vaddr; size_t size = map->size; @@ -1347,13 +1374,16 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, if (map->flags & VFIO_DMA_MAP_FLAG_READ) prot |= IOMMU_READ; + if ((prot && set_vaddr) || (!prot && !set_vaddr)) + return -EINVAL; + mutex_lock(&iommu->lock); pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap); WARN_ON((pgsize - 1) & PAGE_MASK); - if (!prot || !size || (size | iova | vaddr) & (pgsize - 1)) { + if (!size || (size | iova | vaddr) & (pgsize - 1)) { ret = -EINVAL; goto out_unlock; } @@ -1364,7 +1394,20 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, goto out_unlock; } - if (vfio_find_dma(iommu, iova, size)) { + dma = vfio_find_dma(iommu, iova, size); + if (set_vaddr) { + if (!dma) { + ret = -ENOENT; + } else if (!dma->vaddr_invalid || dma->iova != iova || + dma->size != size) { + ret = -EINVAL; + } else { + dma->vaddr = vaddr; + dma->vaddr_invalid = false; + iommu->vaddr_invalid_count--; + } + goto out_unlock; + } else if (dma) { ret = -EEXIST; goto out_unlock; } @@ -2549,6 +2592,7 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu, case VFIO_TYPE1v2_IOMMU: case VFIO_TYPE1_NESTING_IOMMU: case VFIO_UNMAP_ALL: + case VFIO_UPDATE_VADDR: return 1; case VFIO_DMA_CC_IOMMU: if (!iommu) @@ -2720,7 +2764,8 @@ static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu, { struct vfio_iommu_type1_dma_map map; unsigned long minsz; - uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE | + VFIO_DMA_MAP_FLAG_VADDR; minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); @@ -2739,6 +2784,7 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, struct vfio_iommu_type1_dma_unmap unmap; struct vfio_bitmap bitmap = { 0 }; uint32_t mask = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP | + VFIO_DMA_UNMAP_FLAG_VADDR | VFIO_DMA_UNMAP_FLAG_ALL; unsigned long minsz; int ret; @@ -2752,7 +2798,8 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu, return -EINVAL; if ((unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) && - (unmap.flags & VFIO_DMA_UNMAP_FLAG_ALL)) + (unmap.flags & (VFIO_DMA_UNMAP_FLAG_ALL | + VFIO_DMA_UNMAP_FLAG_VADDR))) return -EINVAL; if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) { -- cgit v1.2.3 From ec5e32940cc9d2a8a321cb7d756fb6ae45702d03 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 29 Jan 2021 08:54:10 -0800 Subject: vfio: iommu driver notify callback Define a vfio_iommu_driver_ops notify callback, for sending events to the driver. Drivers are not required to provide the callback, and may ignore any events. The handling of events is driver specific. Define the CONTAINER_CLOSE event, called when the container's file descriptor is closed. This event signifies that no further state changes will occur via container ioctl's. Signed-off-by: Steve Sistare Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 5 +++++ include/linux/vfio.h | 7 +++++++ 2 files changed, 12 insertions(+) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 4ad8a35667a7..38779e6fd80c 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1220,6 +1220,11 @@ static int vfio_fops_open(struct inode *inode, struct file *filep) static int vfio_fops_release(struct inode *inode, struct file *filep) { struct vfio_container *container = filep->private_data; + struct vfio_iommu_driver *driver = container->iommu_driver; + + if (driver && driver->ops->notify) + driver->ops->notify(container->iommu_data, + VFIO_IOMMU_CONTAINER_CLOSE); filep->private_data = NULL; diff --git a/include/linux/vfio.h b/include/linux/vfio.h index f45940b38a02..b7e18bde5aa8 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -57,6 +57,11 @@ extern struct vfio_device *vfio_device_get_from_dev(struct device *dev); extern void vfio_device_put(struct vfio_device *device); extern void *vfio_device_data(struct vfio_device *device); +/* events for the backend driver notify callback */ +enum vfio_iommu_notify_type { + VFIO_IOMMU_CONTAINER_CLOSE = 0, +}; + /** * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks */ @@ -92,6 +97,8 @@ struct vfio_iommu_driver_ops { void *data, size_t count, bool write); struct iommu_domain *(*group_iommu_domain)(void *iommu_data, struct iommu_group *group); + void (*notify)(void *iommu_data, + enum vfio_iommu_notify_type event); }; extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops); -- cgit v1.2.3 From 487ace1340536ea9b8daf8c783a397a467fe55fc Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 29 Jan 2021 08:54:11 -0800 Subject: vfio/type1: implement notify callback Implement a notify callback that remembers if the container's file descriptor has been closed. Signed-off-by: Steve Sistare Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 55b49bfac993..2109803bfb15 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -75,6 +75,7 @@ struct vfio_iommu { bool nesting; bool dirty_page_tracking; bool pinned_page_dirty_scope; + bool container_open; }; struct vfio_domain { @@ -2520,6 +2521,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) INIT_LIST_HEAD(&iommu->iova_list); iommu->dma_list = RB_ROOT; iommu->dma_avail = dma_entry_limit; + iommu->container_open = true; mutex_init(&iommu->lock); BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); @@ -3087,6 +3089,18 @@ vfio_iommu_type1_group_iommu_domain(void *iommu_data, return domain; } +static void vfio_iommu_type1_notify(void *iommu_data, + enum vfio_iommu_notify_type event) +{ + struct vfio_iommu *iommu = iommu_data; + + if (event != VFIO_IOMMU_CONTAINER_CLOSE) + return; + mutex_lock(&iommu->lock); + iommu->container_open = false; + mutex_unlock(&iommu->lock); +} + static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { .name = "vfio-iommu-type1", .owner = THIS_MODULE, @@ -3101,6 +3115,7 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { .unregister_notifier = vfio_iommu_type1_unregister_notifier, .dma_rw = vfio_iommu_type1_dma_rw, .group_iommu_domain = vfio_iommu_type1_group_iommu_domain, + .notify = vfio_iommu_type1_notify, }; static int __init vfio_iommu_type1_init(void) -- cgit v1.2.3 From 898b9eaeb3fe5eb1ac510f2c4775c8277206752c Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Fri, 29 Jan 2021 08:54:12 -0800 Subject: vfio/type1: block on invalid vaddr Block translation of host virtual address while an iova range has an invalid vaddr. Signed-off-by: Steve Sistare Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 95 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 90 insertions(+), 5 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 2109803bfb15..6cf1dad501d0 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include @@ -71,6 +72,7 @@ struct vfio_iommu { unsigned int dma_avail; unsigned int vaddr_invalid_count; uint64_t pgsize_bitmap; + wait_queue_head_t vaddr_wait; bool v2; bool nesting; bool dirty_page_tracking; @@ -146,6 +148,8 @@ struct vfio_regions { #define DIRTY_BITMAP_PAGES_MAX ((u64)INT_MAX) #define DIRTY_BITMAP_SIZE_MAX DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX) +#define WAITED 1 + static int put_pfn(unsigned long pfn, int prot); static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, @@ -507,6 +511,61 @@ done: return ret; } +static int vfio_wait(struct vfio_iommu *iommu) +{ + DEFINE_WAIT(wait); + + prepare_to_wait(&iommu->vaddr_wait, &wait, TASK_KILLABLE); + mutex_unlock(&iommu->lock); + schedule(); + mutex_lock(&iommu->lock); + finish_wait(&iommu->vaddr_wait, &wait); + if (kthread_should_stop() || !iommu->container_open || + fatal_signal_pending(current)) { + return -EFAULT; + } + return WAITED; +} + +/* + * Find dma struct and wait for its vaddr to be valid. iommu lock is dropped + * if the task waits, but is re-locked on return. Return result in *dma_p. + * Return 0 on success with no waiting, WAITED on success if waited, and -errno + * on error. + */ +static int vfio_find_dma_valid(struct vfio_iommu *iommu, dma_addr_t start, + size_t size, struct vfio_dma **dma_p) +{ + int ret; + + do { + *dma_p = vfio_find_dma(iommu, start, size); + if (!*dma_p) + ret = -EINVAL; + else if (!(*dma_p)->vaddr_invalid) + ret = 0; + else + ret = vfio_wait(iommu); + } while (ret > 0); + + return ret; +} + +/* + * Wait for all vaddr in the dma_list to become valid. iommu lock is dropped + * if the task waits, but is re-locked on return. Return 0 on success with no + * waiting, WAITED on success if waited, and -errno on error. + */ +static int vfio_wait_all_valid(struct vfio_iommu *iommu) +{ + int ret = 0; + + while (iommu->vaddr_invalid_count && ret >= 0) + ret = vfio_wait(iommu); + + return ret; +} + /* * Attempt to pin pages. We really don't want to track all the pfns and * the iommu can only map chunks of consecutive pfns anyway, so get the @@ -668,6 +727,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, unsigned long remote_vaddr; struct vfio_dma *dma; bool do_accounting; + dma_addr_t iova; if (!iommu || !user_pfn || !phys_pfn) return -EINVAL; @@ -678,6 +738,22 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, mutex_lock(&iommu->lock); + /* + * Wait for all necessary vaddr's to be valid so they can be used in + * the main loop without dropping the lock, to avoid racing vs unmap. + */ +again: + if (iommu->vaddr_invalid_count) { + for (i = 0; i < npage; i++) { + iova = user_pfn[i] << PAGE_SHIFT; + ret = vfio_find_dma_valid(iommu, iova, PAGE_SIZE, &dma); + if (ret < 0) + goto pin_done; + if (ret == WAITED) + goto again; + } + } + /* Fail if notifier list is empty */ if (!iommu->notifier.head) { ret = -EINVAL; @@ -692,7 +768,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu); for (i = 0; i < npage; i++) { - dma_addr_t iova; struct vfio_pfn *vpfn; iova = user_pfn[i] << PAGE_SHIFT; @@ -977,8 +1052,10 @@ static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) vfio_unlink_dma(iommu, dma); put_task_struct(dma->task); vfio_dma_bitmap_free(dma); - if (dma->vaddr_invalid) + if (dma->vaddr_invalid) { iommu->vaddr_invalid_count--; + wake_up_all(&iommu->vaddr_wait); + } kfree(dma); iommu->dma_avail++; } @@ -1406,6 +1483,7 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, dma->vaddr = vaddr; dma->vaddr_invalid = false; iommu->vaddr_invalid_count--; + wake_up_all(&iommu->vaddr_wait); } goto out_unlock; } else if (dma) { @@ -1505,6 +1583,10 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; int ret; + ret = vfio_wait_all_valid(iommu); + if (ret < 0) + return ret; + /* Arbitrarily pick the first domain in the list for lookups */ if (!list_empty(&iommu->domain_list)) d = list_first_entry(&iommu->domain_list, @@ -2524,6 +2606,7 @@ static void *vfio_iommu_type1_open(unsigned long arg) iommu->container_open = true; mutex_init(&iommu->lock); BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier); + init_waitqueue_head(&iommu->vaddr_wait); return iommu; } @@ -2992,12 +3075,13 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu, struct vfio_dma *dma; bool kthread = current->mm == NULL; size_t offset; + int ret; *copied = 0; - dma = vfio_find_dma(iommu, user_iova, 1); - if (!dma) - return -EINVAL; + ret = vfio_find_dma_valid(iommu, user_iova, 1, &dma); + if (ret < 0) + return ret; if ((write && !(dma->prot & IOMMU_WRITE)) || !(dma->prot & IOMMU_READ)) @@ -3099,6 +3183,7 @@ static void vfio_iommu_type1_notify(void *iommu_data, mutex_lock(&iommu->lock); iommu->container_open = false; mutex_unlock(&iommu->lock); + wake_up_all(&iommu->vaddr_wait); } static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = { -- cgit v1.2.3 From d0a78f91761fcd837da1e7a4b0f8368873adc646 Mon Sep 17 00:00:00 2001 From: Keqian Zhu Date: Fri, 22 Jan 2021 17:26:34 +0800 Subject: vfio/iommu_type1: Populate full dirty when detach non-pinned group If a group with non-pinned-page dirty scope is detached with dirty logging enabled, we should fully populate the dirty bitmaps at the time it's removed since we don't know the extent of its previous DMA, nor will the group be present to trigger the full bitmap when the user retrieves the dirty bitmap. Fixes: d6a4c185660c ("vfio iommu: Implementation of ioctl for dirty pages tracking") Suggested-by: Alex Williamson Signed-off-by: Keqian Zhu Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 0b4dedaa9128..161725395f2f 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -236,6 +236,18 @@ static void vfio_dma_populate_bitmap(struct vfio_dma *dma, size_t pgsize) } } +static void vfio_iommu_populate_bitmap_full(struct vfio_iommu *iommu) +{ + struct rb_node *n; + unsigned long pgshift = __ffs(iommu->pgsize_bitmap); + + for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) { + struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node); + + bitmap_set(dma->bitmap, 0, dma->size >> pgshift); + } +} + static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize) { struct rb_node *n; @@ -2415,8 +2427,11 @@ detach_group_done: * Removal of a group without dirty tracking may allow the iommu scope * to be promoted. */ - if (update_dirty_scope) + if (update_dirty_scope) { update_pinned_page_dirty_scope(iommu); + if (iommu->dirty_page_tracking) + vfio_iommu_populate_bitmap_full(iommu); + } mutex_unlock(&iommu->lock); } -- cgit v1.2.3 From 4a19f37a3dd3f29997735e61b25ddad24b8abe73 Mon Sep 17 00:00:00 2001 From: Keqian Zhu Date: Fri, 22 Jan 2021 17:26:35 +0800 Subject: vfio/iommu_type1: Fix some sanity checks in detach group vfio_sanity_check_pfn_list() is used to check whether pfn_list and notifier are empty when remove the external domain, so it makes a wrong assumption that only external domain will use the pinning interface. Now we apply the pfn_list check when a vfio_dma is removed and apply the notifier check when all domains are removed. Fixes: a54eb55045ae ("vfio iommu type1: Add support for mediated devices") Signed-off-by: Keqian Zhu Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 31 ++++++++----------------------- 1 file changed, 8 insertions(+), 23 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 161725395f2f..78bd28873945 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -957,6 +957,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma) { + WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list)); vfio_unmap_unpin(iommu, dma, true); vfio_unlink_dma(iommu, dma); put_task_struct(dma->task); @@ -2250,23 +2251,6 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu) } } -static void vfio_sanity_check_pfn_list(struct vfio_iommu *iommu) -{ - struct rb_node *n; - - n = rb_first(&iommu->dma_list); - for (; n; n = rb_next(n)) { - struct vfio_dma *dma; - - dma = rb_entry(n, struct vfio_dma, node); - - if (WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list))) - break; - } - /* mdev vendor driver must unregister notifier */ - WARN_ON(iommu->notifier.head); -} - /* * Called when a domain is removed in detach. It is possible that * the removed domain decided the iova aperture window. Modify the @@ -2366,10 +2350,10 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, kfree(group); if (list_empty(&iommu->external_domain->group_list)) { - vfio_sanity_check_pfn_list(iommu); - - if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) + if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) { + WARN_ON(iommu->notifier.head); vfio_iommu_unmap_unpin_all(iommu); + } kfree(iommu->external_domain); iommu->external_domain = NULL; @@ -2403,10 +2387,12 @@ static void vfio_iommu_type1_detach_group(void *iommu_data, */ if (list_empty(&domain->group_list)) { if (list_is_singular(&iommu->domain_list)) { - if (!iommu->external_domain) + if (!iommu->external_domain) { + WARN_ON(iommu->notifier.head); vfio_iommu_unmap_unpin_all(iommu); - else + } else { vfio_iommu_unmap_unpin_reaccount(iommu); + } } iommu_domain_free(domain->domain); list_del(&domain->next); @@ -2490,7 +2476,6 @@ static void vfio_iommu_type1_release(void *iommu_data) if (iommu->external_domain) { vfio_release_domain(iommu->external_domain, true); - vfio_sanity_check_pfn_list(iommu); kfree(iommu->external_domain); } -- cgit v1.2.3 From 010321565a7d2cd73eeed6f589693ce752e154bd Mon Sep 17 00:00:00 2001 From: Keqian Zhu Date: Mon, 25 Jan 2021 10:46:42 +0800 Subject: vfio/iommu_type1: Mantain a counter for non_pinned_groups With this counter, we never need to traverse all groups to update pinned_scope of vfio_iommu. Suggested-by: Alex Williamson Signed-off-by: Keqian Zhu Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 40 +++++----------------------------------- 1 file changed, 5 insertions(+), 35 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 78bd28873945..706b88b5d4ee 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -70,10 +70,10 @@ struct vfio_iommu { struct blocking_notifier_head notifier; unsigned int dma_avail; uint64_t pgsize_bitmap; + uint64_t num_non_pinned_groups; bool v2; bool nesting; bool dirty_page_tracking; - bool pinned_page_dirty_scope; }; struct vfio_domain { @@ -148,7 +148,6 @@ static int put_pfn(unsigned long pfn, int prot); static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, struct iommu_group *iommu_group); -static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu); /* * This code handles mapping and unmapping of user data buffers * into DMA'ble space using the IOMMU @@ -726,7 +725,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data, group = vfio_iommu_find_iommu_group(iommu, iommu_group); if (!group->pinned_page_dirty_scope) { group->pinned_page_dirty_scope = true; - update_pinned_page_dirty_scope(iommu); + iommu->num_non_pinned_groups--; } goto pin_done; @@ -1004,7 +1003,7 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu, * mark all pages dirty if any IOMMU capable device is not able * to report dirty pages and all pages are pinned and mapped. */ - if (!iommu->pinned_page_dirty_scope && dma->iommu_mapped) + if (iommu->num_non_pinned_groups && dma->iommu_mapped) bitmap_set(dma->bitmap, 0, nbits); if (shift) { @@ -1635,33 +1634,6 @@ static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu, return group; } -static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu) -{ - struct vfio_domain *domain; - struct vfio_group *group; - - list_for_each_entry(domain, &iommu->domain_list, next) { - list_for_each_entry(group, &domain->group_list, next) { - if (!group->pinned_page_dirty_scope) { - iommu->pinned_page_dirty_scope = false; - return; - } - } - } - - if (iommu->external_domain) { - domain = iommu->external_domain; - list_for_each_entry(group, &domain->group_list, next) { - if (!group->pinned_page_dirty_scope) { - iommu->pinned_page_dirty_scope = false; - return; - } - } - } - - iommu->pinned_page_dirty_scope = true; -} - static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions, phys_addr_t *base) { @@ -2070,8 +2042,6 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, * addition of a dirty tracking group. */ group->pinned_page_dirty_scope = true; - if (!iommu->pinned_page_dirty_scope) - update_pinned_page_dirty_scope(iommu); mutex_unlock(&iommu->lock); return 0; @@ -2201,7 +2171,7 @@ done: * demotes the iommu scope until it declares itself dirty tracking * capable via the page pinning interface. */ - iommu->pinned_page_dirty_scope = false; + iommu->num_non_pinned_groups++; mutex_unlock(&iommu->lock); vfio_iommu_resv_free(&group_resv_regions); @@ -2414,7 +2384,7 @@ detach_group_done: * to be promoted. */ if (update_dirty_scope) { - update_pinned_page_dirty_scope(iommu); + iommu->num_non_pinned_groups--; if (iommu->dirty_page_tracking) vfio_iommu_populate_bitmap_full(iommu); } -- cgit v1.2.3 From 37a682ffbe2ab31b51123b67bd30ab42d1131cc1 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 24 Jan 2021 16:35:41 +0100 Subject: vfio/pci: Fix handling of pci use accessor return codes The pci user accessors return negative errno's on error. Signed-off-by: Heiner Kallweit Reviewed-by: Cornelia Huck [aw: drop Fixes tag, pcibios_err_to_errno() behaves correctly for -errno] Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_igd.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/vfio_pci_igd.c b/drivers/vfio/pci/vfio_pci_igd.c index 53d97f459252..e66dfb0178ed 100644 --- a/drivers/vfio/pci/vfio_pci_igd.c +++ b/drivers/vfio/pci/vfio_pci_igd.c @@ -127,7 +127,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, ret = pci_user_read_config_byte(pdev, pos, &val); if (ret) - return pcibios_err_to_errno(ret); + return ret; if (copy_to_user(buf + count - size, &val, 1)) return -EFAULT; @@ -141,7 +141,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, ret = pci_user_read_config_word(pdev, pos, &val); if (ret) - return pcibios_err_to_errno(ret); + return ret; val = cpu_to_le16(val); if (copy_to_user(buf + count - size, &val, 2)) @@ -156,7 +156,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, ret = pci_user_read_config_dword(pdev, pos, &val); if (ret) - return pcibios_err_to_errno(ret); + return ret; val = cpu_to_le32(val); if (copy_to_user(buf + count - size, &val, 4)) @@ -171,7 +171,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, ret = pci_user_read_config_word(pdev, pos, &val); if (ret) - return pcibios_err_to_errno(ret); + return ret; val = cpu_to_le16(val); if (copy_to_user(buf + count - size, &val, 2)) @@ -186,7 +186,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev, ret = pci_user_read_config_byte(pdev, pos, &val); if (ret) - return pcibios_err_to_errno(ret); + return ret; if (copy_to_user(buf + count - size, &val, 1)) return -EFAULT; -- cgit v1.2.3 From 46c474666094448ee843266adfc38bde43ef8f68 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 1 Feb 2021 16:28:24 +0000 Subject: vfio-pci/zdev: remove unused vdev argument Zdev static functions do not use vdev argument. Remove it. Signed-off-by: Max Gurtovoy Reviewed-by: Matthew Rosato Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_zdev.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c index 229685634031..53084521cc80 100644 --- a/drivers/vfio/pci/vfio_pci_zdev.c +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -24,8 +24,7 @@ /* * Add the Base PCI Function information to the device info region. */ -static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, - struct vfio_info_cap *caps) +static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps) { struct vfio_device_info_cap_zpci_base cap = { .header.id = VFIO_DEVICE_INFO_CAP_ZPCI_BASE, @@ -45,8 +44,7 @@ static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, /* * Add the Base PCI Function Group information to the device info region. */ -static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, - struct vfio_info_cap *caps) +static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps) { struct vfio_device_info_cap_zpci_group cap = { .header.id = VFIO_DEVICE_INFO_CAP_ZPCI_GROUP, @@ -66,8 +64,7 @@ static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, /* * Add the device utility string to the device info region. */ -static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, - struct vfio_info_cap *caps) +static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps) { struct vfio_device_info_cap_zpci_util *cap; int cap_size = sizeof(*cap) + CLP_UTIL_STR_LEN; @@ -90,8 +87,7 @@ static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, /* * Add the function path string to the device info region. */ -static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev, - struct vfio_info_cap *caps) +static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps) { struct vfio_device_info_cap_zpci_pfip *cap; int cap_size = sizeof(*cap) + CLP_PFIP_NR_SEGMENTS; @@ -123,21 +119,21 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, if (!zdev) return -ENODEV; - ret = zpci_base_cap(zdev, vdev, caps); + ret = zpci_base_cap(zdev, caps); if (ret) return ret; - ret = zpci_group_cap(zdev, vdev, caps); + ret = zpci_group_cap(zdev, caps); if (ret) return ret; if (zdev->util_str_avail) { - ret = zpci_util_cap(zdev, vdev, caps); + ret = zpci_util_cap(zdev, caps); if (ret) return ret; } - ret = zpci_pfip_cap(zdev, vdev, caps); + ret = zpci_pfip_cap(zdev, caps); return ret; } -- cgit v1.2.3 From 7e31d6dc2c78b2a0ba0039ca97ca98a581e8db82 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 1 Feb 2021 16:28:25 +0000 Subject: vfio-pci/zdev: fix possible segmentation fault issue In case allocation fails, we must behave correctly and exit with error. Fixes: e6b817d4b821 ("vfio-pci/zdev: Add zPCI capabilities to VFIO_DEVICE_GET_INFO") Signed-off-by: Max Gurtovoy Reviewed-by: Cornelia Huck Reviewed-by: Matthew Rosato Signed-off-by: Alex Williamson --- drivers/vfio/pci/vfio_pci_zdev.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/vfio_pci_zdev.c b/drivers/vfio/pci/vfio_pci_zdev.c index 53084521cc80..7b011b62c766 100644 --- a/drivers/vfio/pci/vfio_pci_zdev.c +++ b/drivers/vfio/pci/vfio_pci_zdev.c @@ -71,6 +71,8 @@ static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps) int ret; cap = kmalloc(cap_size, GFP_KERNEL); + if (!cap) + return -ENOMEM; cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_UTIL; cap->header.version = 1; @@ -94,6 +96,8 @@ static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps) int ret; cap = kmalloc(cap_size, GFP_KERNEL); + if (!cap) + return -ENOMEM; cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_PFIP; cap->header.version = 1; -- cgit v1.2.3 From 35ac5991cdec9d920a683e74b64fda8512bdd3e9 Mon Sep 17 00:00:00 2001 From: Tian Tao Date: Thu, 18 Feb 2021 10:17:29 +0800 Subject: vfio/iommu_type1: Fix duplicate included kthread.h linux/kthread.h is included more than once, remove the one that isn't necessary. Fixes: 898b9eaeb3fe ("vfio/type1: block on invalid vaddr") Signed-off-by: Tian Tao Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index ec9fd95a138b..b3df383d7028 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From b9abef43a08ef7faa33477cccb0c08c64eb2b8bf Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 18 Feb 2021 10:44:35 +0000 Subject: vfio/pci: remove CONFIG_VFIO_PCI_ZDEV from Kconfig In case we're running on s390 system always expose the capabilities for configuration of zPCI devices. In case we're running on different platform, continue as usual. Signed-off-by: Max Gurtovoy Reviewed-by: Matthew Rosato Reviewed-by: Cornelia Huck Signed-off-by: Alex Williamson --- drivers/vfio/pci/Kconfig | 12 ------------ drivers/vfio/pci/Makefile | 2 +- drivers/vfio/pci/vfio_pci.c | 12 +++++------- drivers/vfio/pci/vfio_pci_private.h | 2 +- 4 files changed, 7 insertions(+), 21 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/pci/Kconfig b/drivers/vfio/pci/Kconfig index 40a223381ab6..ac3c1dd3edef 100644 --- a/drivers/vfio/pci/Kconfig +++ b/drivers/vfio/pci/Kconfig @@ -45,15 +45,3 @@ config VFIO_PCI_NVLINK2 depends on VFIO_PCI && PPC_POWERNV help VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs - -config VFIO_PCI_ZDEV - bool "VFIO PCI ZPCI device CLP support" - depends on VFIO_PCI && S390 - default y - help - Enabling this option exposes VFIO capabilities containing hardware - configuration for zPCI devices. This enables userspace (e.g. QEMU) - to supply proper configuration values instead of hard-coded defaults - for zPCI devices passed through via VFIO on s390. - - Say Y here. diff --git a/drivers/vfio/pci/Makefile b/drivers/vfio/pci/Makefile index 781e0809d6ee..eff97a7cd9f1 100644 --- a/drivers/vfio/pci/Makefile +++ b/drivers/vfio/pci/Makefile @@ -3,6 +3,6 @@ vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o -vfio-pci-$(CONFIG_VFIO_PCI_ZDEV) += vfio_pci_zdev.o +vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o obj-$(CONFIG_VFIO_PCI) += vfio-pci.o diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index 706de3ef94bb..65e7e6b44578 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -807,6 +807,7 @@ static long vfio_pci_ioctl(void *device_data, struct vfio_device_info info; struct vfio_info_cap caps = { .buf = NULL, .size = 0 }; unsigned long capsz; + int ret; minsz = offsetofend(struct vfio_device_info, num_irqs); @@ -832,13 +833,10 @@ static long vfio_pci_ioctl(void *device_data, info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions; info.num_irqs = VFIO_PCI_NUM_IRQS; - if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) { - int ret = vfio_pci_info_zdev_add_caps(vdev, &caps); - - if (ret && ret != -ENODEV) { - pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n"); - return ret; - } + ret = vfio_pci_info_zdev_add_caps(vdev, &caps); + if (ret && ret != -ENODEV) { + pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n"); + return ret; } if (caps.size) { diff --git a/drivers/vfio/pci/vfio_pci_private.h b/drivers/vfio/pci/vfio_pci_private.h index 5c90e560c5c7..9cd1882a05af 100644 --- a/drivers/vfio/pci/vfio_pci_private.h +++ b/drivers/vfio/pci/vfio_pci_private.h @@ -214,7 +214,7 @@ static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev) } #endif -#ifdef CONFIG_VFIO_PCI_ZDEV +#ifdef CONFIG_S390 extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev, struct vfio_info_cap *caps); #else -- cgit v1.2.3 From 07956b6269d3ed05d854233d5bb776dca91751dd Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 16 Feb 2021 15:49:34 -0700 Subject: vfio/type1: Use follow_pte() follow_pfn() doesn't make sure that we're using the correct page protections, get the pte with follow_pte() so that we can test protections and get the pfn from the pte. Fixes: 5cbf3264bc71 ("vfio/type1: Fix VA->PA translation for PFNMAP VMAs in vaddr_get_pfn()") Reviewed-by: Jason Gunthorpe Reviewed-by: Cornelia Huck Reviewed-by: Peter Xu Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index b3df383d7028..ed03f3fcb07e 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -462,9 +463,11 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, unsigned long vaddr, unsigned long *pfn, bool write_fault) { + pte_t *ptep; + spinlock_t *ptl; int ret; - ret = follow_pfn(vma, vaddr, pfn); + ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl); if (ret) { bool unlocked = false; @@ -478,9 +481,17 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, if (ret) return ret; - ret = follow_pfn(vma, vaddr, pfn); + ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl); + if (ret) + return ret; } + if (write_fault && !pte_write(*ptep)) + ret = -EFAULT; + else + *pfn = pte_pfn(*ptep); + + pte_unmap_unlock(ptep, ptl); return ret; } -- cgit v1.2.3 From be16c1fd99f41abebc0bf965d5d29cd18c9d271e Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Fri, 19 Feb 2021 11:13:03 -0500 Subject: vfio/type1: Change success value of vaddr_get_pfn() vaddr_get_pfn() simply returns 0 on success. Have it report the number of pfns successfully gotten instead, whether from page pinning or follow_fault_pfn(), which will be used later when batching pinning. Change the last check in vfio_pin_pages_remote() for consistency with the other two. Signed-off-by: Daniel Jordan Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index ed03f3fcb07e..82ed8bf34c11 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -495,6 +495,10 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, return ret; } +/* + * Returns the positive number of pfns successfully obtained or a negative + * error code. + */ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, int prot, unsigned long *pfn) { @@ -511,7 +515,6 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, page, NULL, NULL); if (ret == 1) { *pfn = page_to_pfn(page[0]); - ret = 0; goto done; } @@ -525,8 +528,12 @@ retry: if (ret == -EAGAIN) goto retry; - if (!ret && !is_invalid_reserved_pfn(*pfn)) - ret = -EFAULT; + if (!ret) { + if (is_invalid_reserved_pfn(*pfn)) + ret = 1; + else + ret = -EFAULT; + } } done: mmap_read_unlock(mm); @@ -607,7 +614,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, return -ENODEV; ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, pfn_base); - if (ret) + if (ret < 0) return ret; pinned++; @@ -634,7 +641,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn); - if (ret) + if (ret < 0) break; if (pfn != *pfn_base + pinned || @@ -660,7 +667,7 @@ out: ret = vfio_lock_acct(dma, lock_acct, false); unpin_out: - if (ret) { + if (ret < 0) { if (!rsvd) { for (pfn = *pfn_base ; pinned ; pfn++, pinned--) put_pfn(pfn, dma->prot); @@ -704,7 +711,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, return -ENODEV; ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); - if (!ret && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { + if (ret == 1 && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { ret = vfio_lock_acct(dma, 1, true); if (ret) { put_pfn(*pfn_base, dma->prot); -- cgit v1.2.3 From 4b6c33b3229678e38a6b0bbd4367d4b91366b523 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Fri, 19 Feb 2021 11:13:04 -0500 Subject: vfio/type1: Prepare for batched pinning with struct vfio_batch Get ready to pin more pages at once with struct vfio_batch, which represents a batch of pinned pages. The struct has a fallback page pointer to avoid two unlikely scenarios: pointlessly allocating a page if disable_hugepages is enabled or failing the whole pinning operation if the kernel can't allocate memory. vaddr_get_pfn() becomes vaddr_get_pfns() to prepare for handling multiple pages, though for now only one page is stored in the pages array. Signed-off-by: Daniel Jordan Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 71 +++++++++++++++++++++++++++++++++-------- 1 file changed, 58 insertions(+), 13 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 82ed8bf34c11..1c86375aca2d 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -103,6 +103,12 @@ struct vfio_dma { unsigned long *bitmap; }; +struct vfio_batch { + struct page **pages; /* for pin_user_pages_remote */ + struct page *fallback_page; /* if pages alloc fails */ + int capacity; /* length of pages array */ +}; + struct vfio_group { struct iommu_group *iommu_group; struct list_head next; @@ -459,6 +465,31 @@ static int put_pfn(unsigned long pfn, int prot) return 0; } +#define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *)) + +static void vfio_batch_init(struct vfio_batch *batch) +{ + if (unlikely(disable_hugepages)) + goto fallback; + + batch->pages = (struct page **) __get_free_page(GFP_KERNEL); + if (!batch->pages) + goto fallback; + + batch->capacity = VFIO_BATCH_MAX_CAPACITY; + return; + +fallback: + batch->pages = &batch->fallback_page; + batch->capacity = 1; +} + +static void vfio_batch_fini(struct vfio_batch *batch) +{ + if (batch->capacity == VFIO_BATCH_MAX_CAPACITY) + free_page((unsigned long)batch->pages); +} + static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, unsigned long vaddr, unsigned long *pfn, bool write_fault) @@ -499,10 +530,10 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm, * Returns the positive number of pfns successfully obtained or a negative * error code. */ -static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, - int prot, unsigned long *pfn) +static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr, + long npages, int prot, unsigned long *pfn, + struct page **pages) { - struct page *page[1]; struct vm_area_struct *vma; unsigned int flags = 0; int ret; @@ -511,10 +542,10 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, flags |= FOLL_WRITE; mmap_read_lock(mm); - ret = pin_user_pages_remote(mm, vaddr, 1, flags | FOLL_LONGTERM, - page, NULL, NULL); - if (ret == 1) { - *pfn = page_to_pfn(page[0]); + ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM, + pages, NULL, NULL); + if (ret > 0) { + *pfn = page_to_pfn(pages[0]); goto done; } @@ -602,7 +633,7 @@ static int vfio_wait_all_valid(struct vfio_iommu *iommu) */ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, long npage, unsigned long *pfn_base, - unsigned long limit) + unsigned long limit, struct vfio_batch *batch) { unsigned long pfn = 0; long ret, pinned = 0, lock_acct = 0; @@ -613,7 +644,8 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, if (!current->mm) return -ENODEV; - ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, pfn_base); + ret = vaddr_get_pfns(current->mm, vaddr, 1, dma->prot, pfn_base, + batch->pages); if (ret < 0) return ret; @@ -640,7 +672,8 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, /* Lock all the consecutive pages from pfn_base */ for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { - ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn); + ret = vaddr_get_pfns(current->mm, vaddr, 1, dma->prot, &pfn, + batch->pages); if (ret < 0) break; @@ -703,6 +736,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, unsigned long *pfn_base, bool do_accounting) { + struct page *pages[1]; struct mm_struct *mm; int ret; @@ -710,7 +744,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, if (!mm) return -ENODEV; - ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); + ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages); if (ret == 1 && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { ret = vfio_lock_acct(dma, 1, true); if (ret) { @@ -1404,15 +1438,19 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, { dma_addr_t iova = dma->iova; unsigned long vaddr = dma->vaddr; + struct vfio_batch batch; size_t size = map_size; long npage; unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; int ret = 0; + vfio_batch_init(&batch); + while (size) { /* Pin a contiguous chunk of memory */ npage = vfio_pin_pages_remote(dma, vaddr + dma->size, - size >> PAGE_SHIFT, &pfn, limit); + size >> PAGE_SHIFT, &pfn, limit, + &batch); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1432,6 +1470,7 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, dma->size += npage << PAGE_SHIFT; } + vfio_batch_fini(&batch); dma->iommu_mapped = true; if (ret) @@ -1608,6 +1647,7 @@ static int vfio_bus_type(struct device *dev, void *data) static int vfio_iommu_replay(struct vfio_iommu *iommu, struct vfio_domain *domain) { + struct vfio_batch batch; struct vfio_domain *d = NULL; struct rb_node *n; unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; @@ -1622,6 +1662,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, d = list_first_entry(&iommu->domain_list, struct vfio_domain, next); + vfio_batch_init(&batch); + n = rb_first(&iommu->dma_list); for (; n; n = rb_next(n)) { @@ -1669,7 +1711,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, npage = vfio_pin_pages_remote(dma, vaddr, n >> PAGE_SHIFT, - &pfn, limit); + &pfn, limit, + &batch); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1702,6 +1745,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, dma->iommu_mapped = true; } + vfio_batch_fini(&batch); return 0; unwind: @@ -1742,6 +1786,7 @@ unwind: } } + vfio_batch_fini(&batch); return ret; } -- cgit v1.2.3 From 4d83de6da265cd84e74c19d876055fa5f261cde4 Mon Sep 17 00:00:00 2001 From: Daniel Jordan Date: Fri, 19 Feb 2021 11:13:05 -0500 Subject: vfio/type1: Batch page pinning Pinning one 4K page at a time is inefficient, so do it in batches of 512 instead. This is just an optimization with no functional change intended, and in particular the driver still calls iommu_map() with the largest physically contiguous range possible. Add two fields in vfio_batch to remember where to start between calls to vfio_pin_pages_remote(), and use vfio_batch_unpin() to handle remaining pages in the batch in case of error. qemu pins pages for guests around 8% faster on my test system, a two-node Broadwell server with 128G memory per node. The qemu process was bound to one node with its allocations constrained there as well. base test guest ---------------- ---------------- mem (GB) speedup avg sec (std) avg sec (std) 1 7.4% 0.61 (0.00) 0.56 (0.00) 2 8.3% 0.93 (0.00) 0.85 (0.00) 4 8.4% 1.46 (0.00) 1.34 (0.00) 8 8.6% 2.54 (0.01) 2.32 (0.00) 16 8.3% 4.66 (0.00) 4.27 (0.01) 32 8.3% 8.94 (0.01) 8.20 (0.01) 64 8.2% 17.47 (0.01) 16.04 (0.03) 120 8.5% 32.45 (0.13) 29.69 (0.01) perf diff confirms less time spent in pup. Here are the top ten functions: Baseline Delta Abs Symbol 78.63% +6.64% clear_page_erms 1.50% -1.50% __gup_longterm_locked 1.27% -0.78% __get_user_pages +0.76% kvm_zap_rmapp.constprop.0 0.54% -0.53% vmacache_find 0.55% -0.51% get_pfnblock_flags_mask 0.48% -0.48% __get_user_pages_remote +0.39% slot_rmap_walk_next +0.32% vfio_pin_map_dma +0.26% kvm_handle_hva_range ... Suggested-by: Matthew Wilcox (Oracle) Signed-off-by: Daniel Jordan Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 135 ++++++++++++++++++++++++++-------------- 1 file changed, 89 insertions(+), 46 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 1c86375aca2d..4bb162c1d649 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -107,6 +107,8 @@ struct vfio_batch { struct page **pages; /* for pin_user_pages_remote */ struct page *fallback_page; /* if pages alloc fails */ int capacity; /* length of pages array */ + int size; /* of batch currently */ + int offset; /* of next entry in pages */ }; struct vfio_group { @@ -469,6 +471,9 @@ static int put_pfn(unsigned long pfn, int prot) static void vfio_batch_init(struct vfio_batch *batch) { + batch->size = 0; + batch->offset = 0; + if (unlikely(disable_hugepages)) goto fallback; @@ -484,6 +489,17 @@ fallback: batch->capacity = 1; } +static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma) +{ + while (batch->size) { + unsigned long pfn = page_to_pfn(batch->pages[batch->offset]); + + put_pfn(pfn, dma->prot); + batch->offset++; + batch->size--; + } +} + static void vfio_batch_fini(struct vfio_batch *batch) { if (batch->capacity == VFIO_BATCH_MAX_CAPACITY) @@ -635,65 +651,88 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, long npage, unsigned long *pfn_base, unsigned long limit, struct vfio_batch *batch) { - unsigned long pfn = 0; + unsigned long pfn; + struct mm_struct *mm = current->mm; long ret, pinned = 0, lock_acct = 0; bool rsvd; dma_addr_t iova = vaddr - dma->vaddr + dma->iova; /* This code path is only user initiated */ - if (!current->mm) + if (!mm) return -ENODEV; - ret = vaddr_get_pfns(current->mm, vaddr, 1, dma->prot, pfn_base, - batch->pages); - if (ret < 0) - return ret; - - pinned++; - rsvd = is_invalid_reserved_pfn(*pfn_base); - - /* - * Reserved pages aren't counted against the user, externally pinned - * pages are already counted against the user. - */ - if (!rsvd && !vfio_find_vpfn(dma, iova)) { - if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) { - put_pfn(*pfn_base, dma->prot); - pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, - limit << PAGE_SHIFT); - return -ENOMEM; - } - lock_acct++; + if (batch->size) { + /* Leftover pages in batch from an earlier call. */ + *pfn_base = page_to_pfn(batch->pages[batch->offset]); + pfn = *pfn_base; + rsvd = is_invalid_reserved_pfn(*pfn_base); + } else { + *pfn_base = 0; } - if (unlikely(disable_hugepages)) - goto out; + while (npage) { + if (!batch->size) { + /* Empty batch, so refill it. */ + long req_pages = min_t(long, npage, batch->capacity); - /* Lock all the consecutive pages from pfn_base */ - for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage; - pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) { - ret = vaddr_get_pfns(current->mm, vaddr, 1, dma->prot, &pfn, - batch->pages); - if (ret < 0) - break; + ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot, + &pfn, batch->pages); + if (ret < 0) + goto unpin_out; - if (pfn != *pfn_base + pinned || - rsvd != is_invalid_reserved_pfn(pfn)) { - put_pfn(pfn, dma->prot); - break; + batch->size = ret; + batch->offset = 0; + + if (!*pfn_base) { + *pfn_base = pfn; + rsvd = is_invalid_reserved_pfn(*pfn_base); + } } - if (!rsvd && !vfio_find_vpfn(dma, iova)) { - if (!dma->lock_cap && - current->mm->locked_vm + lock_acct + 1 > limit) { - put_pfn(pfn, dma->prot); - pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", - __func__, limit << PAGE_SHIFT); - ret = -ENOMEM; - goto unpin_out; + /* + * pfn is preset for the first iteration of this inner loop and + * updated at the end to handle a VM_PFNMAP pfn. In that case, + * batch->pages isn't valid (there's no struct page), so allow + * batch->pages to be touched only when there's more than one + * pfn to check, which guarantees the pfns are from a + * !VM_PFNMAP vma. + */ + while (true) { + if (pfn != *pfn_base + pinned || + rsvd != is_invalid_reserved_pfn(pfn)) + goto out; + + /* + * Reserved pages aren't counted against the user, + * externally pinned pages are already counted against + * the user. + */ + if (!rsvd && !vfio_find_vpfn(dma, iova)) { + if (!dma->lock_cap && + mm->locked_vm + lock_acct + 1 > limit) { + pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", + __func__, limit << PAGE_SHIFT); + ret = -ENOMEM; + goto unpin_out; + } + lock_acct++; } - lock_acct++; + + pinned++; + npage--; + vaddr += PAGE_SIZE; + iova += PAGE_SIZE; + batch->offset++; + batch->size--; + + if (!batch->size) + break; + + pfn = page_to_pfn(batch->pages[batch->offset]); } + + if (unlikely(disable_hugepages)) + break; } out: @@ -701,10 +740,11 @@ out: unpin_out: if (ret < 0) { - if (!rsvd) { + if (pinned && !rsvd) { for (pfn = *pfn_base ; pinned ; pfn++, pinned--) put_pfn(pfn, dma->prot); } + vfio_batch_unpin(batch, dma); return ret; } @@ -1463,6 +1503,7 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, if (ret) { vfio_unpin_pages_remote(dma, iova + dma->size, pfn, npage, true); + vfio_batch_unpin(&batch, dma); break; } @@ -1726,11 +1767,13 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, ret = iommu_map(domain->domain, iova, phys, size, dma->prot | domain->prot); if (ret) { - if (!dma->iommu_mapped) + if (!dma->iommu_mapped) { vfio_unpin_pages_remote(dma, iova, phys >> PAGE_SHIFT, size >> PAGE_SHIFT, true); + vfio_batch_unpin(&batch, dma); + } goto unwind; } -- cgit v1.2.3