summaryrefslogtreecommitdiff
path: root/drivers/vfio/vfio_main.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vfio/vfio_main.c')
-rw-r--r--drivers/vfio/vfio_main.c311
1 files changed, 283 insertions, 28 deletions
diff --git a/drivers/vfio/vfio_main.c b/drivers/vfio/vfio_main.c
index f0ca33b2e1df..40732e8ed4c6 100644
--- a/drivers/vfio/vfio_main.c
+++ b/drivers/vfio/vfio_main.c
@@ -141,6 +141,21 @@ unsigned int vfio_device_set_open_count(struct vfio_device_set *dev_set)
}
EXPORT_SYMBOL_GPL(vfio_device_set_open_count);
+struct vfio_device *
+vfio_find_device_in_devset(struct vfio_device_set *dev_set,
+ struct device *dev)
+{
+ struct vfio_device *cur;
+
+ lockdep_assert_held(&dev_set->lock);
+
+ list_for_each_entry(cur, &dev_set->device_list, dev_set_list)
+ if (cur->dev == dev)
+ return cur;
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(vfio_find_device_in_devset);
+
/*
* Device objects - create, release, get, put, search
*/
@@ -258,7 +273,8 @@ static int __vfio_register_dev(struct vfio_device *device,
if (WARN_ON(IS_ENABLED(CONFIG_IOMMUFD) &&
(!device->ops->bind_iommufd ||
!device->ops->unbind_iommufd ||
- !device->ops->attach_ioas)))
+ !device->ops->attach_ioas ||
+ !device->ops->detach_ioas)))
return -EINVAL;
/*
@@ -276,7 +292,18 @@ static int __vfio_register_dev(struct vfio_device *device,
if (ret)
return ret;
- ret = device_add(&device->device);
+ /*
+ * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
+ * restore cache coherency. It has to be checked here because it is only
+ * valid for cases where we are using iommu groups.
+ */
+ if (type == VFIO_IOMMU && !vfio_device_is_noiommu(device) &&
+ !device_iommu_capable(device->dev, IOMMU_CAP_CACHE_COHERENCY)) {
+ ret = -EINVAL;
+ goto err_out;
+ }
+
+ ret = vfio_device_add(device);
if (ret)
goto err_out;
@@ -316,6 +343,18 @@ void vfio_unregister_group_dev(struct vfio_device *device)
bool interrupted = false;
long rc;
+ /*
+ * Prevent new device opened by userspace via the
+ * VFIO_GROUP_GET_DEVICE_FD in the group path.
+ */
+ vfio_device_group_unregister(device);
+
+ /*
+ * Balances vfio_device_add() in register path, also prevents
+ * new device opened by userspace in the cdev path.
+ */
+ vfio_device_del(device);
+
vfio_device_put_registration(device);
rc = try_wait_for_completion(&device->comp);
while (rc <= 0) {
@@ -339,18 +378,13 @@ void vfio_unregister_group_dev(struct vfio_device *device)
}
}
- vfio_device_group_unregister(device);
-
- /* Balances device_add in register path */
- device_del(&device->device);
-
/* Balances vfio_device_set_group in register path */
vfio_device_remove_group(device);
}
EXPORT_SYMBOL_GPL(vfio_unregister_group_dev);
#ifdef CONFIG_HAVE_KVM
-void _vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm)
+void vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm)
{
void (*pfn)(struct kvm *kvm);
bool (*fn)(struct kvm *kvm);
@@ -358,6 +392,9 @@ void _vfio_device_get_kvm_safe(struct vfio_device *device, struct kvm *kvm)
lockdep_assert_held(&device->dev_set->lock);
+ if (!kvm)
+ return;
+
pfn = symbol_get(kvm_put_kvm);
if (WARN_ON(!pfn))
return;
@@ -404,9 +441,25 @@ static bool vfio_assert_device_open(struct vfio_device *device)
return !WARN_ON_ONCE(!READ_ONCE(device->open_count));
}
-static int vfio_device_first_open(struct vfio_device *device,
- struct iommufd_ctx *iommufd)
+struct vfio_device_file *
+vfio_allocate_device_file(struct vfio_device *device)
{
+ struct vfio_device_file *df;
+
+ df = kzalloc(sizeof(*df), GFP_KERNEL_ACCOUNT);
+ if (!df)
+ return ERR_PTR(-ENOMEM);
+
+ df->device = device;
+ spin_lock_init(&df->kvm_ref_lock);
+
+ return df;
+}
+
+static int vfio_df_device_first_open(struct vfio_device_file *df)
+{
+ struct vfio_device *device = df->device;
+ struct iommufd_ctx *iommufd = df->iommufd;
int ret;
lockdep_assert_held(&device->dev_set->lock);
@@ -415,7 +468,7 @@ static int vfio_device_first_open(struct vfio_device *device,
return -ENODEV;
if (iommufd)
- ret = vfio_iommufd_bind(device, iommufd);
+ ret = vfio_df_iommufd_bind(df);
else
ret = vfio_device_group_use_iommu(device);
if (ret)
@@ -430,7 +483,7 @@ static int vfio_device_first_open(struct vfio_device *device,
err_unuse_iommu:
if (iommufd)
- vfio_iommufd_unbind(device);
+ vfio_df_iommufd_unbind(df);
else
vfio_device_group_unuse_iommu(device);
err_module_put:
@@ -438,29 +491,39 @@ err_module_put:
return ret;
}
-static void vfio_device_last_close(struct vfio_device *device,
- struct iommufd_ctx *iommufd)
+static void vfio_df_device_last_close(struct vfio_device_file *df)
{
+ struct vfio_device *device = df->device;
+ struct iommufd_ctx *iommufd = df->iommufd;
+
lockdep_assert_held(&device->dev_set->lock);
if (device->ops->close_device)
device->ops->close_device(device);
if (iommufd)
- vfio_iommufd_unbind(device);
+ vfio_df_iommufd_unbind(df);
else
vfio_device_group_unuse_iommu(device);
module_put(device->dev->driver->owner);
}
-int vfio_device_open(struct vfio_device *device, struct iommufd_ctx *iommufd)
+int vfio_df_open(struct vfio_device_file *df)
{
+ struct vfio_device *device = df->device;
int ret = 0;
lockdep_assert_held(&device->dev_set->lock);
+ /*
+ * Only the group path allows the device to be opened multiple
+ * times. The device cdev path doesn't have a secure way for it.
+ */
+ if (device->open_count != 0 && !df->group)
+ return -EINVAL;
+
device->open_count++;
if (device->open_count == 1) {
- ret = vfio_device_first_open(device, iommufd);
+ ret = vfio_df_device_first_open(df);
if (ret)
device->open_count--;
}
@@ -468,14 +531,15 @@ int vfio_device_open(struct vfio_device *device, struct iommufd_ctx *iommufd)
return ret;
}
-void vfio_device_close(struct vfio_device *device,
- struct iommufd_ctx *iommufd)
+void vfio_df_close(struct vfio_device_file *df)
{
+ struct vfio_device *device = df->device;
+
lockdep_assert_held(&device->dev_set->lock);
vfio_assert_device_open(device);
if (device->open_count == 1)
- vfio_device_last_close(device, iommufd);
+ vfio_df_device_last_close(df);
device->open_count--;
}
@@ -517,12 +581,18 @@ static inline void vfio_device_pm_runtime_put(struct vfio_device *device)
*/
static int vfio_device_fops_release(struct inode *inode, struct file *filep)
{
- struct vfio_device *device = filep->private_data;
+ struct vfio_device_file *df = filep->private_data;
+ struct vfio_device *device = df->device;
- vfio_device_group_close(device);
+ if (df->group)
+ vfio_df_group_close(df);
+ else
+ vfio_df_unbind_iommufd(df);
vfio_device_put_registration(device);
+ kfree(df);
+
return 0;
}
@@ -865,6 +935,53 @@ static int vfio_ioctl_device_feature_migration(struct vfio_device *device,
return 0;
}
+void vfio_combine_iova_ranges(struct rb_root_cached *root, u32 cur_nodes,
+ u32 req_nodes)
+{
+ struct interval_tree_node *prev, *curr, *comb_start, *comb_end;
+ unsigned long min_gap, curr_gap;
+
+ /* Special shortcut when a single range is required */
+ if (req_nodes == 1) {
+ unsigned long last;
+
+ comb_start = interval_tree_iter_first(root, 0, ULONG_MAX);
+ curr = comb_start;
+ while (curr) {
+ last = curr->last;
+ prev = curr;
+ curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
+ if (prev != comb_start)
+ interval_tree_remove(prev, root);
+ }
+ comb_start->last = last;
+ return;
+ }
+
+ /* Combine ranges which have the smallest gap */
+ while (cur_nodes > req_nodes) {
+ prev = NULL;
+ min_gap = ULONG_MAX;
+ curr = interval_tree_iter_first(root, 0, ULONG_MAX);
+ while (curr) {
+ if (prev) {
+ curr_gap = curr->start - prev->last;
+ if (curr_gap < min_gap) {
+ min_gap = curr_gap;
+ comb_start = prev;
+ comb_end = curr;
+ }
+ }
+ prev = curr;
+ curr = interval_tree_iter_next(curr, 0, ULONG_MAX);
+ }
+ comb_start->last = comb_end->last;
+ interval_tree_remove(comb_end, root);
+ cur_nodes--;
+ }
+}
+EXPORT_SYMBOL_GPL(vfio_combine_iova_ranges);
+
/* Ranges should fit into a single kernel page */
#define LOG_MAX_RANGES \
(PAGE_SIZE / sizeof(struct vfio_device_feature_dma_logging_range))
@@ -1087,16 +1204,38 @@ static int vfio_ioctl_device_feature(struct vfio_device *device,
static long vfio_device_fops_unl_ioctl(struct file *filep,
unsigned int cmd, unsigned long arg)
{
- struct vfio_device *device = filep->private_data;
+ struct vfio_device_file *df = filep->private_data;
+ struct vfio_device *device = df->device;
+ void __user *uptr = (void __user *)arg;
int ret;
+ if (cmd == VFIO_DEVICE_BIND_IOMMUFD)
+ return vfio_df_ioctl_bind_iommufd(df, uptr);
+
+ /* Paired with smp_store_release() following vfio_df_open() */
+ if (!smp_load_acquire(&df->access_granted))
+ return -EINVAL;
+
ret = vfio_device_pm_runtime_get(device);
if (ret)
return ret;
+ /* cdev only ioctls */
+ if (IS_ENABLED(CONFIG_VFIO_DEVICE_CDEV) && !df->group) {
+ switch (cmd) {
+ case VFIO_DEVICE_ATTACH_IOMMUFD_PT:
+ ret = vfio_df_ioctl_attach_pt(df, uptr);
+ goto out;
+
+ case VFIO_DEVICE_DETACH_IOMMUFD_PT:
+ ret = vfio_df_ioctl_detach_pt(df, uptr);
+ goto out;
+ }
+ }
+
switch (cmd) {
case VFIO_DEVICE_FEATURE:
- ret = vfio_ioctl_device_feature(device, (void __user *)arg);
+ ret = vfio_ioctl_device_feature(device, uptr);
break;
default:
@@ -1106,7 +1245,7 @@ static long vfio_device_fops_unl_ioctl(struct file *filep,
ret = device->ops->ioctl(device, cmd, arg);
break;
}
-
+out:
vfio_device_pm_runtime_put(device);
return ret;
}
@@ -1114,7 +1253,12 @@ static long vfio_device_fops_unl_ioctl(struct file *filep,
static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
size_t count, loff_t *ppos)
{
- struct vfio_device *device = filep->private_data;
+ struct vfio_device_file *df = filep->private_data;
+ struct vfio_device *device = df->device;
+
+ /* Paired with smp_store_release() following vfio_df_open() */
+ if (!smp_load_acquire(&df->access_granted))
+ return -EINVAL;
if (unlikely(!device->ops->read))
return -EINVAL;
@@ -1126,7 +1270,12 @@ static ssize_t vfio_device_fops_write(struct file *filep,
const char __user *buf,
size_t count, loff_t *ppos)
{
- struct vfio_device *device = filep->private_data;
+ struct vfio_device_file *df = filep->private_data;
+ struct vfio_device *device = df->device;
+
+ /* Paired with smp_store_release() following vfio_df_open() */
+ if (!smp_load_acquire(&df->access_granted))
+ return -EINVAL;
if (unlikely(!device->ops->write))
return -EINVAL;
@@ -1136,7 +1285,12 @@ static ssize_t vfio_device_fops_write(struct file *filep,
static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
{
- struct vfio_device *device = filep->private_data;
+ struct vfio_device_file *df = filep->private_data;
+ struct vfio_device *device = df->device;
+
+ /* Paired with smp_store_release() following vfio_df_open() */
+ if (!smp_load_acquire(&df->access_granted))
+ return -EINVAL;
if (unlikely(!device->ops->mmap))
return -EINVAL;
@@ -1146,6 +1300,7 @@ static int vfio_device_fops_mmap(struct file *filep, struct vm_area_struct *vma)
const struct file_operations vfio_device_fops = {
.owner = THIS_MODULE,
+ .open = vfio_device_fops_cdev_open,
.release = vfio_device_fops_release,
.read = vfio_device_fops_read,
.write = vfio_device_fops_write,
@@ -1154,6 +1309,88 @@ const struct file_operations vfio_device_fops = {
.mmap = vfio_device_fops_mmap,
};
+static struct vfio_device *vfio_device_from_file(struct file *file)
+{
+ struct vfio_device_file *df = file->private_data;
+
+ if (file->f_op != &vfio_device_fops)
+ return NULL;
+ return df->device;
+}
+
+/**
+ * vfio_file_is_valid - True if the file is valid vfio file
+ * @file: VFIO group file or VFIO device file
+ */
+bool vfio_file_is_valid(struct file *file)
+{
+ return vfio_group_from_file(file) ||
+ vfio_device_from_file(file);
+}
+EXPORT_SYMBOL_GPL(vfio_file_is_valid);
+
+/**
+ * vfio_file_enforced_coherent - True if the DMA associated with the VFIO file
+ * is always CPU cache coherent
+ * @file: VFIO group file or VFIO device file
+ *
+ * Enforced coherency means that the IOMMU ignores things like the PCIe no-snoop
+ * bit in DMA transactions. A return of false indicates that the user has
+ * rights to access additional instructions such as wbinvd on x86.
+ */
+bool vfio_file_enforced_coherent(struct file *file)
+{
+ struct vfio_device *device;
+ struct vfio_group *group;
+
+ group = vfio_group_from_file(file);
+ if (group)
+ return vfio_group_enforced_coherent(group);
+
+ device = vfio_device_from_file(file);
+ if (device)
+ return device_iommu_capable(device->dev,
+ IOMMU_CAP_ENFORCE_CACHE_COHERENCY);
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(vfio_file_enforced_coherent);
+
+static void vfio_device_file_set_kvm(struct file *file, struct kvm *kvm)
+{
+ struct vfio_device_file *df = file->private_data;
+
+ /*
+ * The kvm is first recorded in the vfio_device_file, and will
+ * be propagated to vfio_device::kvm when the file is bound to
+ * iommufd successfully in the vfio device cdev path.
+ */
+ spin_lock(&df->kvm_ref_lock);
+ df->kvm = kvm;
+ spin_unlock(&df->kvm_ref_lock);
+}
+
+/**
+ * vfio_file_set_kvm - Link a kvm with VFIO drivers
+ * @file: VFIO group file or VFIO device file
+ * @kvm: KVM to link
+ *
+ * When a VFIO device is first opened the KVM will be available in
+ * device->kvm if one was associated with the file.
+ */
+void vfio_file_set_kvm(struct file *file, struct kvm *kvm)
+{
+ struct vfio_group *group;
+
+ group = vfio_group_from_file(file);
+ if (group)
+ vfio_group_set_kvm(group, kvm);
+
+ if (vfio_device_from_file(file))
+ vfio_device_file_set_kvm(file, kvm);
+}
+EXPORT_SYMBOL_GPL(vfio_file_set_kvm);
+
/*
* Sub-module support
*/
@@ -1172,6 +1409,9 @@ struct vfio_info_cap_header *vfio_info_cap_add(struct vfio_info_cap *caps,
void *buf;
struct vfio_info_cap_header *header, *tmp;
+ /* Ensure that the next capability struct will be aligned */
+ size = ALIGN(size, sizeof(u64));
+
buf = krealloc(caps->buf, caps->size + size, GFP_KERNEL);
if (!buf) {
kfree(caps->buf);
@@ -1205,6 +1445,9 @@ void vfio_info_cap_shift(struct vfio_info_cap *caps, size_t offset)
struct vfio_info_cap_header *tmp;
void *buf = (void *)caps->buf;
+ /* Capability structs should start with proper alignment */
+ WARN_ON(!IS_ALIGNED(offset, sizeof(u64)));
+
for (tmp = buf; tmp->next; tmp = buf + tmp->next - offset)
tmp->next += offset;
}
@@ -1293,6 +1536,8 @@ int vfio_pin_pages(struct vfio_device *device, dma_addr_t iova,
/* group->container cannot change while a vfio device is open */
if (!pages || !npage || WARN_ON(!vfio_assert_device_open(device)))
return -EINVAL;
+ if (!device->ops->dma_unmap)
+ return -EINVAL;
if (vfio_device_has_container(device))
return vfio_device_container_pin_pages(device, iova,
npage, prot, pages);
@@ -1330,6 +1575,8 @@ void vfio_unpin_pages(struct vfio_device *device, dma_addr_t iova, int npage)
{
if (WARN_ON(!vfio_assert_device_open(device)))
return;
+ if (WARN_ON(!device->ops->dma_unmap))
+ return;
if (vfio_device_has_container(device)) {
vfio_device_container_unpin_pages(device, iova, npage);
@@ -1415,9 +1662,16 @@ static int __init vfio_init(void)
goto err_dev_class;
}
+ ret = vfio_cdev_init(vfio.device_class);
+ if (ret)
+ goto err_alloc_dev_chrdev;
+
pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
return 0;
+err_alloc_dev_chrdev:
+ class_destroy(vfio.device_class);
+ vfio.device_class = NULL;
err_dev_class:
vfio_virqfd_exit();
err_virqfd:
@@ -1428,6 +1682,7 @@ err_virqfd:
static void __exit vfio_cleanup(void)
{
ida_destroy(&vfio.device_ida);
+ vfio_cdev_cleanup();
class_destroy(vfio.device_class);
vfio.device_class = NULL;
vfio_virqfd_exit();