From 48d8476b41eed63567dd2f0ad125c895b9ac648a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 11 May 2018 09:05:02 -0600 Subject: vfio/type1: Fix task tracking for QEMU vCPU hotplug MAP_DMA ioctls might be called from various threads within a process, for example when using QEMU, the vCPU threads are often generating these calls and we therefore take a reference to that vCPU task. However, QEMU also supports vCPU hotplug on some machines and the task that called MAP_DMA may have exited by the time UNMAP_DMA is called, resulting in the mm_struct pointer being NULL and thus a failure to match against the existing mapping. To resolve this, we instead take a reference to the thread group_leader, which has the same mm_struct and resource limits, but is less likely exit, at least in the QEMU case. A difficulty here is guaranteeing that the capabilities of the group_leader match that of the calling thread, which we resolve by tracking CAP_IPC_LOCK at the time of calling rather than at an indeterminate time in the future. Potentially this also results in better efficiency as this is now recorded once per MAP_DMA ioctl. Reported-by: Xu Yandong Signed-off-by: Alex Williamson --- drivers/vfio/vfio_iommu_type1.c | 73 ++++++++++++++++++++++++++--------------- 1 file changed, 47 insertions(+), 26 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 3c082451ab1a..2c75b33db4ac 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -83,6 +83,7 @@ struct vfio_dma { size_t size; /* Map size (bytes) */ int prot; /* IOMMU_READ/WRITE */ bool iommu_mapped; + bool lock_cap; /* capable(CAP_IPC_LOCK) */ struct task_struct *task; struct rb_root pfn_list; /* Ex-user pinned pfn list */ }; @@ -253,29 +254,25 @@ static int vfio_iova_put_vfio_pfn(struct vfio_dma *dma, struct vfio_pfn *vpfn) return ret; } -static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap) +static int vfio_lock_acct(struct vfio_dma *dma, long npage, bool async) { struct mm_struct *mm; - bool is_current; int ret; if (!npage) return 0; - is_current = (task->mm == current->mm); - - mm = is_current ? task->mm : get_task_mm(task); + mm = async ? get_task_mm(dma->task) : dma->task->mm; if (!mm) return -ESRCH; /* process exited */ ret = down_write_killable(&mm->mmap_sem); if (!ret) { if (npage > 0) { - if (lock_cap ? !*lock_cap : - !has_capability(task, CAP_IPC_LOCK)) { + if (!dma->lock_cap) { unsigned long limit; - limit = task_rlimit(task, + limit = task_rlimit(dma->task, RLIMIT_MEMLOCK) >> PAGE_SHIFT; if (mm->locked_vm + npage > limit) @@ -289,7 +286,7 @@ static int vfio_lock_acct(struct task_struct *task, long npage, bool *lock_cap) up_write(&mm->mmap_sem); } - if (!is_current) + if (async) mmput(mm); return ret; @@ -400,7 +397,7 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr, */ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, long npage, unsigned long *pfn_base, - bool lock_cap, unsigned long limit) + unsigned long limit) { unsigned long pfn = 0; long ret, pinned = 0, lock_acct = 0; @@ -423,7 +420,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, * pages are already counted against the user. */ if (!rsvd && !vfio_find_vpfn(dma, iova)) { - if (!lock_cap && current->mm->locked_vm + 1 > limit) { + if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) { put_pfn(*pfn_base, dma->prot); pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__, limit << PAGE_SHIFT); @@ -449,7 +446,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, } if (!rsvd && !vfio_find_vpfn(dma, iova)) { - if (!lock_cap && + if (!dma->lock_cap && current->mm->locked_vm + lock_acct + 1 > limit) { put_pfn(pfn, dma->prot); pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", @@ -462,7 +459,7 @@ static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr, } out: - ret = vfio_lock_acct(current, lock_acct, &lock_cap); + ret = vfio_lock_acct(dma, lock_acct, false); unpin_out: if (ret) { @@ -493,7 +490,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova, } if (do_accounting) - vfio_lock_acct(dma->task, locked - unlocked, NULL); + vfio_lock_acct(dma, locked - unlocked, true); return unlocked; } @@ -510,7 +507,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr, ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base); if (!ret && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) { - ret = vfio_lock_acct(dma->task, 1, NULL); + ret = vfio_lock_acct(dma, 1, true); if (ret) { put_pfn(*pfn_base, dma->prot); if (ret == -ENOMEM) @@ -537,7 +534,7 @@ static int vfio_unpin_page_external(struct vfio_dma *dma, dma_addr_t iova, unlocked = vfio_iova_put_vfio_pfn(dma, vpfn); if (do_accounting) - vfio_lock_acct(dma->task, -unlocked, NULL); + vfio_lock_acct(dma, -unlocked, true); return unlocked; } @@ -829,7 +826,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list); if (do_accounting) { - vfio_lock_acct(dma->task, -unlocked, NULL); + vfio_lock_acct(dma, -unlocked, true); return 0; } return unlocked; @@ -1044,14 +1041,12 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma, size_t size = map_size; long npage; unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - bool lock_cap = capable(CAP_IPC_LOCK); int ret = 0; while (size) { /* Pin a contiguous chunk of memory */ npage = vfio_pin_pages_remote(dma, vaddr + dma->size, - size >> PAGE_SHIFT, &pfn, - lock_cap, limit); + size >> PAGE_SHIFT, &pfn, limit); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1126,8 +1121,36 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu, dma->iova = iova; dma->vaddr = vaddr; dma->prot = prot; - get_task_struct(current); - dma->task = current; + + /* + * We need to be able to both add to a task's locked memory and test + * against the locked memory limit and we need to be able to do both + * outside of this call path as pinning can be asynchronous via the + * external interfaces for mdev devices. RLIMIT_MEMLOCK requires a + * task_struct and VM locked pages requires an mm_struct, however + * holding an indefinite mm reference is not recommended, therefore we + * only hold a reference to a task. We could hold a reference to + * current, however QEMU uses this call path through vCPU threads, + * which can be killed resulting in a NULL mm and failure in the unmap + * path when called via a different thread. Avoid this problem by + * using the group_leader as threads within the same group require + * both CLONE_THREAD and CLONE_VM and will therefore use the same + * mm_struct. + * + * Previously we also used the task for testing CAP_IPC_LOCK at the + * time of pinning and accounting, however has_capability() makes use + * of real_cred, a copy-on-write field, so we can't guarantee that it + * matches group_leader, or in fact that it might not change by the + * time it's evaluated. If a process were to call MAP_DMA with + * CAP_IPC_LOCK but later drop it, it doesn't make sense that they + * possibly see different results for an iommu_mapped vfio_dma vs + * externally mapped. Therefore track CAP_IPC_LOCK in vfio_dma at the + * time of calling MAP_DMA. + */ + get_task_struct(current->group_leader); + dma->task = current->group_leader; + dma->lock_cap = capable(CAP_IPC_LOCK); + dma->pfn_list = RB_ROOT; /* Insert zero-sized and grow as we map chunks of it */ @@ -1162,7 +1185,6 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, struct vfio_domain *d; struct rb_node *n; unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; - bool lock_cap = capable(CAP_IPC_LOCK); int ret; /* Arbitrarily pick the first domain in the list for lookups */ @@ -1209,8 +1231,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu, npage = vfio_pin_pages_remote(dma, vaddr, n >> PAGE_SHIFT, - &pfn, lock_cap, - limit); + &pfn, limit); if (npage <= 0) { WARN_ON(!npage); ret = (int)npage; @@ -1487,7 +1508,7 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu) if (!is_invalid_reserved_pfn(vpfn->pfn)) locked++; } - vfio_lock_acct(dma->task, locked - unlocked, NULL); + vfio_lock_acct(dma, locked - unlocked, true); } } -- cgit v1.2.3 From d61fc96f47fdac1f031ed4eafa9106fe10cdaa37 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Fri, 11 May 2018 09:05:03 -0600 Subject: sample: vfio mdev display - host device Simple framebuffer display, demo-ing the vfio region display interface (VFIO_GFX_PLANE_TYPE_REGION). Signed-off-by: Gerd Hoffmann Signed-off-by: Alex Williamson --- samples/Kconfig | 8 + samples/vfio-mdev/Makefile | 1 + samples/vfio-mdev/mdpy-defs.h | 22 ++ samples/vfio-mdev/mdpy.c | 807 ++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 838 insertions(+) create mode 100644 samples/vfio-mdev/mdpy-defs.h create mode 100644 samples/vfio-mdev/mdpy.c diff --git a/samples/Kconfig b/samples/Kconfig index 3db002b9e1d3..960f19b24df0 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -115,6 +115,14 @@ config SAMPLE_VFIO_MDEV_MTTY Build a virtual tty sample driver for use as a VFIO mediated device +config SAMPLE_VFIO_MDEV_MDPY + tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only" + depends on VFIO_MDEV_DEVICE && m + help + Build a virtual display sample driver for use as a VFIO + mediated device. It is a simple framebuffer and supports + the region display interface (VFIO_GFX_PLANE_TYPE_REGION). + config SAMPLE_STATX bool "Build example extended-stat using code" depends on BROKEN diff --git a/samples/vfio-mdev/Makefile b/samples/vfio-mdev/Makefile index cbbd868a50a8..031d6b88e9e9 100644 --- a/samples/vfio-mdev/Makefile +++ b/samples/vfio-mdev/Makefile @@ -1 +1,2 @@ obj-$(CONFIG_SAMPLE_VFIO_MDEV_MTTY) += mtty.o +obj-$(CONFIG_SAMPLE_VFIO_MDEV_MDPY) += mdpy.o diff --git a/samples/vfio-mdev/mdpy-defs.h b/samples/vfio-mdev/mdpy-defs.h new file mode 100644 index 000000000000..96b3b1b49d34 --- /dev/null +++ b/samples/vfio-mdev/mdpy-defs.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Simple pci display device. + * + * Framebuffer memory is pci bar 0. + * Configuration (read-only) is in pci config space. + * Format field uses drm fourcc codes. + * ATM only DRM_FORMAT_XRGB8888 is supported. + */ + +/* pci ids */ +#define MDPY_PCI_VENDOR_ID 0x1b36 /* redhat */ +#define MDPY_PCI_DEVICE_ID 0x000f +#define MDPY_PCI_SUBVENDOR_ID PCI_SUBVENDOR_ID_REDHAT_QUMRANET +#define MDPY_PCI_SUBDEVICE_ID PCI_SUBDEVICE_ID_QEMU + +/* pci cfg space offsets for fb config (dword) */ +#define MDPY_VENDORCAP_OFFSET 0x40 +#define MDPY_VENDORCAP_SIZE 0x10 +#define MDPY_FORMAT_OFFSET (MDPY_VENDORCAP_OFFSET + 0x04) +#define MDPY_WIDTH_OFFSET (MDPY_VENDORCAP_OFFSET + 0x08) +#define MDPY_HEIGHT_OFFSET (MDPY_VENDORCAP_OFFSET + 0x0c) diff --git a/samples/vfio-mdev/mdpy.c b/samples/vfio-mdev/mdpy.c new file mode 100644 index 000000000000..96e7969c473a --- /dev/null +++ b/samples/vfio-mdev/mdpy.c @@ -0,0 +1,807 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Mediated virtual PCI display host device driver + * + * See mdpy-defs.h for device specs + * + * (c) Gerd Hoffmann + * + * based on mtty driver which is: + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Author: Neo Jia + * Kirti Wankhede + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "mdpy-defs.h" + +#define MDPY_NAME "mdpy" +#define MDPY_CLASS_NAME "mdpy" + +#define MDPY_CONFIG_SPACE_SIZE 0xff +#define MDPY_MEMORY_BAR_OFFSET PAGE_SIZE +#define MDPY_DISPLAY_REGION 16 + +#define STORE_LE16(addr, val) (*(u16 *)addr = val) +#define STORE_LE32(addr, val) (*(u32 *)addr = val) + + +MODULE_LICENSE("GPL v2"); + +static int max_devices = 4; +module_param_named(count, max_devices, int, 0444); +MODULE_PARM_DESC(count, "number of " MDPY_NAME " devices"); + + +#define MDPY_TYPE_1 "vga" +#define MDPY_TYPE_2 "xga" +#define MDPY_TYPE_3 "hd" + +static const struct mdpy_type { + const char *name; + u32 format; + u32 bytepp; + u32 width; + u32 height; +} mdpy_types[] = { + { + .name = MDPY_CLASS_NAME "-" MDPY_TYPE_1, + .format = DRM_FORMAT_XRGB8888, + .bytepp = 4, + .width = 640, + .height = 480, + }, { + .name = MDPY_CLASS_NAME "-" MDPY_TYPE_2, + .format = DRM_FORMAT_XRGB8888, + .bytepp = 4, + .width = 1024, + .height = 768, + }, { + .name = MDPY_CLASS_NAME "-" MDPY_TYPE_3, + .format = DRM_FORMAT_XRGB8888, + .bytepp = 4, + .width = 1920, + .height = 1080, + }, +}; + +static dev_t mdpy_devt; +static struct class *mdpy_class; +static struct cdev mdpy_cdev; +static struct device mdpy_dev; +static u32 mdpy_count; + +/* State of each mdev device */ +struct mdev_state { + u8 *vconfig; + u32 bar_mask; + struct mutex ops_lock; + struct mdev_device *mdev; + struct vfio_device_info dev_info; + + const struct mdpy_type *type; + u32 memsize; + void *memblk; +}; + +static const struct mdpy_type *mdpy_find_type(struct kobject *kobj) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mdpy_types); i++) + if (strcmp(mdpy_types[i].name, kobj->name) == 0) + return mdpy_types + i; + return NULL; +} + +static void mdpy_create_config_space(struct mdev_state *mdev_state) +{ + STORE_LE16((u16 *) &mdev_state->vconfig[PCI_VENDOR_ID], + MDPY_PCI_VENDOR_ID); + STORE_LE16((u16 *) &mdev_state->vconfig[PCI_DEVICE_ID], + MDPY_PCI_DEVICE_ID); + STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_VENDOR_ID], + MDPY_PCI_SUBVENDOR_ID); + STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_ID], + MDPY_PCI_SUBDEVICE_ID); + + STORE_LE16((u16 *) &mdev_state->vconfig[PCI_COMMAND], + PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + STORE_LE16((u16 *) &mdev_state->vconfig[PCI_STATUS], + PCI_STATUS_CAP_LIST); + STORE_LE16((u16 *) &mdev_state->vconfig[PCI_CLASS_DEVICE], + PCI_CLASS_DISPLAY_OTHER); + mdev_state->vconfig[PCI_CLASS_REVISION] = 0x01; + + STORE_LE32((u32 *) &mdev_state->vconfig[PCI_BASE_ADDRESS_0], + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_32 | + PCI_BASE_ADDRESS_MEM_PREFETCH); + mdev_state->bar_mask = ~(mdev_state->memsize) + 1; + + /* vendor specific capability for the config registers */ + mdev_state->vconfig[PCI_CAPABILITY_LIST] = MDPY_VENDORCAP_OFFSET; + mdev_state->vconfig[MDPY_VENDORCAP_OFFSET + 0] = 0x09; /* vendor cap */ + mdev_state->vconfig[MDPY_VENDORCAP_OFFSET + 1] = 0x00; /* next ptr */ + mdev_state->vconfig[MDPY_VENDORCAP_OFFSET + 2] = MDPY_VENDORCAP_SIZE; + STORE_LE32((u32 *) &mdev_state->vconfig[MDPY_FORMAT_OFFSET], + mdev_state->type->format); + STORE_LE32((u32 *) &mdev_state->vconfig[MDPY_WIDTH_OFFSET], + mdev_state->type->width); + STORE_LE32((u32 *) &mdev_state->vconfig[MDPY_HEIGHT_OFFSET], + mdev_state->type->height); +} + +static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset, + char *buf, u32 count) +{ + struct device *dev = mdev_dev(mdev_state->mdev); + u32 cfg_addr; + + switch (offset) { + case PCI_BASE_ADDRESS_0: + cfg_addr = *(u32 *)buf; + + if (cfg_addr == 0xffffffff) { + cfg_addr = (cfg_addr & mdev_state->bar_mask); + } else { + cfg_addr &= PCI_BASE_ADDRESS_MEM_MASK; + if (cfg_addr) + dev_info(dev, "BAR0 @ 0x%x\n", cfg_addr); + } + + cfg_addr |= (mdev_state->vconfig[offset] & + ~PCI_BASE_ADDRESS_MEM_MASK); + STORE_LE32(&mdev_state->vconfig[offset], cfg_addr); + break; + } +} + +static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, + loff_t pos, bool is_write) +{ + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + struct device *dev = mdev_dev(mdev); + int ret = 0; + + mutex_lock(&mdev_state->ops_lock); + + if (pos < MDPY_CONFIG_SPACE_SIZE) { + if (is_write) + handle_pci_cfg_write(mdev_state, pos, buf, count); + else + memcpy(buf, (mdev_state->vconfig + pos), count); + + } else if ((pos >= MDPY_MEMORY_BAR_OFFSET) && + (pos + count <= + MDPY_MEMORY_BAR_OFFSET + mdev_state->memsize)) { + pos -= MDPY_MEMORY_BAR_OFFSET; + if (is_write) + memcpy(mdev_state->memblk, buf, count); + else + memcpy(buf, mdev_state->memblk, count); + + } else { + dev_info(dev, "%s: %s @0x%llx (unhandled)\n", + __func__, is_write ? "WR" : "RD", pos); + ret = -1; + goto accessfailed; + } + + ret = count; + + +accessfailed: + mutex_unlock(&mdev_state->ops_lock); + + return ret; +} + +static int mdpy_reset(struct mdev_device *mdev) +{ + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + u32 stride, i; + + /* initialize with gray gradient */ + stride = mdev_state->type->width * mdev_state->type->bytepp; + for (i = 0; i < mdev_state->type->height; i++) + memset(mdev_state->memblk + i * stride, + i * 255 / mdev_state->type->height, + stride); + return 0; +} + +static int mdpy_create(struct kobject *kobj, struct mdev_device *mdev) +{ + const struct mdpy_type *type = mdpy_find_type(kobj); + struct device *dev = mdev_dev(mdev); + struct mdev_state *mdev_state; + u32 fbsize; + + if (mdpy_count >= max_devices) + return -ENOMEM; + + mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL); + if (mdev_state == NULL) + return -ENOMEM; + + mdev_state->vconfig = kzalloc(MDPY_CONFIG_SPACE_SIZE, GFP_KERNEL); + if (mdev_state->vconfig == NULL) { + kfree(mdev_state); + return -ENOMEM; + } + + if (!type) + type = &mdpy_types[0]; + fbsize = roundup_pow_of_two(type->width * type->height * type->bytepp); + + mdev_state->memblk = vmalloc_user(fbsize); + if (!mdev_state->memblk) { + kfree(mdev_state->vconfig); + kfree(mdev_state); + return -ENOMEM; + } + dev_info(dev, "%s: %s (%dx%d)\n", + __func__, kobj->name, type->width, type->height); + + mutex_init(&mdev_state->ops_lock); + mdev_state->mdev = mdev; + mdev_set_drvdata(mdev, mdev_state); + + mdev_state->type = type; + mdev_state->memsize = fbsize; + mdpy_create_config_space(mdev_state); + mdpy_reset(mdev); + + mdpy_count++; + return 0; +} + +static int mdpy_remove(struct mdev_device *mdev) +{ + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + struct device *dev = mdev_dev(mdev); + + dev_info(dev, "%s\n", __func__); + + mdev_set_drvdata(mdev, NULL); + vfree(mdev_state->memblk); + kfree(mdev_state->vconfig); + kfree(mdev_state); + + mdpy_count--; + return 0; +} + +static ssize_t mdpy_read(struct mdev_device *mdev, char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned int done = 0; + int ret; + + while (count) { + size_t filled; + + if (count >= 4 && !(*ppos % 4)) { + u32 val; + + ret = mdev_access(mdev, (char *)&val, sizeof(val), + *ppos, false); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 4; + } else if (count >= 2 && !(*ppos % 2)) { + u16 val; + + ret = mdev_access(mdev, (char *)&val, sizeof(val), + *ppos, false); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 2; + } else { + u8 val; + + ret = mdev_access(mdev, (char *)&val, sizeof(val), + *ppos, false); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 1; + } + + count -= filled; + done += filled; + *ppos += filled; + buf += filled; + } + + return done; + +read_err: + return -EFAULT; +} + +static ssize_t mdpy_write(struct mdev_device *mdev, const char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned int done = 0; + int ret; + + while (count) { + size_t filled; + + if (count >= 4 && !(*ppos % 4)) { + u32 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = mdev_access(mdev, (char *)&val, sizeof(val), + *ppos, true); + if (ret <= 0) + goto write_err; + + filled = 4; + } else if (count >= 2 && !(*ppos % 2)) { + u16 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = mdev_access(mdev, (char *)&val, sizeof(val), + *ppos, true); + if (ret <= 0) + goto write_err; + + filled = 2; + } else { + u8 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = mdev_access(mdev, (char *)&val, sizeof(val), + *ppos, true); + if (ret <= 0) + goto write_err; + + filled = 1; + } + count -= filled; + done += filled; + *ppos += filled; + buf += filled; + } + + return done; +write_err: + return -EFAULT; +} + +static int mdpy_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) +{ + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + + if (vma->vm_pgoff != MDPY_MEMORY_BAR_OFFSET >> PAGE_SHIFT) + return -EINVAL; + if (vma->vm_end < vma->vm_start) + return -EINVAL; + if (vma->vm_end - vma->vm_start > mdev_state->memsize) + return -EINVAL; + if ((vma->vm_flags & VM_SHARED) == 0) + return -EINVAL; + + return remap_vmalloc_range_partial(vma, vma->vm_start, + mdev_state->memblk, + vma->vm_end - vma->vm_start); +} + +static int mdpy_get_region_info(struct mdev_device *mdev, + struct vfio_region_info *region_info, + u16 *cap_type_id, void **cap_type) +{ + struct mdev_state *mdev_state; + + mdev_state = mdev_get_drvdata(mdev); + if (!mdev_state) + return -EINVAL; + + if (region_info->index >= VFIO_PCI_NUM_REGIONS && + region_info->index != MDPY_DISPLAY_REGION) + return -EINVAL; + + switch (region_info->index) { + case VFIO_PCI_CONFIG_REGION_INDEX: + region_info->offset = 0; + region_info->size = MDPY_CONFIG_SPACE_SIZE; + region_info->flags = (VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE); + break; + case VFIO_PCI_BAR0_REGION_INDEX: + case MDPY_DISPLAY_REGION: + region_info->offset = MDPY_MEMORY_BAR_OFFSET; + region_info->size = mdev_state->memsize; + region_info->flags = (VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_MMAP); + break; + default: + region_info->size = 0; + region_info->offset = 0; + region_info->flags = 0; + } + + return 0; +} + +static int mdpy_get_irq_info(struct mdev_device *mdev, + struct vfio_irq_info *irq_info) +{ + irq_info->count = 0; + return 0; +} + +static int mdpy_get_device_info(struct mdev_device *mdev, + struct vfio_device_info *dev_info) +{ + dev_info->flags = VFIO_DEVICE_FLAGS_PCI; + dev_info->num_regions = VFIO_PCI_NUM_REGIONS; + dev_info->num_irqs = VFIO_PCI_NUM_IRQS; + return 0; +} + +static int mdpy_query_gfx_plane(struct mdev_device *mdev, + struct vfio_device_gfx_plane_info *plane) +{ + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + + if (plane->flags & VFIO_GFX_PLANE_TYPE_PROBE) { + if (plane->flags == (VFIO_GFX_PLANE_TYPE_PROBE | + VFIO_GFX_PLANE_TYPE_REGION)) + return 0; + return -EINVAL; + } + + if (plane->flags != VFIO_GFX_PLANE_TYPE_REGION) + return -EINVAL; + + plane->drm_format = mdev_state->type->format; + plane->width = mdev_state->type->width; + plane->height = mdev_state->type->height; + plane->stride = (mdev_state->type->width * + mdev_state->type->bytepp); + plane->size = mdev_state->memsize; + plane->region_index = MDPY_DISPLAY_REGION; + + /* unused */ + plane->drm_format_mod = 0; + plane->x_pos = 0; + plane->y_pos = 0; + plane->x_hot = 0; + plane->y_hot = 0; + + return 0; +} + +static long mdpy_ioctl(struct mdev_device *mdev, unsigned int cmd, + unsigned long arg) +{ + int ret = 0; + unsigned long minsz; + struct mdev_state *mdev_state; + + mdev_state = mdev_get_drvdata(mdev); + + switch (cmd) { + case VFIO_DEVICE_GET_INFO: + { + struct vfio_device_info info; + + minsz = offsetofend(struct vfio_device_info, num_irqs); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + ret = mdpy_get_device_info(mdev, &info); + if (ret) + return ret; + + memcpy(&mdev_state->dev_info, &info, sizeof(info)); + + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + + return 0; + } + case VFIO_DEVICE_GET_REGION_INFO: + { + struct vfio_region_info info; + u16 cap_type_id = 0; + void *cap_type = NULL; + + minsz = offsetofend(struct vfio_region_info, offset); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + ret = mdpy_get_region_info(mdev, &info, &cap_type_id, + &cap_type); + if (ret) + return ret; + + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + + return 0; + } + + case VFIO_DEVICE_GET_IRQ_INFO: + { + struct vfio_irq_info info; + + minsz = offsetofend(struct vfio_irq_info, count); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if ((info.argsz < minsz) || + (info.index >= mdev_state->dev_info.num_irqs)) + return -EINVAL; + + ret = mdpy_get_irq_info(mdev, &info); + if (ret) + return ret; + + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + + return 0; + } + + case VFIO_DEVICE_QUERY_GFX_PLANE: + { + struct vfio_device_gfx_plane_info plane; + + minsz = offsetofend(struct vfio_device_gfx_plane_info, + region_index); + + if (copy_from_user(&plane, (void __user *)arg, minsz)) + return -EFAULT; + + if (plane.argsz < minsz) + return -EINVAL; + + ret = mdpy_query_gfx_plane(mdev, &plane); + if (ret) + return ret; + + if (copy_to_user((void __user *)arg, &plane, minsz)) + return -EFAULT; + + return 0; + } + + case VFIO_DEVICE_SET_IRQS: + return -EINVAL; + + case VFIO_DEVICE_RESET: + return mdpy_reset(mdev); + } + return -ENOTTY; +} + +static int mdpy_open(struct mdev_device *mdev) +{ + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + return 0; +} + +static void mdpy_close(struct mdev_device *mdev) +{ + module_put(THIS_MODULE); +} + +static ssize_t +resolution_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct mdev_device *mdev = mdev_from_dev(dev); + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + + return sprintf(buf, "%dx%d\n", + mdev_state->type->width, + mdev_state->type->height); +} +static DEVICE_ATTR_RO(resolution); + +static struct attribute *mdev_dev_attrs[] = { + &dev_attr_resolution.attr, + NULL, +}; + +static const struct attribute_group mdev_dev_group = { + .name = "vendor", + .attrs = mdev_dev_attrs, +}; + +const struct attribute_group *mdev_dev_groups[] = { + &mdev_dev_group, + NULL, +}; + +static ssize_t +name_show(struct kobject *kobj, struct device *dev, char *buf) +{ + return sprintf(buf, "%s\n", kobj->name); +} +MDEV_TYPE_ATTR_RO(name); + +static ssize_t +description_show(struct kobject *kobj, struct device *dev, char *buf) +{ + const struct mdpy_type *type = mdpy_find_type(kobj); + + return sprintf(buf, "virtual display, %dx%d framebuffer\n", + type ? type->width : 0, + type ? type->height : 0); +} +MDEV_TYPE_ATTR_RO(description); + +static ssize_t +available_instances_show(struct kobject *kobj, struct device *dev, char *buf) +{ + return sprintf(buf, "%d\n", max_devices - mdpy_count); +} +MDEV_TYPE_ATTR_RO(available_instances); + +static ssize_t device_api_show(struct kobject *kobj, struct device *dev, + char *buf) +{ + return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); +} +MDEV_TYPE_ATTR_RO(device_api); + +static struct attribute *mdev_types_attrs[] = { + &mdev_type_attr_name.attr, + &mdev_type_attr_description.attr, + &mdev_type_attr_device_api.attr, + &mdev_type_attr_available_instances.attr, + NULL, +}; + +static struct attribute_group mdev_type_group1 = { + .name = MDPY_TYPE_1, + .attrs = mdev_types_attrs, +}; + +static struct attribute_group mdev_type_group2 = { + .name = MDPY_TYPE_2, + .attrs = mdev_types_attrs, +}; + +static struct attribute_group mdev_type_group3 = { + .name = MDPY_TYPE_3, + .attrs = mdev_types_attrs, +}; + +static struct attribute_group *mdev_type_groups[] = { + &mdev_type_group1, + &mdev_type_group2, + &mdev_type_group3, + NULL, +}; + +static const struct mdev_parent_ops mdev_fops = { + .owner = THIS_MODULE, + .mdev_attr_groups = mdev_dev_groups, + .supported_type_groups = mdev_type_groups, + .create = mdpy_create, + .remove = mdpy_remove, + .open = mdpy_open, + .release = mdpy_close, + .read = mdpy_read, + .write = mdpy_write, + .ioctl = mdpy_ioctl, + .mmap = mdpy_mmap, +}; + +static const struct file_operations vd_fops = { + .owner = THIS_MODULE, +}; + +static void mdpy_device_release(struct device *dev) +{ + /* nothing */ +} + +static int __init mdpy_dev_init(void) +{ + int ret = 0; + + ret = alloc_chrdev_region(&mdpy_devt, 0, MINORMASK, MDPY_NAME); + if (ret < 0) { + pr_err("Error: failed to register mdpy_dev, err: %d\n", ret); + return ret; + } + cdev_init(&mdpy_cdev, &vd_fops); + cdev_add(&mdpy_cdev, mdpy_devt, MINORMASK); + pr_info("%s: major %d\n", __func__, MAJOR(mdpy_devt)); + + mdpy_class = class_create(THIS_MODULE, MDPY_CLASS_NAME); + if (IS_ERR(mdpy_class)) { + pr_err("Error: failed to register mdpy_dev class\n"); + ret = PTR_ERR(mdpy_class); + goto failed1; + } + mdpy_dev.class = mdpy_class; + mdpy_dev.release = mdpy_device_release; + dev_set_name(&mdpy_dev, "%s", MDPY_NAME); + + ret = device_register(&mdpy_dev); + if (ret) + goto failed2; + + ret = mdev_register_device(&mdpy_dev, &mdev_fops); + if (ret) + goto failed3; + + return 0; + +failed3: + device_unregister(&mdpy_dev); +failed2: + class_destroy(mdpy_class); +failed1: + cdev_del(&mdpy_cdev); + unregister_chrdev_region(mdpy_devt, MINORMASK); + return ret; +} + +static void __exit mdpy_dev_exit(void) +{ + mdpy_dev.bus = NULL; + mdev_unregister_device(&mdpy_dev); + + device_unregister(&mdpy_dev); + cdev_del(&mdpy_cdev); + unregister_chrdev_region(mdpy_devt, MINORMASK); + class_destroy(mdpy_class); + mdpy_class = NULL; +} + +module_init(mdpy_dev_init) +module_exit(mdpy_dev_exit) -- cgit v1.2.3 From cacade1946a41b38dcdf3defb0f931453587eac9 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Fri, 11 May 2018 09:05:04 -0600 Subject: sample: vfio mdev display - guest driver Guest fbdev driver for CONFIG_SAMPLE_VFIO_MDEV_MDPY. Signed-off-by: Gerd Hoffmann Signed-off-by: Alex Williamson --- samples/Kconfig | 9 ++ samples/vfio-mdev/Makefile | 1 + samples/vfio-mdev/mdpy-fb.c | 232 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 242 insertions(+) create mode 100644 samples/vfio-mdev/mdpy-fb.c diff --git a/samples/Kconfig b/samples/Kconfig index 960f19b24df0..a6ff147d8744 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -123,6 +123,15 @@ config SAMPLE_VFIO_MDEV_MDPY mediated device. It is a simple framebuffer and supports the region display interface (VFIO_GFX_PLANE_TYPE_REGION). +config SAMPLE_VFIO_MDEV_MDPY_FB + tristate "Build VFIO mdpy example guest fbdev driver -- loadable module only" + depends on FB && m + select FB_CFB_FILLRECT + select FB_CFB_COPYAREA + select FB_CFB_IMAGEBLIT + help + Guest fbdev driver for the virtual display sample driver. + config SAMPLE_STATX bool "Build example extended-stat using code" depends on BROKEN diff --git a/samples/vfio-mdev/Makefile b/samples/vfio-mdev/Makefile index 031d6b88e9e9..7a5790aaec9c 100644 --- a/samples/vfio-mdev/Makefile +++ b/samples/vfio-mdev/Makefile @@ -1,2 +1,3 @@ obj-$(CONFIG_SAMPLE_VFIO_MDEV_MTTY) += mtty.o obj-$(CONFIG_SAMPLE_VFIO_MDEV_MDPY) += mdpy.o +obj-$(CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB) += mdpy-fb.o diff --git a/samples/vfio-mdev/mdpy-fb.c b/samples/vfio-mdev/mdpy-fb.c new file mode 100644 index 000000000000..2719bb259653 --- /dev/null +++ b/samples/vfio-mdev/mdpy-fb.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Framebuffer driver for mdpy (mediated virtual pci display device). + * + * See mdpy-defs.h for device specs + * + * (c) Gerd Hoffmann + * + * Using some code snippets from simplefb and cirrusfb. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ +#include +#include +#include +#include +#include +#include +#include "mdpy-defs.h" + +static const struct fb_fix_screeninfo mdpy_fb_fix = { + .id = "mdpy-fb", + .type = FB_TYPE_PACKED_PIXELS, + .visual = FB_VISUAL_TRUECOLOR, + .accel = FB_ACCEL_NONE, +}; + +static const struct fb_var_screeninfo mdpy_fb_var = { + .height = -1, + .width = -1, + .activate = FB_ACTIVATE_NOW, + .vmode = FB_VMODE_NONINTERLACED, + + .bits_per_pixel = 32, + .transp.offset = 24, + .red.offset = 16, + .green.offset = 8, + .blue.offset = 0, + .transp.length = 8, + .red.length = 8, + .green.length = 8, + .blue.length = 8, +}; + +#define PSEUDO_PALETTE_SIZE 16 + +struct mdpy_fb_par { + u32 palette[PSEUDO_PALETTE_SIZE]; +}; + +static int mdpy_fb_setcolreg(u_int regno, u_int red, u_int green, u_int blue, + u_int transp, struct fb_info *info) +{ + u32 *pal = info->pseudo_palette; + u32 cr = red >> (16 - info->var.red.length); + u32 cg = green >> (16 - info->var.green.length); + u32 cb = blue >> (16 - info->var.blue.length); + u32 value, mask; + + if (regno >= PSEUDO_PALETTE_SIZE) + return -EINVAL; + + value = (cr << info->var.red.offset) | + (cg << info->var.green.offset) | + (cb << info->var.blue.offset); + if (info->var.transp.length > 0) { + mask = (1 << info->var.transp.length) - 1; + mask <<= info->var.transp.offset; + value |= mask; + } + pal[regno] = value; + + return 0; +} + +static void mdpy_fb_destroy(struct fb_info *info) +{ + if (info->screen_base) + iounmap(info->screen_base); +} + +static struct fb_ops mdpy_fb_ops = { + .owner = THIS_MODULE, + .fb_destroy = mdpy_fb_destroy, + .fb_setcolreg = mdpy_fb_setcolreg, + .fb_fillrect = cfb_fillrect, + .fb_copyarea = cfb_copyarea, + .fb_imageblit = cfb_imageblit, +}; + +static int mdpy_fb_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct fb_info *info; + struct mdpy_fb_par *par; + u32 format, width, height; + int ret; + + ret = pci_enable_device(pdev); + if (ret < 0) + return ret; + + ret = pci_request_regions(pdev, "mdpy-fb"); + if (ret < 0) + return ret; + + pci_read_config_dword(pdev, MDPY_FORMAT_OFFSET, &format); + pci_read_config_dword(pdev, MDPY_WIDTH_OFFSET, &width); + pci_read_config_dword(pdev, MDPY_HEIGHT_OFFSET, &height); + if (format != DRM_FORMAT_XRGB8888) { + pci_err(pdev, "format mismatch (0x%x != 0x%x)\n", + format, DRM_FORMAT_XRGB8888); + return -EINVAL; + } + if (width < 100 || width > 10000) { + pci_err(pdev, "width (%d) out of range\n", width); + return -EINVAL; + } + if (height < 100 || height > 10000) { + pci_err(pdev, "height (%d) out of range\n", height); + return -EINVAL; + } + pci_info(pdev, "mdpy found: %dx%d framebuffer\n", + width, height); + + info = framebuffer_alloc(sizeof(struct mdpy_fb_par), &pdev->dev); + if (!info) + goto err_release_regions; + pci_set_drvdata(pdev, info); + par = info->par; + + info->fix = mdpy_fb_fix; + info->fix.smem_start = pci_resource_start(pdev, 0); + info->fix.smem_len = pci_resource_len(pdev, 0); + info->fix.line_length = width * 4; + + info->var = mdpy_fb_var; + info->var.xres = width; + info->var.yres = height; + info->var.xres_virtual = width; + info->var.yres_virtual = height; + + info->screen_size = info->fix.smem_len; + info->screen_base = ioremap(info->fix.smem_start, + info->screen_size); + if (!info->screen_base) { + pci_err(pdev, "ioremap(pcibar) failed\n"); + ret = -EIO; + goto err_release_fb; + } + + info->apertures = alloc_apertures(1); + if (!info->apertures) { + ret = -ENOMEM; + goto err_unmap; + } + info->apertures->ranges[0].base = info->fix.smem_start; + info->apertures->ranges[0].size = info->fix.smem_len; + + info->fbops = &mdpy_fb_ops; + info->flags = FBINFO_DEFAULT; + info->pseudo_palette = par->palette; + + ret = register_framebuffer(info); + if (ret < 0) { + pci_err(pdev, "mdpy-fb device register failed: %d\n", ret); + goto err_unmap; + } + + pci_info(pdev, "fb%d registered\n", info->node); + return 0; + +err_unmap: + iounmap(info->screen_base); + +err_release_fb: + framebuffer_release(info); + +err_release_regions: + pci_release_regions(pdev); + + return ret; +} + +static void mdpy_fb_remove(struct pci_dev *pdev) +{ + struct fb_info *info = pci_get_drvdata(pdev); + + unregister_framebuffer(info); + framebuffer_release(info); +} + +static struct pci_device_id mdpy_fb_pci_table[] = { + { + .vendor = MDPY_PCI_VENDOR_ID, + .device = MDPY_PCI_DEVICE_ID, + .subvendor = MDPY_PCI_SUBVENDOR_ID, + .subdevice = MDPY_PCI_SUBDEVICE_ID, + }, { + /* end of list */ + } +}; + +static struct pci_driver mdpy_fb_pci_driver = { + .name = "mdpy-fb", + .id_table = mdpy_fb_pci_table, + .probe = mdpy_fb_probe, + .remove = mdpy_fb_remove, +}; + +static int __init mdpy_fb_init(void) +{ + int ret; + + ret = pci_register_driver(&mdpy_fb_pci_driver); + if (ret) + return ret; + + return 0; +} + +module_init(mdpy_fb_init); + +MODULE_DEVICE_TABLE(pci, mdpy_fb_pci_table); +MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From a5e6e6505f38f7bce1d3576503a2bffff3fa888c Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Fri, 11 May 2018 09:05:04 -0600 Subject: sample: vfio bochs vbe display (host device for bochs-drm) Display device, demo-ing the vfio dmabuf display interface (VFIO_GFX_PLANE_TYPE_DMABUF). Compatible enough to qemu stdvga that bochs-drm.ko can be used as guest driver. Signed-off-by: Gerd Hoffmann Signed-off-by: Alex Williamson --- samples/Kconfig | 13 + samples/vfio-mdev/Makefile | 1 + samples/vfio-mdev/mbochs.c | 1406 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1420 insertions(+) create mode 100644 samples/vfio-mdev/mbochs.c diff --git a/samples/Kconfig b/samples/Kconfig index a6ff147d8744..3aeaaca77831 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -132,6 +132,19 @@ config SAMPLE_VFIO_MDEV_MDPY_FB help Guest fbdev driver for the virtual display sample driver. +config SAMPLE_VFIO_MDEV_MBOCHS + tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only" + depends on VFIO_MDEV_DEVICE && m + help + Build a virtual display sample driver for use as a VFIO + mediated device. It supports the region display interface + (VFIO_GFX_PLANE_TYPE_DMABUF). + Emulate enough of qemu stdvga to make bochs-drm.ko happy. + That is basically the vram memory bar and the bochs dispi + interface vbe registers in the mmio register bar. + Specifically it does *not* include any legacy vga stuff. + Device looks a lot like "qemu -device secondary-vga". + config SAMPLE_STATX bool "Build example extended-stat using code" depends on BROKEN diff --git a/samples/vfio-mdev/Makefile b/samples/vfio-mdev/Makefile index 7a5790aaec9c..7db889ca135c 100644 --- a/samples/vfio-mdev/Makefile +++ b/samples/vfio-mdev/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_SAMPLE_VFIO_MDEV_MTTY) += mtty.o obj-$(CONFIG_SAMPLE_VFIO_MDEV_MDPY) += mdpy.o obj-$(CONFIG_SAMPLE_VFIO_MDEV_MDPY_FB) += mdpy-fb.o +obj-$(CONFIG_SAMPLE_VFIO_MDEV_MBOCHS) += mbochs.o diff --git a/samples/vfio-mdev/mbochs.c b/samples/vfio-mdev/mbochs.c new file mode 100644 index 000000000000..2960e26c6ea4 --- /dev/null +++ b/samples/vfio-mdev/mbochs.c @@ -0,0 +1,1406 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Mediated virtual PCI display host device driver + * + * Emulate enough of qemu stdvga to make bochs-drm.ko happy. That is + * basically the vram memory bar and the bochs dispi interface vbe + * registers in the mmio register bar. Specifically it does *not* + * include any legacy vga stuff. Device looks a lot like "qemu -device + * secondary-vga". + * + * (c) Gerd Hoffmann + * + * based on mtty driver which is: + * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved. + * Author: Neo Jia + * Kirti Wankhede + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define VBE_DISPI_INDEX_ID 0x0 +#define VBE_DISPI_INDEX_XRES 0x1 +#define VBE_DISPI_INDEX_YRES 0x2 +#define VBE_DISPI_INDEX_BPP 0x3 +#define VBE_DISPI_INDEX_ENABLE 0x4 +#define VBE_DISPI_INDEX_BANK 0x5 +#define VBE_DISPI_INDEX_VIRT_WIDTH 0x6 +#define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7 +#define VBE_DISPI_INDEX_X_OFFSET 0x8 +#define VBE_DISPI_INDEX_Y_OFFSET 0x9 +#define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa +#define VBE_DISPI_INDEX_COUNT 0xb + +#define VBE_DISPI_ID0 0xB0C0 +#define VBE_DISPI_ID1 0xB0C1 +#define VBE_DISPI_ID2 0xB0C2 +#define VBE_DISPI_ID3 0xB0C3 +#define VBE_DISPI_ID4 0xB0C4 +#define VBE_DISPI_ID5 0xB0C5 + +#define VBE_DISPI_DISABLED 0x00 +#define VBE_DISPI_ENABLED 0x01 +#define VBE_DISPI_GETCAPS 0x02 +#define VBE_DISPI_8BIT_DAC 0x20 +#define VBE_DISPI_LFB_ENABLED 0x40 +#define VBE_DISPI_NOCLEARMEM 0x80 + + +#define MBOCHS_NAME "mbochs" +#define MBOCHS_CLASS_NAME "mbochs" + +#define MBOCHS_CONFIG_SPACE_SIZE 0xff +#define MBOCHS_MMIO_BAR_OFFSET PAGE_SIZE +#define MBOCHS_MMIO_BAR_SIZE PAGE_SIZE +#define MBOCHS_MEMORY_BAR_OFFSET (MBOCHS_MMIO_BAR_OFFSET + \ + MBOCHS_MMIO_BAR_SIZE) + +#define STORE_LE16(addr, val) (*(u16 *)addr = val) +#define STORE_LE32(addr, val) (*(u32 *)addr = val) + + +MODULE_LICENSE("GPL v2"); + +static int max_mbytes = 256; +module_param_named(count, max_mbytes, int, 0444); +MODULE_PARM_DESC(mem, "megabytes available to " MBOCHS_NAME " devices"); + + +#define MBOCHS_TYPE_1 "small" +#define MBOCHS_TYPE_2 "medium" +#define MBOCHS_TYPE_3 "large" + +static const struct mbochs_type { + const char *name; + u32 mbytes; +} mbochs_types[] = { + { + .name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_1, + .mbytes = 4, + }, { + .name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_2, + .mbytes = 16, + }, { + .name = MBOCHS_CLASS_NAME "-" MBOCHS_TYPE_3, + .mbytes = 64, + }, +}; + + +static dev_t mbochs_devt; +static struct class *mbochs_class; +static struct cdev mbochs_cdev; +static struct device mbochs_dev; +static int mbochs_used_mbytes; + +struct mbochs_mode { + u32 drm_format; + u32 bytepp; + u32 width; + u32 height; + u32 stride; + u32 __pad; + u64 offset; + u64 size; +}; + +struct mbochs_dmabuf { + struct mbochs_mode mode; + u32 id; + struct page **pages; + pgoff_t pagecount; + struct dma_buf *buf; + struct mdev_state *mdev_state; + struct list_head next; + bool unlinked; +}; + +/* State of each mdev device */ +struct mdev_state { + u8 *vconfig; + u64 bar_mask[3]; + u32 memory_bar_mask; + struct mutex ops_lock; + struct mdev_device *mdev; + struct vfio_device_info dev_info; + + const struct mbochs_type *type; + u16 vbe[VBE_DISPI_INDEX_COUNT]; + u64 memsize; + struct page **pages; + pgoff_t pagecount; + + struct list_head dmabufs; + u32 active_id; + u32 next_id; +}; + +static const char *vbe_name_list[VBE_DISPI_INDEX_COUNT] = { + [VBE_DISPI_INDEX_ID] = "id", + [VBE_DISPI_INDEX_XRES] = "xres", + [VBE_DISPI_INDEX_YRES] = "yres", + [VBE_DISPI_INDEX_BPP] = "bpp", + [VBE_DISPI_INDEX_ENABLE] = "enable", + [VBE_DISPI_INDEX_BANK] = "bank", + [VBE_DISPI_INDEX_VIRT_WIDTH] = "virt-width", + [VBE_DISPI_INDEX_VIRT_HEIGHT] = "virt-height", + [VBE_DISPI_INDEX_X_OFFSET] = "x-offset", + [VBE_DISPI_INDEX_Y_OFFSET] = "y-offset", + [VBE_DISPI_INDEX_VIDEO_MEMORY_64K] = "video-mem", +}; + +static const char *vbe_name(u32 index) +{ + if (index < ARRAY_SIZE(vbe_name_list)) + return vbe_name_list[index]; + return "(invalid)"; +} + +static struct page *mbochs_get_page(struct mdev_state *mdev_state, + pgoff_t pgoff); + +static const struct mbochs_type *mbochs_find_type(struct kobject *kobj) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(mbochs_types); i++) + if (strcmp(mbochs_types[i].name, kobj->name) == 0) + return mbochs_types + i; + return NULL; +} + +static void mbochs_create_config_space(struct mdev_state *mdev_state) +{ + STORE_LE16((u16 *) &mdev_state->vconfig[PCI_VENDOR_ID], + 0x1234); + STORE_LE16((u16 *) &mdev_state->vconfig[PCI_DEVICE_ID], + 0x1111); + STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_VENDOR_ID], + PCI_SUBVENDOR_ID_REDHAT_QUMRANET); + STORE_LE16((u16 *) &mdev_state->vconfig[PCI_SUBSYSTEM_ID], + PCI_SUBDEVICE_ID_QEMU); + + STORE_LE16((u16 *) &mdev_state->vconfig[PCI_COMMAND], + PCI_COMMAND_IO | PCI_COMMAND_MEMORY); + STORE_LE16((u16 *) &mdev_state->vconfig[PCI_CLASS_DEVICE], + PCI_CLASS_DISPLAY_OTHER); + mdev_state->vconfig[PCI_CLASS_REVISION] = 0x01; + + STORE_LE32((u32 *) &mdev_state->vconfig[PCI_BASE_ADDRESS_0], + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_32 | + PCI_BASE_ADDRESS_MEM_PREFETCH); + mdev_state->bar_mask[0] = ~(mdev_state->memsize) + 1; + + STORE_LE32((u32 *) &mdev_state->vconfig[PCI_BASE_ADDRESS_2], + PCI_BASE_ADDRESS_SPACE_MEMORY | + PCI_BASE_ADDRESS_MEM_TYPE_32); + mdev_state->bar_mask[2] = ~(MBOCHS_MMIO_BAR_SIZE) + 1; +} + +static int mbochs_check_framebuffer(struct mdev_state *mdev_state, + struct mbochs_mode *mode) +{ + struct device *dev = mdev_dev(mdev_state->mdev); + u16 *vbe = mdev_state->vbe; + u32 virt_width; + + WARN_ON(!mutex_is_locked(&mdev_state->ops_lock)); + + if (!(vbe[VBE_DISPI_INDEX_ENABLE] & VBE_DISPI_ENABLED)) + goto nofb; + + memset(mode, 0, sizeof(*mode)); + switch (vbe[VBE_DISPI_INDEX_BPP]) { + case 32: + mode->drm_format = DRM_FORMAT_XRGB8888; + mode->bytepp = 4; + break; + default: + dev_info_ratelimited(dev, "%s: bpp %d not supported\n", + __func__, vbe[VBE_DISPI_INDEX_BPP]); + goto nofb; + } + + mode->width = vbe[VBE_DISPI_INDEX_XRES]; + mode->height = vbe[VBE_DISPI_INDEX_YRES]; + virt_width = vbe[VBE_DISPI_INDEX_VIRT_WIDTH]; + if (virt_width < mode->width) + virt_width = mode->width; + mode->stride = virt_width * mode->bytepp; + mode->size = (u64)mode->stride * mode->height; + mode->offset = ((u64)vbe[VBE_DISPI_INDEX_X_OFFSET] * mode->bytepp + + (u64)vbe[VBE_DISPI_INDEX_Y_OFFSET] * mode->stride); + + if (mode->width < 64 || mode->height < 64) { + dev_info_ratelimited(dev, "%s: invalid resolution %dx%d\n", + __func__, mode->width, mode->height); + goto nofb; + } + if (mode->offset + mode->size > mdev_state->memsize) { + dev_info_ratelimited(dev, "%s: framebuffer memory overflow\n", + __func__); + goto nofb; + } + + return 0; + +nofb: + memset(mode, 0, sizeof(*mode)); + return -EINVAL; +} + +static bool mbochs_modes_equal(struct mbochs_mode *mode1, + struct mbochs_mode *mode2) +{ + return memcmp(mode1, mode2, sizeof(struct mbochs_mode)) == 0; +} + +static void handle_pci_cfg_write(struct mdev_state *mdev_state, u16 offset, + char *buf, u32 count) +{ + struct device *dev = mdev_dev(mdev_state->mdev); + int index = (offset - PCI_BASE_ADDRESS_0) / 0x04; + u32 cfg_addr; + + switch (offset) { + case PCI_BASE_ADDRESS_0: + case PCI_BASE_ADDRESS_2: + cfg_addr = *(u32 *)buf; + + if (cfg_addr == 0xffffffff) { + cfg_addr = (cfg_addr & mdev_state->bar_mask[index]); + } else { + cfg_addr &= PCI_BASE_ADDRESS_MEM_MASK; + if (cfg_addr) + dev_info(dev, "BAR #%d @ 0x%x\n", + index, cfg_addr); + } + + cfg_addr |= (mdev_state->vconfig[offset] & + ~PCI_BASE_ADDRESS_MEM_MASK); + STORE_LE32(&mdev_state->vconfig[offset], cfg_addr); + break; + } +} + +static void handle_mmio_write(struct mdev_state *mdev_state, u16 offset, + char *buf, u32 count) +{ + struct device *dev = mdev_dev(mdev_state->mdev); + int index; + u16 reg16; + + switch (offset) { + case 0x400 ... 0x41f: /* vga ioports remapped */ + goto unhandled; + case 0x500 ... 0x515: /* bochs dispi interface */ + if (count != 2) + goto unhandled; + index = (offset - 0x500) / 2; + reg16 = *(u16 *)buf; + if (index < ARRAY_SIZE(mdev_state->vbe)) + mdev_state->vbe[index] = reg16; + dev_dbg(dev, "%s: vbe write %d = %d (%s)\n", + __func__, index, reg16, vbe_name(index)); + break; + case 0x600 ... 0x607: /* qemu extended regs */ + goto unhandled; + default: +unhandled: + dev_dbg(dev, "%s: @0x%03x, count %d (unhandled)\n", + __func__, offset, count); + break; + } +} + +static void handle_mmio_read(struct mdev_state *mdev_state, u16 offset, + char *buf, u32 count) +{ + struct device *dev = mdev_dev(mdev_state->mdev); + u16 reg16 = 0; + int index; + + switch (offset) { + case 0x500 ... 0x515: /* bochs dispi interface */ + if (count != 2) + goto unhandled; + index = (offset - 0x500) / 2; + if (index < ARRAY_SIZE(mdev_state->vbe)) + reg16 = mdev_state->vbe[index]; + dev_dbg(dev, "%s: vbe read %d = %d (%s)\n", + __func__, index, reg16, vbe_name(index)); + *(u16 *)buf = reg16; + break; + default: +unhandled: + dev_dbg(dev, "%s: @0x%03x, count %d (unhandled)\n", + __func__, offset, count); + memset(buf, 0, count); + break; + } +} + +static ssize_t mdev_access(struct mdev_device *mdev, char *buf, size_t count, + loff_t pos, bool is_write) +{ + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + struct device *dev = mdev_dev(mdev); + struct page *pg; + loff_t poff; + char *map; + int ret = 0; + + mutex_lock(&mdev_state->ops_lock); + + if (pos < MBOCHS_CONFIG_SPACE_SIZE) { + if (is_write) + handle_pci_cfg_write(mdev_state, pos, buf, count); + else + memcpy(buf, (mdev_state->vconfig + pos), count); + + } else if (pos >= MBOCHS_MMIO_BAR_OFFSET && + pos + count <= MBOCHS_MEMORY_BAR_OFFSET) { + pos -= MBOCHS_MMIO_BAR_OFFSET; + if (is_write) + handle_mmio_write(mdev_state, pos, buf, count); + else + handle_mmio_read(mdev_state, pos, buf, count); + + } else if (pos >= MBOCHS_MEMORY_BAR_OFFSET && + pos + count <= + MBOCHS_MEMORY_BAR_OFFSET + mdev_state->memsize) { + pos -= MBOCHS_MMIO_BAR_OFFSET; + poff = pos & ~PAGE_MASK; + pg = mbochs_get_page(mdev_state, pos >> PAGE_SHIFT); + map = kmap(pg); + if (is_write) + memcpy(map + poff, buf, count); + else + memcpy(buf, map + poff, count); + kunmap(pg); + put_page(pg); + + } else { + dev_dbg(dev, "%s: %s @0x%llx (unhandled)\n", + __func__, is_write ? "WR" : "RD", pos); + ret = -1; + goto accessfailed; + } + + ret = count; + + +accessfailed: + mutex_unlock(&mdev_state->ops_lock); + + return ret; +} + +static int mbochs_reset(struct mdev_device *mdev) +{ + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + u32 size64k = mdev_state->memsize / (64 * 1024); + int i; + + for (i = 0; i < ARRAY_SIZE(mdev_state->vbe); i++) + mdev_state->vbe[i] = 0; + mdev_state->vbe[VBE_DISPI_INDEX_ID] = VBE_DISPI_ID5; + mdev_state->vbe[VBE_DISPI_INDEX_VIDEO_MEMORY_64K] = size64k; + return 0; +} + +static int mbochs_create(struct kobject *kobj, struct mdev_device *mdev) +{ + const struct mbochs_type *type = mbochs_find_type(kobj); + struct device *dev = mdev_dev(mdev); + struct mdev_state *mdev_state; + + if (!type) + type = &mbochs_types[0]; + if (type->mbytes + mbochs_used_mbytes > max_mbytes) + return -ENOMEM; + + mdev_state = kzalloc(sizeof(struct mdev_state), GFP_KERNEL); + if (mdev_state == NULL) + return -ENOMEM; + + mdev_state->vconfig = kzalloc(MBOCHS_CONFIG_SPACE_SIZE, GFP_KERNEL); + if (mdev_state->vconfig == NULL) + goto err_mem; + + mdev_state->memsize = type->mbytes * 1024 * 1024; + mdev_state->pagecount = mdev_state->memsize >> PAGE_SHIFT; + mdev_state->pages = kcalloc(mdev_state->pagecount, + sizeof(struct page *), + GFP_KERNEL); + if (!mdev_state->pages) + goto err_mem; + + dev_info(dev, "%s: %s, %d MB, %ld pages\n", __func__, + kobj->name, type->mbytes, mdev_state->pagecount); + + mutex_init(&mdev_state->ops_lock); + mdev_state->mdev = mdev; + mdev_set_drvdata(mdev, mdev_state); + INIT_LIST_HEAD(&mdev_state->dmabufs); + mdev_state->next_id = 1; + + mdev_state->type = type; + mbochs_create_config_space(mdev_state); + mbochs_reset(mdev); + + mbochs_used_mbytes += type->mbytes; + return 0; + +err_mem: + kfree(mdev_state->vconfig); + kfree(mdev_state); + return -ENOMEM; +} + +static int mbochs_remove(struct mdev_device *mdev) +{ + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + + mbochs_used_mbytes -= mdev_state->type->mbytes; + mdev_set_drvdata(mdev, NULL); + kfree(mdev_state->pages); + kfree(mdev_state->vconfig); + kfree(mdev_state); + return 0; +} + +static ssize_t mbochs_read(struct mdev_device *mdev, char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned int done = 0; + int ret; + + while (count) { + size_t filled; + + if (count >= 4 && !(*ppos % 4)) { + u32 val; + + ret = mdev_access(mdev, (char *)&val, sizeof(val), + *ppos, false); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 4; + } else if (count >= 2 && !(*ppos % 2)) { + u16 val; + + ret = mdev_access(mdev, (char *)&val, sizeof(val), + *ppos, false); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 2; + } else { + u8 val; + + ret = mdev_access(mdev, (char *)&val, sizeof(val), + *ppos, false); + if (ret <= 0) + goto read_err; + + if (copy_to_user(buf, &val, sizeof(val))) + goto read_err; + + filled = 1; + } + + count -= filled; + done += filled; + *ppos += filled; + buf += filled; + } + + return done; + +read_err: + return -EFAULT; +} + +static ssize_t mbochs_write(struct mdev_device *mdev, const char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned int done = 0; + int ret; + + while (count) { + size_t filled; + + if (count >= 4 && !(*ppos % 4)) { + u32 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = mdev_access(mdev, (char *)&val, sizeof(val), + *ppos, true); + if (ret <= 0) + goto write_err; + + filled = 4; + } else if (count >= 2 && !(*ppos % 2)) { + u16 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = mdev_access(mdev, (char *)&val, sizeof(val), + *ppos, true); + if (ret <= 0) + goto write_err; + + filled = 2; + } else { + u8 val; + + if (copy_from_user(&val, buf, sizeof(val))) + goto write_err; + + ret = mdev_access(mdev, (char *)&val, sizeof(val), + *ppos, true); + if (ret <= 0) + goto write_err; + + filled = 1; + } + count -= filled; + done += filled; + *ppos += filled; + buf += filled; + } + + return done; +write_err: + return -EFAULT; +} + +static struct page *__mbochs_get_page(struct mdev_state *mdev_state, + pgoff_t pgoff) +{ + WARN_ON(!mutex_is_locked(&mdev_state->ops_lock)); + + if (!mdev_state->pages[pgoff]) { + mdev_state->pages[pgoff] = + alloc_pages(GFP_HIGHUSER | __GFP_ZERO, 0); + if (!mdev_state->pages[pgoff]) + return NULL; + } + + get_page(mdev_state->pages[pgoff]); + return mdev_state->pages[pgoff]; +} + +static struct page *mbochs_get_page(struct mdev_state *mdev_state, + pgoff_t pgoff) +{ + struct page *page; + + if (WARN_ON(pgoff >= mdev_state->pagecount)) + return NULL; + + mutex_lock(&mdev_state->ops_lock); + page = __mbochs_get_page(mdev_state, pgoff); + mutex_unlock(&mdev_state->ops_lock); + + return page; +} + +static void mbochs_put_pages(struct mdev_state *mdev_state) +{ + struct device *dev = mdev_dev(mdev_state->mdev); + int i, count = 0; + + WARN_ON(!mutex_is_locked(&mdev_state->ops_lock)); + + for (i = 0; i < mdev_state->pagecount; i++) { + if (!mdev_state->pages[i]) + continue; + put_page(mdev_state->pages[i]); + mdev_state->pages[i] = NULL; + count++; + } + dev_dbg(dev, "%s: %d pages released\n", __func__, count); +} + +static int mbochs_region_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct mdev_state *mdev_state = vma->vm_private_data; + pgoff_t page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT; + + if (page_offset >= mdev_state->pagecount) + return VM_FAULT_SIGBUS; + + vmf->page = mbochs_get_page(mdev_state, page_offset); + if (!vmf->page) + return VM_FAULT_SIGBUS; + + return 0; +} + +static const struct vm_operations_struct mbochs_region_vm_ops = { + .fault = mbochs_region_vm_fault, +}; + +static int mbochs_mmap(struct mdev_device *mdev, struct vm_area_struct *vma) +{ + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + + if (vma->vm_pgoff != MBOCHS_MEMORY_BAR_OFFSET >> PAGE_SHIFT) + return -EINVAL; + if (vma->vm_end < vma->vm_start) + return -EINVAL; + if (vma->vm_end - vma->vm_start > mdev_state->memsize) + return -EINVAL; + if ((vma->vm_flags & VM_SHARED) == 0) + return -EINVAL; + + vma->vm_ops = &mbochs_region_vm_ops; + vma->vm_private_data = mdev_state; + return 0; +} + +static int mbochs_dmabuf_vm_fault(struct vm_fault *vmf) +{ + struct vm_area_struct *vma = vmf->vma; + struct mbochs_dmabuf *dmabuf = vma->vm_private_data; + + if (WARN_ON(vmf->pgoff >= dmabuf->pagecount)) + return VM_FAULT_SIGBUS; + + vmf->page = dmabuf->pages[vmf->pgoff]; + get_page(vmf->page); + return 0; +} + +static const struct vm_operations_struct mbochs_dmabuf_vm_ops = { + .fault = mbochs_dmabuf_vm_fault, +}; + +static int mbochs_mmap_dmabuf(struct dma_buf *buf, struct vm_area_struct *vma) +{ + struct mbochs_dmabuf *dmabuf = buf->priv; + struct device *dev = mdev_dev(dmabuf->mdev_state->mdev); + + dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id); + + if ((vma->vm_flags & VM_SHARED) == 0) + return -EINVAL; + + vma->vm_ops = &mbochs_dmabuf_vm_ops; + vma->vm_private_data = dmabuf; + return 0; +} + +static void mbochs_print_dmabuf(struct mbochs_dmabuf *dmabuf, + const char *prefix) +{ + struct device *dev = mdev_dev(dmabuf->mdev_state->mdev); + u32 fourcc = dmabuf->mode.drm_format; + + dev_dbg(dev, "%s/%d: %c%c%c%c, %dx%d, stride %d, off 0x%llx, size 0x%llx, pages %ld\n", + prefix, dmabuf->id, + fourcc ? ((fourcc >> 0) & 0xff) : '-', + fourcc ? ((fourcc >> 8) & 0xff) : '-', + fourcc ? ((fourcc >> 16) & 0xff) : '-', + fourcc ? ((fourcc >> 24) & 0xff) : '-', + dmabuf->mode.width, dmabuf->mode.height, dmabuf->mode.stride, + dmabuf->mode.offset, dmabuf->mode.size, dmabuf->pagecount); +} + +static struct sg_table *mbochs_map_dmabuf(struct dma_buf_attachment *at, + enum dma_data_direction direction) +{ + struct mbochs_dmabuf *dmabuf = at->dmabuf->priv; + struct device *dev = mdev_dev(dmabuf->mdev_state->mdev); + struct sg_table *sg; + + dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id); + + sg = kzalloc(sizeof(*sg), GFP_KERNEL); + if (!sg) + goto err1; + if (sg_alloc_table_from_pages(sg, dmabuf->pages, dmabuf->pagecount, + 0, dmabuf->mode.size, GFP_KERNEL) < 0) + goto err2; + if (!dma_map_sg(at->dev, sg->sgl, sg->nents, direction)) + goto err3; + + return sg; + +err3: + sg_free_table(sg); +err2: + kfree(sg); +err1: + return ERR_PTR(-ENOMEM); +} + +static void mbochs_unmap_dmabuf(struct dma_buf_attachment *at, + struct sg_table *sg, + enum dma_data_direction direction) +{ + struct mbochs_dmabuf *dmabuf = at->dmabuf->priv; + struct device *dev = mdev_dev(dmabuf->mdev_state->mdev); + + dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id); + + sg_free_table(sg); + kfree(sg); +} + +static void mbochs_release_dmabuf(struct dma_buf *buf) +{ + struct mbochs_dmabuf *dmabuf = buf->priv; + struct mdev_state *mdev_state = dmabuf->mdev_state; + struct device *dev = mdev_dev(mdev_state->mdev); + pgoff_t pg; + + dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id); + + for (pg = 0; pg < dmabuf->pagecount; pg++) + put_page(dmabuf->pages[pg]); + + mutex_lock(&mdev_state->ops_lock); + dmabuf->buf = NULL; + if (dmabuf->unlinked) + kfree(dmabuf); + mutex_unlock(&mdev_state->ops_lock); +} + +static void *mbochs_kmap_atomic_dmabuf(struct dma_buf *buf, + unsigned long page_num) +{ + struct mbochs_dmabuf *dmabuf = buf->priv; + struct page *page = dmabuf->pages[page_num]; + + return kmap_atomic(page); +} + +static void *mbochs_kmap_dmabuf(struct dma_buf *buf, unsigned long page_num) +{ + struct mbochs_dmabuf *dmabuf = buf->priv; + struct page *page = dmabuf->pages[page_num]; + + return kmap(page); +} + +static struct dma_buf_ops mbochs_dmabuf_ops = { + .map_dma_buf = mbochs_map_dmabuf, + .unmap_dma_buf = mbochs_unmap_dmabuf, + .release = mbochs_release_dmabuf, + .map_atomic = mbochs_kmap_atomic_dmabuf, + .map = mbochs_kmap_dmabuf, + .mmap = mbochs_mmap_dmabuf, +}; + +static struct mbochs_dmabuf *mbochs_dmabuf_alloc(struct mdev_state *mdev_state, + struct mbochs_mode *mode) +{ + struct mbochs_dmabuf *dmabuf; + pgoff_t page_offset, pg; + + WARN_ON(!mutex_is_locked(&mdev_state->ops_lock)); + + dmabuf = kzalloc(sizeof(struct mbochs_dmabuf), GFP_KERNEL); + if (!dmabuf) + return NULL; + + dmabuf->mode = *mode; + dmabuf->id = mdev_state->next_id++; + dmabuf->pagecount = DIV_ROUND_UP(mode->size, PAGE_SIZE); + dmabuf->pages = kcalloc(dmabuf->pagecount, sizeof(struct page *), + GFP_KERNEL); + if (!dmabuf->pages) + goto err_free_dmabuf; + + page_offset = dmabuf->mode.offset >> PAGE_SHIFT; + for (pg = 0; pg < dmabuf->pagecount; pg++) { + dmabuf->pages[pg] = __mbochs_get_page(mdev_state, + page_offset + pg); + if (!dmabuf->pages[pg]) + goto err_free_pages; + } + + dmabuf->mdev_state = mdev_state; + list_add(&dmabuf->next, &mdev_state->dmabufs); + + mbochs_print_dmabuf(dmabuf, __func__); + return dmabuf; + +err_free_pages: + while (pg > 0) + put_page(dmabuf->pages[--pg]); + kfree(dmabuf->pages); +err_free_dmabuf: + kfree(dmabuf); + return NULL; +} + +static struct mbochs_dmabuf * +mbochs_dmabuf_find_by_mode(struct mdev_state *mdev_state, + struct mbochs_mode *mode) +{ + struct mbochs_dmabuf *dmabuf; + + WARN_ON(!mutex_is_locked(&mdev_state->ops_lock)); + + list_for_each_entry(dmabuf, &mdev_state->dmabufs, next) + if (mbochs_modes_equal(&dmabuf->mode, mode)) + return dmabuf; + + return NULL; +} + +static struct mbochs_dmabuf * +mbochs_dmabuf_find_by_id(struct mdev_state *mdev_state, u32 id) +{ + struct mbochs_dmabuf *dmabuf; + + WARN_ON(!mutex_is_locked(&mdev_state->ops_lock)); + + list_for_each_entry(dmabuf, &mdev_state->dmabufs, next) + if (dmabuf->id == id) + return dmabuf; + + return NULL; +} + +static int mbochs_dmabuf_export(struct mbochs_dmabuf *dmabuf) +{ + struct mdev_state *mdev_state = dmabuf->mdev_state; + struct device *dev = mdev_dev(mdev_state->mdev); + DEFINE_DMA_BUF_EXPORT_INFO(exp_info); + struct dma_buf *buf; + + WARN_ON(!mutex_is_locked(&mdev_state->ops_lock)); + + if (!IS_ALIGNED(dmabuf->mode.offset, PAGE_SIZE)) { + dev_info_ratelimited(dev, "%s: framebuffer not page-aligned\n", + __func__); + return -EINVAL; + } + + exp_info.ops = &mbochs_dmabuf_ops; + exp_info.size = dmabuf->mode.size; + exp_info.priv = dmabuf; + + buf = dma_buf_export(&exp_info); + if (IS_ERR(buf)) { + dev_info_ratelimited(dev, "%s: dma_buf_export failed: %ld\n", + __func__, PTR_ERR(buf)); + return PTR_ERR(buf); + } + + dmabuf->buf = buf; + dev_dbg(dev, "%s: %d\n", __func__, dmabuf->id); + return 0; +} + +static int mbochs_get_region_info(struct mdev_device *mdev, + struct vfio_region_info *region_info, + u16 *cap_type_id, void **cap_type) +{ + struct mdev_state *mdev_state; + + mdev_state = mdev_get_drvdata(mdev); + if (!mdev_state) + return -EINVAL; + + if (region_info->index >= VFIO_PCI_NUM_REGIONS) + return -EINVAL; + + switch (region_info->index) { + case VFIO_PCI_CONFIG_REGION_INDEX: + region_info->offset = 0; + region_info->size = MBOCHS_CONFIG_SPACE_SIZE; + region_info->flags = (VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE); + break; + case VFIO_PCI_BAR0_REGION_INDEX: + region_info->offset = MBOCHS_MEMORY_BAR_OFFSET; + region_info->size = mdev_state->memsize; + region_info->flags = (VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE | + VFIO_REGION_INFO_FLAG_MMAP); + break; + case VFIO_PCI_BAR2_REGION_INDEX: + region_info->offset = MBOCHS_MMIO_BAR_OFFSET; + region_info->size = MBOCHS_MMIO_BAR_SIZE; + region_info->flags = (VFIO_REGION_INFO_FLAG_READ | + VFIO_REGION_INFO_FLAG_WRITE); + break; + default: + region_info->size = 0; + region_info->offset = 0; + region_info->flags = 0; + } + + return 0; +} + +static int mbochs_get_irq_info(struct mdev_device *mdev, + struct vfio_irq_info *irq_info) +{ + irq_info->count = 0; + return 0; +} + +static int mbochs_get_device_info(struct mdev_device *mdev, + struct vfio_device_info *dev_info) +{ + dev_info->flags = VFIO_DEVICE_FLAGS_PCI; + dev_info->num_regions = VFIO_PCI_NUM_REGIONS; + dev_info->num_irqs = VFIO_PCI_NUM_IRQS; + return 0; +} + +static int mbochs_query_gfx_plane(struct mdev_device *mdev, + struct vfio_device_gfx_plane_info *plane) +{ + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + struct device *dev = mdev_dev(mdev); + struct mbochs_dmabuf *dmabuf; + struct mbochs_mode mode; + int ret; + + if (plane->flags & VFIO_GFX_PLANE_TYPE_PROBE) { + if (plane->flags == (VFIO_GFX_PLANE_TYPE_PROBE | + VFIO_GFX_PLANE_TYPE_DMABUF)) + return 0; + return -EINVAL; + } + + if (plane->flags != VFIO_GFX_PLANE_TYPE_DMABUF) + return -EINVAL; + + plane->drm_format_mod = 0; + plane->x_pos = 0; + plane->y_pos = 0; + plane->x_hot = 0; + plane->y_hot = 0; + + mutex_lock(&mdev_state->ops_lock); + + ret = -EINVAL; + if (plane->drm_plane_type == DRM_PLANE_TYPE_PRIMARY) + ret = mbochs_check_framebuffer(mdev_state, &mode); + if (ret < 0) { + plane->drm_format = 0; + plane->width = 0; + plane->height = 0; + plane->stride = 0; + plane->size = 0; + plane->dmabuf_id = 0; + goto done; + } + + dmabuf = mbochs_dmabuf_find_by_mode(mdev_state, &mode); + if (!dmabuf) + mbochs_dmabuf_alloc(mdev_state, &mode); + if (!dmabuf) { + mutex_unlock(&mdev_state->ops_lock); + return -ENOMEM; + } + + plane->drm_format = dmabuf->mode.drm_format; + plane->width = dmabuf->mode.width; + plane->height = dmabuf->mode.height; + plane->stride = dmabuf->mode.stride; + plane->size = dmabuf->mode.size; + plane->dmabuf_id = dmabuf->id; + +done: + if (plane->drm_plane_type == DRM_PLANE_TYPE_PRIMARY && + mdev_state->active_id != plane->dmabuf_id) { + dev_dbg(dev, "%s: primary: %d => %d\n", __func__, + mdev_state->active_id, plane->dmabuf_id); + mdev_state->active_id = plane->dmabuf_id; + } + mutex_unlock(&mdev_state->ops_lock); + return 0; +} + +static int mbochs_get_gfx_dmabuf(struct mdev_device *mdev, + u32 id) +{ + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + struct mbochs_dmabuf *dmabuf; + + mutex_lock(&mdev_state->ops_lock); + + dmabuf = mbochs_dmabuf_find_by_id(mdev_state, id); + if (!dmabuf) { + mutex_unlock(&mdev_state->ops_lock); + return -ENOENT; + } + + if (!dmabuf->buf) + mbochs_dmabuf_export(dmabuf); + + mutex_unlock(&mdev_state->ops_lock); + + if (!dmabuf->buf) + return -EINVAL; + + return dma_buf_fd(dmabuf->buf, 0); +} + +static long mbochs_ioctl(struct mdev_device *mdev, unsigned int cmd, + unsigned long arg) +{ + int ret = 0; + unsigned long minsz; + struct mdev_state *mdev_state; + + mdev_state = mdev_get_drvdata(mdev); + + switch (cmd) { + case VFIO_DEVICE_GET_INFO: + { + struct vfio_device_info info; + + minsz = offsetofend(struct vfio_device_info, num_irqs); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + ret = mbochs_get_device_info(mdev, &info); + if (ret) + return ret; + + memcpy(&mdev_state->dev_info, &info, sizeof(info)); + + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + + return 0; + } + case VFIO_DEVICE_GET_REGION_INFO: + { + struct vfio_region_info info; + u16 cap_type_id = 0; + void *cap_type = NULL; + + minsz = offsetofend(struct vfio_region_info, offset); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + ret = mbochs_get_region_info(mdev, &info, &cap_type_id, + &cap_type); + if (ret) + return ret; + + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + + return 0; + } + + case VFIO_DEVICE_GET_IRQ_INFO: + { + struct vfio_irq_info info; + + minsz = offsetofend(struct vfio_irq_info, count); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if ((info.argsz < minsz) || + (info.index >= mdev_state->dev_info.num_irqs)) + return -EINVAL; + + ret = mbochs_get_irq_info(mdev, &info); + if (ret) + return ret; + + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + + return 0; + } + + case VFIO_DEVICE_QUERY_GFX_PLANE: + { + struct vfio_device_gfx_plane_info plane; + + minsz = offsetofend(struct vfio_device_gfx_plane_info, + region_index); + + if (copy_from_user(&plane, (void __user *)arg, minsz)) + return -EFAULT; + + if (plane.argsz < minsz) + return -EINVAL; + + ret = mbochs_query_gfx_plane(mdev, &plane); + if (ret) + return ret; + + if (copy_to_user((void __user *)arg, &plane, minsz)) + return -EFAULT; + + return 0; + } + + case VFIO_DEVICE_GET_GFX_DMABUF: + { + u32 dmabuf_id; + + if (get_user(dmabuf_id, (__u32 __user *)arg)) + return -EFAULT; + + return mbochs_get_gfx_dmabuf(mdev, dmabuf_id); + } + + case VFIO_DEVICE_SET_IRQS: + return -EINVAL; + + case VFIO_DEVICE_RESET: + return mbochs_reset(mdev); + } + return -ENOTTY; +} + +static int mbochs_open(struct mdev_device *mdev) +{ + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + return 0; +} + +static void mbochs_close(struct mdev_device *mdev) +{ + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + struct mbochs_dmabuf *dmabuf, *tmp; + + mutex_lock(&mdev_state->ops_lock); + + list_for_each_entry_safe(dmabuf, tmp, &mdev_state->dmabufs, next) { + list_del(&dmabuf->next); + if (dmabuf->buf) { + /* free in mbochs_release_dmabuf() */ + dmabuf->unlinked = true; + } else { + kfree(dmabuf); + } + } + mbochs_put_pages(mdev_state); + + mutex_unlock(&mdev_state->ops_lock); + module_put(THIS_MODULE); +} + +static ssize_t +memory_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct mdev_device *mdev = mdev_from_dev(dev); + struct mdev_state *mdev_state = mdev_get_drvdata(mdev); + + return sprintf(buf, "%d MB\n", mdev_state->type->mbytes); +} +static DEVICE_ATTR_RO(memory); + +static struct attribute *mdev_dev_attrs[] = { + &dev_attr_memory.attr, + NULL, +}; + +static const struct attribute_group mdev_dev_group = { + .name = "vendor", + .attrs = mdev_dev_attrs, +}; + +const struct attribute_group *mdev_dev_groups[] = { + &mdev_dev_group, + NULL, +}; + +static ssize_t +name_show(struct kobject *kobj, struct device *dev, char *buf) +{ + return sprintf(buf, "%s\n", kobj->name); +} +MDEV_TYPE_ATTR_RO(name); + +static ssize_t +description_show(struct kobject *kobj, struct device *dev, char *buf) +{ + const struct mbochs_type *type = mbochs_find_type(kobj); + + return sprintf(buf, "virtual display, %d MB video memory\n", + type ? type->mbytes : 0); +} +MDEV_TYPE_ATTR_RO(description); + +static ssize_t +available_instances_show(struct kobject *kobj, struct device *dev, char *buf) +{ + const struct mbochs_type *type = mbochs_find_type(kobj); + int count = (max_mbytes - mbochs_used_mbytes) / type->mbytes; + + return sprintf(buf, "%d\n", count); +} +MDEV_TYPE_ATTR_RO(available_instances); + +static ssize_t device_api_show(struct kobject *kobj, struct device *dev, + char *buf) +{ + return sprintf(buf, "%s\n", VFIO_DEVICE_API_PCI_STRING); +} +MDEV_TYPE_ATTR_RO(device_api); + +static struct attribute *mdev_types_attrs[] = { + &mdev_type_attr_name.attr, + &mdev_type_attr_description.attr, + &mdev_type_attr_device_api.attr, + &mdev_type_attr_available_instances.attr, + NULL, +}; + +static struct attribute_group mdev_type_group1 = { + .name = MBOCHS_TYPE_1, + .attrs = mdev_types_attrs, +}; + +static struct attribute_group mdev_type_group2 = { + .name = MBOCHS_TYPE_2, + .attrs = mdev_types_attrs, +}; + +static struct attribute_group mdev_type_group3 = { + .name = MBOCHS_TYPE_3, + .attrs = mdev_types_attrs, +}; + +static struct attribute_group *mdev_type_groups[] = { + &mdev_type_group1, + &mdev_type_group2, + &mdev_type_group3, + NULL, +}; + +static const struct mdev_parent_ops mdev_fops = { + .owner = THIS_MODULE, + .mdev_attr_groups = mdev_dev_groups, + .supported_type_groups = mdev_type_groups, + .create = mbochs_create, + .remove = mbochs_remove, + .open = mbochs_open, + .release = mbochs_close, + .read = mbochs_read, + .write = mbochs_write, + .ioctl = mbochs_ioctl, + .mmap = mbochs_mmap, +}; + +static const struct file_operations vd_fops = { + .owner = THIS_MODULE, +}; + +static void mbochs_device_release(struct device *dev) +{ + /* nothing */ +} + +static int __init mbochs_dev_init(void) +{ + int ret = 0; + + ret = alloc_chrdev_region(&mbochs_devt, 0, MINORMASK, MBOCHS_NAME); + if (ret < 0) { + pr_err("Error: failed to register mbochs_dev, err: %d\n", ret); + return ret; + } + cdev_init(&mbochs_cdev, &vd_fops); + cdev_add(&mbochs_cdev, mbochs_devt, MINORMASK); + pr_info("%s: major %d\n", __func__, MAJOR(mbochs_devt)); + + mbochs_class = class_create(THIS_MODULE, MBOCHS_CLASS_NAME); + if (IS_ERR(mbochs_class)) { + pr_err("Error: failed to register mbochs_dev class\n"); + ret = PTR_ERR(mbochs_class); + goto failed1; + } + mbochs_dev.class = mbochs_class; + mbochs_dev.release = mbochs_device_release; + dev_set_name(&mbochs_dev, "%s", MBOCHS_NAME); + + ret = device_register(&mbochs_dev); + if (ret) + goto failed2; + + ret = mdev_register_device(&mbochs_dev, &mdev_fops); + if (ret) + goto failed3; + + return 0; + +failed3: + device_unregister(&mbochs_dev); +failed2: + class_destroy(mbochs_class); +failed1: + cdev_del(&mbochs_cdev); + unregister_chrdev_region(mbochs_devt, MINORMASK); + return ret; +} + +static void __exit mbochs_dev_exit(void) +{ + mbochs_dev.bus = NULL; + mdev_unregister_device(&mbochs_dev); + + device_unregister(&mbochs_dev); + cdev_del(&mbochs_cdev); + unregister_chrdev_region(mbochs_devt, MINORMASK); + class_destroy(mbochs_class); + mbochs_class = NULL; +} + +module_init(mbochs_dev_init) +module_exit(mbochs_dev_exit) -- cgit v1.2.3 From 28a68387888997e8a7fa57940ea5d55f2e16b594 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 11 Apr 2018 11:15:48 +0200 Subject: vfio: platform: Fix reset module leak in error path If the IOMMU group setup fails, the reset module is not released. Fixes: b5add544d677d363 ("vfio, platform: make reset driver a requirement by default") Signed-off-by: Geert Uytterhoeven Reviewed-by: Eric Auger Reviewed-by: Simon Horman Acked-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_platform_common.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index 4c27f4be3c3d..aa9e792110e3 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -681,18 +681,23 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, group = vfio_iommu_group_get(dev); if (!group) { pr_err("VFIO: No IOMMU group for device %s\n", vdev->name); - return -EINVAL; + ret = -EINVAL; + goto put_reset; } ret = vfio_add_group_dev(dev, &vfio_platform_ops, vdev); - if (ret) { - vfio_iommu_group_put(group, dev); - return ret; - } + if (ret) + goto put_iommu; mutex_init(&vdev->igate); return 0; + +put_iommu: + vfio_iommu_group_put(group, dev); +put_reset: + vfio_platform_put_reset(vdev); + return ret; } EXPORT_SYMBOL_GPL(vfio_platform_probe_common); -- cgit v1.2.3 From d59502bc0591d75feb5824c7c24faaa8ec48a7ae Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 10 Apr 2018 16:54:11 +0200 Subject: vfio: platform: Make printed error messages more consistent - Capitalize the first word of error messages, - Unwrap statements that fit on a single line, - Use "VFIO" instead of "vfio" as the error message prefix. Signed-off-by: Geert Uytterhoeven Reviewed-by: Eric Auger Acked-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_platform_common.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index aa9e792110e3..b60bb5326668 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -630,8 +630,7 @@ static int vfio_platform_of_probe(struct vfio_platform_device *vdev, ret = device_property_read_string(dev, "compatible", &vdev->compat); if (ret) - pr_err("VFIO: cannot retrieve compat for %s\n", - vdev->name); + pr_err("VFIO: Cannot retrieve compat for %s\n", vdev->name); return ret; } @@ -673,7 +672,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, ret = vfio_platform_get_reset(vdev); if (ret && vdev->reset_required) { - pr_err("vfio: no reset function found for device %s\n", + pr_err("VFIO: No reset function found for device %s\n", vdev->name); return ret; } -- cgit v1.2.3 From 002fe996f67f4f46d8917b14cfb6e4313c20685a Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Tue, 15 May 2018 13:53:55 -0600 Subject: vfio/mdev: Check globally for duplicate devices When we create an mdev device, we check for duplicates against the parent device and return -EEXIST if found, but the mdev device namespace is global since we'll link all devices from the bus. We do catch this later in sysfs_do_create_link_sd() to return -EEXIST, but with it comes a kernel warning and stack trace for trying to create duplicate sysfs links, which makes it an undesirable response. Therefore we should really be looking for duplicates across all mdev parent devices, or as implemented here, against our mdev device list. Using mdev_list to prevent duplicates means that we can remove mdev_parent.lock, but in order not to serialize mdev device creation and removal globally, we add mdev_device.active which allows UUIDs to be reserved such that we can drop the mdev_list_lock before the mdev device is fully in place. Two behavioral notes; first, mdev_parent.lock had the side-effect of serializing mdev create and remove ops per parent device. This was an implementation detail, not an intentional guarantee provided to the mdev vendor drivers. Vendor drivers can trivially provide this serialization internally if necessary. Second, review comments note the new -EAGAIN behavior when the device, and in particular the remove attribute, becomes visible in sysfs. If a remove is triggered prior to completion of mdev_device_create() the user will see a -EAGAIN error. While the errno is different, receiving an error during this period is not, the previous implementation returned -ENODEV for the same condition. Furthermore, the consistency to the user is improved in the case where mdev_device_remove_ops() returns error. Previously concurrent calls to mdev_device_remove() could see the device disappear with -ENODEV and return in the case of error. Now a user would see -EAGAIN while the device is in this transitory state. Reviewed-by: Kirti Wankhede Reviewed-by: Cornelia Huck Acked-by: Halil Pasic Acked-by: Zhenyu Wang Signed-off-by: Alex Williamson --- Documentation/vfio-mediated-device.txt | 5 ++ drivers/vfio/mdev/mdev_core.c | 102 ++++++++++++--------------------- drivers/vfio/mdev/mdev_private.h | 2 +- 3 files changed, 42 insertions(+), 67 deletions(-) diff --git a/Documentation/vfio-mediated-device.txt b/Documentation/vfio-mediated-device.txt index 1b3950346532..c3f69bcaf96e 100644 --- a/Documentation/vfio-mediated-device.txt +++ b/Documentation/vfio-mediated-device.txt @@ -145,6 +145,11 @@ The functions in the mdev_parent_ops structure are as follows: * create: allocate basic resources in a driver for a mediated device * remove: free resources in a driver when a mediated device is destroyed +(Note that mdev-core provides no implicit serialization of create/remove +callbacks per mdev parent device, per mdev type, or any other categorization. +Vendor drivers are expected to be fully asynchronous in this respect or +provide their own internal resource protection.) + The callbacks in the mdev_parent_ops structure are as follows: * open: open callback of mediated device diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c index 126991046eb7..0212f0ee8aea 100644 --- a/drivers/vfio/mdev/mdev_core.c +++ b/drivers/vfio/mdev/mdev_core.c @@ -66,34 +66,6 @@ uuid_le mdev_uuid(struct mdev_device *mdev) } EXPORT_SYMBOL(mdev_uuid); -static int _find_mdev_device(struct device *dev, void *data) -{ - struct mdev_device *mdev; - - if (!dev_is_mdev(dev)) - return 0; - - mdev = to_mdev_device(dev); - - if (uuid_le_cmp(mdev->uuid, *(uuid_le *)data) == 0) - return 1; - - return 0; -} - -static bool mdev_device_exist(struct mdev_parent *parent, uuid_le uuid) -{ - struct device *dev; - - dev = device_find_child(parent->dev, &uuid, _find_mdev_device); - if (dev) { - put_device(dev); - return true; - } - - return false; -} - /* Should be called holding parent_list_lock */ static struct mdev_parent *__find_parent_device(struct device *dev) { @@ -221,7 +193,6 @@ int mdev_register_device(struct device *dev, const struct mdev_parent_ops *ops) } kref_init(&parent->ref); - mutex_init(&parent->lock); parent->dev = dev; parent->ops = ops; @@ -297,6 +268,10 @@ static void mdev_device_release(struct device *dev) { struct mdev_device *mdev = to_mdev_device(dev); + mutex_lock(&mdev_list_lock); + list_del(&mdev->next); + mutex_unlock(&mdev_list_lock); + dev_dbg(&mdev->dev, "MDEV: destroying\n"); kfree(mdev); } @@ -304,7 +279,7 @@ static void mdev_device_release(struct device *dev) int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) { int ret; - struct mdev_device *mdev; + struct mdev_device *mdev, *tmp; struct mdev_parent *parent; struct mdev_type *type = to_mdev_type(kobj); @@ -312,21 +287,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) if (!parent) return -EINVAL; - mutex_lock(&parent->lock); + mutex_lock(&mdev_list_lock); /* Check for duplicate */ - if (mdev_device_exist(parent, uuid)) { - ret = -EEXIST; - goto create_err; + list_for_each_entry(tmp, &mdev_list, next) { + if (!uuid_le_cmp(tmp->uuid, uuid)) { + mutex_unlock(&mdev_list_lock); + ret = -EEXIST; + goto mdev_fail; + } } mdev = kzalloc(sizeof(*mdev), GFP_KERNEL); if (!mdev) { + mutex_unlock(&mdev_list_lock); ret = -ENOMEM; - goto create_err; + goto mdev_fail; } memcpy(&mdev->uuid, &uuid, sizeof(uuid_le)); + list_add(&mdev->next, &mdev_list); + mutex_unlock(&mdev_list_lock); + mdev->parent = parent; kref_init(&mdev->ref); @@ -338,35 +320,28 @@ int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid) ret = device_register(&mdev->dev); if (ret) { put_device(&mdev->dev); - goto create_err; + goto mdev_fail; } ret = mdev_device_create_ops(kobj, mdev); if (ret) - goto create_failed; + goto create_fail; ret = mdev_create_sysfs_files(&mdev->dev, type); if (ret) { mdev_device_remove_ops(mdev, true); - goto create_failed; + goto create_fail; } mdev->type_kobj = kobj; + mdev->active = true; dev_dbg(&mdev->dev, "MDEV: created\n"); - mutex_unlock(&parent->lock); - - mutex_lock(&mdev_list_lock); - list_add(&mdev->next, &mdev_list); - mutex_unlock(&mdev_list_lock); - - return ret; + return 0; -create_failed: +create_fail: device_unregister(&mdev->dev); - -create_err: - mutex_unlock(&parent->lock); +mdev_fail: mdev_put_parent(parent); return ret; } @@ -377,44 +352,39 @@ int mdev_device_remove(struct device *dev, bool force_remove) struct mdev_parent *parent; struct mdev_type *type; int ret; - bool found = false; mdev = to_mdev_device(dev); mutex_lock(&mdev_list_lock); list_for_each_entry(tmp, &mdev_list, next) { - if (tmp == mdev) { - found = true; + if (tmp == mdev) break; - } } - if (found) - list_del(&mdev->next); + if (tmp != mdev) { + mutex_unlock(&mdev_list_lock); + return -ENODEV; + } - mutex_unlock(&mdev_list_lock); + if (!mdev->active) { + mutex_unlock(&mdev_list_lock); + return -EAGAIN; + } - if (!found) - return -ENODEV; + mdev->active = false; + mutex_unlock(&mdev_list_lock); type = to_mdev_type(mdev->type_kobj); parent = mdev->parent; - mutex_lock(&parent->lock); ret = mdev_device_remove_ops(mdev, force_remove); if (ret) { - mutex_unlock(&parent->lock); - - mutex_lock(&mdev_list_lock); - list_add(&mdev->next, &mdev_list); - mutex_unlock(&mdev_list_lock); - + mdev->active = true; return ret; } mdev_remove_sysfs_files(dev, type); device_unregister(dev); - mutex_unlock(&parent->lock); mdev_put_parent(parent); return 0; diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h index a9cefd70a705..b5819b7d7ef7 100644 --- a/drivers/vfio/mdev/mdev_private.h +++ b/drivers/vfio/mdev/mdev_private.h @@ -20,7 +20,6 @@ struct mdev_parent { struct device *dev; const struct mdev_parent_ops *ops; struct kref ref; - struct mutex lock; struct list_head next; struct kset *mdev_types_kset; struct list_head type_list; @@ -34,6 +33,7 @@ struct mdev_device { struct kref ref; struct list_head next; struct kobject *type_kobj; + bool active; }; #define to_mdev_device(dev) container_of(dev, struct mdev_device, dev) -- cgit v1.2.3 From 6a62c1dfb5c7f6719361180fd0fdf45b15a35d95 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 18 May 2018 11:40:33 -0600 Subject: vfio/mdev: Re-order sysfs attribute creation There exists a gap at the end of mdev_device_create() where the device is visible to userspace, but we're not yet ready to handle removal, as triggered through the 'remove' attribute. We handle this properly in mdev_device_remove() with an -EAGAIN return, but we can marginally reduce this gap by adding this attribute as a final step of our sysfs setup. Reviewed-by: Kirti Wankhede Reviewed-by: Cornelia Huck Acked-by: Halil Pasic Signed-off-by: Alex Williamson --- drivers/vfio/mdev/mdev_sysfs.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c index 802df210929b..249472f05509 100644 --- a/drivers/vfio/mdev/mdev_sysfs.c +++ b/drivers/vfio/mdev/mdev_sysfs.c @@ -257,24 +257,24 @@ int mdev_create_sysfs_files(struct device *dev, struct mdev_type *type) { int ret; - ret = sysfs_create_files(&dev->kobj, mdev_device_attrs); - if (ret) - return ret; - ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev)); if (ret) - goto device_link_failed; + return ret; ret = sysfs_create_link(&dev->kobj, &type->kobj, "mdev_type"); if (ret) goto type_link_failed; + ret = sysfs_create_files(&dev->kobj, mdev_device_attrs); + if (ret) + goto create_files_failed; + return ret; +create_files_failed: + sysfs_remove_link(&dev->kobj, "mdev_type"); type_link_failed: sysfs_remove_link(type->devices_kobj, dev_name(dev)); -device_link_failed: - sysfs_remove_files(&dev->kobj, mdev_device_attrs); return ret; } -- cgit v1.2.3 From e77addf018f0c8d947ddc0bef2cd6d4791da7dde Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Mon, 21 May 2018 19:57:45 +0800 Subject: vfio: use match_string() helper match_string() returns the index of an array for a matching string, which can be used intead of open coded variant. Cc: Alex Williamson Cc: kvm@vger.kernel.org Signed-off-by: Yisheng Xie Reviewed-by: Andy Shevchenko Signed-off-by: Alex Williamson --- drivers/vfio/vfio.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 721f97f8dac1..64833879f75d 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -630,8 +630,6 @@ static const char * const vfio_driver_whitelist[] = { "pci-stub" }; static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) { - int i; - if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); @@ -639,12 +637,9 @@ static bool vfio_dev_whitelisted(struct device *dev, struct device_driver *drv) return true; } - for (i = 0; i < ARRAY_SIZE(vfio_driver_whitelist); i++) { - if (!strcmp(drv->name, vfio_driver_whitelist[i])) - return true; - } - - return false; + return match_string(vfio_driver_whitelist, + ARRAY_SIZE(vfio_driver_whitelist), + drv->name) >= 0; } /* -- cgit v1.2.3 From 415eb9fc0e23071fc8cdd1c7bf42e4a54b6521d3 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 29 May 2018 19:15:28 +0200 Subject: vfio: platform: Fix using devices in PM Domains If a device is part of a PM Domain (e.g. power and/or clock domain), its power state is managed using Runtime PM. Without Runtime PM, the device may not be powered up or clocked, causing subtle failures, crashes, or system lock-ups when the device is accessed by the guest. Fix this by adding Runtime PM support, powering the device when the VFIO device is opened by the guest. Signed-off-by: Geert Uytterhoeven Reviewed-by: Simon Horman Acked-by: Eric Auger Signed-off-by: Alex Williamson --- drivers/vfio/platform/vfio_platform_common.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/vfio/platform/vfio_platform_common.c b/drivers/vfio/platform/vfio_platform_common.c index b60bb5326668..c0cd824be2b7 100644 --- a/drivers/vfio/platform/vfio_platform_common.c +++ b/drivers/vfio/platform/vfio_platform_common.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -239,6 +240,7 @@ static void vfio_platform_release(void *device_data) ret, extra_dbg ? extra_dbg : ""); WARN_ON(1); } + pm_runtime_put(vdev->device); vfio_platform_regions_cleanup(vdev); vfio_platform_irq_cleanup(vdev); } @@ -269,6 +271,10 @@ static int vfio_platform_open(void *device_data) if (ret) goto err_irq; + ret = pm_runtime_get_sync(vdev->device); + if (ret < 0) + goto err_pm; + ret = vfio_platform_call_reset(vdev, &extra_dbg); if (ret && vdev->reset_required) { dev_warn(vdev->device, "reset driver is required and reset call failed in open (%d) %s\n", @@ -283,6 +289,8 @@ static int vfio_platform_open(void *device_data) return 0; err_rst: + pm_runtime_put(vdev->device); +err_pm: vfio_platform_irq_cleanup(vdev); err_irq: vfio_platform_regions_cleanup(vdev); @@ -690,6 +698,7 @@ int vfio_platform_probe_common(struct vfio_platform_device *vdev, mutex_init(&vdev->igate); + pm_runtime_enable(vdev->device); return 0; put_iommu: @@ -707,6 +716,7 @@ struct vfio_platform_device *vfio_platform_remove_common(struct device *dev) vdev = vfio_del_group_dev(dev); if (vdev) { + pm_runtime_disable(vdev->device); vfio_platform_put_reset(vdev); vfio_iommu_group_put(dev->iommu_group, dev); } -- cgit v1.2.3 From c1abca96b252a9627f99f39215b84e5de92bf1e3 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 30 May 2018 23:37:31 +0200 Subject: samples: mbochs: add DMA_SHARED_BUFFER dependency The new bochs vbe sample fails to link when DMA_SHARED_BUFFER is disabled: ERROR: "dma_buf_export" [samples/vfio-mdev/mbochs.ko] undefined! ERROR: "dma_buf_fd" [samples/vfio-mdev/mbochs.ko] undefined! This uses a 'select' statement to enable that framework, like all other users do. Fixes: a5e6e6505f38 ("sample: vfio bochs vbe display (host device for bochs-drm)") Signed-off-by: Arnd Bergmann Signed-off-by: Alex Williamson --- samples/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/samples/Kconfig b/samples/Kconfig index 3aeaaca77831..bd133efc1a56 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -135,6 +135,7 @@ config SAMPLE_VFIO_MDEV_MDPY_FB config SAMPLE_VFIO_MDEV_MBOCHS tristate "Build VFIO mdpy example mediated device sample code -- loadable modules only" depends on VFIO_MDEV_DEVICE && m + select DMA_SHARED_BUFFER help Build a virtual display sample driver for use as a VFIO mediated device. It supports the region display interface -- cgit v1.2.3