diff options
Diffstat (limited to 'virt')
-rw-r--r-- | virt/kvm/assigned-dev.c | 157 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.c | 137 | ||||
-rw-r--r-- | virt/kvm/coalesced_mmio.h | 7 | ||||
-rw-r--r-- | virt/kvm/eventfd.c | 3 | ||||
-rw-r--r-- | virt/kvm/ioapic.c | 20 | ||||
-rw-r--r-- | virt/kvm/iommu.c | 37 | ||||
-rw-r--r-- | virt/kvm/kvm_main.c | 316 |
7 files changed, 460 insertions, 217 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 4e9eaeb518c7..758e3b36d4cf 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -17,6 +17,8 @@ #include <linux/pci.h> #include <linux/interrupt.h> #include <linux/slab.h> +#include <linux/namei.h> +#include <linux/fs.h> #include "irq.h" static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head, @@ -58,8 +60,6 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) { struct kvm_assigned_dev_kernel *assigned_dev = dev_id; - u32 vector; - int index; if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { spin_lock(&assigned_dev->intx_lock); @@ -68,31 +68,35 @@ static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) spin_unlock(&assigned_dev->intx_lock); } - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) { - index = find_index_from_host_irq(assigned_dev, irq); - if (index >= 0) { - vector = assigned_dev-> - guest_msix_entries[index].vector; - kvm_set_irq(assigned_dev->kvm, - assigned_dev->irq_source_id, vector, 1); - } - } else + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + assigned_dev->guest_irq, 1); + + return IRQ_HANDLED; +} + +#ifdef __KVM_HAVE_MSIX +static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + int index = find_index_from_host_irq(assigned_dev, irq); + u32 vector; + + if (index >= 0) { + vector = assigned_dev->guest_msix_entries[index].vector; kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); + vector, 1); + } return IRQ_HANDLED; } +#endif /* Ack the irq line for an assigned device */ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) { - struct kvm_assigned_dev_kernel *dev; - - if (kian->gsi == -1) - return; - - dev = container_of(kian, struct kvm_assigned_dev_kernel, - ack_notifier); + struct kvm_assigned_dev_kernel *dev = + container_of(kian, struct kvm_assigned_dev_kernel, + ack_notifier); kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); @@ -110,8 +114,9 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) static void deassign_guest_irq(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev) { - kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier); - assigned_dev->ack_notifier.gsi = -1; + if (assigned_dev->ack_notifier.gsi != -1) + kvm_unregister_irq_ack_notifier(kvm, + &assigned_dev->ack_notifier); kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, assigned_dev->guest_irq, 0); @@ -143,7 +148,7 @@ static void deassign_host_irq(struct kvm *kvm, for (i = 0; i < assigned_dev->entries_nr; i++) free_irq(assigned_dev->host_msix_entries[i].vector, - (void *)assigned_dev); + assigned_dev); assigned_dev->entries_nr = 0; kfree(assigned_dev->host_msix_entries); @@ -153,7 +158,7 @@ static void deassign_host_irq(struct kvm *kvm, /* Deal with MSI and INTx */ disable_irq(assigned_dev->host_irq); - free_irq(assigned_dev->host_irq, (void *)assigned_dev); + free_irq(assigned_dev->host_irq, assigned_dev); if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI) pci_disable_msi(assigned_dev->dev); @@ -205,6 +210,8 @@ static void kvm_free_assigned_device(struct kvm *kvm, else pci_restore_state(assigned_dev->dev); + assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; + pci_release_regions(assigned_dev->dev); pci_disable_device(assigned_dev->dev); pci_dev_put(assigned_dev->dev); @@ -237,7 +244,7 @@ static int assigned_device_enable_host_intx(struct kvm *kvm, * are going to be long delays in accepting, acking, etc. */ if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, - IRQF_ONESHOT, dev->irq_name, (void *)dev)) + IRQF_ONESHOT, dev->irq_name, dev)) return -EIO; return 0; } @@ -256,7 +263,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, dev->host_irq = dev->dev->irq; if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, - 0, dev->irq_name, (void *)dev)) { + 0, dev->irq_name, dev)) { pci_disable_msi(dev->dev); return -EIO; } @@ -282,8 +289,8 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, for (i = 0; i < dev->entries_nr; i++) { r = request_threaded_irq(dev->host_msix_entries[i].vector, - NULL, kvm_assigned_dev_thread, - 0, dev->irq_name, (void *)dev); + NULL, kvm_assigned_dev_thread_msix, + 0, dev->irq_name, dev); if (r) goto err; } @@ -291,7 +298,7 @@ static int assigned_device_enable_host_msix(struct kvm *kvm, return 0; err: for (i -= 1; i >= 0; i--) - free_irq(dev->host_msix_entries[i].vector, (void *)dev); + free_irq(dev->host_msix_entries[i].vector, dev); pci_disable_msix(dev->dev); return r; } @@ -404,7 +411,8 @@ static int assign_guest_irq(struct kvm *kvm, if (!r) { dev->irq_requested_type |= guest_irq_type; - kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); + if (dev->ack_notifier.gsi != -1) + kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier); } else kvm_free_irq_source_id(kvm, dev->irq_source_id); @@ -474,12 +482,76 @@ out: return r; } +/* + * We want to test whether the caller has been granted permissions to + * use this device. To be able to configure and control the device, + * the user needs access to PCI configuration space and BAR resources. + * These are accessed through PCI sysfs. PCI config space is often + * passed to the process calling this ioctl via file descriptor, so we + * can't rely on access to that file. We can check for permissions + * on each of the BAR resource files, which is a pretty clear + * indicator that the user has been granted access to the device. + */ +static int probe_sysfs_permissions(struct pci_dev *dev) +{ +#ifdef CONFIG_SYSFS + int i; + bool bar_found = false; + + for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) { + char *kpath, *syspath; + struct path path; + struct inode *inode; + int r; + + if (!pci_resource_len(dev, i)) + continue; + + kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL); + if (!kpath) + return -ENOMEM; + + /* Per sysfs-rules, sysfs is always at /sys */ + syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i); + kfree(kpath); + if (!syspath) + return -ENOMEM; + + r = kern_path(syspath, LOOKUP_FOLLOW, &path); + kfree(syspath); + if (r) + return r; + + inode = path.dentry->d_inode; + + r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS); + path_put(&path); + if (r) + return r; + + bar_found = true; + } + + /* If no resources, probably something special */ + if (!bar_found) + return -EPERM; + + return 0; +#else + return -EINVAL; /* No way to control the device without sysfs */ +#endif +} + static int kvm_vm_ioctl_assign_device(struct kvm *kvm, struct kvm_assigned_pci_dev *assigned_dev) { int r = 0, idx; struct kvm_assigned_dev_kernel *match; struct pci_dev *dev; + u8 header_type; + + if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)) + return -EINVAL; mutex_lock(&kvm->lock); idx = srcu_read_lock(&kvm->srcu); @@ -507,6 +579,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, r = -EINVAL; goto out_free; } + + /* Don't allow bridges to be assigned */ + pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); + if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) { + r = -EPERM; + goto out_put; + } + + r = probe_sysfs_permissions(dev); + if (r) + goto out_put; + if (pci_enable_device(dev)) { printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); r = -EBUSY; @@ -538,16 +622,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, list_add(&match->list, &kvm->arch.assigned_dev_head); - if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) { - if (!kvm->arch.iommu_domain) { - r = kvm_iommu_map_guest(kvm); - if (r) - goto out_list_del; - } - r = kvm_assign_device(kvm, match); + if (!kvm->arch.iommu_domain) { + r = kvm_iommu_map_guest(kvm); if (r) goto out_list_del; } + r = kvm_assign_device(kvm, match); + if (r) + goto out_list_del; out: srcu_read_unlock(&kvm->srcu, idx); @@ -587,8 +669,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, goto out; } - if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) - kvm_deassign_device(kvm, match); + kvm_deassign_device(kvm, match); kvm_free_assigned_device(kvm, match); diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index fc8487564d1f..88b2fe3ddf42 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -24,10 +24,25 @@ static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev) static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, gpa_t addr, int len) { - struct kvm_coalesced_mmio_zone *zone; + /* is it in a batchable area ? + * (addr,len) is fully included in + * (zone->addr, zone->size) + */ + if (len < 0) + return 0; + if (addr + len < addr) + return 0; + if (addr < dev->zone.addr) + return 0; + if (addr + len > dev->zone.addr + dev->zone.size) + return 0; + return 1; +} + +static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev) +{ struct kvm_coalesced_mmio_ring *ring; unsigned avail; - int i; /* Are we able to batch it ? */ @@ -37,25 +52,12 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev, */ ring = dev->kvm->coalesced_mmio_ring; avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX; - if (avail < KVM_MAX_VCPUS) { + if (avail == 0) { /* full */ return 0; } - /* is it in a batchable area ? */ - - for (i = 0; i < dev->nb_zones; i++) { - zone = &dev->zone[i]; - - /* (addr,len) is fully included in - * (zone->addr, zone->size) - */ - - if (zone->addr <= addr && - addr + len <= zone->addr + zone->size) - return 1; - } - return 0; + return 1; } static int coalesced_mmio_write(struct kvm_io_device *this, @@ -63,10 +65,16 @@ static int coalesced_mmio_write(struct kvm_io_device *this, { struct kvm_coalesced_mmio_dev *dev = to_mmio(this); struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring; + if (!coalesced_mmio_in_range(dev, addr, len)) return -EOPNOTSUPP; - spin_lock(&dev->lock); + spin_lock(&dev->kvm->ring_lock); + + if (!coalesced_mmio_has_room(dev)) { + spin_unlock(&dev->kvm->ring_lock); + return -EOPNOTSUPP; + } /* copy data in first free entry of the ring */ @@ -75,7 +83,7 @@ static int coalesced_mmio_write(struct kvm_io_device *this, memcpy(ring->coalesced_mmio[ring->last].data, val, len); smp_wmb(); ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX; - spin_unlock(&dev->lock); + spin_unlock(&dev->kvm->ring_lock); return 0; } @@ -83,6 +91,8 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this) { struct kvm_coalesced_mmio_dev *dev = to_mmio(this); + list_del(&dev->list); + kfree(dev); } @@ -93,7 +103,6 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = { int kvm_coalesced_mmio_init(struct kvm *kvm) { - struct kvm_coalesced_mmio_dev *dev; struct page *page; int ret; @@ -101,31 +110,18 @@ int kvm_coalesced_mmio_init(struct kvm *kvm) page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) goto out_err; - kvm->coalesced_mmio_ring = page_address(page); - ret = -ENOMEM; - dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); - if (!dev) - goto out_free_page; - spin_lock_init(&dev->lock); - kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); - dev->kvm = kvm; - kvm->coalesced_mmio_dev = dev; - - mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev); - mutex_unlock(&kvm->slots_lock); - if (ret < 0) - goto out_free_dev; + ret = 0; + kvm->coalesced_mmio_ring = page_address(page); - return ret; + /* + * We're using this spinlock to sync access to the coalesced ring. + * The list doesn't need it's own lock since device registration and + * unregistration should only happen when kvm->slots_lock is held. + */ + spin_lock_init(&kvm->ring_lock); + INIT_LIST_HEAD(&kvm->coalesced_zones); -out_free_dev: - kvm->coalesced_mmio_dev = NULL; - kfree(dev); -out_free_page: - kvm->coalesced_mmio_ring = NULL; - __free_page(page); out_err: return ret; } @@ -139,51 +135,50 @@ void kvm_coalesced_mmio_free(struct kvm *kvm) int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_zone *zone) { - struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; + int ret; + struct kvm_coalesced_mmio_dev *dev; - if (dev == NULL) - return -ENXIO; + dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL); + if (!dev) + return -ENOMEM; + + kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops); + dev->kvm = kvm; + dev->zone = *zone; mutex_lock(&kvm->slots_lock); - if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) { - mutex_unlock(&kvm->slots_lock); - return -ENOBUFS; - } + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr, + zone->size, &dev->dev); + if (ret < 0) + goto out_free_dev; + list_add_tail(&dev->list, &kvm->coalesced_zones); + mutex_unlock(&kvm->slots_lock); - dev->zone[dev->nb_zones] = *zone; - dev->nb_zones++; + return ret; +out_free_dev: mutex_unlock(&kvm->slots_lock); + + kfree(dev); + + if (dev == NULL) + return -ENXIO; + return 0; } int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, struct kvm_coalesced_mmio_zone *zone) { - int i; - struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev; - struct kvm_coalesced_mmio_zone *z; - - if (dev == NULL) - return -ENXIO; + struct kvm_coalesced_mmio_dev *dev, *tmp; mutex_lock(&kvm->slots_lock); - i = dev->nb_zones; - while (i) { - z = &dev->zone[i - 1]; - - /* unregister all zones - * included in (zone->addr, zone->size) - */ - - if (zone->addr <= z->addr && - z->addr + z->size <= zone->addr + zone->size) { - dev->nb_zones--; - *z = dev->zone[dev->nb_zones]; + list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list) + if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) { + kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev); + kvm_iodevice_destructor(&dev->dev); } - i--; - } mutex_unlock(&kvm->slots_lock); diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h index 8a5959e3535f..b280c20444d1 100644 --- a/virt/kvm/coalesced_mmio.h +++ b/virt/kvm/coalesced_mmio.h @@ -12,14 +12,13 @@ #ifdef CONFIG_KVM_MMIO -#define KVM_COALESCED_MMIO_ZONE_MAX 100 +#include <linux/list.h> struct kvm_coalesced_mmio_dev { + struct list_head list; struct kvm_io_device dev; struct kvm *kvm; - spinlock_t lock; - int nb_zones; - struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX]; + struct kvm_coalesced_mmio_zone zone; }; int kvm_coalesced_mmio_init(struct kvm *kvm); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 73358d256fa2..f59c1e8de7a2 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) kvm_iodevice_init(&p->dev, &ioeventfd_ops); - ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev); + ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length, + &p->dev); if (ret < 0) goto unlock_fail; diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c index 8df1ca104a7f..dcaf272c26c0 100644 --- a/virt/kvm/ioapic.c +++ b/virt/kvm/ioapic.c @@ -185,7 +185,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq) irqe.dest_mode = 0; /* Physical mode. */ /* need to read apic_id from apic regiest since * it can be rewritten */ - irqe.dest_id = ioapic->kvm->bsp_vcpu->vcpu_id; + irqe.dest_id = ioapic->kvm->bsp_vcpu_id; } #endif return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe); @@ -332,9 +332,18 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, (void*)addr, len, val); ASSERT(!(addr & 0xf)); /* check alignment */ - if (len == 4 || len == 8) + switch (len) { + case 8: + case 4: data = *(u32 *) val; - else { + break; + case 2: + data = *(u16 *) val; + break; + case 1: + data = *(u8 *) val; + break; + default: printk(KERN_WARNING "ioapic: Unsupported size %d\n", len); return 0; } @@ -343,7 +352,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len, spin_lock(&ioapic->lock); switch (addr) { case IOAPIC_REG_SELECT: - ioapic->ioregsel = data; + ioapic->ioregsel = data & 0xFF; /* 8-bit register */ break; case IOAPIC_REG_WINDOW: @@ -394,7 +403,8 @@ int kvm_ioapic_init(struct kvm *kvm) kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops); ioapic->kvm = kvm; mutex_lock(&kvm->slots_lock); - ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); + ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address, + IOAPIC_MEM_LENGTH, &ioapic->dev); mutex_unlock(&kvm->slots_lock); if (ret < 0) { kvm->arch.vioapic = NULL; diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 78c80f67f535..a457d2138f49 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -25,12 +25,14 @@ #include <linux/list.h> #include <linux/kvm_host.h> +#include <linux/module.h> #include <linux/pci.h> +#include <linux/stat.h> #include <linux/dmar.h> #include <linux/iommu.h> #include <linux/intel-iommu.h> -static int allow_unsafe_assigned_interrupts; +static bool allow_unsafe_assigned_interrupts; module_param_named(allow_unsafe_assigned_interrupts, allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(allow_unsafe_assigned_interrupts, @@ -111,7 +113,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot) /* Map into IO address space */ r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), - get_order(page_size), flags); + page_size, flags); if (r) { printk(KERN_ERR "kvm_iommu_map_address:" "iommu failed to map pfn=%llx\n", pfn); @@ -132,14 +134,15 @@ unmap_pages: static int kvm_iommu_map_memslots(struct kvm *kvm) { - int i, idx, r = 0; + int idx, r = 0; struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; idx = srcu_read_lock(&kvm->srcu); slots = kvm_memslots(kvm); - for (i = 0; i < slots->nmemslots; i++) { - r = kvm_iommu_map_pages(kvm, &slots->memslots[i]); + kvm_for_each_memslot(memslot, slots) { + r = kvm_iommu_map_pages(kvm, memslot); if (r) break; } @@ -187,6 +190,8 @@ int kvm_assign_device(struct kvm *kvm, goto out_unmap; } + pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED; + printk(KERN_DEBUG "assign device %x:%x:%x.%x\n", assigned_dev->host_segnr, assigned_dev->host_busnr, @@ -215,6 +220,8 @@ int kvm_deassign_device(struct kvm *kvm, iommu_detach_device(domain, &pdev->dev); + pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED; + printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n", assigned_dev->host_segnr, assigned_dev->host_busnr, @@ -228,12 +235,12 @@ int kvm_iommu_map_guest(struct kvm *kvm) { int r; - if (!iommu_found()) { + if (!iommu_present(&pci_bus_type)) { printk(KERN_ERR "%s: iommu not found\n", __func__); return -ENODEV; } - kvm->arch.iommu_domain = iommu_domain_alloc(); + kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type); if (!kvm->arch.iommu_domain) return -ENOMEM; @@ -286,15 +293,15 @@ static void kvm_iommu_put_pages(struct kvm *kvm, while (gfn < end_gfn) { unsigned long unmap_pages; - int order; + size_t size; /* Get physical address */ phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); pfn = phys >> PAGE_SHIFT; /* Unmap address from IO address space */ - order = iommu_unmap(domain, gfn_to_gpa(gfn), 0); - unmap_pages = 1ULL << order; + size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE); + unmap_pages = 1ULL << get_order(size); /* Unpin all pages we just unmapped to not leak any memory */ kvm_unpin_pages(kvm, pfn, unmap_pages); @@ -305,16 +312,16 @@ static void kvm_iommu_put_pages(struct kvm *kvm, static int kvm_iommu_unmap_memslots(struct kvm *kvm) { - int i, idx; + int idx; struct kvm_memslots *slots; + struct kvm_memory_slot *memslot; idx = srcu_read_lock(&kvm->srcu); slots = kvm_memslots(kvm); - for (i = 0; i < slots->nmemslots; i++) { - kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn, - slots->memslots[i].npages); - } + kvm_for_each_memslot(memslot, slots) + kvm_iommu_put_pages(kvm, memslot->base_gfn, memslot->npages); + srcu_read_unlock(&kvm->srcu, idx); return 0; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index aefdda390f5e..7287bf5d1c9e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -47,6 +47,8 @@ #include <linux/srcu.h> #include <linux/hugetlb.h> #include <linux/slab.h> +#include <linux/sort.h> +#include <linux/bsearch.h> #include <asm/processor.h> #include <asm/io.h> @@ -438,6 +440,15 @@ static int kvm_init_mmu_notifier(struct kvm *kvm) #endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */ +static void kvm_init_memslots_id(struct kvm *kvm) +{ + int i; + struct kvm_memslots *slots = kvm->memslots; + + for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) + slots->id_to_index[i] = slots->memslots[i].id = i; +} + static struct kvm *kvm_create_vm(void) { int r, i; @@ -463,6 +474,7 @@ static struct kvm *kvm_create_vm(void) kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); if (!kvm->memslots) goto out_err_nosrcu; + kvm_init_memslots_id(kvm); if (init_srcu_struct(&kvm->srcu)) goto out_err_nosrcu; for (i = 0; i < KVM_NR_BUSES; i++) { @@ -545,11 +557,11 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free, void kvm_free_physmem(struct kvm *kvm) { - int i; struct kvm_memslots *slots = kvm->memslots; + struct kvm_memory_slot *memslot; - for (i = 0; i < slots->nmemslots; ++i) - kvm_free_physmem_slot(&slots->memslots[i], NULL); + kvm_for_each_memslot(memslot, slots) + kvm_free_physmem_slot(memslot, NULL); kfree(kvm->memslots); } @@ -623,10 +635,69 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) return -ENOMEM; memslot->dirty_bitmap_head = memslot->dirty_bitmap; + memslot->nr_dirty_pages = 0; return 0; } #endif /* !CONFIG_S390 */ +static struct kvm_memory_slot * +search_memslots(struct kvm_memslots *slots, gfn_t gfn) +{ + struct kvm_memory_slot *memslot; + + kvm_for_each_memslot(memslot, slots) + if (gfn >= memslot->base_gfn && + gfn < memslot->base_gfn + memslot->npages) + return memslot; + + return NULL; +} + +static int cmp_memslot(const void *slot1, const void *slot2) +{ + struct kvm_memory_slot *s1, *s2; + + s1 = (struct kvm_memory_slot *)slot1; + s2 = (struct kvm_memory_slot *)slot2; + + if (s1->npages < s2->npages) + return 1; + if (s1->npages > s2->npages) + return -1; + + return 0; +} + +/* + * Sort the memslots base on its size, so the larger slots + * will get better fit. + */ +static void sort_memslots(struct kvm_memslots *slots) +{ + int i; + + sort(slots->memslots, KVM_MEM_SLOTS_NUM, + sizeof(struct kvm_memory_slot), cmp_memslot, NULL); + + for (i = 0; i < KVM_MEM_SLOTS_NUM; i++) + slots->id_to_index[slots->memslots[i].id] = i; +} + +void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) +{ + if (new) { + int id = new->id; + struct kvm_memory_slot *old = id_to_memslot(slots, id); + unsigned long npages = old->npages; + + *old = *new; + if (new->npages != npages) + sort_memslots(slots); + } + + slots->generation++; +} + /* * Allocate some memory and give it an address in the guest physical address * space. @@ -660,12 +731,12 @@ int __kvm_set_memory_region(struct kvm *kvm, (void __user *)(unsigned long)mem->userspace_addr, mem->memory_size))) goto out; - if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS) + if (mem->slot >= KVM_MEM_SLOTS_NUM) goto out; if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr) goto out; - memslot = &kvm->memslots->memslots[mem->slot]; + memslot = id_to_memslot(kvm->memslots, mem->slot); base_gfn = mem->guest_phys_addr >> PAGE_SHIFT; npages = mem->memory_size >> PAGE_SHIFT; @@ -772,15 +843,17 @@ skip_lpage: #endif /* not defined CONFIG_S390 */ if (!npages) { + struct kvm_memory_slot *slot; + r = -ENOMEM; - slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), + GFP_KERNEL); if (!slots) goto out_free; - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); - if (mem->slot >= slots->nmemslots) - slots->nmemslots = mem->slot + 1; - slots->generation++; - slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID; + slot = id_to_memslot(slots, mem->slot); + slot->flags |= KVM_MEMSLOT_INVALID; + + update_memslots(slots, NULL); old_memslots = kvm->memslots; rcu_assign_pointer(kvm->memslots, slots); @@ -808,13 +881,10 @@ skip_lpage: } r = -ENOMEM; - slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL); + slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots), + GFP_KERNEL); if (!slots) goto out_free; - memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots)); - if (mem->slot >= slots->nmemslots) - slots->nmemslots = mem->slot + 1; - slots->generation++; /* actual memory is freed via old in kvm_free_physmem_slot below */ if (!npages) { @@ -824,7 +894,7 @@ skip_lpage: new.lpage_info[i] = NULL; } - slots->memslots[mem->slot] = new; + update_memslots(slots, &new); old_memslots = kvm->memslots; rcu_assign_pointer(kvm->memslots, slots); synchronize_srcu_expedited(&kvm->srcu); @@ -886,7 +956,7 @@ int kvm_get_dirty_log(struct kvm *kvm, if (log->slot >= KVM_MEMORY_SLOTS) goto out; - memslot = &kvm->memslots->memslots[log->slot]; + memslot = id_to_memslot(kvm->memslots, log->slot); r = -ENOENT; if (!memslot->dirty_bitmap) goto out; @@ -964,16 +1034,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva); static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) { - int i; - - for (i = 0; i < slots->nmemslots; ++i) { - struct kvm_memory_slot *memslot = &slots->memslots[i]; - - if (gfn >= memslot->base_gfn - && gfn < memslot->base_gfn + memslot->npages) - return memslot; - } - return NULL; + return search_memslots(slots, gfn); } struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) @@ -984,20 +1045,13 @@ EXPORT_SYMBOL_GPL(gfn_to_memslot); int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn) { - int i; - struct kvm_memslots *slots = kvm_memslots(kvm); - - for (i = 0; i < KVM_MEMORY_SLOTS; ++i) { - struct kvm_memory_slot *memslot = &slots->memslots[i]; + struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn); - if (memslot->flags & KVM_MEMSLOT_INVALID) - continue; + if (!memslot || memslot->id >= KVM_MEMORY_SLOTS || + memslot->flags & KVM_MEMSLOT_INVALID) + return 0; - if (gfn >= memslot->base_gfn - && gfn < memslot->base_gfn + memslot->npages) - return 1; - } - return 0; + return 1; } EXPORT_SYMBOL_GPL(kvm_is_visible_gfn); @@ -1489,7 +1543,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot, if (memslot && memslot->dirty_bitmap) { unsigned long rel_gfn = gfn - memslot->base_gfn; - __set_bit_le(rel_gfn, memslot->dirty_bitmap); + if (!__test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap)) + memslot->nr_dirty_pages++; } } @@ -1688,10 +1743,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) smp_wmb(); atomic_inc(&kvm->online_vcpus); -#ifdef CONFIG_KVM_APIC_ARCHITECTURE - if (kvm->bsp_vcpu_id == id) - kvm->bsp_vcpu = vcpu; -#endif mutex_unlock(&kvm->lock); return r; @@ -1766,12 +1817,11 @@ out_free1: struct kvm_regs *kvm_regs; r = -ENOMEM; - kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL); - if (!kvm_regs) + kvm_regs = memdup_user(argp, sizeof(*kvm_regs)); + if (IS_ERR(kvm_regs)) { + r = PTR_ERR(kvm_regs); goto out; - r = -EFAULT; - if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs))) - goto out_free2; + } r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs); if (r) goto out_free2; @@ -1795,13 +1845,11 @@ out_free2: break; } case KVM_SET_SREGS: { - kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL); - r = -ENOMEM; - if (!kvm_sregs) - goto out; - r = -EFAULT; - if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs))) + kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs)); + if (IS_ERR(kvm_sregs)) { + r = PTR_ERR(kvm_sregs); goto out; + } r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs); if (r) goto out; @@ -1897,13 +1945,11 @@ out_free2: break; } case KVM_SET_FPU: { - fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL); - r = -ENOMEM; - if (!fpu) - goto out; - r = -EFAULT; - if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu))) + fpu = memdup_user(argp, sizeof(*fpu)); + if (IS_ERR(fpu)) { + r = PTR_ERR(fpu); goto out; + } r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu); if (r) goto out; @@ -2391,24 +2437,92 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus) int i; for (i = 0; i < bus->dev_count; i++) { - struct kvm_io_device *pos = bus->devs[i]; + struct kvm_io_device *pos = bus->range[i].dev; kvm_iodevice_destructor(pos); } kfree(bus); } +int kvm_io_bus_sort_cmp(const void *p1, const void *p2) +{ + const struct kvm_io_range *r1 = p1; + const struct kvm_io_range *r2 = p2; + + if (r1->addr < r2->addr) + return -1; + if (r1->addr + r1->len > r2->addr + r2->len) + return 1; + return 0; +} + +int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev, + gpa_t addr, int len) +{ + if (bus->dev_count == NR_IOBUS_DEVS) + return -ENOSPC; + + bus->range[bus->dev_count++] = (struct kvm_io_range) { + .addr = addr, + .len = len, + .dev = dev, + }; + + sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range), + kvm_io_bus_sort_cmp, NULL); + + return 0; +} + +int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus, + gpa_t addr, int len) +{ + struct kvm_io_range *range, key; + int off; + + key = (struct kvm_io_range) { + .addr = addr, + .len = len, + }; + + range = bsearch(&key, bus->range, bus->dev_count, + sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp); + if (range == NULL) + return -ENOENT; + + off = range - bus->range; + + while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0) + off--; + + return off; +} + /* kvm_io_bus_write - called under kvm->slots_lock */ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, const void *val) { - int i; + int idx; struct kvm_io_bus *bus; + struct kvm_io_range range; + + range = (struct kvm_io_range) { + .addr = addr, + .len = len, + }; bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - for (i = 0; i < bus->dev_count; i++) - if (!kvm_iodevice_write(bus->devs[i], addr, len, val)) + idx = kvm_io_bus_get_first_dev(bus, addr, len); + if (idx < 0) + return -EOPNOTSUPP; + + while (idx < bus->dev_count && + kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { + if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val)) return 0; + idx++; + } + return -EOPNOTSUPP; } @@ -2416,19 +2530,33 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int len, void *val) { - int i; + int idx; struct kvm_io_bus *bus; + struct kvm_io_range range; + + range = (struct kvm_io_range) { + .addr = addr, + .len = len, + }; bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu); - for (i = 0; i < bus->dev_count; i++) - if (!kvm_iodevice_read(bus->devs[i], addr, len, val)) + idx = kvm_io_bus_get_first_dev(bus, addr, len); + if (idx < 0) + return -EOPNOTSUPP; + + while (idx < bus->dev_count && + kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) { + if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val)) return 0; + idx++; + } + return -EOPNOTSUPP; } /* Caller must hold slots_lock. */ -int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, - struct kvm_io_device *dev) +int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, + int len, struct kvm_io_device *dev) { struct kvm_io_bus *new_bus, *bus; @@ -2436,11 +2564,10 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, if (bus->dev_count > NR_IOBUS_DEVS-1) return -ENOSPC; - new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); + new_bus = kmemdup(bus, sizeof(struct kvm_io_bus), GFP_KERNEL); if (!new_bus) return -ENOMEM; - memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); - new_bus->devs[new_bus->dev_count++] = dev; + kvm_io_bus_insert_dev(new_bus, dev, addr, len); rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); kfree(bus); @@ -2455,18 +2582,21 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, int i, r; struct kvm_io_bus *new_bus, *bus; - new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL); + bus = kvm->buses[bus_idx]; + + new_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL); if (!new_bus) return -ENOMEM; - bus = kvm->buses[bus_idx]; - memcpy(new_bus, bus, sizeof(struct kvm_io_bus)); - r = -ENOENT; for (i = 0; i < new_bus->dev_count; i++) - if (new_bus->devs[i] == dev) { + if (new_bus->range[i].dev == dev) { r = 0; - new_bus->devs[i] = new_bus->devs[--new_bus->dev_count]; + new_bus->dev_count--; + new_bus->range[i] = new_bus->range[new_bus->dev_count]; + sort(new_bus->range, new_bus->dev_count, + sizeof(struct kvm_io_range), + kvm_io_bus_sort_cmp, NULL); break; } @@ -2524,15 +2654,29 @@ static const struct file_operations *stat_fops[] = { [KVM_STAT_VM] = &vm_stat_fops, }; -static void kvm_init_debug(void) +static int kvm_init_debug(void) { + int r = -EFAULT; struct kvm_stats_debugfs_item *p; kvm_debugfs_dir = debugfs_create_dir("kvm", NULL); - for (p = debugfs_entries; p->name; ++p) + if (kvm_debugfs_dir == NULL) + goto out; + + for (p = debugfs_entries; p->name; ++p) { p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir, (void *)(long)p->offset, stat_fops[p->kind]); + if (p->dentry == NULL) + goto out_dir; + } + + return 0; + +out_dir: + debugfs_remove_recursive(kvm_debugfs_dir); +out: + return r; } static void kvm_exit_debug(void) @@ -2676,10 +2820,16 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align, kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; - kvm_init_debug(); + r = kvm_init_debug(); + if (r) { + printk(KERN_ERR "kvm: create debugfs files failed\n"); + goto out_undebugfs; + } return 0; +out_undebugfs: + unregister_syscore_ops(&kvm_syscore_ops); out_unreg: kvm_async_pf_deinit(); out_free: |