summaryrefslogtreecommitdiff
path: root/virt
diff options
context:
space:
mode:
Diffstat (limited to 'virt')
-rw-r--r--virt/kvm/assigned-dev.c157
-rw-r--r--virt/kvm/coalesced_mmio.c137
-rw-r--r--virt/kvm/coalesced_mmio.h7
-rw-r--r--virt/kvm/eventfd.c3
-rw-r--r--virt/kvm/ioapic.c20
-rw-r--r--virt/kvm/iommu.c37
-rw-r--r--virt/kvm/kvm_main.c316
7 files changed, 460 insertions, 217 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 4e9eaeb518c7..758e3b36d4cf 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -17,6 +17,8 @@
#include <linux/pci.h>
#include <linux/interrupt.h>
#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/fs.h>
#include "irq.h"
static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
@@ -58,8 +60,6 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id)
{
struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
- u32 vector;
- int index;
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) {
spin_lock(&assigned_dev->intx_lock);
@@ -68,31 +68,35 @@ static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id)
spin_unlock(&assigned_dev->intx_lock);
}
- if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
- index = find_index_from_host_irq(assigned_dev, irq);
- if (index >= 0) {
- vector = assigned_dev->
- guest_msix_entries[index].vector;
- kvm_set_irq(assigned_dev->kvm,
- assigned_dev->irq_source_id, vector, 1);
- }
- } else
+ kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+ assigned_dev->guest_irq, 1);
+
+ return IRQ_HANDLED;
+}
+
+#ifdef __KVM_HAVE_MSIX
+static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
+{
+ struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+ int index = find_index_from_host_irq(assigned_dev, irq);
+ u32 vector;
+
+ if (index >= 0) {
+ vector = assigned_dev->guest_msix_entries[index].vector;
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
- assigned_dev->guest_irq, 1);
+ vector, 1);
+ }
return IRQ_HANDLED;
}
+#endif
/* Ack the irq line for an assigned device */
static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
{
- struct kvm_assigned_dev_kernel *dev;
-
- if (kian->gsi == -1)
- return;
-
- dev = container_of(kian, struct kvm_assigned_dev_kernel,
- ack_notifier);
+ struct kvm_assigned_dev_kernel *dev =
+ container_of(kian, struct kvm_assigned_dev_kernel,
+ ack_notifier);
kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0);
@@ -110,8 +114,9 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
static void deassign_guest_irq(struct kvm *kvm,
struct kvm_assigned_dev_kernel *assigned_dev)
{
- kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
- assigned_dev->ack_notifier.gsi = -1;
+ if (assigned_dev->ack_notifier.gsi != -1)
+ kvm_unregister_irq_ack_notifier(kvm,
+ &assigned_dev->ack_notifier);
kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
assigned_dev->guest_irq, 0);
@@ -143,7 +148,7 @@ static void deassign_host_irq(struct kvm *kvm,
for (i = 0; i < assigned_dev->entries_nr; i++)
free_irq(assigned_dev->host_msix_entries[i].vector,
- (void *)assigned_dev);
+ assigned_dev);
assigned_dev->entries_nr = 0;
kfree(assigned_dev->host_msix_entries);
@@ -153,7 +158,7 @@ static void deassign_host_irq(struct kvm *kvm,
/* Deal with MSI and INTx */
disable_irq(assigned_dev->host_irq);
- free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+ free_irq(assigned_dev->host_irq, assigned_dev);
if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
pci_disable_msi(assigned_dev->dev);
@@ -205,6 +210,8 @@ static void kvm_free_assigned_device(struct kvm *kvm,
else
pci_restore_state(assigned_dev->dev);
+ assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+
pci_release_regions(assigned_dev->dev);
pci_disable_device(assigned_dev->dev);
pci_dev_put(assigned_dev->dev);
@@ -237,7 +244,7 @@ static int assigned_device_enable_host_intx(struct kvm *kvm,
* are going to be long delays in accepting, acking, etc.
*/
if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread,
- IRQF_ONESHOT, dev->irq_name, (void *)dev))
+ IRQF_ONESHOT, dev->irq_name, dev))
return -EIO;
return 0;
}
@@ -256,7 +263,7 @@ static int assigned_device_enable_host_msi(struct kvm *kvm,
dev->host_irq = dev->dev->irq;
if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread,
- 0, dev->irq_name, (void *)dev)) {
+ 0, dev->irq_name, dev)) {
pci_disable_msi(dev->dev);
return -EIO;
}
@@ -282,8 +289,8 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,
for (i = 0; i < dev->entries_nr; i++) {
r = request_threaded_irq(dev->host_msix_entries[i].vector,
- NULL, kvm_assigned_dev_thread,
- 0, dev->irq_name, (void *)dev);
+ NULL, kvm_assigned_dev_thread_msix,
+ 0, dev->irq_name, dev);
if (r)
goto err;
}
@@ -291,7 +298,7 @@ static int assigned_device_enable_host_msix(struct kvm *kvm,
return 0;
err:
for (i -= 1; i >= 0; i--)
- free_irq(dev->host_msix_entries[i].vector, (void *)dev);
+ free_irq(dev->host_msix_entries[i].vector, dev);
pci_disable_msix(dev->dev);
return r;
}
@@ -404,7 +411,8 @@ static int assign_guest_irq(struct kvm *kvm,
if (!r) {
dev->irq_requested_type |= guest_irq_type;
- kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
+ if (dev->ack_notifier.gsi != -1)
+ kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
} else
kvm_free_irq_source_id(kvm, dev->irq_source_id);
@@ -474,12 +482,76 @@ out:
return r;
}
+/*
+ * We want to test whether the caller has been granted permissions to
+ * use this device. To be able to configure and control the device,
+ * the user needs access to PCI configuration space and BAR resources.
+ * These are accessed through PCI sysfs. PCI config space is often
+ * passed to the process calling this ioctl via file descriptor, so we
+ * can't rely on access to that file. We can check for permissions
+ * on each of the BAR resource files, which is a pretty clear
+ * indicator that the user has been granted access to the device.
+ */
+static int probe_sysfs_permissions(struct pci_dev *dev)
+{
+#ifdef CONFIG_SYSFS
+ int i;
+ bool bar_found = false;
+
+ for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
+ char *kpath, *syspath;
+ struct path path;
+ struct inode *inode;
+ int r;
+
+ if (!pci_resource_len(dev, i))
+ continue;
+
+ kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
+ if (!kpath)
+ return -ENOMEM;
+
+ /* Per sysfs-rules, sysfs is always at /sys */
+ syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
+ kfree(kpath);
+ if (!syspath)
+ return -ENOMEM;
+
+ r = kern_path(syspath, LOOKUP_FOLLOW, &path);
+ kfree(syspath);
+ if (r)
+ return r;
+
+ inode = path.dentry->d_inode;
+
+ r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
+ path_put(&path);
+ if (r)
+ return r;
+
+ bar_found = true;
+ }
+
+ /* If no resources, probably something special */
+ if (!bar_found)
+ return -EPERM;
+
+ return 0;
+#else
+ return -EINVAL; /* No way to control the device without sysfs */
+#endif
+}
+
static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
struct kvm_assigned_pci_dev *assigned_dev)
{
int r = 0, idx;
struct kvm_assigned_dev_kernel *match;
struct pci_dev *dev;
+ u8 header_type;
+
+ if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
+ return -EINVAL;
mutex_lock(&kvm->lock);
idx = srcu_read_lock(&kvm->srcu);
@@ -507,6 +579,18 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
r = -EINVAL;
goto out_free;
}
+
+ /* Don't allow bridges to be assigned */
+ pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
+ if ((header_type & PCI_HEADER_TYPE) != PCI_HEADER_TYPE_NORMAL) {
+ r = -EPERM;
+ goto out_put;
+ }
+
+ r = probe_sysfs_permissions(dev);
+ if (r)
+ goto out_put;
+
if (pci_enable_device(dev)) {
printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
r = -EBUSY;
@@ -538,16 +622,14 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
list_add(&match->list, &kvm->arch.assigned_dev_head);
- if (assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU) {
- if (!kvm->arch.iommu_domain) {
- r = kvm_iommu_map_guest(kvm);
- if (r)
- goto out_list_del;
- }
- r = kvm_assign_device(kvm, match);
+ if (!kvm->arch.iommu_domain) {
+ r = kvm_iommu_map_guest(kvm);
if (r)
goto out_list_del;
}
+ r = kvm_assign_device(kvm, match);
+ if (r)
+ goto out_list_del;
out:
srcu_read_unlock(&kvm->srcu, idx);
@@ -587,8 +669,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
goto out;
}
- if (match->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU)
- kvm_deassign_device(kvm, match);
+ kvm_deassign_device(kvm, match);
kvm_free_assigned_device(kvm, match);
diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c
index fc8487564d1f..88b2fe3ddf42 100644
--- a/virt/kvm/coalesced_mmio.c
+++ b/virt/kvm/coalesced_mmio.c
@@ -24,10 +24,25 @@ static inline struct kvm_coalesced_mmio_dev *to_mmio(struct kvm_io_device *dev)
static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
gpa_t addr, int len)
{
- struct kvm_coalesced_mmio_zone *zone;
+ /* is it in a batchable area ?
+ * (addr,len) is fully included in
+ * (zone->addr, zone->size)
+ */
+ if (len < 0)
+ return 0;
+ if (addr + len < addr)
+ return 0;
+ if (addr < dev->zone.addr)
+ return 0;
+ if (addr + len > dev->zone.addr + dev->zone.size)
+ return 0;
+ return 1;
+}
+
+static int coalesced_mmio_has_room(struct kvm_coalesced_mmio_dev *dev)
+{
struct kvm_coalesced_mmio_ring *ring;
unsigned avail;
- int i;
/* Are we able to batch it ? */
@@ -37,25 +52,12 @@ static int coalesced_mmio_in_range(struct kvm_coalesced_mmio_dev *dev,
*/
ring = dev->kvm->coalesced_mmio_ring;
avail = (ring->first - ring->last - 1) % KVM_COALESCED_MMIO_MAX;
- if (avail < KVM_MAX_VCPUS) {
+ if (avail == 0) {
/* full */
return 0;
}
- /* is it in a batchable area ? */
-
- for (i = 0; i < dev->nb_zones; i++) {
- zone = &dev->zone[i];
-
- /* (addr,len) is fully included in
- * (zone->addr, zone->size)
- */
-
- if (zone->addr <= addr &&
- addr + len <= zone->addr + zone->size)
- return 1;
- }
- return 0;
+ return 1;
}
static int coalesced_mmio_write(struct kvm_io_device *this,
@@ -63,10 +65,16 @@ static int coalesced_mmio_write(struct kvm_io_device *this,
{
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
struct kvm_coalesced_mmio_ring *ring = dev->kvm->coalesced_mmio_ring;
+
if (!coalesced_mmio_in_range(dev, addr, len))
return -EOPNOTSUPP;
- spin_lock(&dev->lock);
+ spin_lock(&dev->kvm->ring_lock);
+
+ if (!coalesced_mmio_has_room(dev)) {
+ spin_unlock(&dev->kvm->ring_lock);
+ return -EOPNOTSUPP;
+ }
/* copy data in first free entry of the ring */
@@ -75,7 +83,7 @@ static int coalesced_mmio_write(struct kvm_io_device *this,
memcpy(ring->coalesced_mmio[ring->last].data, val, len);
smp_wmb();
ring->last = (ring->last + 1) % KVM_COALESCED_MMIO_MAX;
- spin_unlock(&dev->lock);
+ spin_unlock(&dev->kvm->ring_lock);
return 0;
}
@@ -83,6 +91,8 @@ static void coalesced_mmio_destructor(struct kvm_io_device *this)
{
struct kvm_coalesced_mmio_dev *dev = to_mmio(this);
+ list_del(&dev->list);
+
kfree(dev);
}
@@ -93,7 +103,6 @@ static const struct kvm_io_device_ops coalesced_mmio_ops = {
int kvm_coalesced_mmio_init(struct kvm *kvm)
{
- struct kvm_coalesced_mmio_dev *dev;
struct page *page;
int ret;
@@ -101,31 +110,18 @@ int kvm_coalesced_mmio_init(struct kvm *kvm)
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
if (!page)
goto out_err;
- kvm->coalesced_mmio_ring = page_address(page);
- ret = -ENOMEM;
- dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
- if (!dev)
- goto out_free_page;
- spin_lock_init(&dev->lock);
- kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
- dev->kvm = kvm;
- kvm->coalesced_mmio_dev = dev;
-
- mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &dev->dev);
- mutex_unlock(&kvm->slots_lock);
- if (ret < 0)
- goto out_free_dev;
+ ret = 0;
+ kvm->coalesced_mmio_ring = page_address(page);
- return ret;
+ /*
+ * We're using this spinlock to sync access to the coalesced ring.
+ * The list doesn't need it's own lock since device registration and
+ * unregistration should only happen when kvm->slots_lock is held.
+ */
+ spin_lock_init(&kvm->ring_lock);
+ INIT_LIST_HEAD(&kvm->coalesced_zones);
-out_free_dev:
- kvm->coalesced_mmio_dev = NULL;
- kfree(dev);
-out_free_page:
- kvm->coalesced_mmio_ring = NULL;
- __free_page(page);
out_err:
return ret;
}
@@ -139,51 +135,50 @@ void kvm_coalesced_mmio_free(struct kvm *kvm)
int kvm_vm_ioctl_register_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone)
{
- struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
+ int ret;
+ struct kvm_coalesced_mmio_dev *dev;
- if (dev == NULL)
- return -ENXIO;
+ dev = kzalloc(sizeof(struct kvm_coalesced_mmio_dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+
+ kvm_iodevice_init(&dev->dev, &coalesced_mmio_ops);
+ dev->kvm = kvm;
+ dev->zone = *zone;
mutex_lock(&kvm->slots_lock);
- if (dev->nb_zones >= KVM_COALESCED_MMIO_ZONE_MAX) {
- mutex_unlock(&kvm->slots_lock);
- return -ENOBUFS;
- }
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, zone->addr,
+ zone->size, &dev->dev);
+ if (ret < 0)
+ goto out_free_dev;
+ list_add_tail(&dev->list, &kvm->coalesced_zones);
+ mutex_unlock(&kvm->slots_lock);
- dev->zone[dev->nb_zones] = *zone;
- dev->nb_zones++;
+ return ret;
+out_free_dev:
mutex_unlock(&kvm->slots_lock);
+
+ kfree(dev);
+
+ if (dev == NULL)
+ return -ENXIO;
+
return 0;
}
int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm,
struct kvm_coalesced_mmio_zone *zone)
{
- int i;
- struct kvm_coalesced_mmio_dev *dev = kvm->coalesced_mmio_dev;
- struct kvm_coalesced_mmio_zone *z;
-
- if (dev == NULL)
- return -ENXIO;
+ struct kvm_coalesced_mmio_dev *dev, *tmp;
mutex_lock(&kvm->slots_lock);
- i = dev->nb_zones;
- while (i) {
- z = &dev->zone[i - 1];
-
- /* unregister all zones
- * included in (zone->addr, zone->size)
- */
-
- if (zone->addr <= z->addr &&
- z->addr + z->size <= zone->addr + zone->size) {
- dev->nb_zones--;
- *z = dev->zone[dev->nb_zones];
+ list_for_each_entry_safe(dev, tmp, &kvm->coalesced_zones, list)
+ if (coalesced_mmio_in_range(dev, zone->addr, zone->size)) {
+ kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &dev->dev);
+ kvm_iodevice_destructor(&dev->dev);
}
- i--;
- }
mutex_unlock(&kvm->slots_lock);
diff --git a/virt/kvm/coalesced_mmio.h b/virt/kvm/coalesced_mmio.h
index 8a5959e3535f..b280c20444d1 100644
--- a/virt/kvm/coalesced_mmio.h
+++ b/virt/kvm/coalesced_mmio.h
@@ -12,14 +12,13 @@
#ifdef CONFIG_KVM_MMIO
-#define KVM_COALESCED_MMIO_ZONE_MAX 100
+#include <linux/list.h>
struct kvm_coalesced_mmio_dev {
+ struct list_head list;
struct kvm_io_device dev;
struct kvm *kvm;
- spinlock_t lock;
- int nb_zones;
- struct kvm_coalesced_mmio_zone zone[KVM_COALESCED_MMIO_ZONE_MAX];
+ struct kvm_coalesced_mmio_zone zone;
};
int kvm_coalesced_mmio_init(struct kvm *kvm);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 73358d256fa2..f59c1e8de7a2 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -586,7 +586,8 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
kvm_iodevice_init(&p->dev, &ioeventfd_ops);
- ret = kvm_io_bus_register_dev(kvm, bus_idx, &p->dev);
+ ret = kvm_io_bus_register_dev(kvm, bus_idx, p->addr, p->length,
+ &p->dev);
if (ret < 0)
goto unlock_fail;
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 8df1ca104a7f..dcaf272c26c0 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -185,7 +185,7 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
irqe.dest_mode = 0; /* Physical mode. */
/* need to read apic_id from apic regiest since
* it can be rewritten */
- irqe.dest_id = ioapic->kvm->bsp_vcpu->vcpu_id;
+ irqe.dest_id = ioapic->kvm->bsp_vcpu_id;
}
#endif
return kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe);
@@ -332,9 +332,18 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
(void*)addr, len, val);
ASSERT(!(addr & 0xf)); /* check alignment */
- if (len == 4 || len == 8)
+ switch (len) {
+ case 8:
+ case 4:
data = *(u32 *) val;
- else {
+ break;
+ case 2:
+ data = *(u16 *) val;
+ break;
+ case 1:
+ data = *(u8 *) val;
+ break;
+ default:
printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
return 0;
}
@@ -343,7 +352,7 @@ static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
spin_lock(&ioapic->lock);
switch (addr) {
case IOAPIC_REG_SELECT:
- ioapic->ioregsel = data;
+ ioapic->ioregsel = data & 0xFF; /* 8-bit register */
break;
case IOAPIC_REG_WINDOW:
@@ -394,7 +403,8 @@ int kvm_ioapic_init(struct kvm *kvm)
kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
ioapic->kvm = kvm;
mutex_lock(&kvm->slots_lock);
- ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+ ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
+ IOAPIC_MEM_LENGTH, &ioapic->dev);
mutex_unlock(&kvm->slots_lock);
if (ret < 0) {
kvm->arch.vioapic = NULL;
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 78c80f67f535..a457d2138f49 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -25,12 +25,14 @@
#include <linux/list.h>
#include <linux/kvm_host.h>
+#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/stat.h>
#include <linux/dmar.h>
#include <linux/iommu.h>
#include <linux/intel-iommu.h>
-static int allow_unsafe_assigned_interrupts;
+static bool allow_unsafe_assigned_interrupts;
module_param_named(allow_unsafe_assigned_interrupts,
allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
@@ -111,7 +113,7 @@ int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
/* Map into IO address space */
r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
- get_order(page_size), flags);
+ page_size, flags);
if (r) {
printk(KERN_ERR "kvm_iommu_map_address:"
"iommu failed to map pfn=%llx\n", pfn);
@@ -132,14 +134,15 @@ unmap_pages:
static int kvm_iommu_map_memslots(struct kvm *kvm)
{
- int i, idx, r = 0;
+ int idx, r = 0;
struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
idx = srcu_read_lock(&kvm->srcu);
slots = kvm_memslots(kvm);
- for (i = 0; i < slots->nmemslots; i++) {
- r = kvm_iommu_map_pages(kvm, &slots->memslots[i]);
+ kvm_for_each_memslot(memslot, slots) {
+ r = kvm_iommu_map_pages(kvm, memslot);
if (r)
break;
}
@@ -187,6 +190,8 @@ int kvm_assign_device(struct kvm *kvm,
goto out_unmap;
}
+ pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
+
printk(KERN_DEBUG "assign device %x:%x:%x.%x\n",
assigned_dev->host_segnr,
assigned_dev->host_busnr,
@@ -215,6 +220,8 @@ int kvm_deassign_device(struct kvm *kvm,
iommu_detach_device(domain, &pdev->dev);
+ pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+
printk(KERN_DEBUG "deassign device %x:%x:%x.%x\n",
assigned_dev->host_segnr,
assigned_dev->host_busnr,
@@ -228,12 +235,12 @@ int kvm_iommu_map_guest(struct kvm *kvm)
{
int r;
- if (!iommu_found()) {
+ if (!iommu_present(&pci_bus_type)) {
printk(KERN_ERR "%s: iommu not found\n", __func__);
return -ENODEV;
}
- kvm->arch.iommu_domain = iommu_domain_alloc();
+ kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
if (!kvm->arch.iommu_domain)
return -ENOMEM;
@@ -286,15 +293,15 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
while (gfn < end_gfn) {
unsigned long unmap_pages;
- int order;
+ size_t size;
/* Get physical address */
phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
pfn = phys >> PAGE_SHIFT;
/* Unmap address from IO address space */
- order = iommu_unmap(domain, gfn_to_gpa(gfn), 0);
- unmap_pages = 1ULL << order;
+ size = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
+ unmap_pages = 1ULL << get_order(size);
/* Unpin all pages we just unmapped to not leak any memory */
kvm_unpin_pages(kvm, pfn, unmap_pages);
@@ -305,16 +312,16 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
static int kvm_iommu_unmap_memslots(struct kvm *kvm)
{
- int i, idx;
+ int idx;
struct kvm_memslots *slots;
+ struct kvm_memory_slot *memslot;
idx = srcu_read_lock(&kvm->srcu);
slots = kvm_memslots(kvm);
- for (i = 0; i < slots->nmemslots; i++) {
- kvm_iommu_put_pages(kvm, slots->memslots[i].base_gfn,
- slots->memslots[i].npages);
- }
+ kvm_for_each_memslot(memslot, slots)
+ kvm_iommu_put_pages(kvm, memslot->base_gfn, memslot->npages);
+
srcu_read_unlock(&kvm->srcu, idx);
return 0;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index aefdda390f5e..7287bf5d1c9e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -47,6 +47,8 @@
#include <linux/srcu.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/sort.h>
+#include <linux/bsearch.h>
#include <asm/processor.h>
#include <asm/io.h>
@@ -438,6 +440,15 @@ static int kvm_init_mmu_notifier(struct kvm *kvm)
#endif /* CONFIG_MMU_NOTIFIER && KVM_ARCH_WANT_MMU_NOTIFIER */
+static void kvm_init_memslots_id(struct kvm *kvm)
+{
+ int i;
+ struct kvm_memslots *slots = kvm->memslots;
+
+ for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
+ slots->id_to_index[i] = slots->memslots[i].id = i;
+}
+
static struct kvm *kvm_create_vm(void)
{
int r, i;
@@ -463,6 +474,7 @@ static struct kvm *kvm_create_vm(void)
kvm->memslots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
if (!kvm->memslots)
goto out_err_nosrcu;
+ kvm_init_memslots_id(kvm);
if (init_srcu_struct(&kvm->srcu))
goto out_err_nosrcu;
for (i = 0; i < KVM_NR_BUSES; i++) {
@@ -545,11 +557,11 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
void kvm_free_physmem(struct kvm *kvm)
{
- int i;
struct kvm_memslots *slots = kvm->memslots;
+ struct kvm_memory_slot *memslot;
- for (i = 0; i < slots->nmemslots; ++i)
- kvm_free_physmem_slot(&slots->memslots[i], NULL);
+ kvm_for_each_memslot(memslot, slots)
+ kvm_free_physmem_slot(memslot, NULL);
kfree(kvm->memslots);
}
@@ -623,10 +635,69 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot)
return -ENOMEM;
memslot->dirty_bitmap_head = memslot->dirty_bitmap;
+ memslot->nr_dirty_pages = 0;
return 0;
}
#endif /* !CONFIG_S390 */
+static struct kvm_memory_slot *
+search_memslots(struct kvm_memslots *slots, gfn_t gfn)
+{
+ struct kvm_memory_slot *memslot;
+
+ kvm_for_each_memslot(memslot, slots)
+ if (gfn >= memslot->base_gfn &&
+ gfn < memslot->base_gfn + memslot->npages)
+ return memslot;
+
+ return NULL;
+}
+
+static int cmp_memslot(const void *slot1, const void *slot2)
+{
+ struct kvm_memory_slot *s1, *s2;
+
+ s1 = (struct kvm_memory_slot *)slot1;
+ s2 = (struct kvm_memory_slot *)slot2;
+
+ if (s1->npages < s2->npages)
+ return 1;
+ if (s1->npages > s2->npages)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * Sort the memslots base on its size, so the larger slots
+ * will get better fit.
+ */
+static void sort_memslots(struct kvm_memslots *slots)
+{
+ int i;
+
+ sort(slots->memslots, KVM_MEM_SLOTS_NUM,
+ sizeof(struct kvm_memory_slot), cmp_memslot, NULL);
+
+ for (i = 0; i < KVM_MEM_SLOTS_NUM; i++)
+ slots->id_to_index[slots->memslots[i].id] = i;
+}
+
+void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new)
+{
+ if (new) {
+ int id = new->id;
+ struct kvm_memory_slot *old = id_to_memslot(slots, id);
+ unsigned long npages = old->npages;
+
+ *old = *new;
+ if (new->npages != npages)
+ sort_memslots(slots);
+ }
+
+ slots->generation++;
+}
+
/*
* Allocate some memory and give it an address in the guest physical address
* space.
@@ -660,12 +731,12 @@ int __kvm_set_memory_region(struct kvm *kvm,
(void __user *)(unsigned long)mem->userspace_addr,
mem->memory_size)))
goto out;
- if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
+ if (mem->slot >= KVM_MEM_SLOTS_NUM)
goto out;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
goto out;
- memslot = &kvm->memslots->memslots[mem->slot];
+ memslot = id_to_memslot(kvm->memslots, mem->slot);
base_gfn = mem->guest_phys_addr >> PAGE_SHIFT;
npages = mem->memory_size >> PAGE_SHIFT;
@@ -772,15 +843,17 @@ skip_lpage:
#endif /* not defined CONFIG_S390 */
if (!npages) {
+ struct kvm_memory_slot *slot;
+
r = -ENOMEM;
- slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+ slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
+ GFP_KERNEL);
if (!slots)
goto out_free;
- memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
- if (mem->slot >= slots->nmemslots)
- slots->nmemslots = mem->slot + 1;
- slots->generation++;
- slots->memslots[mem->slot].flags |= KVM_MEMSLOT_INVALID;
+ slot = id_to_memslot(slots, mem->slot);
+ slot->flags |= KVM_MEMSLOT_INVALID;
+
+ update_memslots(slots, NULL);
old_memslots = kvm->memslots;
rcu_assign_pointer(kvm->memslots, slots);
@@ -808,13 +881,10 @@ skip_lpage:
}
r = -ENOMEM;
- slots = kzalloc(sizeof(struct kvm_memslots), GFP_KERNEL);
+ slots = kmemdup(kvm->memslots, sizeof(struct kvm_memslots),
+ GFP_KERNEL);
if (!slots)
goto out_free;
- memcpy(slots, kvm->memslots, sizeof(struct kvm_memslots));
- if (mem->slot >= slots->nmemslots)
- slots->nmemslots = mem->slot + 1;
- slots->generation++;
/* actual memory is freed via old in kvm_free_physmem_slot below */
if (!npages) {
@@ -824,7 +894,7 @@ skip_lpage:
new.lpage_info[i] = NULL;
}
- slots->memslots[mem->slot] = new;
+ update_memslots(slots, &new);
old_memslots = kvm->memslots;
rcu_assign_pointer(kvm->memslots, slots);
synchronize_srcu_expedited(&kvm->srcu);
@@ -886,7 +956,7 @@ int kvm_get_dirty_log(struct kvm *kvm,
if (log->slot >= KVM_MEMORY_SLOTS)
goto out;
- memslot = &kvm->memslots->memslots[log->slot];
+ memslot = id_to_memslot(kvm->memslots, log->slot);
r = -ENOENT;
if (!memslot->dirty_bitmap)
goto out;
@@ -964,16 +1034,7 @@ EXPORT_SYMBOL_GPL(kvm_is_error_hva);
static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots,
gfn_t gfn)
{
- int i;
-
- for (i = 0; i < slots->nmemslots; ++i) {
- struct kvm_memory_slot *memslot = &slots->memslots[i];
-
- if (gfn >= memslot->base_gfn
- && gfn < memslot->base_gfn + memslot->npages)
- return memslot;
- }
- return NULL;
+ return search_memslots(slots, gfn);
}
struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn)
@@ -984,20 +1045,13 @@ EXPORT_SYMBOL_GPL(gfn_to_memslot);
int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
{
- int i;
- struct kvm_memslots *slots = kvm_memslots(kvm);
-
- for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
- struct kvm_memory_slot *memslot = &slots->memslots[i];
+ struct kvm_memory_slot *memslot = gfn_to_memslot(kvm, gfn);
- if (memslot->flags & KVM_MEMSLOT_INVALID)
- continue;
+ if (!memslot || memslot->id >= KVM_MEMORY_SLOTS ||
+ memslot->flags & KVM_MEMSLOT_INVALID)
+ return 0;
- if (gfn >= memslot->base_gfn
- && gfn < memslot->base_gfn + memslot->npages)
- return 1;
- }
- return 0;
+ return 1;
}
EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
@@ -1489,7 +1543,8 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_memory_slot *memslot,
if (memslot && memslot->dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
- __set_bit_le(rel_gfn, memslot->dirty_bitmap);
+ if (!__test_and_set_bit_le(rel_gfn, memslot->dirty_bitmap))
+ memslot->nr_dirty_pages++;
}
}
@@ -1688,10 +1743,6 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
smp_wmb();
atomic_inc(&kvm->online_vcpus);
-#ifdef CONFIG_KVM_APIC_ARCHITECTURE
- if (kvm->bsp_vcpu_id == id)
- kvm->bsp_vcpu = vcpu;
-#endif
mutex_unlock(&kvm->lock);
return r;
@@ -1766,12 +1817,11 @@ out_free1:
struct kvm_regs *kvm_regs;
r = -ENOMEM;
- kvm_regs = kzalloc(sizeof(struct kvm_regs), GFP_KERNEL);
- if (!kvm_regs)
+ kvm_regs = memdup_user(argp, sizeof(*kvm_regs));
+ if (IS_ERR(kvm_regs)) {
+ r = PTR_ERR(kvm_regs);
goto out;
- r = -EFAULT;
- if (copy_from_user(kvm_regs, argp, sizeof(struct kvm_regs)))
- goto out_free2;
+ }
r = kvm_arch_vcpu_ioctl_set_regs(vcpu, kvm_regs);
if (r)
goto out_free2;
@@ -1795,13 +1845,11 @@ out_free2:
break;
}
case KVM_SET_SREGS: {
- kvm_sregs = kmalloc(sizeof(struct kvm_sregs), GFP_KERNEL);
- r = -ENOMEM;
- if (!kvm_sregs)
- goto out;
- r = -EFAULT;
- if (copy_from_user(kvm_sregs, argp, sizeof(struct kvm_sregs)))
+ kvm_sregs = memdup_user(argp, sizeof(*kvm_sregs));
+ if (IS_ERR(kvm_sregs)) {
+ r = PTR_ERR(kvm_sregs);
goto out;
+ }
r = kvm_arch_vcpu_ioctl_set_sregs(vcpu, kvm_sregs);
if (r)
goto out;
@@ -1897,13 +1945,11 @@ out_free2:
break;
}
case KVM_SET_FPU: {
- fpu = kmalloc(sizeof(struct kvm_fpu), GFP_KERNEL);
- r = -ENOMEM;
- if (!fpu)
- goto out;
- r = -EFAULT;
- if (copy_from_user(fpu, argp, sizeof(struct kvm_fpu)))
+ fpu = memdup_user(argp, sizeof(*fpu));
+ if (IS_ERR(fpu)) {
+ r = PTR_ERR(fpu);
goto out;
+ }
r = kvm_arch_vcpu_ioctl_set_fpu(vcpu, fpu);
if (r)
goto out;
@@ -2391,24 +2437,92 @@ static void kvm_io_bus_destroy(struct kvm_io_bus *bus)
int i;
for (i = 0; i < bus->dev_count; i++) {
- struct kvm_io_device *pos = bus->devs[i];
+ struct kvm_io_device *pos = bus->range[i].dev;
kvm_iodevice_destructor(pos);
}
kfree(bus);
}
+int kvm_io_bus_sort_cmp(const void *p1, const void *p2)
+{
+ const struct kvm_io_range *r1 = p1;
+ const struct kvm_io_range *r2 = p2;
+
+ if (r1->addr < r2->addr)
+ return -1;
+ if (r1->addr + r1->len > r2->addr + r2->len)
+ return 1;
+ return 0;
+}
+
+int kvm_io_bus_insert_dev(struct kvm_io_bus *bus, struct kvm_io_device *dev,
+ gpa_t addr, int len)
+{
+ if (bus->dev_count == NR_IOBUS_DEVS)
+ return -ENOSPC;
+
+ bus->range[bus->dev_count++] = (struct kvm_io_range) {
+ .addr = addr,
+ .len = len,
+ .dev = dev,
+ };
+
+ sort(bus->range, bus->dev_count, sizeof(struct kvm_io_range),
+ kvm_io_bus_sort_cmp, NULL);
+
+ return 0;
+}
+
+int kvm_io_bus_get_first_dev(struct kvm_io_bus *bus,
+ gpa_t addr, int len)
+{
+ struct kvm_io_range *range, key;
+ int off;
+
+ key = (struct kvm_io_range) {
+ .addr = addr,
+ .len = len,
+ };
+
+ range = bsearch(&key, bus->range, bus->dev_count,
+ sizeof(struct kvm_io_range), kvm_io_bus_sort_cmp);
+ if (range == NULL)
+ return -ENOENT;
+
+ off = range - bus->range;
+
+ while (off > 0 && kvm_io_bus_sort_cmp(&key, &bus->range[off-1]) == 0)
+ off--;
+
+ return off;
+}
+
/* kvm_io_bus_write - called under kvm->slots_lock */
int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, const void *val)
{
- int i;
+ int idx;
struct kvm_io_bus *bus;
+ struct kvm_io_range range;
+
+ range = (struct kvm_io_range) {
+ .addr = addr,
+ .len = len,
+ };
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
- for (i = 0; i < bus->dev_count; i++)
- if (!kvm_iodevice_write(bus->devs[i], addr, len, val))
+ idx = kvm_io_bus_get_first_dev(bus, addr, len);
+ if (idx < 0)
+ return -EOPNOTSUPP;
+
+ while (idx < bus->dev_count &&
+ kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
+ if (!kvm_iodevice_write(bus->range[idx].dev, addr, len, val))
return 0;
+ idx++;
+ }
+
return -EOPNOTSUPP;
}
@@ -2416,19 +2530,33 @@ int kvm_io_bus_write(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int kvm_io_bus_read(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
int len, void *val)
{
- int i;
+ int idx;
struct kvm_io_bus *bus;
+ struct kvm_io_range range;
+
+ range = (struct kvm_io_range) {
+ .addr = addr,
+ .len = len,
+ };
bus = srcu_dereference(kvm->buses[bus_idx], &kvm->srcu);
- for (i = 0; i < bus->dev_count; i++)
- if (!kvm_iodevice_read(bus->devs[i], addr, len, val))
+ idx = kvm_io_bus_get_first_dev(bus, addr, len);
+ if (idx < 0)
+ return -EOPNOTSUPP;
+
+ while (idx < bus->dev_count &&
+ kvm_io_bus_sort_cmp(&range, &bus->range[idx]) == 0) {
+ if (!kvm_iodevice_read(bus->range[idx].dev, addr, len, val))
return 0;
+ idx++;
+ }
+
return -EOPNOTSUPP;
}
/* Caller must hold slots_lock. */
-int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
- struct kvm_io_device *dev)
+int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
+ int len, struct kvm_io_device *dev)
{
struct kvm_io_bus *new_bus, *bus;
@@ -2436,11 +2564,10 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx,
if (bus->dev_count > NR_IOBUS_DEVS-1)
return -ENOSPC;
- new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+ new_bus = kmemdup(bus, sizeof(struct kvm_io_bus), GFP_KERNEL);
if (!new_bus)
return -ENOMEM;
- memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
- new_bus->devs[new_bus->dev_count++] = dev;
+ kvm_io_bus_insert_dev(new_bus, dev, addr, len);
rcu_assign_pointer(kvm->buses[bus_idx], new_bus);
synchronize_srcu_expedited(&kvm->srcu);
kfree(bus);
@@ -2455,18 +2582,21 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
int i, r;
struct kvm_io_bus *new_bus, *bus;
- new_bus = kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL);
+ bus = kvm->buses[bus_idx];
+
+ new_bus = kmemdup(bus, sizeof(*bus), GFP_KERNEL);
if (!new_bus)
return -ENOMEM;
- bus = kvm->buses[bus_idx];
- memcpy(new_bus, bus, sizeof(struct kvm_io_bus));
-
r = -ENOENT;
for (i = 0; i < new_bus->dev_count; i++)
- if (new_bus->devs[i] == dev) {
+ if (new_bus->range[i].dev == dev) {
r = 0;
- new_bus->devs[i] = new_bus->devs[--new_bus->dev_count];
+ new_bus->dev_count--;
+ new_bus->range[i] = new_bus->range[new_bus->dev_count];
+ sort(new_bus->range, new_bus->dev_count,
+ sizeof(struct kvm_io_range),
+ kvm_io_bus_sort_cmp, NULL);
break;
}
@@ -2524,15 +2654,29 @@ static const struct file_operations *stat_fops[] = {
[KVM_STAT_VM] = &vm_stat_fops,
};
-static void kvm_init_debug(void)
+static int kvm_init_debug(void)
{
+ int r = -EFAULT;
struct kvm_stats_debugfs_item *p;
kvm_debugfs_dir = debugfs_create_dir("kvm", NULL);
- for (p = debugfs_entries; p->name; ++p)
+ if (kvm_debugfs_dir == NULL)
+ goto out;
+
+ for (p = debugfs_entries; p->name; ++p) {
p->dentry = debugfs_create_file(p->name, 0444, kvm_debugfs_dir,
(void *)(long)p->offset,
stat_fops[p->kind]);
+ if (p->dentry == NULL)
+ goto out_dir;
+ }
+
+ return 0;
+
+out_dir:
+ debugfs_remove_recursive(kvm_debugfs_dir);
+out:
+ return r;
}
static void kvm_exit_debug(void)
@@ -2676,10 +2820,16 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
kvm_preempt_ops.sched_in = kvm_sched_in;
kvm_preempt_ops.sched_out = kvm_sched_out;
- kvm_init_debug();
+ r = kvm_init_debug();
+ if (r) {
+ printk(KERN_ERR "kvm: create debugfs files failed\n");
+ goto out_undebugfs;
+ }
return 0;
+out_undebugfs:
+ unregister_syscore_ops(&kvm_syscore_ops);
out_unreg:
kvm_async_pf_deinit();
out_free: