summaryrefslogtreecommitdiff
path: root/virt/kvm
diff options
context:
space:
mode:
Diffstat (limited to 'virt/kvm')
-rw-r--r--virt/kvm/assigned-dev.c2
-rw-r--r--virt/kvm/async_pf.c4
-rw-r--r--virt/kvm/eventfd.c3
-rw-r--r--virt/kvm/iommu.c10
-rw-r--r--virt/kvm/kvm_main.c79
-rw-r--r--virt/kvm/vfio.c4
-rw-r--r--virt/kvm/vfio.h13
7 files changed, 91 insertions, 24 deletions
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index 5819a2708d7e..e05000e200d2 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -302,7 +302,7 @@ static void kvm_free_assigned_device(struct kvm *kvm,
else
pci_restore_state(assigned_dev->dev);
- assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+ pci_clear_dev_assigned(assigned_dev->dev);
pci_release_regions(assigned_dev->dev);
pci_disable_device(assigned_dev->dev);
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index d6a3d0993d88..5ff7f7f2689a 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work)
might_sleep();
- down_read(&mm->mmap_sem);
- get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
- up_read(&mm->mmap_sem);
+ kvm_get_user_page_io(NULL, mm, addr, 1, NULL);
kvm_async_page_present_sync(vcpu, apf);
spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 0c712a779b44..b0fb390943c6 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,6 +36,9 @@
#include <linux/seqlock.h>
#include <trace/events/kvm.h>
+#ifdef __KVM_HAVE_IOAPIC
+#include "ioapic.h"
+#endif
#include "iodev.h"
#ifdef CONFIG_HAVE_KVM_IRQFD
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 714b94932312..e51d9f9b995f 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -191,8 +191,7 @@ int kvm_assign_device(struct kvm *kvm,
return r;
}
- noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain,
- IOMMU_CAP_CACHE_COHERENCY);
+ noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY);
/* Check if need to update IOMMU page table for guest memory */
if (noncoherent != kvm->arch.iommu_noncoherent) {
@@ -203,7 +202,7 @@ int kvm_assign_device(struct kvm *kvm,
goto out_unmap;
}
- pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
+ pci_set_dev_assigned(pdev);
dev_info(&pdev->dev, "kvm assign device\n");
@@ -229,7 +228,7 @@ int kvm_deassign_device(struct kvm *kvm,
iommu_detach_device(domain, &pdev->dev);
- pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+ pci_clear_dev_assigned(pdev);
dev_info(&pdev->dev, "kvm deassign device\n");
@@ -254,8 +253,7 @@ int kvm_iommu_map_guest(struct kvm *kvm)
}
if (!allow_unsafe_assigned_interrupts &&
- !iommu_domain_has_cap(kvm->arch.iommu_domain,
- IOMMU_CAP_INTR_REMAP)) {
+ !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
printk(KERN_WARNING "%s: No interrupt remapping support,"
" disallowing device assignment."
" Re-enble with \"allow_unsafe_assigned_interrupts=1\""
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 278232025129..384eaa7b02fa 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -52,11 +52,13 @@
#include <asm/processor.h>
#include <asm/io.h>
+#include <asm/ioctl.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include "coalesced_mmio.h"
#include "async_pf.h"
+#include "vfio.h"
#define CREATE_TRACE_POINTS
#include <trace/events/kvm.h>
@@ -108,7 +110,7 @@ static bool largepages_enabled = true;
bool kvm_is_mmio_pfn(pfn_t pfn)
{
if (pfn_valid(pfn))
- return PageReserved(pfn_to_page(pfn));
+ return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
return true;
}
@@ -151,7 +153,7 @@ static void ack_flush(void *_completed)
{
}
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
{
int i, cpu, me;
cpumask_var_t cpus;
@@ -188,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
long dirty_count = kvm->tlbs_dirty;
smp_mb();
- if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+ if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
++kvm->stat.remote_tlb_flush;
cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
}
@@ -196,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
void kvm_reload_remote_mmus(struct kvm *kvm)
{
- make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+ kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
}
void kvm_make_mclock_inprogress_request(struct kvm *kvm)
{
- make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+ kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
}
void kvm_make_scan_ioapic_request(struct kvm *kvm)
{
- make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+ kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
}
int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
@@ -294,6 +296,9 @@ static void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn,
kvm_flush_remote_tlbs(kvm);
spin_unlock(&kvm->mmu_lock);
+
+ kvm_arch_mmu_notifier_invalidate_page(kvm, address);
+
srcu_read_unlock(&kvm->srcu, idx);
}
@@ -367,7 +372,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
struct mm_struct *mm,
- unsigned long address)
+ unsigned long start,
+ unsigned long end)
{
struct kvm *kvm = mmu_notifier_to_kvm(mn);
int young, idx;
@@ -375,7 +381,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
idx = srcu_read_lock(&kvm->srcu);
spin_lock(&kvm->mmu_lock);
- young = kvm_age_hva(kvm, address);
+ young = kvm_age_hva(kvm, start, end);
if (young)
kvm_flush_remote_tlbs(kvm);
@@ -1128,6 +1134,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
}
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long addr, bool write_fault,
+ struct page **pagep)
+{
+ int npages;
+ int locked = 1;
+ int flags = FOLL_TOUCH | FOLL_HWPOISON |
+ (pagep ? FOLL_GET : 0) |
+ (write_fault ? FOLL_WRITE : 0);
+
+ /*
+ * If retrying the fault, we get here *not* having allowed the filemap
+ * to wait on the page lock. We should now allow waiting on the IO with
+ * the mmap semaphore released.
+ */
+ down_read(&mm->mmap_sem);
+ npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL,
+ &locked);
+ if (!locked) {
+ VM_BUG_ON(npages);
+
+ if (!pagep)
+ return 0;
+
+ /*
+ * The previous call has now waited on the IO. Now we can
+ * retry and complete. Pass TRIED to ensure we do not re
+ * schedule async IO (see e.g. filemap_fault).
+ */
+ down_read(&mm->mmap_sem);
+ npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED,
+ pagep, NULL, NULL);
+ }
+ up_read(&mm->mmap_sem);
+ return npages;
+}
+
static inline int check_user_page_hwpoison(unsigned long addr)
{
int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
@@ -1190,9 +1233,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
npages = get_user_page_nowait(current, current->mm,
addr, write_fault, page);
up_read(&current->mm->mmap_sem);
- } else
- npages = get_user_pages_fast(addr, 1, write_fault,
- page);
+ } else {
+ /*
+ * By now we have tried gup_fast, and possibly async_pf, and we
+ * are certainly not atomic. Time to retry the gup, allowing
+ * mmap semaphore to be relinquished in the case of IO.
+ */
+ npages = kvm_get_user_page_io(current, current->mm, addr,
+ write_fault, page);
+ }
if (npages != 1)
return npages;
@@ -1746,7 +1795,7 @@ int kvm_vcpu_yield_to(struct kvm_vcpu *target)
rcu_read_lock();
pid = rcu_dereference(target->pid);
if (pid)
- task = get_pid_task(target->pid, PIDTYPE_PID);
+ task = get_pid_task(pid, PIDTYPE_PID);
rcu_read_unlock();
if (!task)
return ret;
@@ -1995,6 +2044,9 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (vcpu->kvm->mm != current->mm)
return -EIO;
+ if (unlikely(_IOC_TYPE(ioctl) != KVMIO))
+ return -EINVAL;
+
#if defined(CONFIG_S390) || defined(CONFIG_PPC) || defined(CONFIG_MIPS)
/*
* Special cases: vcpu ioctls that are asynchronous to vcpu execution,
@@ -3233,6 +3285,9 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
goto out_undebugfs;
}
+ r = kvm_vfio_ops_init();
+ WARN_ON(r);
+
return 0;
out_undebugfs:
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index bb11b36ee8a2..281e7cf2b8e5 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -18,6 +18,7 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
#include <linux/vfio.h>
+#include "vfio.h"
struct kvm_vfio_group {
struct list_head node;
@@ -278,8 +279,7 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
return 0;
}
-static int __init kvm_vfio_ops_init(void)
+int kvm_vfio_ops_init(void)
{
return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
}
-module_init(kvm_vfio_ops_init);
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h
new file mode 100644
index 000000000000..92eac75d6b62
--- /dev/null
+++ b/virt/kvm/vfio.h
@@ -0,0 +1,13 @@
+#ifndef __KVM_VFIO_H
+#define __KVM_VFIO_H
+
+#ifdef CONFIG_KVM_VFIO
+int kvm_vfio_ops_init(void);
+#else
+static inline int kvm_vfio_ops_init(void)
+{
+ return 0;
+}
+#endif
+
+#endif