summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/xen.c
diff options
context:
space:
mode:
authorDavid Woodhouse <dwmw@amazon.co.uk>2022-03-03 18:41:15 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2022-04-02 12:41:13 +0300
commit7caf9571563eade546976d600dd023674b1c3185 (patch)
treed0b21a5ef5483654d759800d905fdce6de2956a3 /arch/x86/kvm/xen.c
parent916d3608df8265a2e3f6214200dbb0b39a5ca383 (diff)
downloadlinux-7caf9571563eade546976d600dd023674b1c3185.tar.xz
KVM: x86/xen: Use gfn_to_pfn_cache for vcpu_info
Currently, the fast path of kvm_xen_set_evtchn_fast() doesn't set the index bits in the target vCPU's evtchn_pending_sel, because it only has a userspace virtual address with which to do so. It just sets them in the kernel, and kvm_xen_has_interrupt() then completes the delivery to the actual vcpu_info structure when the vCPU runs. Using a gfn_to_pfn_cache allows kvm_xen_set_evtchn_fast() to do the full delivery in the common case. Clean up the fallback case too, by moving the deferred delivery out into a separate kvm_xen_inject_pending_events() function which isn't ever called in atomic contexts as __kvm_xen_has_interrupt() is. Signed-off-by: David Woodhouse <dwmw@amazon.co.uk> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com> Message-Id: <20220303154127.202856-6-dwmw2@infradead.org> Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Diffstat (limited to 'arch/x86/kvm/xen.c')
-rw-r--r--arch/x86/kvm/xen.c242
1 files changed, 128 insertions, 114 deletions
diff --git a/arch/x86/kvm/xen.c b/arch/x86/kvm/xen.c
index de1b3be7239e..56e8a6980b65 100644
--- a/arch/x86/kvm/xen.c
+++ b/arch/x86/kvm/xen.c
@@ -8,6 +8,7 @@
#include "x86.h"
#include "xen.h"
+#include "lapic.h"
#include "hyperv.h"
#include <linux/kvm_host.h>
@@ -246,23 +247,79 @@ void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
}
-int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
+/*
+ * On event channel delivery, the vcpu_info may not have been accessible.
+ * In that case, there are bits in vcpu->arch.xen.evtchn_pending_sel which
+ * need to be marked into the vcpu_info (and evtchn_upcall_pending set).
+ * Do so now that we can sleep in the context of the vCPU to bring the
+ * page in, and refresh the pfn cache for it.
+ */
+void kvm_xen_inject_pending_events(struct kvm_vcpu *v)
{
unsigned long evtchn_pending_sel = READ_ONCE(v->arch.xen.evtchn_pending_sel);
- bool atomic = in_atomic() || !task_is_running(current);
- int err;
+ struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache;
+ unsigned long flags;
+
+ if (!evtchn_pending_sel)
+ return;
+
+ /*
+ * Yes, this is an open-coded loop. But that's just what put_user()
+ * does anyway. Page it in and retry the instruction. We're just a
+ * little more honest about it.
+ */
+ read_lock_irqsave(&gpc->lock, flags);
+ while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
+ sizeof(struct vcpu_info))) {
+ read_unlock_irqrestore(&gpc->lock, flags);
+
+ if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
+ sizeof(struct vcpu_info)))
+ return;
+
+ read_lock_irqsave(&gpc->lock, flags);
+ }
+
+ /* Now gpc->khva is a valid kernel address for the vcpu_info */
+ if (IS_ENABLED(CONFIG_64BIT) && v->kvm->arch.xen.long_mode) {
+ struct vcpu_info *vi = gpc->khva;
+
+ asm volatile(LOCK_PREFIX "orq %0, %1\n"
+ "notq %0\n"
+ LOCK_PREFIX "andq %0, %2\n"
+ : "=r" (evtchn_pending_sel),
+ "+m" (vi->evtchn_pending_sel),
+ "+m" (v->arch.xen.evtchn_pending_sel)
+ : "0" (evtchn_pending_sel));
+ WRITE_ONCE(vi->evtchn_upcall_pending, 1);
+ } else {
+ u32 evtchn_pending_sel32 = evtchn_pending_sel;
+ struct compat_vcpu_info *vi = gpc->khva;
+
+ asm volatile(LOCK_PREFIX "orl %0, %1\n"
+ "notl %0\n"
+ LOCK_PREFIX "andl %0, %2\n"
+ : "=r" (evtchn_pending_sel32),
+ "+m" (vi->evtchn_pending_sel),
+ "+m" (v->arch.xen.evtchn_pending_sel)
+ : "0" (evtchn_pending_sel32));
+ WRITE_ONCE(vi->evtchn_upcall_pending, 1);
+ }
+ read_unlock_irqrestore(&gpc->lock, flags);
+
+ mark_page_dirty_in_slot(v->kvm, gpc->memslot, gpc->gpa >> PAGE_SHIFT);
+}
+
+int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
+{
+ struct gfn_to_pfn_cache *gpc = &v->arch.xen.vcpu_info_cache;
+ unsigned long flags;
u8 rc = 0;
/*
* If the global upcall vector (HVMIRQ_callback_vector) is set and
* the vCPU's evtchn_upcall_pending flag is set, the IRQ is pending.
*/
- struct gfn_to_hva_cache *ghc = &v->arch.xen.vcpu_info_cache;
- struct kvm_memslots *slots = kvm_memslots(v->kvm);
- bool ghc_valid = slots->generation == ghc->generation &&
- !kvm_is_error_hva(ghc->hva) && ghc->memslot;
-
- unsigned int offset = offsetof(struct vcpu_info, evtchn_upcall_pending);
/* No need for compat handling here */
BUILD_BUG_ON(offsetof(struct vcpu_info, evtchn_upcall_pending) !=
@@ -272,101 +329,35 @@ int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
BUILD_BUG_ON(sizeof(rc) !=
sizeof_field(struct compat_vcpu_info, evtchn_upcall_pending));
- /*
- * For efficiency, this mirrors the checks for using the valid
- * cache in kvm_read_guest_offset_cached(), but just uses
- * __get_user() instead. And falls back to the slow path.
- */
- if (!evtchn_pending_sel && ghc_valid) {
- /* Fast path */
- pagefault_disable();
- err = __get_user(rc, (u8 __user *)ghc->hva + offset);
- pagefault_enable();
- if (!err)
- return rc;
- }
-
- /* Slow path */
+ read_lock_irqsave(&gpc->lock, flags);
+ while (!kvm_gfn_to_pfn_cache_check(v->kvm, gpc, gpc->gpa,
+ sizeof(struct vcpu_info))) {
+ read_unlock_irqrestore(&gpc->lock, flags);
- /*
- * This function gets called from kvm_vcpu_block() after setting the
- * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately
- * from a HLT. So we really mustn't sleep. If the page ended up absent
- * at that point, just return 1 in order to trigger an immediate wake,
- * and we'll end up getting called again from a context where we *can*
- * fault in the page and wait for it.
- */
- if (atomic)
- return 1;
+ /*
+ * This function gets called from kvm_vcpu_block() after setting the
+ * task to TASK_INTERRUPTIBLE, to see if it needs to wake immediately
+ * from a HLT. So we really mustn't sleep. If the page ended up absent
+ * at that point, just return 1 in order to trigger an immediate wake,
+ * and we'll end up getting called again from a context where we *can*
+ * fault in the page and wait for it.
+ */
+ if (in_atomic() || !task_is_running(current))
+ return 1;
- if (!ghc_valid) {
- err = kvm_gfn_to_hva_cache_init(v->kvm, ghc, ghc->gpa, ghc->len);
- if (err || !ghc->memslot) {
+ if (kvm_gfn_to_pfn_cache_refresh(v->kvm, gpc, gpc->gpa,
+ sizeof(struct vcpu_info))) {
/*
* If this failed, userspace has screwed up the
* vcpu_info mapping. No interrupts for you.
*/
return 0;
}
+ read_lock_irqsave(&gpc->lock, flags);
}
- /*
- * Now we have a valid (protected by srcu) userspace HVA in
- * ghc->hva which points to the struct vcpu_info. If there
- * are any bits in the in-kernel evtchn_pending_sel then
- * we need to write those to the guest vcpu_info and set
- * its evtchn_upcall_pending flag. If there aren't any bits
- * to add, we only want to *check* evtchn_upcall_pending.
- */
- if (evtchn_pending_sel) {
- bool long_mode = v->kvm->arch.xen.long_mode;
-
- if (!user_access_begin((void __user *)ghc->hva, sizeof(struct vcpu_info)))
- return 0;
-
- if (IS_ENABLED(CONFIG_64BIT) && long_mode) {
- struct vcpu_info __user *vi = (void __user *)ghc->hva;
-
- /* Attempt to set the evtchn_pending_sel bits in the
- * guest, and if that succeeds then clear the same
- * bits in the in-kernel version. */
- asm volatile("1:\t" LOCK_PREFIX "orq %0, %1\n"
- "\tnotq %0\n"
- "\t" LOCK_PREFIX "andq %0, %2\n"
- "2:\n"
- _ASM_EXTABLE_UA(1b, 2b)
- : "=r" (evtchn_pending_sel),
- "+m" (vi->evtchn_pending_sel),
- "+m" (v->arch.xen.evtchn_pending_sel)
- : "0" (evtchn_pending_sel));
- } else {
- struct compat_vcpu_info __user *vi = (void __user *)ghc->hva;
- u32 evtchn_pending_sel32 = evtchn_pending_sel;
-
- /* Attempt to set the evtchn_pending_sel bits in the
- * guest, and if that succeeds then clear the same
- * bits in the in-kernel version. */
- asm volatile("1:\t" LOCK_PREFIX "orl %0, %1\n"
- "\tnotl %0\n"
- "\t" LOCK_PREFIX "andl %0, %2\n"
- "2:\n"
- _ASM_EXTABLE_UA(1b, 2b)
- : "=r" (evtchn_pending_sel32),
- "+m" (vi->evtchn_pending_sel),
- "+m" (v->arch.xen.evtchn_pending_sel)
- : "0" (evtchn_pending_sel32));
- }
- rc = 1;
- unsafe_put_user(rc, (u8 __user *)ghc->hva + offset, err);
-
- err:
- user_access_end();
-
- mark_page_dirty_in_slot(v->kvm, ghc->memslot, ghc->gpa >> PAGE_SHIFT);
- } else {
- __get_user(rc, (u8 __user *)ghc->hva + offset);
- }
-
+ rc = ((struct vcpu_info *)gpc->khva)->evtchn_upcall_pending;
+ read_unlock_irqrestore(&gpc->lock, flags);
return rc;
}
@@ -456,25 +447,18 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
offsetof(struct compat_vcpu_info, time));
if (data->u.gpa == GPA_INVALID) {
- vcpu->arch.xen.vcpu_info_set = false;
+ kvm_gfn_to_pfn_cache_destroy(vcpu->kvm, &vcpu->arch.xen.vcpu_info_cache);
r = 0;
break;
}
- /* It must fit within a single page */
- if ((data->u.gpa & ~PAGE_MASK) + sizeof(struct vcpu_info) > PAGE_SIZE) {
- r = -EINVAL;
- break;
- }
-
- r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
+ r = kvm_gfn_to_pfn_cache_init(vcpu->kvm,
&vcpu->arch.xen.vcpu_info_cache,
- data->u.gpa,
+ NULL, KVM_HOST_USES_PFN, data->u.gpa,
sizeof(struct vcpu_info));
- if (!r) {
- vcpu->arch.xen.vcpu_info_set = true;
+ if (!r)
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
- }
+
break;
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
@@ -630,7 +614,7 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
switch (data->type) {
case KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO:
- if (vcpu->arch.xen.vcpu_info_set)
+ if (vcpu->arch.xen.vcpu_info_cache.active)
data->u.gpa = vcpu->arch.xen.vcpu_info_cache.gpa;
else
data->u.gpa = GPA_INVALID;
@@ -903,16 +887,17 @@ int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e,
if (!vcpu)
return -1;
- if (!vcpu->arch.xen.vcpu_info_set)
+ if (!vcpu->arch.xen.vcpu_info_cache.active)
return -1;
if (e->xen_evtchn.port >= max_evtchn_port(kvm))
return -1;
rc = -EWOULDBLOCK;
- read_lock_irqsave(&gpc->lock, flags);
idx = srcu_read_lock(&kvm->srcu);
+
+ read_lock_irqsave(&gpc->lock, flags);
if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, PAGE_SIZE))
goto out_rcu;
@@ -940,17 +925,44 @@ int kvm_xen_set_evtchn_fast(struct kvm_kernel_irq_routing_entry *e,
} else if (test_bit(e->xen_evtchn.port, mask_bits)) {
rc = -1; /* Masked */
} else {
- rc = 1; /* Delivered. But was the vCPU waking already? */
- if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
- kick_vcpu = true;
+ rc = 1; /* Delivered to the bitmap in shared_info. */
+ /* Now switch to the vCPU's vcpu_info to set the index and pending_sel */
+ read_unlock_irqrestore(&gpc->lock, flags);
+ gpc = &vcpu->arch.xen.vcpu_info_cache;
+
+ read_lock_irqsave(&gpc->lock, flags);
+ if (!kvm_gfn_to_pfn_cache_check(kvm, gpc, gpc->gpa, sizeof(struct vcpu_info))) {
+ /*
+ * Could not access the vcpu_info. Set the bit in-kernel
+ * and prod the vCPU to deliver it for itself.
+ */
+ if (!test_and_set_bit(port_word_bit, &vcpu->arch.xen.evtchn_pending_sel))
+ kick_vcpu = true;
+ goto out_rcu;
+ }
+
+ if (IS_ENABLED(CONFIG_64BIT) && kvm->arch.xen.long_mode) {
+ struct vcpu_info *vcpu_info = gpc->khva;
+ if (!test_and_set_bit(port_word_bit, &vcpu_info->evtchn_pending_sel)) {
+ WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1);
+ kick_vcpu = true;
+ }
+ } else {
+ struct compat_vcpu_info *vcpu_info = gpc->khva;
+ if (!test_and_set_bit(port_word_bit,
+ (unsigned long *)&vcpu_info->evtchn_pending_sel)) {
+ WRITE_ONCE(vcpu_info->evtchn_upcall_pending, 1);
+ kick_vcpu = true;
+ }
+ }
}
out_rcu:
- srcu_read_unlock(&kvm->srcu, idx);
read_unlock_irqrestore(&gpc->lock, flags);
+ srcu_read_unlock(&kvm->srcu, idx);
if (kick_vcpu) {
- kvm_make_request(KVM_REQ_EVENT, vcpu);
+ kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
kvm_vcpu_kick(vcpu);
}
@@ -1052,4 +1064,6 @@ void kvm_xen_destroy_vcpu(struct kvm_vcpu *vcpu)
{
kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
&vcpu->arch.xen.runstate_cache);
+ kvm_gfn_to_pfn_cache_destroy(vcpu->kvm,
+ &vcpu->arch.xen.vcpu_info_cache);
}