From b053b2aef25d00773fa6762dcd4b7f5c9c42d171 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 29 Jul 2015 23:32:35 -0700 Subject: KVM: x86: Add EOI exit bitmap inference In order to support a userspace IOAPIC interacting with an in kernel APIC, the EOI exit bitmaps need to be configurable. If the IOAPIC is in userspace (i.e. the irqchip has been split), the EOI exit bitmaps will be set whenever the GSI Routes are configured. In particular, for the low MSI routes are reservable for userspace IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the destination vector of the route will be set for the destination VCPU. The intention is for the userspace IOAPICs to use the reservable MSI routes to inject interrupts into the guest. This is a slight abuse of the notion of an MSI Route, given that MSIs classically bypass the IOAPIC. It might be worthwhile to add an additional route type to improve clarity. Compile tested for Intel x86. Signed-off-by: Steve Rutherford Signed-off-by: Paolo Bonzini --- virt/kvm/irqchip.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'virt') diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index d7ea8e20dae4..716a1c4db528 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -31,16 +31,6 @@ #include #include "irq.h" -struct kvm_irq_routing_table { - int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; - u32 nr_rt_entries; - /* - * Array indexed by gsi. Each entry contains list of irq chips - * the gsi is connected to. - */ - struct hlist_head map[0]; -}; - int kvm_irq_map_gsi(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *entries, int gsi) { @@ -231,6 +221,8 @@ int kvm_set_irq_routing(struct kvm *kvm, kvm_irq_routing_update(kvm); mutex_unlock(&kvm->irq_lock); + kvm_arch_irq_routing_update(kvm); + synchronize_srcu_expedited(&kvm->irq_srcu); new = old; -- cgit v1.2.3 From d3febddde9c7a959dbb189a700e937db50fad4d6 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 25 Aug 2015 17:05:46 +0800 Subject: kvm: use kmalloc() instead of kzalloc() during iodev register/unregister All fields of kvm_io_range were initialized or copied explicitly afterwards. So switch to use kmalloc(). Cc: Gleb Natapov Cc: Paolo Bonzini Cc: Michael S. Tsirkin Signed-off-by: Jason Wang Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8db1d9361993..23116dcb2129 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3341,7 +3341,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) return -ENOSPC; - new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count + 1) * + new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count + 1) * sizeof(struct kvm_io_range)), GFP_KERNEL); if (!new_bus) return -ENOMEM; @@ -3373,7 +3373,7 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, if (r) return r; - new_bus = kzalloc(sizeof(*bus) + ((bus->dev_count - 1) * + new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * sizeof(struct kvm_io_range)), GFP_KERNEL); if (!new_bus) return -ENOMEM; -- cgit v1.2.3 From e9ea5069d9e569c32ab913c39467df32e056b3a7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 15 Sep 2015 14:41:59 +0800 Subject: kvm: add capability for any-length ioeventfds Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Jason Wang Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 6 +++++- include/uapi/linux/kvm.h | 1 + virt/kvm/eventfd.c | 4 +--- virt/kvm/kvm_main.c | 1 + 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e3e9c41721a2..34cc068e81ea 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1604,7 +1604,7 @@ provided event instead of triggering an exit. struct kvm_ioeventfd { __u64 datamatch; __u64 addr; /* legal pio/mmio address */ - __u32 len; /* 1, 2, 4, or 8 bytes */ + __u32 len; /* 0, 1, 2, 4, or 8 bytes */ __s32 fd; __u32 flags; __u8 pad[36]; @@ -1627,6 +1627,10 @@ to the registered address is equal to datamatch in struct kvm_ioeventfd. For virtio-ccw devices, addr contains the subchannel id and datamatch the virtqueue index. +With KVM_CAP_IOEVENTFD_ANY_LENGTH, a zero length ioeventfd is allowed, and +the kernel will ignore the length of guest write and may get a faster vmexit. +The speedup may only apply to specific architectures, but the ioeventfd will +work anyway. 4.60 KVM_DIRTY_TLB diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 12e3afbf0f47..03f3618612aa 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -830,6 +830,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_GUEST_DEBUG_HW_BPS 119 #define KVM_CAP_GUEST_DEBUG_HW_WPS 120 #define KVM_CAP_SPLIT_IRQCHIP 121 +#define KVM_CAP_IOEVENTFD_ANY_LENGTH 122 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 79db45336e3a..ac89299b8699 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -914,9 +914,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args) return -EINVAL; /* ioeventfd with no length can't be combined with DATAMATCH */ - if (!args->len && - args->flags & (KVM_IOEVENTFD_FLAG_PIO | - KVM_IOEVENTFD_FLAG_DATAMATCH)) + if (!args->len && (args->flags & KVM_IOEVENTFD_FLAG_DATAMATCH)) return -EINVAL; ret = kvm_assign_ioeventfd_idx(kvm, bus_idx, args); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 23116dcb2129..afd7ae6aec65 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2718,6 +2718,7 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) case KVM_CAP_IRQFD: case KVM_CAP_IRQFD_RESAMPLE: #endif + case KVM_CAP_IOEVENTFD_ANY_LENGTH: case KVM_CAP_CHECK_EXTENSION_VM: return 1; #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING -- cgit v1.2.3 From f73f8173126ba68eb1c42bd9a234a51d78576ca6 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 18 Sep 2015 22:29:39 +0800 Subject: virt: IRQ bypass manager When a physical I/O device is assigned to a virtual machine through facilities like VFIO and KVM, the interrupt for the device generally bounces through the host system before being injected into the VM. However, hardware technologies exist that often allow the host to be bypassed for some of these scenarios. Intel Posted Interrupts allow the specified physical edge interrupts to be directly injected into a guest when delivered to a physical processor while the vCPU is running. ARM IRQ Forwarding allows forwarded physical interrupts to be directly deactivated by the guest. The IRQ bypass manager here is meant to provide the shim to connect interrupt producers, generally the host physical device driver, with interrupt consumers, generally the hypervisor, in order to configure these bypass mechanism. To do this, we base the connection on a shared, opaque token. For KVM-VFIO this is expected to be an eventfd_ctx since this is the connection we already use to connect an eventfd to an irqfd on the in-kernel path. When a producer and consumer with matching tokens is found, callbacks via both registered participants allow the bypass facilities to be automatically enabled. Signed-off-by: Alex Williamson Reviewed-by: Eric Auger Tested-by: Eric Auger Tested-by: Feng Wu Signed-off-by: Feng Wu Signed-off-by: Paolo Bonzini --- MAINTAINERS | 7 ++ include/linux/irqbypass.h | 90 ++++++++++++++++ virt/lib/Kconfig | 2 + virt/lib/Makefile | 1 + virt/lib/irqbypass.c | 257 ++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 357 insertions(+) create mode 100644 include/linux/irqbypass.h create mode 100644 virt/lib/Kconfig create mode 100644 virt/lib/Makefile create mode 100644 virt/lib/irqbypass.c (limited to 'virt') diff --git a/MAINTAINERS b/MAINTAINERS index 797236befd27..3b738cb8bdeb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11177,6 +11177,13 @@ L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/via/via-velocity.* +VIRT LIB +M: Alex Williamson +M: Paolo Bonzini +L: kvm@vger.kernel.org +S: Supported +F: virt/lib/ + VIVID VIRTUAL VIDEO DRIVER M: Hans Verkuil L: linux-media@vger.kernel.org diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h new file mode 100644 index 000000000000..1551b5b2f4c2 --- /dev/null +++ b/include/linux/irqbypass.h @@ -0,0 +1,90 @@ +/* + * IRQ offload/bypass manager + * + * Copyright (C) 2015 Red Hat, Inc. + * Copyright (c) 2015 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef IRQBYPASS_H +#define IRQBYPASS_H + +#include + +struct irq_bypass_consumer; + +/* + * Theory of operation + * + * The IRQ bypass manager is a simple set of lists and callbacks that allows + * IRQ producers (ex. physical interrupt sources) to be matched to IRQ + * consumers (ex. virtualization hardware that allows IRQ bypass or offload) + * via a shared token (ex. eventfd_ctx). Producers and consumers register + * independently. When a token match is found, the optional @stop callback + * will be called for each participant. The pair will then be connected via + * the @add_* callbacks, and finally the optional @start callback will allow + * any final coordination. When either participant is unregistered, the + * process is repeated using the @del_* callbacks in place of the @add_* + * callbacks. Match tokens must be unique per producer/consumer, 1:N pairings + * are not supported. + */ + +/** + * struct irq_bypass_producer - IRQ bypass producer definition + * @node: IRQ bypass manager private list management + * @token: opaque token to match between producer and consumer + * @irq: Linux IRQ number for the producer device + * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional) + * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional) + * @stop: Perform any quiesce operations necessary prior to add/del (optional) + * @start: Perform any startup operations necessary after add/del (optional) + * + * The IRQ bypass producer structure represents an interrupt source for + * participation in possible host bypass, for instance an interrupt vector + * for a physical device assigned to a VM. + */ +struct irq_bypass_producer { + struct list_head node; + void *token; + int irq; + int (*add_consumer)(struct irq_bypass_producer *, + struct irq_bypass_consumer *); + void (*del_consumer)(struct irq_bypass_producer *, + struct irq_bypass_consumer *); + void (*stop)(struct irq_bypass_producer *); + void (*start)(struct irq_bypass_producer *); +}; + +/** + * struct irq_bypass_consumer - IRQ bypass consumer definition + * @node: IRQ bypass manager private list management + * @token: opaque token to match between producer and consumer + * @add_producer: Connect the IRQ consumer to an IRQ producer + * @del_producer: Disconnect the IRQ consumer from an IRQ producer + * @stop: Perform any quiesce operations necessary prior to add/del (optional) + * @start: Perform any startup operations necessary after add/del (optional) + * + * The IRQ bypass consumer structure represents an interrupt sink for + * participation in possible host bypass, for instance a hypervisor may + * support offloads to allow bypassing the host entirely or offload + * portions of the interrupt handling to the VM. + */ +struct irq_bypass_consumer { + struct list_head node; + void *token; + int (*add_producer)(struct irq_bypass_consumer *, + struct irq_bypass_producer *); + void (*del_producer)(struct irq_bypass_consumer *, + struct irq_bypass_producer *); + void (*stop)(struct irq_bypass_consumer *); + void (*start)(struct irq_bypass_consumer *); +}; + +int irq_bypass_register_producer(struct irq_bypass_producer *); +void irq_bypass_unregister_producer(struct irq_bypass_producer *); +int irq_bypass_register_consumer(struct irq_bypass_consumer *); +void irq_bypass_unregister_consumer(struct irq_bypass_consumer *); + +#endif /* IRQBYPASS_H */ diff --git a/virt/lib/Kconfig b/virt/lib/Kconfig new file mode 100644 index 000000000000..89a414f815d2 --- /dev/null +++ b/virt/lib/Kconfig @@ -0,0 +1,2 @@ +config IRQ_BYPASS_MANAGER + tristate diff --git a/virt/lib/Makefile b/virt/lib/Makefile new file mode 100644 index 000000000000..901228d1ffbc --- /dev/null +++ b/virt/lib/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_IRQ_BYPASS_MANAGER) += irqbypass.o diff --git a/virt/lib/irqbypass.c b/virt/lib/irqbypass.c new file mode 100644 index 000000000000..09a03b5a21ff --- /dev/null +++ b/virt/lib/irqbypass.c @@ -0,0 +1,257 @@ +/* + * IRQ offload/bypass manager + * + * Copyright (C) 2015 Red Hat, Inc. + * Copyright (c) 2015 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Various virtualization hardware acceleration techniques allow bypassing or + * offloading interrupts received from devices around the host kernel. Posted + * Interrupts on Intel VT-d systems can allow interrupts to be received + * directly by a virtual machine. ARM IRQ Forwarding allows forwarded physical + * interrupts to be directly deactivated by the guest. This manager allows + * interrupt producers and consumers to find each other to enable this sort of + * bypass. + */ + +#include +#include +#include +#include + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("IRQ bypass manager utility module"); + +static LIST_HEAD(producers); +static LIST_HEAD(consumers); +static DEFINE_MUTEX(lock); + +/* @lock must be held when calling connect */ +static int __connect(struct irq_bypass_producer *prod, + struct irq_bypass_consumer *cons) +{ + int ret = 0; + + if (prod->stop) + prod->stop(prod); + if (cons->stop) + cons->stop(cons); + + if (prod->add_consumer) + ret = prod->add_consumer(prod, cons); + + if (!ret) { + ret = cons->add_producer(cons, prod); + if (ret && prod->del_consumer) + prod->del_consumer(prod, cons); + } + + if (cons->start) + cons->start(cons); + if (prod->start) + prod->start(prod); + + return ret; +} + +/* @lock must be held when calling disconnect */ +static void __disconnect(struct irq_bypass_producer *prod, + struct irq_bypass_consumer *cons) +{ + if (prod->stop) + prod->stop(prod); + if (cons->stop) + cons->stop(cons); + + cons->del_producer(cons, prod); + + if (prod->del_consumer) + prod->del_consumer(prod, cons); + + if (cons->start) + cons->start(cons); + if (prod->start) + prod->start(prod); +} + +/** + * irq_bypass_register_producer - register IRQ bypass producer + * @producer: pointer to producer structure + * + * Add the provided IRQ producer to the list of producers and connect + * with any matching token found on the IRQ consumers list. + */ +int irq_bypass_register_producer(struct irq_bypass_producer *producer) +{ + struct irq_bypass_producer *tmp; + struct irq_bypass_consumer *consumer; + + might_sleep(); + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + mutex_lock(&lock); + + list_for_each_entry(tmp, &producers, node) { + if (tmp->token == producer->token) { + mutex_unlock(&lock); + module_put(THIS_MODULE); + return -EBUSY; + } + } + + list_for_each_entry(consumer, &consumers, node) { + if (consumer->token == producer->token) { + int ret = __connect(producer, consumer); + if (ret) { + mutex_unlock(&lock); + module_put(THIS_MODULE); + return ret; + } + break; + } + } + + list_add(&producer->node, &producers); + + mutex_unlock(&lock); + + return 0; +} +EXPORT_SYMBOL_GPL(irq_bypass_register_producer); + +/** + * irq_bypass_unregister_producer - unregister IRQ bypass producer + * @producer: pointer to producer structure + * + * Remove a previously registered IRQ producer from the list of producers + * and disconnect it from any connected IRQ consumer. + */ +void irq_bypass_unregister_producer(struct irq_bypass_producer *producer) +{ + struct irq_bypass_producer *tmp; + struct irq_bypass_consumer *consumer; + + might_sleep(); + + if (!try_module_get(THIS_MODULE)) + return; /* nothing in the list anyway */ + + mutex_lock(&lock); + + list_for_each_entry(tmp, &producers, node) { + if (tmp->token != producer->token) + continue; + + list_for_each_entry(consumer, &consumers, node) { + if (consumer->token == producer->token) { + __disconnect(producer, consumer); + break; + } + } + + list_del(&producer->node); + module_put(THIS_MODULE); + break; + } + + mutex_unlock(&lock); + + module_put(THIS_MODULE); +} +EXPORT_SYMBOL_GPL(irq_bypass_unregister_producer); + +/** + * irq_bypass_register_consumer - register IRQ bypass consumer + * @consumer: pointer to consumer structure + * + * Add the provided IRQ consumer to the list of consumers and connect + * with any matching token found on the IRQ producer list. + */ +int irq_bypass_register_consumer(struct irq_bypass_consumer *consumer) +{ + struct irq_bypass_consumer *tmp; + struct irq_bypass_producer *producer; + + if (!consumer->add_producer || !consumer->del_producer) + return -EINVAL; + + might_sleep(); + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + mutex_lock(&lock); + + list_for_each_entry(tmp, &consumers, node) { + if (tmp->token == consumer->token) { + mutex_unlock(&lock); + module_put(THIS_MODULE); + return -EBUSY; + } + } + + list_for_each_entry(producer, &producers, node) { + if (producer->token == consumer->token) { + int ret = __connect(producer, consumer); + if (ret) { + mutex_unlock(&lock); + module_put(THIS_MODULE); + return ret; + } + break; + } + } + + list_add(&consumer->node, &consumers); + + mutex_unlock(&lock); + + return 0; +} +EXPORT_SYMBOL_GPL(irq_bypass_register_consumer); + +/** + * irq_bypass_unregister_consumer - unregister IRQ bypass consumer + * @consumer: pointer to consumer structure + * + * Remove a previously registered IRQ consumer from the list of consumers + * and disconnect it from any connected IRQ producer. + */ +void irq_bypass_unregister_consumer(struct irq_bypass_consumer *consumer) +{ + struct irq_bypass_consumer *tmp; + struct irq_bypass_producer *producer; + + might_sleep(); + + if (!try_module_get(THIS_MODULE)) + return; /* nothing in the list anyway */ + + mutex_lock(&lock); + + list_for_each_entry(tmp, &consumers, node) { + if (tmp->token != consumer->token) + continue; + + list_for_each_entry(producer, &producers, node) { + if (producer->token == consumer->token) { + __disconnect(producer, consumer); + break; + } + } + + list_del(&consumer->node); + module_put(THIS_MODULE); + break; + } + + mutex_unlock(&lock); + + module_put(THIS_MODULE); +} +EXPORT_SYMBOL_GPL(irq_bypass_unregister_consumer); -- cgit v1.2.3 From 37d9fe4783ffcaddcc4afe67626691e62c5ab30e Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Tue, 22 Sep 2015 16:47:29 +0800 Subject: virt: Add virt directory to the top Makefile We need to build files in virt/lib/, which are now used by KVM and VFIO, so add virt directory to the top Makefile. Signed-off-by: Feng Wu Acked-by: Michal Marek Signed-off-by: Paolo Bonzini --- Makefile | 10 ++++++---- virt/Makefile | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 virt/Makefile (limited to 'virt') diff --git a/Makefile b/Makefile index 1d341eba143d..7c6c9769bd1c 100644 --- a/Makefile +++ b/Makefile @@ -550,6 +550,7 @@ drivers-y := drivers/ sound/ firmware/ net-y := net/ libs-y := lib/ core-y := usr/ +virt-y := virt/ endif # KBUILD_EXTMOD ifeq ($(dot-config),1) @@ -882,10 +883,10 @@ core-y += kernel/ certs/ mm/ fs/ ipc/ security/ crypto/ block/ vmlinux-dirs := $(patsubst %/,%,$(filter %/, $(init-y) $(init-m) \ $(core-y) $(core-m) $(drivers-y) $(drivers-m) \ - $(net-y) $(net-m) $(libs-y) $(libs-m))) + $(net-y) $(net-m) $(libs-y) $(libs-m) $(virt-y))) vmlinux-alldirs := $(sort $(vmlinux-dirs) $(patsubst %/,%,$(filter %/, \ - $(init-) $(core-) $(drivers-) $(net-) $(libs-)))) + $(init-) $(core-) $(drivers-) $(net-) $(libs-) $(virt-)))) init-y := $(patsubst %/, %/built-in.o, $(init-y)) core-y := $(patsubst %/, %/built-in.o, $(core-y)) @@ -894,14 +895,15 @@ net-y := $(patsubst %/, %/built-in.o, $(net-y)) libs-y1 := $(patsubst %/, %/lib.a, $(libs-y)) libs-y2 := $(patsubst %/, %/built-in.o, $(libs-y)) libs-y := $(libs-y1) $(libs-y2) +virt-y := $(patsubst %/, %/built-in.o, $(virt-y)) # Externally visible symbols (used by link-vmlinux.sh) export KBUILD_VMLINUX_INIT := $(head-y) $(init-y) -export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) +export KBUILD_VMLINUX_MAIN := $(core-y) $(libs-y) $(drivers-y) $(net-y) $(virt-y) export KBUILD_LDS := arch/$(SRCARCH)/kernel/vmlinux.lds export LDFLAGS_vmlinux # used by scripts/pacmage/Makefile -export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools virt) +export KBUILD_ALLDIRS := $(sort $(filter-out arch/%,$(vmlinux-alldirs)) arch Documentation include samples scripts tools) vmlinux-deps := $(KBUILD_LDS) $(KBUILD_VMLINUX_INIT) $(KBUILD_VMLINUX_MAIN) diff --git a/virt/Makefile b/virt/Makefile new file mode 100644 index 000000000000..be783472ac81 --- /dev/null +++ b/virt/Makefile @@ -0,0 +1 @@ +obj-y += lib/ -- cgit v1.2.3 From 166c9775f1f8b8f00ad1db0fa5c8fc74059d965d Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:42 +0800 Subject: KVM: create kvm_irqfd.h Move _irqfd_resampler and _irqfd struct declarations in a new public header: kvm_irqfd.h. They are respectively renamed into kvm_kernel_irqfd_resampler and kvm_kernel_irqfd. Those datatypes will be used by architecture specific code, in the context of IRQ bypass manager integration. Signed-off-by: Eric Auger Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_irqfd.h | 69 ++++++++++++++++++++++++++++++++++ virt/kvm/eventfd.c | 95 ++++++++++++----------------------------------- 2 files changed, 92 insertions(+), 72 deletions(-) create mode 100644 include/linux/kvm_irqfd.h (limited to 'virt') diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h new file mode 100644 index 000000000000..f926b39a26b6 --- /dev/null +++ b/include/linux/kvm_irqfd.h @@ -0,0 +1,69 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * irqfd: Allows an fd to be used to inject an interrupt to the guest + * Credit goes to Avi Kivity for the original idea. + */ + +#ifndef __LINUX_KVM_IRQFD_H +#define __LINUX_KVM_IRQFD_H + +#include +#include + +/* + * Resampling irqfds are a special variety of irqfds used to emulate + * level triggered interrupts. The interrupt is asserted on eventfd + * trigger. On acknowledgment through the irq ack notifier, the + * interrupt is de-asserted and userspace is notified through the + * resamplefd. All resamplers on the same gsi are de-asserted + * together, so we don't need to track the state of each individual + * user. We can also therefore share the same irq source ID. + */ +struct kvm_kernel_irqfd_resampler { + struct kvm *kvm; + /* + * List of resampling struct _irqfd objects sharing this gsi. + * RCU list modified under kvm->irqfds.resampler_lock + */ + struct list_head list; + struct kvm_irq_ack_notifier notifier; + /* + * Entry in list of kvm->irqfd.resampler_list. Use for sharing + * resamplers among irqfds on the same gsi. + * Accessed and modified under kvm->irqfds.resampler_lock + */ + struct list_head link; +}; + +struct kvm_kernel_irqfd { + /* Used for MSI fast-path */ + struct kvm *kvm; + wait_queue_t wait; + /* Update side is protected by irqfds.lock */ + struct kvm_kernel_irq_routing_entry irq_entry; + seqcount_t irq_entry_sc; + /* Used for level IRQ fast-path */ + int gsi; + struct work_struct inject; + /* The resampler used by this irqfd (resampler-only) */ + struct kvm_kernel_irqfd_resampler *resampler; + /* Eventfd notified on resample (resampler-only) */ + struct eventfd_ctx *resamplefd; + /* Entry in list of irqfds for a resampler (resampler-only) */ + struct list_head resampler_link; + /* Used for setup/shutdown */ + struct eventfd_ctx *eventfd; + struct list_head list; + poll_table pt; + struct work_struct shutdown; +}; + +#endif /* __LINUX_KVM_IRQFD_H */ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index ac89299b8699..413f5a6b61ba 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -23,6 +23,7 @@ #include #include +#include #include #include #include @@ -39,68 +40,14 @@ #include #ifdef CONFIG_HAVE_KVM_IRQFD -/* - * -------------------------------------------------------------------- - * irqfd: Allows an fd to be used to inject an interrupt to the guest - * - * Credit goes to Avi Kivity for the original idea. - * -------------------------------------------------------------------- - */ - -/* - * Resampling irqfds are a special variety of irqfds used to emulate - * level triggered interrupts. The interrupt is asserted on eventfd - * trigger. On acknowledgement through the irq ack notifier, the - * interrupt is de-asserted and userspace is notified through the - * resamplefd. All resamplers on the same gsi are de-asserted - * together, so we don't need to track the state of each individual - * user. We can also therefore share the same irq source ID. - */ -struct _irqfd_resampler { - struct kvm *kvm; - /* - * List of resampling struct _irqfd objects sharing this gsi. - * RCU list modified under kvm->irqfds.resampler_lock - */ - struct list_head list; - struct kvm_irq_ack_notifier notifier; - /* - * Entry in list of kvm->irqfd.resampler_list. Use for sharing - * resamplers among irqfds on the same gsi. - * Accessed and modified under kvm->irqfds.resampler_lock - */ - struct list_head link; -}; - -struct _irqfd { - /* Used for MSI fast-path */ - struct kvm *kvm; - wait_queue_t wait; - /* Update side is protected by irqfds.lock */ - struct kvm_kernel_irq_routing_entry irq_entry; - seqcount_t irq_entry_sc; - /* Used for level IRQ fast-path */ - int gsi; - struct work_struct inject; - /* The resampler used by this irqfd (resampler-only) */ - struct _irqfd_resampler *resampler; - /* Eventfd notified on resample (resampler-only) */ - struct eventfd_ctx *resamplefd; - /* Entry in list of irqfds for a resampler (resampler-only) */ - struct list_head resampler_link; - /* Used for setup/shutdown */ - struct eventfd_ctx *eventfd; - struct list_head list; - poll_table pt; - struct work_struct shutdown; -}; static struct workqueue_struct *irqfd_cleanup_wq; static void irqfd_inject(struct work_struct *work) { - struct _irqfd *irqfd = container_of(work, struct _irqfd, inject); + struct kvm_kernel_irqfd *irqfd = + container_of(work, struct kvm_kernel_irqfd, inject); struct kvm *kvm = irqfd->kvm; if (!irqfd->resampler) { @@ -121,12 +68,13 @@ irqfd_inject(struct work_struct *work) static void irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) { - struct _irqfd_resampler *resampler; + struct kvm_kernel_irqfd_resampler *resampler; struct kvm *kvm; - struct _irqfd *irqfd; + struct kvm_kernel_irqfd *irqfd; int idx; - resampler = container_of(kian, struct _irqfd_resampler, notifier); + resampler = container_of(kian, + struct kvm_kernel_irqfd_resampler, notifier); kvm = resampler->kvm; kvm_set_irq(kvm, KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID, @@ -141,9 +89,9 @@ irqfd_resampler_ack(struct kvm_irq_ack_notifier *kian) } static void -irqfd_resampler_shutdown(struct _irqfd *irqfd) +irqfd_resampler_shutdown(struct kvm_kernel_irqfd *irqfd) { - struct _irqfd_resampler *resampler = irqfd->resampler; + struct kvm_kernel_irqfd_resampler *resampler = irqfd->resampler; struct kvm *kvm = resampler->kvm; mutex_lock(&kvm->irqfds.resampler_lock); @@ -168,7 +116,8 @@ irqfd_resampler_shutdown(struct _irqfd *irqfd) static void irqfd_shutdown(struct work_struct *work) { - struct _irqfd *irqfd = container_of(work, struct _irqfd, shutdown); + struct kvm_kernel_irqfd *irqfd = + container_of(work, struct kvm_kernel_irqfd, shutdown); u64 cnt; /* @@ -198,7 +147,7 @@ irqfd_shutdown(struct work_struct *work) /* assumes kvm->irqfds.lock is held */ static bool -irqfd_is_active(struct _irqfd *irqfd) +irqfd_is_active(struct kvm_kernel_irqfd *irqfd) { return list_empty(&irqfd->list) ? false : true; } @@ -209,7 +158,7 @@ irqfd_is_active(struct _irqfd *irqfd) * assumes kvm->irqfds.lock is held */ static void -irqfd_deactivate(struct _irqfd *irqfd) +irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) { BUG_ON(!irqfd_is_active(irqfd)); @@ -224,7 +173,8 @@ irqfd_deactivate(struct _irqfd *irqfd) static int irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { - struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); + struct kvm_kernel_irqfd *irqfd = + container_of(wait, struct kvm_kernel_irqfd, wait); unsigned long flags = (unsigned long)key; struct kvm_kernel_irq_routing_entry irq; struct kvm *kvm = irqfd->kvm; @@ -274,12 +224,13 @@ static void irqfd_ptable_queue_proc(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { - struct _irqfd *irqfd = container_of(pt, struct _irqfd, pt); + struct kvm_kernel_irqfd *irqfd = + container_of(pt, struct kvm_kernel_irqfd, pt); add_wait_queue(wqh, &irqfd->wait); } /* Must be called under irqfds.lock */ -static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) +static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) { struct kvm_kernel_irq_routing_entry *e; struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; @@ -304,7 +255,7 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd) static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; struct fd f; struct eventfd_ctx *eventfd = NULL, *resamplefd = NULL; int ret; @@ -340,7 +291,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) irqfd->eventfd = eventfd; if (args->flags & KVM_IRQFD_FLAG_RESAMPLE) { - struct _irqfd_resampler *resampler; + struct kvm_kernel_irqfd_resampler *resampler; resamplefd = eventfd_ctx_fdget(args->resamplefd); if (IS_ERR(resamplefd)) { @@ -525,7 +476,7 @@ kvm_eventfd_init(struct kvm *kvm) static int kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; struct eventfd_ctx *eventfd; eventfd = eventfd_ctx_fdget(args->fd); @@ -581,7 +532,7 @@ kvm_irqfd(struct kvm *kvm, struct kvm_irqfd *args) void kvm_irqfd_release(struct kvm *kvm) { - struct _irqfd *irqfd, *tmp; + struct kvm_kernel_irqfd *irqfd, *tmp; spin_lock_irq(&kvm->irqfds.lock); @@ -604,7 +555,7 @@ kvm_irqfd_release(struct kvm *kvm) */ void kvm_irq_routing_update(struct kvm *kvm) { - struct _irqfd *irqfd; + struct kvm_kernel_irqfd *irqfd; spin_lock_irq(&kvm->irqfds.lock); -- cgit v1.2.3 From 1a02b27035f82091d51ecafcb9ccaac1f31d4eb2 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:43 +0800 Subject: KVM: introduce kvm_arch functions for IRQ bypass This patch introduces - kvm_arch_irq_bypass_add_producer - kvm_arch_irq_bypass_del_producer - kvm_arch_irq_bypass_stop - kvm_arch_irq_bypass_start They make possible to specialize the KVM IRQ bypass consumer in case CONFIG_KVM_HAVE_IRQ_BYPASS is set. Signed-off-by: Eric Auger [Add weak implementations of the callbacks. - Feng] Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 10 ++++++++++ virt/kvm/Kconfig | 3 +++ virt/kvm/eventfd.c | 12 ++++++++++++ 3 files changed, 25 insertions(+) (limited to 'virt') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cd0ba2e931e1..b8543b02b7bc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -1163,4 +1164,13 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) { } #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ + +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, + struct irq_bypass_producer *); +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, + struct irq_bypass_producer *); +void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); +void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); +#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #endif diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index e2c876d5a03b..9f8014dda2cf 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -47,3 +47,6 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT config KVM_COMPAT def_bool y depends on COMPAT && !S390 + +config HAVE_KVM_IRQ_BYPASS + bool diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 413f5a6b61ba..c4f7abec4261 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -252,6 +252,18 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) write_seqcount_end(&irqfd->irq_entry_sc); } +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +void __attribute__((weak)) kvm_arch_irq_bypass_stop( + struct irq_bypass_consumer *cons) +{ +} + +void __attribute__((weak)) kvm_arch_irq_bypass_start( + struct irq_bypass_consumer *cons) +{ +} +#endif + static int kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) { -- cgit v1.2.3 From 9016cfb577a15abd6a7990890ccf6bf1edf04d31 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:44 +0800 Subject: KVM: eventfd: add irq bypass consumer management This patch adds the registration/unregistration of an irq_bypass_consumer on irqfd assignment/deassignment. Signed-off-by: Eric Auger Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_irqfd.h | 2 ++ virt/kvm/eventfd.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) (limited to 'virt') diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index f926b39a26b6..0c1de05098c8 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -64,6 +64,8 @@ struct kvm_kernel_irqfd { struct list_head list; poll_table pt; struct work_struct shutdown; + struct irq_bypass_consumer consumer; + struct irq_bypass_producer *producer; }; #endif /* __LINUX_KVM_IRQFD_H */ diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index c4f7abec4261..7df356d8f1fd 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -140,6 +141,9 @@ irqfd_shutdown(struct work_struct *work) /* * It is now safe to release the object's resources */ +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + irq_bypass_unregister_consumer(&irqfd->consumer); +#endif eventfd_ctx_put(irqfd->eventfd); kfree(irqfd); } @@ -391,6 +395,17 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) * we might race against the POLLHUP */ fdput(f); +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + irqfd->consumer.token = (void *)irqfd->eventfd; + irqfd->consumer.add_producer = kvm_arch_irq_bypass_add_producer; + irqfd->consumer.del_producer = kvm_arch_irq_bypass_del_producer; + irqfd->consumer.stop = kvm_arch_irq_bypass_stop; + irqfd->consumer.start = kvm_arch_irq_bypass_start; + ret = irq_bypass_register_consumer(&irqfd->consumer); + if (ret) + pr_info("irq bypass consumer (token %p) registration fails: %d\n", + irqfd->consumer.token, ret); +#endif return 0; -- cgit v1.2.3 From f70c20aaf141adb715a2d750c55154073b02a9c3 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Fri, 18 Sep 2015 22:29:53 +0800 Subject: KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd' This patch adds an arch specific hooks 'arch_update' in 'struct kvm_kernel_irqfd'. On Intel side, it is used to update the IRTE when VT-d posted-interrupts is used. Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ virt/kvm/eventfd.c | 19 ++++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b8543b02b7bc..5c3f4538807f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1172,5 +1172,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); +int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #endif diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 7df356d8f1fd..b637965746bb 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -266,6 +266,13 @@ void __attribute__((weak)) kvm_arch_irq_bypass_start( struct irq_bypass_consumer *cons) { } + +int __attribute__((weak)) kvm_arch_update_irqfd_routing( + struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set) +{ + return 0; +} #endif static int @@ -586,9 +593,19 @@ void kvm_irq_routing_update(struct kvm *kvm) spin_lock_irq(&kvm->irqfds.lock); - list_for_each_entry(irqfd, &kvm->irqfds.items, list) + list_for_each_entry(irqfd, &kvm->irqfds.items, list) { irqfd_update(kvm, irqfd); +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS + if (irqfd->producer) { + int ret = kvm_arch_update_irqfd_routing( + irqfd->kvm, irqfd->producer->irq, + irqfd->gsi, 1); + WARN_ON(ret); + } +#endif + } + spin_unlock_irq(&kvm->irqfds.lock); } -- cgit v1.2.3 From bf9f6ac8d74969690df1485b33b7c238ca9f2269 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Fri, 18 Sep 2015 22:29:55 +0800 Subject: KVM: Update Posted-Interrupts Descriptor when vCPU is blocked This patch updates the Posted-Interrupts Descriptor when vCPU is blocked. pre-block: - Add the vCPU to the blocked per-CPU list - Set 'NV' to POSTED_INTR_WAKEUP_VECTOR post-block: - Remove the vCPU from the per-CPU list Signed-off-by: Feng Wu [Concentrate invocation of pre/post-block hooks to vcpu_block. - Paolo] Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/locking.txt | 12 +++ arch/x86/include/asm/kvm_host.h | 11 +++ arch/x86/kvm/vmx.c | 153 ++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.c | 34 +++++--- include/linux/kvm_host.h | 3 + virt/kvm/kvm_main.c | 3 + 6 files changed, 206 insertions(+), 10 deletions(-) (limited to 'virt') diff --git a/Documentation/virtual/kvm/locking.txt b/Documentation/virtual/kvm/locking.txt index d68af4dc3006..19f94a6b9bb0 100644 --- a/Documentation/virtual/kvm/locking.txt +++ b/Documentation/virtual/kvm/locking.txt @@ -166,3 +166,15 @@ Comment: The srcu read lock must be held while accessing memslots (e.g. MMIO/PIO address->device structure mapping (kvm->buses). The srcu index can be stored in kvm_vcpu->srcu_idx per vcpu if it is needed by multiple functions. + +Name: blocked_vcpu_on_cpu_lock +Type: spinlock_t +Arch: x86 +Protects: blocked_vcpu_on_cpu +Comment: This is a per-CPU lock and it is used for VT-d posted-interrupts. + When VT-d posted-interrupts is supported and the VM has assigned + devices, we put the blocked vCPU on the list blocked_vcpu_on_cpu + protected by blocked_vcpu_on_cpu_lock, when VT-d hardware issues + wakeup notification event since external interrupts from the + assigned devices happens, we will find the vCPU on the list to + wakeup. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 15664994b6f3..cdbdb559ecd2 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -899,6 +899,17 @@ struct kvm_x86_ops { /* pmu operations of sub-arch */ const struct kvm_pmu_ops *pmu_ops; + /* + * Architecture specific hooks for vCPU blocking due to + * HLT instruction. + * Returns for .pre_block(): + * - 0 means continue to block the vCPU. + * - 1 means we cannot block the vCPU since some event + * happens during this period, such as, 'ON' bit in + * posted-interrupts descriptor is set. + */ + int (*pre_block)(struct kvm_vcpu *vcpu); + void (*post_block)(struct kvm_vcpu *vcpu); int (*update_pi_irte)(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); }; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 99f5c61954ea..c5c22831aee2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -878,6 +878,13 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs); static DEFINE_PER_CPU(struct list_head, loaded_vmcss_on_cpu); static DEFINE_PER_CPU(struct desc_ptr, host_gdt); +/* + * We maintian a per-CPU linked-list of vCPU, so in wakeup_handler() we + * can find which vCPU should be waken up. + */ +static DEFINE_PER_CPU(struct list_head, blocked_vcpu_on_cpu); +static DEFINE_PER_CPU(spinlock_t, blocked_vcpu_on_cpu_lock); + static unsigned long *vmx_io_bitmap_a; static unsigned long *vmx_io_bitmap_b; static unsigned long *vmx_msr_bitmap_legacy; @@ -2986,6 +2993,8 @@ static int hardware_enable(void) return -EBUSY; INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu)); + INIT_LIST_HEAD(&per_cpu(blocked_vcpu_on_cpu, cpu)); + spin_lock_init(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); /* * Now we can enable the vmclear operation in kdump @@ -6045,6 +6054,25 @@ static void update_ple_window_actual_max(void) ple_window_grow, INT_MIN); } +/* + * Handler for POSTED_INTERRUPT_WAKEUP_VECTOR. + */ +static void wakeup_handler(void) +{ + struct kvm_vcpu *vcpu; + int cpu = smp_processor_id(); + + spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); + list_for_each_entry(vcpu, &per_cpu(blocked_vcpu_on_cpu, cpu), + blocked_vcpu_list) { + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + if (pi_test_on(pi_desc) == 1) + kvm_vcpu_kick(vcpu); + } + spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); +} + static __init int hardware_setup(void) { int r = -ENOMEM, i, msr; @@ -6231,6 +6259,8 @@ static __init int hardware_setup(void) kvm_x86_ops->enable_log_dirty_pt_masked = NULL; } + kvm_set_posted_intr_wakeup_handler(wakeup_handler); + return alloc_kvm_area(); out8: @@ -10431,6 +10461,126 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask); } +/* + * This routine does the following things for vCPU which is going + * to be blocked if VT-d PI is enabled. + * - Store the vCPU to the wakeup list, so when interrupts happen + * we can find the right vCPU to wake up. + * - Change the Posted-interrupt descriptor as below: + * 'NDST' <-- vcpu->pre_pcpu + * 'NV' <-- POSTED_INTR_WAKEUP_VECTOR + * - If 'ON' is set during this process, which means at least one + * interrupt is posted for this vCPU, we cannot block it, in + * this case, return 1, otherwise, return 0. + * + */ +static int vmx_pre_block(struct kvm_vcpu *vcpu) +{ + unsigned long flags; + unsigned int dest; + struct pi_desc old, new; + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + + if (!kvm_arch_has_assigned_device(vcpu->kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP)) + return 0; + + vcpu->pre_pcpu = vcpu->cpu; + spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + list_add_tail(&vcpu->blocked_vcpu_list, + &per_cpu(blocked_vcpu_on_cpu, + vcpu->pre_pcpu)); + spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + + do { + old.control = new.control = pi_desc->control; + + /* + * We should not block the vCPU if + * an interrupt is posted for it. + */ + if (pi_test_on(pi_desc) == 1) { + spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + list_del(&vcpu->blocked_vcpu_list); + spin_unlock_irqrestore( + &per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + vcpu->pre_pcpu = -1; + + return 1; + } + + WARN((pi_desc->sn == 1), + "Warning: SN field of posted-interrupts " + "is set before blocking\n"); + + /* + * Since vCPU can be preempted during this process, + * vcpu->cpu could be different with pre_pcpu, we + * need to set pre_pcpu as the destination of wakeup + * notification event, then we can find the right vCPU + * to wakeup in wakeup handler if interrupts happen + * when the vCPU is in blocked state. + */ + dest = cpu_physical_id(vcpu->pre_pcpu); + + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; + + /* set 'NV' to 'wakeup vector' */ + new.nv = POSTED_INTR_WAKEUP_VECTOR; + } while (cmpxchg(&pi_desc->control, old.control, + new.control) != old.control); + + return 0; +} + +static void vmx_post_block(struct kvm_vcpu *vcpu) +{ + struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu); + struct pi_desc old, new; + unsigned int dest; + unsigned long flags; + + if (!kvm_arch_has_assigned_device(vcpu->kvm) || + !irq_remapping_cap(IRQ_POSTING_CAP)) + return; + + do { + old.control = new.control = pi_desc->control; + + dest = cpu_physical_id(vcpu->cpu); + + if (x2apic_enabled()) + new.ndst = dest; + else + new.ndst = (dest << 8) & 0xFF00; + + /* Allow posting non-urgent interrupts */ + new.sn = 0; + + /* set 'NV' to 'notification vector' */ + new.nv = POSTED_INTR_VECTOR; + } while (cmpxchg(&pi_desc->control, old.control, + new.control) != old.control); + + if(vcpu->pre_pcpu != -1) { + spin_lock_irqsave( + &per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + list_del(&vcpu->blocked_vcpu_list); + spin_unlock_irqrestore( + &per_cpu(blocked_vcpu_on_cpu_lock, + vcpu->pre_pcpu), flags); + vcpu->pre_pcpu = -1; + } +} + /* * vmx_update_pi_irte - set IRTE for Posted-Interrupts * @@ -10622,6 +10772,9 @@ static struct kvm_x86_ops vmx_x86_ops = { .flush_log_dirty = vmx_flush_log_dirty, .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked, + .pre_block = vmx_pre_block, + .post_block = vmx_post_block, + .pmu_ops = &intel_pmu_ops, .update_pi_irte = vmx_update_pi_irte, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b8425a769c0a..2d2c9bb0d6d6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6335,6 +6335,20 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } } + /* + * KVM_REQ_EVENT is not set when posted interrupts are set by + * VT-d hardware, so we have to update RVI unconditionally. + */ + if (kvm_lapic_enabled(vcpu)) { + /* + * Update architecture specific hints for APIC + * virtual interrupt delivery. + */ + if (kvm_x86_ops->hwapic_irr_update) + kvm_x86_ops->hwapic_irr_update(vcpu, + kvm_lapic_find_highest_irr(vcpu)); + } + if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { kvm_apic_accept_events(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { @@ -6351,13 +6365,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->enable_irq_window(vcpu); if (kvm_lapic_enabled(vcpu)) { - /* - * Update architecture specific hints for APIC - * virtual interrupt delivery. - */ - if (kvm_x86_ops->hwapic_irr_update) - kvm_x86_ops->hwapic_irr_update(vcpu, - kvm_lapic_find_highest_irr(vcpu)); update_cr8_intercept(vcpu); kvm_lapic_sync_to_vapic(vcpu); } @@ -6493,10 +6500,15 @@ out: static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) { - if (!kvm_arch_vcpu_runnable(vcpu)) { + if (!kvm_arch_vcpu_runnable(vcpu) && + (!kvm_x86_ops->pre_block || kvm_x86_ops->pre_block(vcpu) == 0)) { srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx); kvm_vcpu_block(vcpu); vcpu->srcu_idx = srcu_read_lock(&kvm->srcu); + + if (kvm_x86_ops->post_block) + kvm_x86_ops->post_block(vcpu); + if (!kvm_check_request(KVM_REQ_UNHALT, vcpu)) return 1; } @@ -6528,10 +6540,12 @@ static int vcpu_run(struct kvm_vcpu *vcpu) for (;;) { if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && - !vcpu->arch.apf.halted) + !vcpu->arch.apf.halted) { r = vcpu_enter_guest(vcpu); - else + } else { r = vcpu_block(kvm, vcpu); + } + if (r <= 0) break; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5c3f4538807f..9596a2f0977b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -234,6 +234,9 @@ struct kvm_vcpu { unsigned long requests; unsigned long guest_debug; + int pre_pcpu; + struct list_head blocked_vcpu_list; + struct mutex mutex; struct kvm_run *run; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index afd7ae6aec65..a75502c93c3e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -230,6 +230,9 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id) init_waitqueue_head(&vcpu->wq); kvm_async_pf_vcpu_init(vcpu); + vcpu->pre_pcpu = -1; + INIT_LIST_HEAD(&vcpu->blocked_vcpu_list); + page = alloc_page(GFP_KERNEL | __GFP_ZERO); if (!page) { r = -ENOMEM; -- cgit v1.2.3 From 6003a4201077da41427dae5e71bb2f31dfdc0c10 Mon Sep 17 00:00:00 2001 From: Kosuke Tatsukawa Date: Fri, 9 Oct 2015 12:21:55 +0000 Subject: kvm: fix waitqueue_active without memory barrier in virt/kvm/async_pf.c async_pf_execute() seems to be missing a memory barrier which might cause the waker to not notice the waiter and miss sending a wake_up as in the following figure. async_pf_execute kvm_vcpu_block ------------------------------------------------------------------------ spin_lock(&vcpu->async_pf.lock); if (waitqueue_active(&vcpu->wq)) /* The CPU might reorder the test for the waitqueue up here, before prior writes complete */ prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); /*if (kvm_vcpu_check_block(vcpu) < 0) */ /*if (kvm_arch_vcpu_runnable(vcpu)) { */ ... return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted) || !list_empty_careful(&vcpu->async_pf.done) ... return 0; list_add_tail(&apf->link, &vcpu->async_pf.done); spin_unlock(&vcpu->async_pf.lock); waited = true; schedule(); ------------------------------------------------------------------------ The attached patch adds the missing memory barrier. I found this issue when I was looking through the linux source code for places calling waitqueue_active() before wake_up*(), but without preceding memory barriers, after sending a patch to fix a similar issue in drivers/tty/n_tty.c (Details about the original issue can be found here: https://lkml.org/lkml/2015/9/28/849). Signed-off-by: Kosuke Tatsukawa Signed-off-by: Paolo Bonzini --- virt/kvm/async_pf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'virt') diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c index 44660aee335f..77d42be6970e 100644 --- a/virt/kvm/async_pf.c +++ b/virt/kvm/async_pf.c @@ -94,6 +94,10 @@ static void async_pf_execute(struct work_struct *work) trace_kvm_async_pf_completed(addr, gva); + /* + * This memory barrier pairs with prepare_to_wait's set_current_state() + */ + smp_mb(); if (waitqueue_active(&vcpu->wq)) wake_up_interruptible(&vcpu->wq); -- cgit v1.2.3 From 351dc6477cd35136ce4668401b1b1332a62908a8 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:45 +0300 Subject: kvm/eventfd: avoid loop inside irqfd_update() The loop(for) inside irqfd_update() is unnecessary because any other value for irq_entry.type will just trigger schedule_work(&irqfd->inject) in irqfd_wakeup. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b637965746bb..518421e65b0d 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -238,20 +238,17 @@ static void irqfd_update(struct kvm *kvm, struct kvm_kernel_irqfd *irqfd) { struct kvm_kernel_irq_routing_entry *e; struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS]; - int i, n_entries; + int n_entries; n_entries = kvm_irq_map_gsi(kvm, entries, irqfd->gsi); write_seqcount_begin(&irqfd->irq_entry_sc); - irqfd->irq_entry.type = 0; - e = entries; - for (i = 0; i < n_entries; ++i, ++e) { - /* Only fast-path MSI. */ - if (e->type == KVM_IRQ_ROUTING_MSI) - irqfd->irq_entry = *e; - } + if (n_entries == 1) + irqfd->irq_entry = *e; + else + irqfd->irq_entry.type = 0; write_seqcount_end(&irqfd->irq_entry_sc); } -- cgit v1.2.3 From ba1aefcd6db5536d3eb3ca3ce7bd6786960140ea Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:46 +0300 Subject: kvm/eventfd: factor out kvm_notify_acked_gsi() Factor out kvm_notify_acked_gsi() helper to iterate over EOI listeners and notify those matching the given gsi. It will be reused in the upcoming Hyper-V SynIC implementation. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + virt/kvm/eventfd.c | 16 +++++++++++----- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9596a2f0977b..b66861c297c4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -829,6 +829,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); +void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 518421e65b0d..f6b986a41823 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -451,9 +451,18 @@ bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) } EXPORT_SYMBOL_GPL(kvm_irq_has_notifier); -void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +void kvm_notify_acked_gsi(struct kvm *kvm, int gsi) { struct kvm_irq_ack_notifier *kian; + + hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, + link) + if (kian->gsi == gsi) + kian->irq_acked(kian); +} + +void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) +{ int gsi, idx; trace_kvm_ack_irq(irqchip, pin); @@ -461,10 +470,7 @@ void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin) idx = srcu_read_lock(&kvm->irq_srcu); gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin); if (gsi != -1) - hlist_for_each_entry_rcu(kian, &kvm->irq_ack_notifier_list, - link) - if (kian->gsi == gsi) - kian->irq_acked(kian); + kvm_notify_acked_gsi(kvm, gsi); srcu_read_unlock(&kvm->irq_srcu, idx); } -- cgit v1.2.3 From c9a5eccac1abf50649949f15754a7635f263a1ff Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:47 +0300 Subject: kvm/eventfd: add arch-specific set_irq Allow for arch-specific interrupt types to be set. For that, add kvm_arch_set_irq() which takes interrupt type-specific action if it recognizes the interrupt type given, and -EWOULDBLOCK otherwise. The default implementation always returns -EWOULDBLOCK. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++++ virt/kvm/eventfd.c | 13 ++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b66861c297c4..eba9caebc9c1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -828,6 +828,10 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); + +int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, + int irq_source_id, int level, bool line_status); + bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index f6b986a41823..e29fd2640709 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -171,6 +171,15 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) queue_work(irqfd_cleanup_wq, &irqfd->shutdown); } +int __attribute__((weak)) kvm_arch_set_irq( + struct kvm_kernel_irq_routing_entry *irq, + struct kvm *kvm, int irq_source_id, + int level, + bool line_status) +{ + return -EWOULDBLOCK; +} + /* * Called with wqh->lock held and interrupts disabled */ @@ -195,7 +204,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) if (irq.type == KVM_IRQ_ROUTING_MSI) kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); - else + else if (kvm_arch_set_irq(&irq, kvm, + KVM_USERSPACE_IRQ_SOURCE_ID, 1, + false) == -EWOULDBLOCK) schedule_work(&irqfd->inject); srcu_read_unlock(&kvm->irq_srcu, idx); } -- cgit v1.2.3 From f33143d80907602deb1b96db42da93507ed03b31 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:48 +0300 Subject: kvm/irqchip: allow only multiple irqchip routes per GSI Any other irq routing types (MSI, S390_ADAPTER, upcoming Hyper-V SynIC) map one-to-one to GSI. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- virt/kvm/irqchip.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 716a1c4db528..f0b08a2a48ba 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -144,11 +144,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, /* * Do not allow GSI to be mapped to the same irqchip more than once. - * Allow only one to one mapping between GSI and MSI. + * Allow only one to one mapping between GSI and non-irqchip routing. */ hlist_for_each_entry(ei, &rt->map[ue->gsi], link) - if (ei->type == KVM_IRQ_ROUTING_MSI || - ue->type == KVM_IRQ_ROUTING_MSI || + if (ei->type != KVM_IRQ_ROUTING_IRQCHIP || + ue->type != KVM_IRQ_ROUTING_IRQCHIP || ue->u.irqchip.irqchip == ei->irqchip.irqchip) return r; -- cgit v1.2.3 From 3217f7c25bca66eed9b07f0b8bfd1937169b0736 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 27 Aug 2015 16:41:15 +0200 Subject: KVM: Add kvm_arch_vcpu_{un}blocking callbacks Some times it is useful for architecture implementations of KVM to know when the VCPU thread is about to block or when it comes back from blocking (arm/arm64 needs to know this to properly implement timers, for example). Therefore provide a generic architecture callback function in line with what we do elsewhere for KVM generic-arch interactions. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 3 +++ arch/arm64/include/asm/kvm_host.h | 3 +++ arch/mips/include/asm/kvm_host.h | 2 ++ arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/s390/include/asm/kvm_host.h | 2 ++ arch/x86/include/asm/kvm_host.h | 3 +++ include/linux/kvm_host.h | 2 ++ virt/kvm/kvm_main.c | 3 +++ 8 files changed, 20 insertions(+) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index c4072d9f32c7..84da97901f1f 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -234,4 +234,7 @@ static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index ed039688c221..e4f4d65f7d2b 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -255,4 +255,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 5a1a882e0a75..6ded8d347af9 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -847,5 +847,7 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #endif /* __MIPS_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 827a38d7a9db..c9f122d00920 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -718,5 +718,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_exit(void) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 8ced426091e1..72a614c68ed8 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -644,5 +644,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2beee0382088..b28f0f142ecb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1233,4 +1233,7 @@ int x86_set_memory_region(struct kvm *kvm, bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu); bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu); +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1bef9e21e725..4a86f5f072c0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -625,6 +625,8 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8db1d9361993..7873d6daccb1 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2018,6 +2018,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) } while (single_task_running() && ktime_before(cur, stop)); } + kvm_arch_vcpu_blocking(vcpu); + for (;;) { prepare_to_wait(&vcpu->wq, &wait, TASK_INTERRUPTIBLE); @@ -2031,6 +2033,7 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu) finish_wait(&vcpu->wq, &wait); cur = ktime_get(); + kvm_arch_vcpu_unblocking(vcpu); out: block_ns = ktime_to_ns(cur) - ktime_to_ns(start); -- cgit v1.2.3 From d35268da66870d733ae763fd7f9b06a1f63f395e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 25 Aug 2015 19:48:21 +0200 Subject: arm/arm64: KVM: arch_timer: Only schedule soft timer on vcpu_block We currently schedule a soft timer every time we exit the guest if the timer did not expire while running the guest. This is really not necessary, because the only work we do in the timer work function is to kick the vcpu. Kicking the vcpu does two things: (1) If the vpcu thread is on a waitqueue, make it runnable and remove it from the waitqueue. (2) If the vcpu is running on a different physical CPU from the one doing the kick, it sends a reschedule IPI. The second case cannot happen, because the soft timer is only ever scheduled when the vcpu is not running. The first case is only relevant when the vcpu thread is on a waitqueue, which is only the case when the vcpu thread has called kvm_vcpu_block(). Therefore, we only need to make sure a timer is scheduled for kvm_vcpu_block(), which we do by encapsulating all calls to kvm_vcpu_block() with kvm_timer_{un}schedule calls. Additionally, we only schedule a soft timer if the timer is enabled and unmasked, since it is useless otherwise. Note that theoretically userspace can use the SET_ONE_REG interface to change registers that should cause the timer to fire, even if the vcpu is blocked without a scheduled timer, but this case was not supported before this patch and we leave it for future work for now. Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 3 -- arch/arm/kvm/arm.c | 10 +++++ arch/arm64/include/asm/kvm_host.h | 3 -- include/kvm/arm_arch_timer.h | 2 + virt/kvm/arm/arch_timer.c | 94 +++++++++++++++++++++++++-------------- 5 files changed, 72 insertions(+), 40 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 84da97901f1f..c4072d9f32c7 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -234,7 +234,4 @@ static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} - #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 78b286994577..7ed4d475d83a 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -271,6 +271,16 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvm_timer_should_fire(vcpu); } +void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) +{ + kvm_timer_schedule(vcpu); +} + +void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) +{ + kvm_timer_unschedule(vcpu); +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { /* Force users to call KVM_ARM_VCPU_INIT */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index e4f4d65f7d2b..ed039688c221 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -255,7 +255,4 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); -static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} - #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index e1e4d7c38dda..ef14cc1f1f26 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -71,5 +71,7 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); bool kvm_timer_should_fire(struct kvm_vcpu *vcpu); +void kvm_timer_schedule(struct kvm_vcpu *vcpu); +void kvm_timer_unschedule(struct kvm_vcpu *vcpu); #endif diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index b9d3a32cbc04..32095fbb5d7c 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -111,14 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } +static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && + (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) && + !kvm_vgic_get_phys_irq_active(timer->map); +} + bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; cycle_t cval, now; - if ((timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) || - !(timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) || - kvm_vgic_get_phys_irq_active(timer->map)) + if (!kvm_timer_irq_can_fire(vcpu)) return false; cval = timer->cntv_cval; @@ -127,12 +134,57 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) return cval <= now; } +/* + * Schedule the background timer before calling kvm_vcpu_block, so that this + * thread is removed from its waitqueue and made runnable when there's a timer + * interrupt to handle. + */ +void kvm_timer_schedule(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + u64 ns; + cycle_t cval, now; + + BUG_ON(timer_is_armed(timer)); + + /* + * No need to schedule a background timer if the guest timer has + * already expired, because kvm_vcpu_block will return before putting + * the thread to sleep. + */ + if (kvm_timer_should_fire(vcpu)) + return; + + /* + * If the timer is not capable of raising interrupts (disabled or + * masked), then there's no more work for us to do. + */ + if (!kvm_timer_irq_can_fire(vcpu)) + return; + + /* The timer has not yet expired, schedule a background timer */ + cval = timer->cntv_cval; + now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; + + ns = cyclecounter_cyc2ns(timecounter->cc, + cval - now, + timecounter->mask, + &timecounter->frac); + timer_arm(timer, ns); +} + +void kvm_timer_unschedule(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + timer_disarm(timer); +} + /** * kvm_timer_flush_hwstate - prepare to move the virt timer to the cpu * @vcpu: The vcpu pointer * - * Disarm any pending soft timers, since the world-switch code will write the - * virtual timer state back to the physical CPU. + * Check if the virtual timer has expired while we were running in the host, + * and inject an interrupt if that was the case. */ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) { @@ -140,17 +192,6 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) bool phys_active; int ret; - /* - * We're about to run this vcpu again, so there is no need to - * keep the background timer running, as we're about to - * populate the CPU timer again. - */ - timer_disarm(timer); - - /* - * If the timer expired while we were not scheduled, now is the time - * to inject it. - */ if (kvm_timer_should_fire(vcpu)) kvm_timer_inject_irq(vcpu); @@ -176,32 +217,17 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) * kvm_timer_sync_hwstate - sync timer state from cpu * @vcpu: The vcpu pointer * - * Check if the virtual timer was armed and either schedule a corresponding - * soft timer or inject directly if already expired. + * Check if the virtual timer has expired while we were running in the guest, + * and inject an interrupt if that was the case. */ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - cycle_t cval, now; - u64 ns; BUG_ON(timer_is_armed(timer)); - if (kvm_timer_should_fire(vcpu)) { - /* - * Timer has already expired while we were not - * looking. Inject the interrupt and carry on. - */ + if (kvm_timer_should_fire(vcpu)) kvm_timer_inject_irq(vcpu); - return; - } - - cval = timer->cntv_cval; - now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff; - - ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask, - &timecounter->frac); - timer_arm(timer, ns); } int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 9103617df202d74e5c65f8af84a9aa727f812a06 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 25 Aug 2015 22:50:57 +0200 Subject: arm/arm64: KVM: vgic: Factor out level irq processing on guest exit Currently vgic_process_maintenance() processes dealing with a completed level-triggered interrupt directly, but we are soon going to reuse this logic for level-triggered mapped interrupts with the HW bit set, so move this logic into a separate static function. Probably the most scary part of this commit is convincing yourself that the current flow is safe compared to the old one. In the following I try to list the changes and why they are harmless: Move vgic_irq_clear_queued after kvm_notify_acked_irq: Harmless because the only potential effect of clearing the queued flag wrt. kvm_set_irq is that vgic_update_irq_pending does not set the pending bit on the emulated CPU interface or in the pending_on_cpu bitmask if the function is called with level=1. However, the point of kvm_notify_acked_irq is to call kvm_set_irq with level=0, and we set the queued flag again in __kvm_vgic_sync_hwstate later on if the level is stil high. Move vgic_set_lr before kvm_notify_acked_irq: Also, harmless because the LR are cpu-local operations and kvm_notify_acked only affects the dist Move vgic_dist_irq_clear_soft_pend after kvm_notify_acked_irq: Also harmless, because now we check the level state in the clear_soft_pend function and lower the pending bits if the level is low. Reviewed-by: Eric Auger Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 94 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 56 insertions(+), 38 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 66c66165e712..367a180fb5ac 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -107,6 +107,7 @@ static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, int virt_irq); +static int compute_pending_for_cpu(struct kvm_vcpu *vcpu); static const struct vgic_ops *vgic_ops; static const struct vgic_params *vgic; @@ -357,6 +358,11 @@ static void vgic_dist_irq_clear_soft_pend(struct kvm_vcpu *vcpu, int irq) struct vgic_dist *dist = &vcpu->kvm->arch.vgic; vgic_bitmap_set_irq_val(&dist->irq_soft_pend, vcpu->vcpu_id, irq, 0); + if (!vgic_dist_irq_get_level(vcpu, irq)) { + vgic_dist_irq_clear_pending(vcpu, irq); + if (!compute_pending_for_cpu(vcpu)) + clear_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); + } } static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq) @@ -1338,12 +1344,56 @@ epilog: } } +static int process_level_irq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) +{ + int level_pending = 0; + + vlr.state = 0; + vlr.hwirq = 0; + vgic_set_lr(vcpu, lr, vlr); + + /* + * If the IRQ was EOIed (called from vgic_process_maintenance) or it + * went from active to non-active (called from vgic_sync_hwirq) it was + * also ACKed and we we therefore assume we can clear the soft pending + * state (should it had been set) for this interrupt. + * + * Note: if the IRQ soft pending state was set after the IRQ was + * acked, it actually shouldn't be cleared, but we have no way of + * knowing that unless we start trapping ACKs when the soft-pending + * state is set. + */ + vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); + + /* + * Tell the gic to start sampling the line of this interrupt again. + */ + vgic_irq_clear_queued(vcpu, vlr.irq); + + /* Any additional pending interrupt? */ + if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { + vgic_cpu_irq_set(vcpu, vlr.irq); + level_pending = 1; + } else { + vgic_dist_irq_clear_pending(vcpu, vlr.irq); + vgic_cpu_irq_clear(vcpu, vlr.irq); + } + + /* + * Despite being EOIed, the LR may not have + * been marked as empty. + */ + vgic_sync_lr_elrsr(vcpu, lr, vlr); + + return level_pending; +} + static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { u32 status = vgic_get_interrupt_status(vcpu); struct vgic_dist *dist = &vcpu->kvm->arch.vgic; - bool level_pending = false; struct kvm *kvm = vcpu->kvm; + int level_pending = 0; kvm_debug("STATUS = %08x\n", status); @@ -1358,54 +1408,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) for_each_set_bit(lr, eisr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); - spin_lock(&dist->lock); - vgic_irq_clear_queued(vcpu, vlr.irq); + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); WARN_ON(vlr.state & LR_STATE_MASK); - vlr.state = 0; - vgic_set_lr(vcpu, lr, vlr); - /* - * If the IRQ was EOIed it was also ACKed and we we - * therefore assume we can clear the soft pending - * state (should it had been set) for this interrupt. - * - * Note: if the IRQ soft pending state was set after - * the IRQ was acked, it actually shouldn't be - * cleared, but we have no way of knowing that unless - * we start trapping ACKs when the soft-pending state - * is set. - */ - vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); /* * kvm_notify_acked_irq calls kvm_set_irq() - * to reset the IRQ level. Need to release the - * lock for kvm_set_irq to grab it. + * to reset the IRQ level, which grabs the dist->lock + * so we call this before taking the dist->lock. */ - spin_unlock(&dist->lock); - kvm_notify_acked_irq(kvm, 0, vlr.irq - VGIC_NR_PRIVATE_IRQS); - spin_lock(&dist->lock); - - /* Any additional pending interrupt? */ - if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { - vgic_cpu_irq_set(vcpu, vlr.irq); - level_pending = true; - } else { - vgic_dist_irq_clear_pending(vcpu, vlr.irq); - vgic_cpu_irq_clear(vcpu, vlr.irq); - } + spin_lock(&dist->lock); + level_pending |= process_level_irq(vcpu, lr, vlr); spin_unlock(&dist->lock); - - /* - * Despite being EOIed, the LR may not have - * been marked as empty. - */ - vgic_sync_lr_elrsr(vcpu, lr, vlr); } } -- cgit v1.2.3 From 8bf9a701e103fd17dbdf0355e43ff5200b4823aa Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 30 Aug 2015 14:42:16 +0200 Subject: arm/arm64: KVM: Implement GICD_ICFGR as RO for PPIs The GICD_ICFGR allows the bits for the SGIs and PPIs to be read only. We currently simulate this behavior by writing a hardcoded value to the register for the SGIs and PPIs on every write of these bits to the register (ignoring what the guest actually wrote), and by writing the same value as the reset value to the register. This is a bit counter-intuitive, as the register is RO for these bits, and we can just implement it that way, allowing us to control the value of the bits purely in the reset code. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 367a180fb5ac..f8ca2e9d2f0b 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -690,10 +690,9 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, vgic_reg_access(mmio, &val, offset, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); if (mmio->is_write) { - if (offset < 8) { - *reg = ~0U; /* Force PPIs/SGIs to 1 */ + /* Ignore writes to read-only SGI and PPI bits */ + if (offset < 8) return false; - } val = vgic_cfg_compress(val); if (offset & 4) { -- cgit v1.2.3 From 54723bb37feac347a169359536f3dff122cabca3 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 30 Aug 2015 14:45:20 +0200 Subject: arm/arm64: KVM: Use appropriate define in VGIC reset code We currently initialize the SGIs to be enabled in the VGIC code, but we use the VGIC_NR_PPIS define for this purpose, instead of the the more natural VGIC_NR_SGIS. Change this slightly confusing use of the defines. Note: This should have no functional change, as both names are defined to the number 16. Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index f8ca2e9d2f0b..a44ecf9eca4e 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -2128,8 +2128,12 @@ int vgic_init(struct kvm *kvm) break; } - for (i = 0; i < dist->nr_irqs; i++) { - if (i < VGIC_NR_PPIS) + /* + * Enable all SGIs and configure all private IRQs as + * edge-triggered. + */ + for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { + if (i < VGIC_NR_SGIS) vgic_bitmap_set_irq_val(&dist->irq_enabled, vcpu->vcpu_id, i, 1); if (i < VGIC_NR_PRIVATE_IRQS) -- cgit v1.2.3 From 4b4b4512da2a844b8da2585609b67fae1ce4f4db Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 30 Aug 2015 15:01:27 +0200 Subject: arm/arm64: KVM: Rework the arch timer to use level-triggered semantics The arch timer currently uses edge-triggered semantics in the sense that the line is never sampled by the vgic and lowering the line from the timer to the vgic doesn't have any effect on the pending state of virtual interrupts in the vgic. This means that we do not support a guest with the otherwise valid behavior of (1) disable interrupts (2) enable the timer (3) disable the timer (4) enable interrupts. Such a guest would validly not expect to see any interrupts on real hardware, but will see interrupts on KVM. This patch fixes this shortcoming through the following series of changes. First, we change the flow of the timer/vgic sync/flush operations. Now the timer is always flushed/synced before the vgic, because the vgic samples the state of the timer output. This has the implication that we move the timer operations in to non-preempible sections, but that is fine after the previous commit getting rid of hrtimer schedules on every entry/exit. Second, we change the internal behavior of the timer, letting the timer keep track of its previous output state, and only lower/raise the line to the vgic when the state changes. Note that in theory this could have been accomplished more simply by signalling the vgic every time the state *potentially* changed, but we don't want to be hitting the vgic more often than necessary. Third, we get rid of the use of the map->active field in the vgic and instead simply set the interrupt as active on the physical distributor whenever the input to the GIC is asserted and conversely clear the physical active state when the input to the GIC is deasserted. Fourth, and finally, we now initialize the timer PPIs (and all the other unused PPIs for now), to be level-triggered, and modify the sync code to sample the line state on HW sync and re-inject a new interrupt if it is still pending at that time. Signed-off-by: Christoffer Dall --- arch/arm/kvm/arm.c | 11 +++-- include/kvm/arm_arch_timer.h | 2 +- include/kvm/arm_vgic.h | 3 -- virt/kvm/arm/arch_timer.c | 81 +++++++++++++++++++++++----------- virt/kvm/arm/vgic.c | 102 +++++++++++-------------------------------- 5 files changed, 91 insertions(+), 108 deletions(-) (limited to 'virt') diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 7ed4d475d83a..59125f48c707 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { local_irq_enable(); + kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); preempt_enable(); - kvm_timer_sync_hwstate(vcpu); continue; } @@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_guest_exit(); trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); + /* + * We must sync the timer state before the vgic state so that + * the vgic can properly sample the updated state of the + * interrupt line. + */ + kvm_timer_sync_hwstate(vcpu); + kvm_vgic_sync_hwstate(vcpu); preempt_enable(); - kvm_timer_sync_hwstate(vcpu); - ret = handle_exit(vcpu, run, ret); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index ef14cc1f1f26..1800227af9d6 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -51,7 +51,7 @@ struct arch_timer_cpu { bool armed; /* Timer IRQ */ - const struct kvm_irq_level *irq; + struct kvm_irq_level irq; /* VGIC mapping */ struct irq_phys_map *map; diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4e14dac282bb..7bc5d0224ab0 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -159,7 +159,6 @@ struct irq_phys_map { u32 virt_irq; u32 phys_irq; u32 irq; - bool active; }; struct irq_phys_map_entry { @@ -354,8 +353,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int irq); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map); -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active); #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 32095fbb5d7c..523816d8c402 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -59,18 +59,6 @@ static void timer_disarm(struct arch_timer_cpu *timer) } } -static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) -{ - int ret; - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; - - kvm_vgic_set_phys_irq_active(timer->map, true); - ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, - timer->map, - timer->irq->level); - WARN_ON(ret); -} - static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) { struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; @@ -116,8 +104,7 @@ static bool kvm_timer_irq_can_fire(struct kvm_vcpu *vcpu) struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && - (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) && - !kvm_vgic_get_phys_irq_active(timer->map); + (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); } bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) @@ -134,6 +121,41 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) return cval <= now; } +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) +{ + int ret; + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + BUG_ON(!vgic_initialized(vcpu->kvm)); + + timer->irq.level = new_level; + ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, + timer->map, + timer->irq.level); + WARN_ON(ret); +} + +/* + * Check if there was a change in the timer state (should we raise or lower + * the line level to the GIC). + */ +static void kvm_timer_update_state(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + + /* + * If userspace modified the timer registers via SET_ONE_REG before + * the vgic was initialized, we mustn't set the timer->irq.level value + * because the guest would never see the interrupt. Instead wait + * until we call this function from kvm_timer_flush_hwstate. + */ + if (!vgic_initialized(vcpu->kvm)) + return; + + if (kvm_timer_should_fire(vcpu) != timer->irq.level) + kvm_timer_update_irq(vcpu, !timer->irq.level); +} + /* * Schedule the background timer before calling kvm_vcpu_block, so that this * thread is removed from its waitqueue and made runnable when there's a timer @@ -192,17 +214,20 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) bool phys_active; int ret; - if (kvm_timer_should_fire(vcpu)) - kvm_timer_inject_irq(vcpu); + kvm_timer_update_state(vcpu); /* - * We keep track of whether the edge-triggered interrupt has been - * signalled to the vgic/guest, and if so, we mask the interrupt and - * the physical distributor to prevent the timer from raising a - * physical interrupt whenever we run a guest, preventing forward - * VCPU progress. + * If we enter the guest with the virtual input level to the VGIC + * asserted, then we have already told the VGIC what we need to, and + * we don't need to exit from the guest until the guest deactivates + * the already injected interrupt, so therefore we should set the + * hardware active state to prevent unnecessary exits from the guest. + * + * Conversely, if the virtual input level is deasserted, then always + * clear the hardware active state to ensure that hardware interrupts + * from the timer triggers a guest exit. */ - if (kvm_vgic_get_phys_irq_active(timer->map)) + if (timer->irq.level) phys_active = true; else phys_active = false; @@ -226,8 +251,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) BUG_ON(timer_is_armed(timer)); - if (kvm_timer_should_fire(vcpu)) - kvm_timer_inject_irq(vcpu); + /* + * The guest could have modified the timer registers or the timer + * could have expired, update the timer state. + */ + kvm_timer_update_state(vcpu); } int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, @@ -242,7 +270,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * kvm_vcpu_set_target(). To handle this, we determine * vcpu timer irq number when the vcpu is reset. */ - timer->irq = irq; + timer->irq.irq = irq->irq; /* * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 @@ -251,6 +279,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, * the ARMv7 architecture. */ timer->cntv_ctl = 0; + kvm_timer_update_state(vcpu); /* * Tell the VGIC that the virtual interrupt is tied to a @@ -295,6 +324,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) default: return -1; } + + kvm_timer_update_state(vcpu); return 0; } diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index a44ecf9eca4e..3c2909c1bda3 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -537,34 +537,6 @@ bool vgic_handle_set_pending_reg(struct kvm *kvm, return false; } -/* - * If a mapped interrupt's state has been modified by the guest such that it - * is no longer active or pending, without it have gone through the sync path, - * then the map->active field must be cleared so the interrupt can be taken - * again. - */ -static void vgic_handle_clear_mapped_irq(struct kvm_vcpu *vcpu) -{ - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - struct list_head *root; - struct irq_phys_map_entry *entry; - struct irq_phys_map *map; - - rcu_read_lock(); - - /* Check for PPIs */ - root = &vgic_cpu->irq_phys_map_list; - list_for_each_entry_rcu(entry, root, entry) { - map = &entry->map; - - if (!vgic_dist_irq_is_pending(vcpu, map->virt_irq) && - !vgic_irq_is_active(vcpu, map->virt_irq)) - map->active = false; - } - - rcu_read_unlock(); -} - bool vgic_handle_clear_pending_reg(struct kvm *kvm, struct kvm_exit_mmio *mmio, phys_addr_t offset, int vcpu_id) @@ -595,7 +567,6 @@ bool vgic_handle_clear_pending_reg(struct kvm *kvm, vcpu_id, offset); vgic_reg_access(mmio, reg, offset, mode); - vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); vgic_update_state(kvm); return true; } @@ -633,7 +604,6 @@ bool vgic_handle_clear_active_reg(struct kvm *kvm, ACCESS_READ_VALUE | ACCESS_WRITE_CLEARBIT); if (mmio->is_write) { - vgic_handle_clear_mapped_irq(kvm_get_vcpu(kvm, vcpu_id)); vgic_update_state(kvm); return true; } @@ -1443,29 +1413,37 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) /* * Save the physical active state, and reset it to inactive. * - * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. + * Return true if there's a pending level triggered interrupt line to queue. */ -static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) +static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct irq_phys_map *map; + bool phys_active; + bool level_pending; int ret; if (!(vlr.state & LR_HW)) - return 0; + return false; map = vgic_irq_map_search(vcpu, vlr.irq); BUG_ON(!map); ret = irq_get_irqchip_state(map->irq, IRQCHIP_STATE_ACTIVE, - &map->active); + &phys_active); WARN_ON(ret); - if (map->active) + if (phys_active) return 0; - return 1; + /* Mapped edge-triggered interrupts not yet supported. */ + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); + spin_lock(&dist->lock); + level_pending = process_level_irq(vcpu, lr, vlr); + spin_unlock(&dist->lock); + return level_pending; } /* Sync back the VGIC state after a guest run */ @@ -1490,18 +1468,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) continue; vlr = vgic_get_lr(vcpu, lr); - if (vgic_sync_hwirq(vcpu, vlr)) { - /* - * So this is a HW interrupt that the guest - * EOI-ed. Clean the LR state and allow the - * interrupt to be sampled again. - */ - vlr.state = 0; - vlr.hwirq = 0; - vgic_set_lr(vcpu, lr, vlr); - vgic_irq_clear_queued(vcpu, vlr.irq); - set_bit(lr, elrsr_ptr); - } + if (vgic_sync_hwirq(vcpu, lr, vlr)) + level_pending = true; if (!test_bit(lr, elrsr_ptr)) continue; @@ -1880,30 +1848,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu) kfree(entry); } -/** - * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ - * - * Return the logical active state of a mapped interrupt. This doesn't - * necessarily reflects the current HW state. - */ -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map) -{ - BUG_ON(!map); - return map->active; -} - -/** - * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ - * - * Set the logical active state of a mapped interrupt. This doesn't - * immediately affects the HW state. - */ -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active) -{ - BUG_ON(!map); - map->active = active; -} - /** * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping * @vcpu: The VCPU pointer @@ -2129,17 +2073,23 @@ int vgic_init(struct kvm *kvm) } /* - * Enable all SGIs and configure all private IRQs as - * edge-triggered. + * Enable and configure all SGIs to be edge-triggere and + * configure all PPIs as level-triggered. */ for (i = 0; i < VGIC_NR_PRIVATE_IRQS; i++) { - if (i < VGIC_NR_SGIS) + if (i < VGIC_NR_SGIS) { + /* SGIs */ vgic_bitmap_set_irq_val(&dist->irq_enabled, vcpu->vcpu_id, i, 1); - if (i < VGIC_NR_PRIVATE_IRQS) vgic_bitmap_set_irq_val(&dist->irq_cfg, vcpu->vcpu_id, i, VGIC_CFG_EDGE); + } else if (i < VGIC_NR_PRIVATE_IRQS) { + /* PPIs */ + vgic_bitmap_set_irq_val(&dist->irq_cfg, + vcpu->vcpu_id, i, + VGIC_CFG_LEVEL); + } } vgic_enable(vcpu); -- cgit v1.2.3 From 8fe2f19e6e6015911bdd4cfcdb23a32e146ba570 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Fri, 4 Sep 2015 21:25:12 +0200 Subject: arm/arm64: KVM: Support edge-triggered forwarded interrupts We mark edge-triggered interrupts with the HW bit set as queued to prevent the VGIC code from injecting LRs with both the Active and Pending bits set at the same time while also setting the HW bit, because the hardware does not support this. However, this means that we must also clear the queued flag when we sync back a LR where the state on the physical distributor went from active to inactive because the guest deactivated the interrupt. At this point we must also check if the interrupt is pending on the distributor, and tell the VGIC to queue it again if it is. Since these actions on the sync path are extremely close to those for level-triggered interrupts, rename process_level_irq to process_queued_irq, allowing it to cater for both cases. Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 3c2909c1bda3..84abc6f38c1d 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1313,13 +1313,10 @@ epilog: } } -static int process_level_irq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) +static int process_queued_irq(struct kvm_vcpu *vcpu, + int lr, struct vgic_lr vlr) { - int level_pending = 0; - - vlr.state = 0; - vlr.hwirq = 0; - vgic_set_lr(vcpu, lr, vlr); + int pending = 0; /* * If the IRQ was EOIed (called from vgic_process_maintenance) or it @@ -1335,26 +1332,35 @@ static int process_level_irq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); /* - * Tell the gic to start sampling the line of this interrupt again. + * Tell the gic to start sampling this interrupt again. */ vgic_irq_clear_queued(vcpu, vlr.irq); /* Any additional pending interrupt? */ - if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { - vgic_cpu_irq_set(vcpu, vlr.irq); - level_pending = 1; + if (vgic_irq_is_edge(vcpu, vlr.irq)) { + BUG_ON(!(vlr.state & LR_HW)); + pending = vgic_dist_irq_is_pending(vcpu, vlr.irq); } else { - vgic_dist_irq_clear_pending(vcpu, vlr.irq); - vgic_cpu_irq_clear(vcpu, vlr.irq); + if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { + vgic_cpu_irq_set(vcpu, vlr.irq); + pending = 1; + } else { + vgic_dist_irq_clear_pending(vcpu, vlr.irq); + vgic_cpu_irq_clear(vcpu, vlr.irq); + } } /* * Despite being EOIed, the LR may not have * been marked as empty. */ + vlr.state = 0; + vlr.hwirq = 0; + vgic_set_lr(vcpu, lr, vlr); + vgic_sync_lr_elrsr(vcpu, lr, vlr); - return level_pending; + return pending; } static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) @@ -1391,7 +1397,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) vlr.irq - VGIC_NR_PRIVATE_IRQS); spin_lock(&dist->lock); - level_pending |= process_level_irq(vcpu, lr, vlr); + level_pending |= process_queued_irq(vcpu, lr, vlr); spin_unlock(&dist->lock); } } @@ -1413,7 +1419,7 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) /* * Save the physical active state, and reset it to inactive. * - * Return true if there's a pending level triggered interrupt line to queue. + * Return true if there's a pending forwarded interrupt to queue. */ static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) { @@ -1438,10 +1444,8 @@ static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) if (phys_active) return 0; - /* Mapped edge-triggered interrupts not yet supported. */ - WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); spin_lock(&dist->lock); - level_pending = process_level_irq(vcpu, lr, vlr); + level_pending = process_queued_irq(vcpu, lr, vlr); spin_unlock(&dist->lock); return level_pending; } -- cgit v1.2.3 From e21f09108754dfdfbb30e547f4edbd3b6884eedb Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 30 Aug 2015 13:57:20 +0200 Subject: arm/arm64: KVM: Add tracepoints for vgic and timer The VGIC and timer code for KVM arm/arm64 doesn't have any tracepoints or tracepoint infrastructure defined. Rewriting some of the timer code handling showed me how much we need this, so let's add these simple trace points once and for all and we can easily expand with additional trace points in these files as we go along. Cc: Wei Huang Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 4 +++ virt/kvm/arm/trace.h | 63 +++++++++++++++++++++++++++++++++++++++++++++++ virt/kvm/arm/vgic.c | 5 ++++ 3 files changed, 72 insertions(+) create mode 100644 virt/kvm/arm/trace.h (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 523816d8c402..21a0ab2d8919 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -28,6 +28,8 @@ #include #include +#include "trace.h" + static struct timecounter *timecounter; static struct workqueue_struct *wqueue; static unsigned int host_vtimer_irq; @@ -129,6 +131,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level) BUG_ON(!vgic_initialized(vcpu->kvm)); timer->irq.level = new_level; + trace_kvm_timer_update_irq(vcpu->vcpu_id, timer->map->virt_irq, + timer->irq.level); ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, timer->map, timer->irq.level); diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h new file mode 100644 index 000000000000..37d8b98867d5 --- /dev/null +++ b/virt/kvm/arm/trace.h @@ -0,0 +1,63 @@ +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +/* + * Tracepoints for vgic + */ +TRACE_EVENT(vgic_update_irq_pending, + TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( __u32, irq ) + __field( bool, level ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->irq = irq; + __entry->level = level; + ), + + TP_printk("VCPU: %ld, IRQ %d, level: %d", + __entry->vcpu_id, __entry->irq, __entry->level) +); + +/* + * Tracepoints for arch_timer + */ +TRACE_EVENT(kvm_timer_update_irq, + TP_PROTO(unsigned long vcpu_id, __u32 irq, int level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field( unsigned long, vcpu_id ) + __field( __u32, irq ) + __field( int, level ) + ), + + TP_fast_assign( + __entry->vcpu_id = vcpu_id; + __entry->irq = irq; + __entry->level = level; + ), + + TP_printk("VCPU: %ld, IRQ %d, level %d", + __entry->vcpu_id, __entry->irq, __entry->level) +); + +#endif /* _TRACE_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 84abc6f38c1d..d4669eb29b77 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -34,6 +34,9 @@ #include #include +#define CREATE_TRACE_POINTS +#include "trace.h" + /* * How the whole thing works (courtesy of Christoffer Dall): * @@ -1574,6 +1577,8 @@ static int vgic_update_irq_pending(struct kvm *kvm, int cpuid, int enabled; bool ret = true, can_inject = true; + trace_vgic_update_irq_pending(cpuid, irq_num, level); + if (irq_num >= min(kvm->arch.vgic.nr_irqs, 1020)) return -EINVAL; -- cgit v1.2.3 From c4cd4c168b81dad53e659d18cdae653bc0ec2384 Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Tue, 27 Oct 2015 11:37:29 +0300 Subject: KVM: arm/arm64: Optimize away redundant LR tracking Currently we use vgic_irq_lr_map in order to track which LRs hold which IRQs, and lr_used bitmap in order to track which LRs are used or free. vgic_irq_lr_map is actually used only for piggy-back optimization, and can be easily replaced by iteration over lr_used. This is good because in future, when LPI support is introduced, number of IRQs will grow up to at least 16384, while numbers from 1024 to 8192 are never going to be used. This would be a huge memory waste. In its turn, lr_used is also completely redundant since ae705930fca6322600690df9dc1c7d0516145a93 ("arm/arm64: KVM: Keep elrsr/aisr in sync with software model"), because together with lr_used we also update elrsr. This allows to easily replace lr_used with elrsr, inverting all conditions (because in elrsr '1' means 'free'). Signed-off-by: Pavel Fedin Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 6 ------ virt/kvm/arm/vgic-v2.c | 1 + virt/kvm/arm/vgic-v3.c | 1 + virt/kvm/arm/vgic.c | 53 ++++++++++++++------------------------------------ 4 files changed, 17 insertions(+), 44 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 8065801a1847..3936bf802e1d 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -295,9 +295,6 @@ struct vgic_v3_cpu_if { }; struct vgic_cpu { - /* per IRQ to LR mapping */ - u8 *vgic_irq_lr_map; - /* Pending/active/both interrupts on this VCPU */ DECLARE_BITMAP(pending_percpu, VGIC_NR_PRIVATE_IRQS); DECLARE_BITMAP(active_percpu, VGIC_NR_PRIVATE_IRQS); @@ -308,9 +305,6 @@ struct vgic_cpu { unsigned long *active_shared; unsigned long *pend_act_shared; - /* Bitmap of used/free list registers */ - DECLARE_BITMAP(lr_used, VGIC_V2_MAX_LRS); - /* Number of list registers on this CPU */ int nr_lr; diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index 8d7b04db8471..c0f5d7fad9ea 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -158,6 +158,7 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) * anyway. */ vcpu->arch.vgic_cpu.vgic_v2.vgic_vmcr = 0; + vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr = ~0; /* Get the show on the road... */ vcpu->arch.vgic_cpu.vgic_v2.vgic_hcr = GICH_HCR_EN; diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 7dd5d62f10a1..92003cb61a0a 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -193,6 +193,7 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) * anyway. */ vgic_v3->vgic_vmcr = 0; + vgic_v3->vgic_elrsr = ~0; /* * If we are emulating a GICv3, we do it in an non-GICv2-compatible diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index d4669eb29b77..265a41035728 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -108,6 +108,7 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); +static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu); static struct irq_phys_map *vgic_irq_map_search(struct kvm_vcpu *vcpu, int virt_irq); static int compute_pending_for_cpu(struct kvm_vcpu *vcpu); @@ -691,9 +692,11 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 elrsr = vgic_get_elrsr(vcpu); + unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); int i; - for_each_set_bit(i, vgic_cpu->lr_used, vgic_cpu->nr_lr) { + for_each_clear_bit(i, elrsr_ptr, vgic_cpu->nr_lr) { struct vgic_lr lr = vgic_get_lr(vcpu, i); /* @@ -1098,7 +1101,6 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu) static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); /* @@ -1112,8 +1114,6 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) vlr.state = 0; vgic_set_lr(vcpu, lr_nr, vlr); - clear_bit(lr_nr, vgic_cpu->lr_used); - vgic_cpu->vgic_irq_lr_map[irq] = LR_EMPTY; vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } @@ -1128,10 +1128,11 @@ static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) */ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; + u64 elrsr = vgic_get_elrsr(vcpu); + unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); int lr; - for_each_set_bit(lr, vgic_cpu->lr_used, vgic->nr_lr) { + for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { @@ -1188,8 +1189,9 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, */ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + u64 elrsr = vgic_get_elrsr(vcpu); + unsigned long *elrsr_ptr = u64_to_bitmask(&elrsr); struct vgic_lr vlr; int lr; @@ -1200,28 +1202,22 @@ bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) kvm_debug("Queue IRQ%d\n", irq); - lr = vgic_cpu->vgic_irq_lr_map[irq]; - /* Do we have an active interrupt for the same CPUID? */ - if (lr != LR_EMPTY) { + for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { vlr = vgic_get_lr(vcpu, lr); - if (vlr.source == sgi_source_id) { + if (vlr.irq == irq && vlr.source == sgi_source_id) { kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq); - BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); vgic_queue_irq_to_lr(vcpu, irq, lr, vlr); return true; } } /* Try to use another LR for this interrupt */ - lr = find_first_zero_bit((unsigned long *)vgic_cpu->lr_used, - vgic->nr_lr); + lr = find_first_bit(elrsr_ptr, vgic->nr_lr); if (lr >= vgic->nr_lr) return false; kvm_debug("LR%d allocated for IRQ%d %x\n", lr, irq, sgi_source_id); - vgic_cpu->vgic_irq_lr_map[irq] = lr; - set_bit(lr, vgic_cpu->lr_used); vlr.irq = irq; vlr.source = sgi_source_id; @@ -1456,7 +1452,6 @@ static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) /* Sync back the VGIC state after a guest run */ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { - struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; struct vgic_dist *dist = &vcpu->kvm->arch.vgic; u64 elrsr; unsigned long *elrsr_ptr; @@ -1469,22 +1464,10 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) /* Deal with HW interrupts, and clear mappings for empty LRs */ for (lr = 0; lr < vgic->nr_lr; lr++) { - struct vgic_lr vlr; - - if (!test_bit(lr, vgic_cpu->lr_used)) - continue; - - vlr = vgic_get_lr(vcpu, lr); - if (vgic_sync_hwirq(vcpu, lr, vlr)) - level_pending = true; - - if (!test_bit(lr, elrsr_ptr)) - continue; - - clear_bit(lr, vgic_cpu->lr_used); + struct vgic_lr vlr = vgic_get_lr(vcpu, lr); + level_pending |= vgic_sync_hwirq(vcpu, lr, vlr); BUG_ON(vlr.irq >= dist->nr_irqs); - vgic_cpu->vgic_irq_lr_map[vlr.irq] = LR_EMPTY; } /* Check if we still have something up our sleeve... */ @@ -1912,12 +1895,10 @@ void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu) kfree(vgic_cpu->pending_shared); kfree(vgic_cpu->active_shared); kfree(vgic_cpu->pend_act_shared); - kfree(vgic_cpu->vgic_irq_lr_map); vgic_destroy_irq_phys_map(vcpu->kvm, &vgic_cpu->irq_phys_map_list); vgic_cpu->pending_shared = NULL; vgic_cpu->active_shared = NULL; vgic_cpu->pend_act_shared = NULL; - vgic_cpu->vgic_irq_lr_map = NULL; } static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) @@ -1928,18 +1909,14 @@ static int vgic_vcpu_init_maps(struct kvm_vcpu *vcpu, int nr_irqs) vgic_cpu->pending_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->active_shared = kzalloc(sz, GFP_KERNEL); vgic_cpu->pend_act_shared = kzalloc(sz, GFP_KERNEL); - vgic_cpu->vgic_irq_lr_map = kmalloc(nr_irqs, GFP_KERNEL); if (!vgic_cpu->pending_shared || !vgic_cpu->active_shared - || !vgic_cpu->pend_act_shared - || !vgic_cpu->vgic_irq_lr_map) { + || !vgic_cpu->pend_act_shared) { kvm_vgic_vcpu_destroy(vcpu); return -ENOMEM; } - memset(vgic_cpu->vgic_irq_lr_map, LR_EMPTY, nr_irqs); - /* * Store the number of LRs per vcpu, so we don't have to go * all the way to the distributor structure to find out. Only -- cgit v1.2.3 From 212c76545dde8370ebde2a170e4f8e1ed8441dc0 Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Tue, 27 Oct 2015 11:37:30 +0300 Subject: KVM: arm/arm64: Clean up vgic_retire_lr() and surroundings 1. Remove unnecessary 'irq' argument, because irq number can be retrieved from the LR. 2. Since cff9211eb1a1f58ce7f5a2d596b617928fd4be0e ("arm/arm64: KVM: Fix arch timer behavior for disabled interrupts ") LR_STATE_PENDING is queued back by vgic_retire_lr() itself. Also, it clears vlr.state itself. Therefore, we remove the same, now duplicated, check with all accompanying bit manipulations from vgic_unqueue_irqs(). 3. vgic_retire_lr() is always accompanied by vgic_irq_clear_queued(). Since it already does more than just clearing the LR, move vgic_irq_clear_queued() inside of it. Signed-off-by: Pavel Fedin Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic.c | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 265a41035728..96e45f3da534 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -105,7 +105,7 @@ #include "vgic.h" static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu); -static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu); +static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu); static struct vgic_lr vgic_get_lr(const struct kvm_vcpu *vcpu, int lr); static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, struct vgic_lr lr_desc); static u64 vgic_get_elrsr(struct kvm_vcpu *vcpu); @@ -717,30 +717,14 @@ void vgic_unqueue_irqs(struct kvm_vcpu *vcpu) * interrupt then move the active state to the * distributor tracking bit. */ - if (lr.state & LR_STATE_ACTIVE) { + if (lr.state & LR_STATE_ACTIVE) vgic_irq_set_active(vcpu, lr.irq); - lr.state &= ~LR_STATE_ACTIVE; - } /* * Reestablish the pending state on the distributor and the - * CPU interface. It may have already been pending, but that - * is fine, then we are only setting a few bits that were - * already set. + * CPU interface and mark the LR as free for other use. */ - if (lr.state & LR_STATE_PENDING) { - vgic_dist_irq_set_pending(vcpu, lr.irq); - lr.state &= ~LR_STATE_PENDING; - } - - vgic_set_lr(vcpu, i, lr); - - /* - * Mark the LR as free for other use. - */ - BUG_ON(lr.state & LR_STATE_MASK); - vgic_retire_lr(i, lr.irq, vcpu); - vgic_irq_clear_queued(vcpu, lr.irq); + vgic_retire_lr(i, vcpu); /* Finally update the VGIC state. */ vgic_update_state(vcpu->kvm); @@ -1099,16 +1083,18 @@ static inline void vgic_enable(struct kvm_vcpu *vcpu) vgic_ops->enable(vcpu); } -static void vgic_retire_lr(int lr_nr, int irq, struct kvm_vcpu *vcpu) +static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr_nr); + vgic_irq_clear_queued(vcpu, vlr.irq); + /* * We must transfer the pending state back to the distributor before * retiring the LR, otherwise we may loose edge-triggered interrupts. */ if (vlr.state & LR_STATE_PENDING) { - vgic_dist_irq_set_pending(vcpu, irq); + vgic_dist_irq_set_pending(vcpu, vlr.irq); vlr.hwirq = 0; } @@ -1135,11 +1121,8 @@ static void vgic_retire_disabled_irqs(struct kvm_vcpu *vcpu) for_each_clear_bit(lr, elrsr_ptr, vgic->nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - if (!vgic_irq_is_enabled(vcpu, vlr.irq)) { - vgic_retire_lr(lr, vlr.irq, vcpu); - if (vgic_irq_is_queued(vcpu, vlr.irq)) - vgic_irq_clear_queued(vcpu, vlr.irq); - } + if (!vgic_irq_is_enabled(vcpu, vlr.irq)) + vgic_retire_lr(lr, vcpu); } } -- cgit v1.2.3 From 26caea7693cb99833fe4ecc544c842289d6b3f69 Mon Sep 17 00:00:00 2001 From: Pavel Fedin Date: Tue, 27 Oct 2015 11:37:31 +0300 Subject: KVM: arm/arm64: Merge vgic_set_lr() and vgic_sync_lr_elrsr() Now we see that vgic_set_lr() and vgic_sync_lr_elrsr() are always used together. Merge them into one function, saving from second vgic_ops dereferencing every time. Signed-off-by: Pavel Fedin Signed-off-by: Christoffer Dall --- include/kvm/arm_vgic.h | 1 - virt/kvm/arm/vgic-v2.c | 5 ----- virt/kvm/arm/vgic-v3.c | 5 ----- virt/kvm/arm/vgic.c | 14 ++------------ 4 files changed, 2 insertions(+), 23 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 3936bf802e1d..f62addc17dcf 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -112,7 +112,6 @@ struct vgic_vmcr { struct vgic_ops { struct vgic_lr (*get_lr)(const struct kvm_vcpu *, int); void (*set_lr)(struct kvm_vcpu *, int, struct vgic_lr); - void (*sync_lr_elrsr)(struct kvm_vcpu *, int, struct vgic_lr); u64 (*get_elrsr)(const struct kvm_vcpu *vcpu); u64 (*get_eisr)(const struct kvm_vcpu *vcpu); void (*clear_eisr)(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/vgic-v2.c b/virt/kvm/arm/vgic-v2.c index c0f5d7fad9ea..ff02f08df74d 100644 --- a/virt/kvm/arm/vgic-v2.c +++ b/virt/kvm/arm/vgic-v2.c @@ -79,11 +79,7 @@ static void vgic_v2_set_lr(struct kvm_vcpu *vcpu, int lr, lr_val |= (lr_desc.source << GICH_LR_PHYSID_CPUID_SHIFT); vcpu->arch.vgic_cpu.vgic_v2.vgic_lr[lr] = lr_val; -} -static void vgic_v2_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ if (!(lr_desc.state & LR_STATE_MASK)) vcpu->arch.vgic_cpu.vgic_v2.vgic_elrsr |= (1ULL << lr); else @@ -167,7 +163,6 @@ static void vgic_v2_enable(struct kvm_vcpu *vcpu) static const struct vgic_ops vgic_v2_ops = { .get_lr = vgic_v2_get_lr, .set_lr = vgic_v2_set_lr, - .sync_lr_elrsr = vgic_v2_sync_lr_elrsr, .get_elrsr = vgic_v2_get_elrsr, .get_eisr = vgic_v2_get_eisr, .clear_eisr = vgic_v2_clear_eisr, diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c index 92003cb61a0a..487d6357b7e7 100644 --- a/virt/kvm/arm/vgic-v3.c +++ b/virt/kvm/arm/vgic-v3.c @@ -112,11 +112,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, } vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[LR_INDEX(lr)] = lr_val; -} -static void vgic_v3_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr lr_desc) -{ if (!(lr_desc.state & LR_STATE_MASK)) vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); else @@ -212,7 +208,6 @@ static void vgic_v3_enable(struct kvm_vcpu *vcpu) static const struct vgic_ops vgic_v3_ops = { .get_lr = vgic_v3_get_lr, .set_lr = vgic_v3_set_lr, - .sync_lr_elrsr = vgic_v3_sync_lr_elrsr, .get_elrsr = vgic_v3_get_elrsr, .get_eisr = vgic_v3_get_eisr, .clear_eisr = vgic_v3_clear_eisr, diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 96e45f3da534..fe451d4885ae 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1032,12 +1032,6 @@ static void vgic_set_lr(struct kvm_vcpu *vcpu, int lr, vgic_ops->set_lr(vcpu, lr, vlr); } -static void vgic_sync_lr_elrsr(struct kvm_vcpu *vcpu, int lr, - struct vgic_lr vlr) -{ - vgic_ops->sync_lr_elrsr(vcpu, lr, vlr); -} - static inline u64 vgic_get_elrsr(struct kvm_vcpu *vcpu) { return vgic_ops->get_elrsr(vcpu); @@ -1100,7 +1094,6 @@ static void vgic_retire_lr(int lr_nr, struct kvm_vcpu *vcpu) vlr.state = 0; vgic_set_lr(vcpu, lr_nr, vlr); - vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } /* @@ -1162,7 +1155,6 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, } vgic_set_lr(vcpu, lr_nr, vlr); - vgic_sync_lr_elrsr(vcpu, lr_nr, vlr); } /* @@ -1340,8 +1332,6 @@ static int process_queued_irq(struct kvm_vcpu *vcpu, vlr.hwirq = 0; vgic_set_lr(vcpu, lr, vlr); - vgic_sync_lr_elrsr(vcpu, lr, vlr); - return pending; } @@ -1442,8 +1432,6 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) bool level_pending; level_pending = vgic_process_maintenance(vcpu); - elrsr = vgic_get_elrsr(vcpu); - elrsr_ptr = u64_to_bitmask(&elrsr); /* Deal with HW interrupts, and clear mappings for empty LRs */ for (lr = 0; lr < vgic->nr_lr; lr++) { @@ -1454,6 +1442,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) } /* Check if we still have something up our sleeve... */ + elrsr = vgic_get_elrsr(vcpu); + elrsr_ptr = u64_to_bitmask(&elrsr); pending = find_first_zero_bit(elrsr_ptr, vgic->nr_lr); if (level_pending || pending < vgic->nr_lr) set_bit(vcpu->vcpu_id, dist->irq_pending_on_cpu); -- cgit v1.2.3 From 6956d8946d5d1cb2ac913caa8d4259a4d0e00c48 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 19 Oct 2015 04:37:18 -0600 Subject: KVM: don't pointlessly leave KVM_COMPAT=y in non-KVM configs The symbol was missing a KVM dependency. Signed-off-by: Jan Beulich Signed-off-by: Paolo Bonzini --- virt/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 9f8014dda2cf..7a79b6853583 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -46,7 +46,7 @@ config KVM_GENERIC_DIRTYLOG_READ_PROTECT config KVM_COMPAT def_bool y - depends on COMPAT && !S390 + depends on KVM && COMPAT && !S390 config HAVE_KVM_IRQ_BYPASS bool -- cgit v1.2.3 From b97e6de9c96cefaa02a6a7464731ea504b45e150 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 19:16:47 +0100 Subject: KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do not want to do too much work in atomic context, in particular not walking all the VCPUs of the virtual machine. So we want to distinguish the architecture-specific injection function for irqfd from kvm_set_msi. Since it's still empty, reuse the newly added kvm_arch_set_irq and rename it to kvm_arch_set_irq_inatomic. Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/irq_comm.c | 14 ++++++++------ include/linux/kvm_host.h | 7 +++---- virt/kvm/eventfd.c | 11 ++++------- 3 files changed, 15 insertions(+), 17 deletions(-) (limited to 'virt') diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 8f4499c7ffc1..75dc633c48dc 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -124,12 +124,16 @@ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, } -static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, - struct kvm *kvm) +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, int level, + bool line_status) { struct kvm_lapic_irq irq; int r; + if (unlikely(e->type != KVM_IRQ_ROUTING_MSI)) + return -EWOULDBLOCK; + kvm_set_msi_irq(e, &irq); if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL)) @@ -165,10 +169,8 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) idx = srcu_read_lock(&kvm->irq_srcu); if (kvm_irq_map_gsi(kvm, entries, irq) > 0) { e = &entries[0]; - if (likely(e->type == KVM_IRQ_ROUTING_MSI)) - ret = kvm_set_msi_inatomic(e, kvm); - else - ret = -EWOULDBLOCK; + ret = kvm_arch_set_irq_inatomic(e, kvm, irq_source_id, + irq, level); } srcu_read_unlock(&kvm->irq_srcu, idx); return ret; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 87189a41d904..15c78f320678 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -830,10 +830,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); - -int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, - int irq_source_id, int level, bool line_status); - +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status); bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index e29fd2640709..46dbc0a7dfc1 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -171,7 +171,7 @@ irqfd_deactivate(struct kvm_kernel_irqfd *irqfd) queue_work(irqfd_cleanup_wq, &irqfd->shutdown); } -int __attribute__((weak)) kvm_arch_set_irq( +int __attribute__((weak)) kvm_arch_set_irq_inatomic( struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, int irq_source_id, int level, @@ -201,12 +201,9 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) irq = irqfd->irq_entry; } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); /* An event has been signaled, inject an interrupt */ - if (irq.type == KVM_IRQ_ROUTING_MSI) - kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, - false); - else if (kvm_arch_set_irq(&irq, kvm, - KVM_USERSPACE_IRQ_SOURCE_ID, 1, - false) == -EWOULDBLOCK) + if (kvm_arch_set_irq_inatomic(&irq, kvm, + KVM_USERSPACE_IRQ_SOURCE_ID, 1, + false) == -EWOULDBLOCK) schedule_work(&irqfd->inject); srcu_read_unlock(&kvm->irq_srcu, idx); } -- cgit v1.2.3