From e08b96371625aaa84cb03f51acc4c8e0be27403a Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 4 Jan 2012 10:25:20 +0100 Subject: KVM: s390: add parameter for KVM_CREATE_VM This patch introduces a new config option for user controlled kernel virtual machines. It introduces a parameter to KVM_CREATE_VM that allows to set bits that alter the capabilities of the newly created virtual machine. The parameter is passed to kvm_arch_init_vm for all architectures. The only valid modifier bit for now is KVM_VM_S390_UCONTROL. This requires CAP_SYS_ADMIN privileges and creates a user controlled virtual machine on s390 architectures. Signed-off-by: Carsten Otte Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 900c76337e8f..82375e145e64 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -520,7 +520,7 @@ static inline void kvm_arch_free_vm(struct kvm *kvm) } #endif -int kvm_arch_init_vm(struct kvm *kvm); +int kvm_arch_init_vm(struct kvm *kvm, unsigned long type); void kvm_arch_destroy_vm(struct kvm *kvm); void kvm_free_all_assigned_devices(struct kvm *kvm); void kvm_arch_sync_events(struct kvm *kvm); -- cgit v1.2.3 From 5b1c1493afe8d69909f9df3221bb2fffdf479f4a Mon Sep 17 00:00:00 2001 From: Carsten Otte Date: Wed, 4 Jan 2012 10:25:23 +0100 Subject: KVM: s390: ucontrol: export SIE control block to user This patch exports the s390 SIE hardware control block to userspace via the mapping of the vcpu file descriptor. In order to do so, a new arch callback named kvm_arch_vcpu_fault is introduced for all architectures. It allows to map architecture specific pages. Signed-off-by: Carsten Otte Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 5 +++++ arch/ia64/kvm/kvm-ia64.c | 5 +++++ arch/powerpc/kvm/powerpc.c | 5 +++++ arch/s390/kvm/kvm-s390.c | 13 +++++++++++++ arch/x86/kvm/x86.c | 5 +++++ include/linux/kvm.h | 2 ++ include/linux/kvm_host.h | 1 + virt/kvm/kvm_main.c | 2 +- 8 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include/linux/kvm_host.h') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 6e53ff51422f..5ebf47d99e56 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -218,6 +218,11 @@ allocation of vcpu ids. For example, if userspace wants single-threaded guest vcpus, it should make all vcpu ids be a multiple of the number of vcpus per vcore. +For virtual cpus that have been created with S390 user controlled virtual +machines, the resulting vcpu fd can be memory mapped at page offset +KVM_S390_SIE_PAGE_OFFSET in order to obtain a memory map of the virtual +cpu's hardware control block. + 4.8 KVM_GET_DIRTY_LOG (vm ioctl) Capability: basic diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index df6b14194051..8ca7261e7b3d 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1566,6 +1566,11 @@ out: return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 83f244569874..a5671616af86 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -659,6 +659,11 @@ out: return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) { u32 inst_lis = 0x3c000000; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index af05328aca25..d6bc65aeb950 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -763,6 +763,19 @@ long kvm_arch_vcpu_ioctl(struct file *filp, return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ +#ifdef CONFIG_KVM_S390_UCONTROL + if ((vmf->pgoff == KVM_S390_SIE_PAGE_OFFSET) + && (kvm_is_ucontrol(vcpu->kvm))) { + vmf->page = virt_to_page(vcpu->arch.sie_block); + get_page(vmf->page); + return 0; + } +#endif + return VM_FAULT_SIGBUS; +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 06925b4bcc27..a3ce196d21fe 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2814,6 +2814,11 @@ out: return r; } +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) +{ + return VM_FAULT_SIGBUS; +} + static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr) { int ret; diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 7f686f6708b0..8f888df206a2 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -440,6 +440,8 @@ struct kvm_ppc_pvinfo { /* machine type bits, to be used as argument to KVM_CREATE_VM */ #define KVM_VM_S390_UCONTROL 1 +#define KVM_S390_SIE_PAGE_OFFSET 1 + /* * ioctls for /dev/kvm fds: */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 82375e145e64..d4d4d7092110 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -450,6 +450,7 @@ long kvm_arch_dev_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg); +int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf); int kvm_dev_ioctl_check_extension(long ext); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 32e3b048a6cf..64be836f3348 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1657,7 +1657,7 @@ static int kvm_vcpu_fault(struct vm_area_struct *vma, struct vm_fault *vmf) page = virt_to_page(vcpu->kvm->coalesced_mmio_ring); #endif else - return VM_FAULT_SIGBUS; + return kvm_arch_vcpu_fault(vcpu, vmf); get_page(page); vmf->page = page; return 0; -- cgit v1.2.3 From a355aa54f1d25dff83c0feef8863d83a76988fdb Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 12 Dec 2011 12:37:21 +0000 Subject: KVM: Add barriers to allow mmu_notifier_retry to be used locklessly This adds an smp_wmb in kvm_mmu_notifier_invalidate_range_end() and an smp_rmb in mmu_notifier_retry() so that mmu_notifier_retry() will give the correct answer when called without kvm->mmu_lock being held. PowerPC Book3S HV KVM wants to use a bitlock per guest page rather than a single global spinlock in order to improve the scalability of updates to the guest MMU hashed page table, and so needs this. Signed-off-by: Paul Mackerras Acked-by: Avi Kivity Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 14 +++++++++----- virt/kvm/kvm_main.c | 6 +++--- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d4d4d7092110..eada8e69fe58 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -702,12 +702,16 @@ static inline int mmu_notifier_retry(struct kvm_vcpu *vcpu, unsigned long mmu_se if (unlikely(vcpu->kvm->mmu_notifier_count)) return 1; /* - * Both reads happen under the mmu_lock and both values are - * modified under mmu_lock, so there's no need of smb_rmb() - * here in between, otherwise mmu_notifier_count should be - * read before mmu_notifier_seq, see - * mmu_notifier_invalidate_range_end write side. + * Ensure the read of mmu_notifier_count happens before the read + * of mmu_notifier_seq. This interacts with the smp_wmb() in + * mmu_notifier_invalidate_range_end to make sure that the caller + * either sees the old (non-zero) value of mmu_notifier_count or + * the new (incremented) value of mmu_notifier_seq. + * PowerPC Book3s HV KVM calls this under a per-page lock + * rather than under kvm->mmu_lock, for scalability, so + * can't rely on kvm->mmu_lock to keep things ordered. */ + smp_rmb(); if (vcpu->kvm->mmu_notifier_seq != mmu_seq) return 1; return 0; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 64be836f3348..9f32bffd37c0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -357,11 +357,11 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn, * been freed. */ kvm->mmu_notifier_seq++; + smp_wmb(); /* * The above sequence increase must be visible before the - * below count decrease but both values are read by the kvm - * page fault under mmu_lock spinlock so we don't need to add - * a smb_wmb() here in between the two. + * below count decrease, which is ensured by the smp_wmb above + * in conjunction with the smp_rmb in mmu_notifier_retry(). */ kvm->mmu_notifier_count--; spin_unlock(&kvm->mmu_lock); -- cgit v1.2.3 From 9d4cba7f93c52d4121ab9c6f289e582d368a6979 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Thu, 12 Jan 2012 20:09:51 +0000 Subject: KVM: Move gfn_to_memslot() to kvm_host.h This moves __gfn_to_memslot() and search_memslots() from kvm_main.c to kvm_host.h to reduce the code duplication caused by the need for non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c to call gfn_to_memslot() in real mode. Rather than putting gfn_to_memslot() itself in a header, which would lead to increased code size, this puts __gfn_to_memslot() in a header. Then, the non-modular uses of gfn_to_memslot() are changed to call __gfn_to_memslot() instead. This way there is only one place in the source code that needs to be changed should the gfn_to_memslot() implementation need to be modified. On powerpc, the Book3S HV style of KVM has code that is called from real mode which needs to call gfn_to_memslot() and thus needs this. (Module code is allocated in the vmalloc region, which can't be accessed in real mode.) With this, we can remove builtin_gfn_to_memslot() from book3s_hv_rm_mmu.c. Signed-off-by: Paul Mackerras Acked-by: Avi Kivity Signed-off-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 23 ++--------------------- include/linux/kvm_host.h | 25 +++++++++++++++++++++++++ virt/kvm/kvm_main.c | 21 +-------------------- 3 files changed, 28 insertions(+), 41 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 5f3c60b89faf..def880aea63a 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -21,25 +21,6 @@ #include #include -/* - * Since this file is built in even if KVM is a module, we need - * a local copy of this function for the case where kvm_main.c is - * modular. - */ -static struct kvm_memory_slot *builtin_gfn_to_memslot(struct kvm *kvm, - gfn_t gfn) -{ - struct kvm_memslots *slots; - struct kvm_memory_slot *memslot; - - slots = kvm_memslots(kvm); - kvm_for_each_memslot(memslot, slots) - if (gfn >= memslot->base_gfn && - gfn < memslot->base_gfn + memslot->npages) - return memslot; - return NULL; -} - /* Translate address of a vmalloc'd thing to a linear map address */ static void *real_vmalloc_addr(void *x) { @@ -99,7 +80,7 @@ static void remove_revmap_chain(struct kvm *kvm, long pte_index, rcbits = hpte_r & (HPTE_R_R | HPTE_R_C); ptel = rev->guest_rpte |= rcbits; gfn = hpte_rpn(ptel, hpte_page_size(hpte_v, ptel)); - memslot = builtin_gfn_to_memslot(kvm, gfn); + memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID)) return; @@ -181,7 +162,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, /* Find the memslot (if any) for this address */ gpa = (ptel & HPTE_R_RPN) & ~(psize - 1); gfn = gpa >> PAGE_SHIFT; - memslot = builtin_gfn_to_memslot(kvm, gfn); + memslot = __gfn_to_memslot(kvm_memslots(kvm), gfn); pa = 0; is_io = ~0ul; rmap = NULL; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index eada8e69fe58..9698080c902b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -651,6 +651,31 @@ static inline void kvm_guest_exit(void) current->flags &= ~PF_VCPU; } +/* + * search_memslots() and __gfn_to_memslot() are here because they are + * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c. + * gfn_to_memslot() itself isn't here as an inline because that would + * bloat other code too much. + */ +static inline struct kvm_memory_slot * +search_memslots(struct kvm_memslots *slots, gfn_t gfn) +{ + struct kvm_memory_slot *memslot; + + kvm_for_each_memslot(memslot, slots) + if (gfn >= memslot->base_gfn && + gfn < memslot->base_gfn + memslot->npages) + return memslot; + + return NULL; +} + +static inline struct kvm_memory_slot * +__gfn_to_memslot(struct kvm_memslots *slots, gfn_t gfn) +{ + return search_memslots(slots, gfn); +} + static inline int memslot_id(struct kvm *kvm, gfn_t gfn) { return gfn_to_memslot(kvm, gfn)->id; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 9f32bffd37c0..470e30520fe8 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -640,19 +640,6 @@ static int kvm_create_dirty_bitmap(struct kvm_memory_slot *memslot) } #endif /* !CONFIG_S390 */ -static struct kvm_memory_slot * -search_memslots(struct kvm_memslots *slots, gfn_t gfn) -{ - struct kvm_memory_slot *memslot; - - kvm_for_each_memslot(memslot, slots) - if (gfn >= memslot->base_gfn && - gfn < memslot->base_gfn + memslot->npages) - return memslot; - - return NULL; -} - static int cmp_memslot(const void *slot1, const void *slot2) { struct kvm_memory_slot *s1, *s2; @@ -1031,12 +1018,6 @@ int kvm_is_error_hva(unsigned long addr) } EXPORT_SYMBOL_GPL(kvm_is_error_hva); -static struct kvm_memory_slot *__gfn_to_memslot(struct kvm_memslots *slots, - gfn_t gfn) -{ - return search_memslots(slots, gfn); -} - struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn) { return __gfn_to_memslot(kvm_memslots(kvm), gfn); @@ -1459,7 +1440,7 @@ int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, ghc->gpa = gpa; ghc->generation = slots->generation; - ghc->memslot = __gfn_to_memslot(slots, gfn); + ghc->memslot = gfn_to_memslot(kvm, gfn); ghc->hva = gfn_to_hva_many(ghc->memslot, gfn, NULL); if (!kvm_is_error_hva(ghc->hva)) ghc->hva += offset; -- cgit v1.2.3 From fb03cb6f44236f4bef62a0dda8e025ff5ca51417 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 8 Feb 2012 12:59:10 +0900 Subject: KVM: Introduce gfn_to_index() which returns the index for a given level This patch cleans up the code and removes the "(void)level;" warning suppressor. Note that we can also use this for PT_PAGE_TABLE_LEVEL to treat every level uniformly later. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/x86/kvm/mmu.c | 3 +-- include/linux/kvm_host.h | 7 +++++++ virt/kvm/kvm_main.c | 7 +------ 3 files changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ae76cc3392e1..37e7f100a0e0 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -688,8 +688,7 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, { unsigned long idx; - idx = (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - - (slot->base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); + idx = gfn_to_index(gfn, slot->base_gfn, level); return &slot->lpage_info[level - 2][idx]; } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9698080c902b..7a08496b974a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -681,6 +681,13 @@ static inline int memslot_id(struct kvm *kvm, gfn_t gfn) return gfn_to_memslot(kvm, gfn)->id; } +static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) +{ + /* KVM_HPAGE_GFN_SHIFT(PT_PAGE_TABLE_LEVEL) must be 0. */ + return (gfn >> KVM_HPAGE_GFN_SHIFT(level)) - + (base_gfn >> KVM_HPAGE_GFN_SHIFT(level)); +} + static inline unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn) { diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 470e30520fe8..415fe816fc15 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -784,15 +784,10 @@ int __kvm_set_memory_region(struct kvm *kvm, int lpages; int level = i + 2; - /* Avoid unused variable warning if no large pages */ - (void)level; - if (new.lpage_info[i]) continue; - lpages = 1 + ((base_gfn + npages - 1) - >> KVM_HPAGE_GFN_SHIFT(level)); - lpages -= base_gfn >> KVM_HPAGE_GFN_SHIFT(level); + lpages = gfn_to_index(base_gfn + npages - 1, base_gfn, level) + 1; new.lpage_info[i] = vzalloc(lpages * sizeof(*new.lpage_info[i])); -- cgit v1.2.3 From db3fe4eb45f3555d91a7124e18cf3a2f2a30eb90 Mon Sep 17 00:00:00 2001 From: Takuya Yoshikawa Date: Wed, 8 Feb 2012 13:02:18 +0900 Subject: KVM: Introduce kvm_memory_slot::arch and move lpage_info into it Some members of kvm_memory_slot are not used by every architecture. This patch is the first step to make this difference clear by introducing kvm_memory_slot::arch; lpage_info is moved into it. Signed-off-by: Takuya Yoshikawa Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/ia64/include/asm/kvm_host.h | 3 ++ arch/ia64/kvm/kvm-ia64.c | 10 ++++++ arch/powerpc/include/asm/kvm_host.h | 3 ++ arch/powerpc/kvm/powerpc.c | 10 ++++++ arch/s390/include/asm/kvm_host.h | 3 ++ arch/s390/kvm/kvm-s390.c | 10 ++++++ arch/x86/include/asm/kvm_host.h | 9 +++++ arch/x86/kvm/mmu.c | 2 +- arch/x86/kvm/x86.c | 59 +++++++++++++++++++++++++++++++ include/linux/kvm_host.h | 11 +++--- virt/kvm/kvm_main.c | 70 +++++-------------------------------- 11 files changed, 122 insertions(+), 68 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h index 2689ee54a1c9..e35b3a84a40b 100644 --- a/arch/ia64/include/asm/kvm_host.h +++ b/arch/ia64/include/asm/kvm_host.h @@ -459,6 +459,9 @@ struct kvm_sal_data { unsigned long boot_gp; }; +struct kvm_arch_memory_slot { +}; + struct kvm_arch { spinlock_t dirty_log_lock; diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index 8ca7261e7b3d..d8ddbba6fe7d 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1571,6 +1571,16 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + return 0; +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1843d5d2a3be..52eb9c1f4fe0 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -213,6 +213,9 @@ struct revmap_entry { #define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */ #define KVMPPC_GOT_PAGE 0x80 +struct kvm_arch_memory_slot { +}; + struct kvm_arch { #ifdef CONFIG_KVM_BOOK3S_64_HV unsigned long hpt_virt; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 0e21d155eea7..00d7e345b3fe 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -281,6 +281,16 @@ long kvm_arch_dev_ioctl(struct file *filp, return -EINVAL; } +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + return 0; +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index e6304268ea28..7343872890a2 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -245,6 +245,9 @@ struct kvm_vm_stat { u32 remote_tlb_flush; }; +struct kvm_arch_memory_slot { +}; + struct kvm_arch{ struct sca_block *sca; debug_info_t *dbf; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index cf3c0a91d046..17ad69d596fd 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -814,6 +814,16 @@ int kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) return VM_FAULT_SIGBUS; } +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + return 0; +} + /* Section: memory related */ int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c24125cd0c63..74c9edf2bb18 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -483,6 +483,15 @@ struct kvm_vcpu_arch { } osvw; }; +struct kvm_lpage_info { + unsigned long rmap_pde; + int write_count; +}; + +struct kvm_arch_memory_slot { + struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; +}; + struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 37e7f100a0e0..ff053ca32303 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -689,7 +689,7 @@ static struct kvm_lpage_info *lpage_info_slot(gfn_t gfn, unsigned long idx; idx = gfn_to_index(gfn, slot->base_gfn, level); - return &slot->lpage_info[level - 2][idx]; + return &slot->arch.lpage_info[level - 2][idx]; } static void account_shadowed(struct kvm *kvm, gfn_t gfn) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3df0b7a140b0..ca74c1dadf3a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6239,6 +6239,65 @@ void kvm_arch_destroy_vm(struct kvm *kvm) put_page(kvm->arch.ept_identity_pagetable); } +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont) +{ + int i; + + for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { + if (!dont || free->arch.lpage_info[i] != dont->arch.lpage_info[i]) { + vfree(free->arch.lpage_info[i]); + free->arch.lpage_info[i] = NULL; + } + } +} + +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages) +{ + int i; + + for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { + unsigned long ugfn; + int lpages; + int level = i + 2; + + lpages = gfn_to_index(slot->base_gfn + npages - 1, + slot->base_gfn, level) + 1; + + slot->arch.lpage_info[i] = + vzalloc(lpages * sizeof(*slot->arch.lpage_info[i])); + if (!slot->arch.lpage_info[i]) + goto out_free; + + if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) + slot->arch.lpage_info[i][0].write_count = 1; + if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) + slot->arch.lpage_info[i][lpages - 1].write_count = 1; + ugfn = slot->userspace_addr >> PAGE_SHIFT; + /* + * If the gfn and userspace address are not aligned wrt each + * other, or if explicitly asked to, disable large page + * support for this slot + */ + if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || + !kvm_largepages_enabled()) { + unsigned long j; + + for (j = 0; j < lpages; ++j) + slot->arch.lpage_info[i][j].write_count = 1; + } + } + + return 0; + +out_free: + for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { + vfree(slot->arch.lpage_info[i]); + slot->arch.lpage_info[i] = NULL; + } + return -ENOMEM; +} + int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7a08496b974a..355e44555c39 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -171,11 +171,6 @@ static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) */ #define KVM_MEM_MAX_NR_PAGES ((1UL << 31) - 1) -struct kvm_lpage_info { - unsigned long rmap_pde; - int write_count; -}; - struct kvm_memory_slot { gfn_t base_gfn; unsigned long npages; @@ -184,7 +179,7 @@ struct kvm_memory_slot { unsigned long *dirty_bitmap; unsigned long *dirty_bitmap_head; unsigned long nr_dirty_pages; - struct kvm_lpage_info *lpage_info[KVM_NR_PAGE_SIZES - 1]; + struct kvm_arch_memory_slot arch; unsigned long userspace_addr; int user_alloc; int id; @@ -376,6 +371,9 @@ int kvm_set_memory_region(struct kvm *kvm, int __kvm_set_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, int user_alloc); +void kvm_arch_free_memslot(struct kvm_memory_slot *free, + struct kvm_memory_slot *dont); +int kvm_arch_create_memslot(struct kvm_memory_slot *slot, unsigned long npages); int kvm_arch_prepare_memory_region(struct kvm *kvm, struct kvm_memory_slot *memslot, struct kvm_memory_slot old, @@ -385,6 +383,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_userspace_memory_region *mem, struct kvm_memory_slot old, int user_alloc); +bool kvm_largepages_enabled(void); void kvm_disable_largepages(void); void kvm_arch_flush_shadow(struct kvm *kvm); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a30447c5eb4a..8340e0e62034 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -535,21 +535,13 @@ static void kvm_destroy_dirty_bitmap(struct kvm_memory_slot *memslot) static void kvm_free_physmem_slot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont) { - int i; - if (!dont || free->rmap != dont->rmap) vfree(free->rmap); if (!dont || free->dirty_bitmap != dont->dirty_bitmap) kvm_destroy_dirty_bitmap(free); - - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { - if (!dont || free->lpage_info[i] != dont->lpage_info[i]) { - vfree(free->lpage_info[i]); - free->lpage_info[i] = NULL; - } - } + kvm_arch_free_memslot(free, dont); free->npages = 0; free->rmap = NULL; @@ -685,53 +677,6 @@ void update_memslots(struct kvm_memslots *slots, struct kvm_memory_slot *new) slots->generation++; } -#ifndef CONFIG_S390 -static int create_lpage_info(struct kvm_memory_slot *slot, unsigned long npages) -{ - int i; - - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { - unsigned long ugfn; - int lpages; - int level = i + 2; - - lpages = gfn_to_index(slot->base_gfn + npages - 1, - slot->base_gfn, level) + 1; - - slot->lpage_info[i] = vzalloc(lpages * sizeof(*slot->lpage_info[i])); - if (!slot->lpage_info[i]) - goto out_free; - - if (slot->base_gfn & (KVM_PAGES_PER_HPAGE(level) - 1)) - slot->lpage_info[i][0].write_count = 1; - if ((slot->base_gfn + npages) & (KVM_PAGES_PER_HPAGE(level) - 1)) - slot->lpage_info[i][lpages - 1].write_count = 1; - ugfn = slot->userspace_addr >> PAGE_SHIFT; - /* - * If the gfn and userspace address are not aligned wrt each - * other, or if explicitly asked to, disable large page - * support for this slot - */ - if ((slot->base_gfn ^ ugfn) & (KVM_PAGES_PER_HPAGE(level) - 1) || - !largepages_enabled) { - unsigned long j; - - for (j = 0; j < lpages; ++j) - slot->lpage_info[i][j].write_count = 1; - } - } - - return 0; - -out_free: - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) { - vfree(slot->lpage_info[i]); - slot->lpage_info[i] = NULL; - } - return -ENOMEM; -} -#endif /* not defined CONFIG_S390 */ - /* * Allocate some memory and give it an address in the guest physical address * space. @@ -819,10 +764,9 @@ int __kvm_set_memory_region(struct kvm *kvm, new.rmap = vzalloc(npages * sizeof(*new.rmap)); if (!new.rmap) goto out_free; - - if (create_lpage_info(&new, npages)) - goto out_free; #endif /* not defined CONFIG_S390 */ + if (kvm_arch_create_memslot(&new, npages)) + goto out_free; } /* Allocate page dirty bitmap if needed */ @@ -880,8 +824,7 @@ int __kvm_set_memory_region(struct kvm *kvm, if (!npages) { new.rmap = NULL; new.dirty_bitmap = NULL; - for (i = 0; i < KVM_NR_PAGE_SIZES - 1; ++i) - new.lpage_info[i] = NULL; + memset(&new.arch, 0, sizeof(new.arch)); } update_memslots(slots, &new); @@ -968,6 +911,11 @@ out: return r; } +bool kvm_largepages_enabled(void) +{ + return largepages_enabled; +} + void kvm_disable_largepages(void) { largepages_enabled = false; -- cgit v1.2.3 From 3e515705a1f46beb1c942bb8043c16f8ac7b1e9e Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Mon, 5 Mar 2012 14:23:29 +0200 Subject: KVM: Ensure all vcpus are consistent with in-kernel irqchip settings If some vcpus are created before KVM_CREATE_IRQCHIP, then irqchip_in_kernel() and vcpu->arch.apic will be inconsistent, leading to potential NULL pointer dereferences. Fix by: - ensuring that no vcpus are installed when KVM_CREATE_IRQCHIP is called - ensuring that a vcpu has an apic if it is installed after KVM_CREATE_IRQCHIP This is somewhat long winded because vcpu->arch.apic is created without kvm->lock held. Based on earlier patch by Michael Ellerman. Signed-off-by: Michael Ellerman Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 5 +++++ arch/x86/kvm/x86.c | 8 ++++++++ include/linux/kvm_host.h | 7 +++++++ virt/kvm/kvm_main.c | 4 ++++ 4 files changed, 24 insertions(+) (limited to 'include/linux/kvm_host.h') diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index d8ddbba6fe7d..f5104b7c52cd 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1172,6 +1172,11 @@ out: #define PALE_RESET_ENTRY 0x80000000ffffffb0UL +bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) +{ + return irqchip_in_kernel(vcpu->kcm) == (vcpu->arch.apic != NULL); +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { struct kvm_vcpu *v; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 03a1fd47a6d3..9477dc6cccae 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3199,6 +3199,9 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EEXIST; if (kvm->arch.vpic) goto create_irqchip_unlock; + r = -EINVAL; + if (atomic_read(&kvm->online_vcpus)) + goto create_irqchip_unlock; r = -ENOMEM; vpic = kvm_create_pic(kvm); if (vpic) { @@ -6107,6 +6110,11 @@ void kvm_arch_check_processor_compat(void *rtn) kvm_x86_ops->check_processor_compatibility(rtn); } +bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) +{ + return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { struct page *page; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 355e44555c39..e42d85ae8541 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -805,6 +805,13 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu) { return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id; } + +bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu); + +#else + +static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; } + #endif #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e4431ada5947..94e148e38719 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1651,6 +1651,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) goto vcpu_destroy; mutex_lock(&kvm->lock); + if (!kvm_vcpu_compatible(vcpu)) { + r = -EINVAL; + goto unlock_vcpu_destroy; + } if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) { r = -EINVAL; goto unlock_vcpu_destroy; -- cgit v1.2.3 From 07700a94b00a4fcbbfb07d1b72dc112a0e036735 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 28 Feb 2012 14:19:54 +0100 Subject: KVM: Allow host IRQ sharing for assigned PCI 2.3 devices PCI 2.3 allows to generically disable IRQ sources at device level. This enables us to share legacy IRQs of such devices with other host devices when passing them to a guest. The new IRQ sharing feature introduced here is optional, user space has to request it explicitly. Moreover, user space can inform us about its view of PCI_COMMAND_INTX_DISABLE so that we can avoid unmasking the interrupt and signaling it if the guest masked it via the virtualized PCI config space. Signed-off-by: Jan Kiszka Acked-by: Alex Williamson Acked-by: Michael S. Tsirkin Signed-off-by: Avi Kivity --- Documentation/virtual/kvm/api.txt | 41 ++++++++ arch/x86/kvm/x86.c | 1 + include/linux/kvm.h | 6 ++ include/linux/kvm_host.h | 2 + virt/kvm/assigned-dev.c | 209 ++++++++++++++++++++++++++++++++------ 5 files changed, 230 insertions(+), 29 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 59a38264a0ed..6386f8c0482e 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1169,6 +1169,14 @@ following flags are specified: /* Depends on KVM_CAP_IOMMU */ #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) +/* The following two depend on KVM_CAP_PCI_2_3 */ +#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) +#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) + +If KVM_DEV_ASSIGN_PCI_2_3 is set, the kernel will manage legacy INTx interrupts +via the PCI-2.3-compliant device-level mask, thus enable IRQ sharing with other +assigned devices or host devices. KVM_DEV_ASSIGN_MASK_INTX specifies the +guest's view on the INTx mask, see KVM_ASSIGN_SET_INTX_MASK for details. The KVM_DEV_ASSIGN_ENABLE_IOMMU flag is a mandatory option to ensure isolation of the device. Usages not specifying this flag are deprecated. @@ -1441,6 +1449,39 @@ The "num_dirty" field is a performance hint for KVM to determine whether it should skip processing the bitmap and just invalidate everything. It must be set to the number of set bits in the bitmap. +4.60 KVM_ASSIGN_SET_INTX_MASK + +Capability: KVM_CAP_PCI_2_3 +Architectures: x86 +Type: vm ioctl +Parameters: struct kvm_assigned_pci_dev (in) +Returns: 0 on success, -1 on error + +Allows userspace to mask PCI INTx interrupts from the assigned device. The +kernel will not deliver INTx interrupts to the guest between setting and +clearing of KVM_ASSIGN_SET_INTX_MASK via this interface. This enables use of +and emulation of PCI 2.3 INTx disable command register behavior. + +This may be used for both PCI 2.3 devices supporting INTx disable natively and +older devices lacking this support. Userspace is responsible for emulating the +read value of the INTx disable bit in the guest visible PCI command register. +When modifying the INTx disable state, userspace should precede updating the +physical device command register by calling this ioctl to inform the kernel of +the new intended INTx mask state. + +Note that the kernel uses the device INTx disable bit to internally manage the +device interrupt state for PCI 2.3 devices. Reads of this register may +therefore not match the expected value. Writes should always use the guest +intended INTx disable value rather than attempting to read-copy-update the +current physical device state. Races between user and kernel updates to the +INTx disable bit are handled lazily in the kernel. It's possible the device +may generate unintended interrupts, but they will not be injected into the +guest. + +See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified +by assigned_dev_id. In the flags field, only KVM_DEV_ASSIGN_MASK_INTX is +evaluated. + 4.62 KVM_CREATE_SPAPR_TCE Capability: KVM_CAP_SPAPR_TCE diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 9477dc6cccae..6866083a48c1 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2143,6 +2143,7 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_XSAVE: case KVM_CAP_ASYNC_PF: case KVM_CAP_GET_TSC_KHZ: + case KVM_CAP_PCI_2_3: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/include/linux/kvm.h b/include/linux/kvm.h index acbe42939089..6c322a90b92f 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -588,6 +588,7 @@ struct kvm_ppc_pvinfo { #define KVM_CAP_TSC_DEADLINE_TIMER 72 #define KVM_CAP_S390_UCONTROL 73 #define KVM_CAP_SYNC_REGS 74 +#define KVM_CAP_PCI_2_3 75 #ifdef KVM_CAP_IRQ_ROUTING @@ -784,6 +785,9 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_TSC_CONTROL */ #define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2) #define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3) +/* Available with KVM_CAP_PCI_2_3 */ +#define KVM_ASSIGN_SET_INTX_MASK _IOW(KVMIO, 0xa4, \ + struct kvm_assigned_pci_dev) /* * ioctls for vcpu fds @@ -857,6 +861,8 @@ struct kvm_s390_ucas_mapping { #define KVM_SET_ONE_REG _IOW(KVMIO, 0xac, struct kvm_one_reg) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) +#define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1) +#define KVM_DEV_ASSIGN_MASK_INTX (1 << 2) struct kvm_assigned_pci_dev { __u32 assigned_dev_id; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index e42d85ae8541..ec171c1d0878 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -546,6 +546,7 @@ struct kvm_assigned_dev_kernel { unsigned int entries_nr; int host_irq; bool host_irq_disabled; + bool pci_2_3; struct msix_entry *host_msix_entries; int guest_irq; struct msix_entry *guest_msix_entries; @@ -555,6 +556,7 @@ struct kvm_assigned_dev_kernel { struct pci_dev *dev; struct kvm *kvm; spinlock_t intx_lock; + struct mutex intx_mask_lock; char irq_name[32]; struct pci_saved_state *pci_saved_state; }; diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index ece80612b594..08e05715df72 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -55,22 +55,66 @@ static int find_index_from_host_irq(struct kvm_assigned_dev_kernel return index; } -static irqreturn_t kvm_assigned_dev_thread(int irq, void *dev_id) +static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id) { struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + int ret; - if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_INTX) { - spin_lock(&assigned_dev->intx_lock); + spin_lock(&assigned_dev->intx_lock); + if (pci_check_and_mask_intx(assigned_dev->dev)) { + assigned_dev->host_irq_disabled = true; + ret = IRQ_WAKE_THREAD; + } else + ret = IRQ_NONE; + spin_unlock(&assigned_dev->intx_lock); + + return ret; +} + +static void +kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, + int vector) +{ + if (unlikely(assigned_dev->irq_requested_type & + KVM_DEV_IRQ_GUEST_INTX)) { + mutex_lock(&assigned_dev->intx_mask_lock); + if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) + kvm_set_irq(assigned_dev->kvm, + assigned_dev->irq_source_id, vector, 1); + mutex_unlock(&assigned_dev->intx_mask_lock); + } else + kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, + vector, 1); +} + +static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + + if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { + spin_lock_irq(&assigned_dev->intx_lock); disable_irq_nosync(irq); assigned_dev->host_irq_disabled = true; - spin_unlock(&assigned_dev->intx_lock); + spin_unlock_irq(&assigned_dev->intx_lock); } - kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - assigned_dev->guest_irq, 1); + kvm_assigned_dev_raise_guest_irq(assigned_dev, + assigned_dev->guest_irq); + + return IRQ_HANDLED; +} + +#ifdef __KVM_HAVE_MSI +static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id) +{ + struct kvm_assigned_dev_kernel *assigned_dev = dev_id; + + kvm_assigned_dev_raise_guest_irq(assigned_dev, + assigned_dev->guest_irq); return IRQ_HANDLED; } +#endif #ifdef __KVM_HAVE_MSIX static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) @@ -81,8 +125,7 @@ static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id) if (index >= 0) { vector = assigned_dev->guest_msix_entries[index].vector; - kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, - vector, 1); + kvm_assigned_dev_raise_guest_irq(assigned_dev, vector); } return IRQ_HANDLED; @@ -98,15 +141,31 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); - /* The guest irq may be shared so this ack may be - * from another device. - */ - spin_lock(&dev->intx_lock); - if (dev->host_irq_disabled) { - enable_irq(dev->host_irq); - dev->host_irq_disabled = false; + mutex_lock(&dev->intx_mask_lock); + + if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { + bool reassert = false; + + spin_lock_irq(&dev->intx_lock); + /* + * The guest IRQ may be shared so this ack can come from an + * IRQ for another guest device. + */ + if (dev->host_irq_disabled) { + if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) + enable_irq(dev->host_irq); + else if (!pci_check_and_unmask_intx(dev->dev)) + reassert = true; + dev->host_irq_disabled = reassert; + } + spin_unlock_irq(&dev->intx_lock); + + if (reassert) + kvm_set_irq(dev->kvm, dev->irq_source_id, + dev->guest_irq, 1); } - spin_unlock(&dev->intx_lock); + + mutex_unlock(&dev->intx_mask_lock); } static void deassign_guest_irq(struct kvm *kvm, @@ -154,7 +213,15 @@ static void deassign_host_irq(struct kvm *kvm, pci_disable_msix(assigned_dev->dev); } else { /* Deal with MSI and INTx */ - disable_irq(assigned_dev->host_irq); + if ((assigned_dev->irq_requested_type & + KVM_DEV_IRQ_HOST_INTX) && + (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { + spin_lock_irq(&assigned_dev->intx_lock); + pci_intx(assigned_dev->dev, false); + spin_unlock_irq(&assigned_dev->intx_lock); + synchronize_irq(assigned_dev->host_irq); + } else + disable_irq(assigned_dev->host_irq); free_irq(assigned_dev->host_irq, assigned_dev); @@ -235,15 +302,34 @@ void kvm_free_all_assigned_devices(struct kvm *kvm) static int assigned_device_enable_host_intx(struct kvm *kvm, struct kvm_assigned_dev_kernel *dev) { + irq_handler_t irq_handler; + unsigned long flags; + dev->host_irq = dev->dev->irq; - /* Even though this is PCI, we don't want to use shared - * interrupts. Sharing host devices with guest-assigned devices - * on the same interrupt line is not a happy situation: there - * are going to be long delays in accepting, acking, etc. + + /* + * We can only share the IRQ line with other host devices if we are + * able to disable the IRQ source at device-level - independently of + * the guest driver. Otherwise host devices may suffer from unbounded + * IRQ latencies when the guest keeps the line asserted. */ - if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, - IRQF_ONESHOT, dev->irq_name, dev)) + if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { + irq_handler = kvm_assigned_dev_intx; + flags = IRQF_SHARED; + } else { + irq_handler = NULL; + flags = IRQF_ONESHOT; + } + if (request_threaded_irq(dev->host_irq, irq_handler, + kvm_assigned_dev_thread_intx, flags, + dev->irq_name, dev)) return -EIO; + + if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) { + spin_lock_irq(&dev->intx_lock); + pci_intx(dev->dev, true); + spin_unlock_irq(&dev->intx_lock); + } return 0; } @@ -260,8 +346,9 @@ static int assigned_device_enable_host_msi(struct kvm *kvm, } dev->host_irq = dev->dev->irq; - if (request_threaded_irq(dev->host_irq, NULL, kvm_assigned_dev_thread, - 0, dev->irq_name, dev)) { + if (request_threaded_irq(dev->host_irq, NULL, + kvm_assigned_dev_thread_msi, 0, + dev->irq_name, dev)) { pci_disable_msi(dev->dev); return -EIO; } @@ -319,7 +406,6 @@ static int assigned_device_enable_guest_msi(struct kvm *kvm, { dev->guest_irq = irq->guest_irq; dev->ack_notifier.gsi = -1; - dev->host_irq_disabled = false; return 0; } #endif @@ -331,7 +417,6 @@ static int assigned_device_enable_guest_msix(struct kvm *kvm, { dev->guest_irq = irq->guest_irq; dev->ack_notifier.gsi = -1; - dev->host_irq_disabled = false; return 0; } #endif @@ -365,6 +450,7 @@ static int assign_host_irq(struct kvm *kvm, default: r = -EINVAL; } + dev->host_irq_disabled = false; if (!r) dev->irq_requested_type |= host_irq_type; @@ -466,6 +552,7 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, { int r = -ENODEV; struct kvm_assigned_dev_kernel *match; + unsigned long irq_type; mutex_lock(&kvm->lock); @@ -474,7 +561,9 @@ static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm, if (!match) goto out; - r = kvm_deassign_irq(kvm, match, assigned_irq->flags); + irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK | + KVM_DEV_IRQ_GUEST_MASK); + r = kvm_deassign_irq(kvm, match, irq_type); out: mutex_unlock(&kvm->lock); return r; @@ -607,6 +696,10 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, if (!match->pci_saved_state) printk(KERN_DEBUG "%s: Couldn't store %s saved state\n", __func__, dev_name(&dev->dev)); + + if (!pci_intx_mask_supported(dev)) + assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3; + match->assigned_dev_id = assigned_dev->assigned_dev_id; match->host_segnr = assigned_dev->segnr; match->host_busnr = assigned_dev->busnr; @@ -614,6 +707,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, match->flags = assigned_dev->flags; match->dev = dev; spin_lock_init(&match->intx_lock); + mutex_init(&match->intx_mask_lock); match->irq_source_id = -1; match->kvm = kvm; match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; @@ -759,6 +853,55 @@ msix_entry_out: } #endif +static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(&kvm->lock); + + match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head, + assigned_dev->assigned_dev_id); + if (!match) { + r = -ENODEV; + goto out; + } + + mutex_lock(&match->intx_mask_lock); + + match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; + match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; + + if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) { + if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) { + kvm_set_irq(match->kvm, match->irq_source_id, + match->guest_irq, 0); + /* + * Masking at hardware-level is performed on demand, + * i.e. when an IRQ actually arrives at the host. + */ + } else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) { + /* + * Unmask the IRQ line if required. Unmasking at + * device level will be performed by user space. + */ + spin_lock_irq(&match->intx_lock); + if (match->host_irq_disabled) { + enable_irq(match->host_irq); + match->host_irq_disabled = false; + } + spin_unlock_irq(&match->intx_lock); + } + } + + mutex_unlock(&match->intx_mask_lock); + +out: + mutex_unlock(&kvm->lock); + return r; +} + long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, unsigned long arg) { @@ -866,6 +1009,15 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, break; } #endif + case KVM_ASSIGN_SET_INTX_MASK: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev); + break; + } default: r = -ENOTTY; break; @@ -873,4 +1025,3 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, out: return r; } - -- cgit v1.2.3 From cf9eeac46350b8b43730b7dc5e999757bed089a4 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 14 Mar 2012 11:02:11 +0100 Subject: KVM: Convert intx_mask_lock to spin lock As kvm_notify_acked_irq calls kvm_assigned_dev_ack_irq under rcu_read_lock, we cannot use a mutex in the latter function. Switch to a spin lock to address this. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- include/linux/kvm_host.h | 2 +- virt/kvm/assigned-dev.c | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux/kvm_host.h') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index ec171c1d0878..40bb1c661a6e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -556,7 +556,7 @@ struct kvm_assigned_dev_kernel { struct pci_dev *dev; struct kvm *kvm; spinlock_t intx_lock; - struct mutex intx_mask_lock; + spinlock_t intx_mask_lock; char irq_name[32]; struct pci_saved_state *pci_saved_state; }; diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index 08e05715df72..01f572c10c71 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -77,11 +77,11 @@ kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev, { if (unlikely(assigned_dev->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX)) { - mutex_lock(&assigned_dev->intx_mask_lock); + spin_lock(&assigned_dev->intx_mask_lock); if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, vector, 1); - mutex_unlock(&assigned_dev->intx_mask_lock); + spin_unlock(&assigned_dev->intx_mask_lock); } else kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id, vector, 1); @@ -141,7 +141,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0); - mutex_lock(&dev->intx_mask_lock); + spin_lock(&dev->intx_mask_lock); if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) { bool reassert = false; @@ -165,7 +165,7 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) dev->guest_irq, 1); } - mutex_unlock(&dev->intx_mask_lock); + spin_unlock(&dev->intx_mask_lock); } static void deassign_guest_irq(struct kvm *kvm, @@ -707,7 +707,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, match->flags = assigned_dev->flags; match->dev = dev; spin_lock_init(&match->intx_lock); - mutex_init(&match->intx_mask_lock); + spin_lock_init(&match->intx_mask_lock); match->irq_source_id = -1; match->kvm = kvm; match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq; @@ -868,7 +868,7 @@ static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, goto out; } - mutex_lock(&match->intx_mask_lock); + spin_lock(&match->intx_mask_lock); match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX; match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX; @@ -895,7 +895,7 @@ static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm, } } - mutex_unlock(&match->intx_mask_lock); + spin_unlock(&match->intx_mask_lock); out: mutex_unlock(&kvm->lock); -- cgit v1.2.3