From 2c07ded06427dd3339278487a1413d5e478f05f9 Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Mon, 4 Jan 2021 09:17:49 -0600 Subject: KVM/SVM: add support for SEV attestation command The SEV FW version >= 0.23 added a new command that can be used to query the attestation report containing the SHA-256 digest of the guest memory encrypted through the KVM_SEV_LAUNCH_UPDATE_{DATA, VMSA} commands and sign the report with the Platform Endorsement Key (PEK). See the SEV FW API spec section 6.8 for more details. Note there already exist a command (KVM_SEV_LAUNCH_MEASURE) that can be used to get the SHA-256 digest. The main difference between the KVM_SEV_LAUNCH_MEASURE and KVM_SEV_ATTESTATION_REPORT is that the latter can be called while the guest is running and the measurement value is signed with PEK. Cc: James Bottomley Cc: Tom Lendacky Cc: David Rientjes Cc: Paolo Bonzini Cc: Sean Christopherson Cc: Borislav Petkov Cc: John Allen Cc: Herbert Xu Cc: linux-crypto@vger.kernel.org Reviewed-by: Tom Lendacky Acked-by: David Rientjes Tested-by: James Bottomley Signed-off-by: Brijesh Singh Message-Id: <20210104151749.30248-1-brijesh.singh@amd.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/amd-memory-encryption.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'Documentation/virt') diff --git a/Documentation/virt/kvm/amd-memory-encryption.rst b/Documentation/virt/kvm/amd-memory-encryption.rst index 09a8f2a34e39..469a6308765b 100644 --- a/Documentation/virt/kvm/amd-memory-encryption.rst +++ b/Documentation/virt/kvm/amd-memory-encryption.rst @@ -263,6 +263,27 @@ Returns: 0 on success, -negative on error __u32 trans_len; }; +10. KVM_SEV_GET_ATTESTATION_REPORT +---------------------------------- + +The KVM_SEV_GET_ATTESTATION_REPORT command can be used by the hypervisor to query the attestation +report containing the SHA-256 digest of the guest memory and VMSA passed through the KVM_SEV_LAUNCH +commands and signed with the PEK. The digest returned by the command should match the digest +used by the guest owner with the KVM_SEV_LAUNCH_MEASURE. + +Parameters (in): struct kvm_sev_attestation + +Returns: 0 on success, -negative on error + +:: + + struct kvm_sev_attestation_report { + __u8 mnonce[16]; /* A random mnonce that will be placed in the report */ + + __u64 uaddr; /* userspace address where the report should be copied */ + __u32 len; + }; + References ========== -- cgit v1.2.3 From c32b1b896d2ab30ac30bc39194bac47a09f7f497 Mon Sep 17 00:00:00 2001 From: Chenyi Qiang Date: Fri, 6 Nov 2020 17:03:15 +0800 Subject: KVM: X86: Add the Document for KVM_CAP_X86_BUS_LOCK_EXIT Introduce a new capability named KVM_CAP_X86_BUS_LOCK_EXIT, which is used to handle bus locks detected in guest. It allows the userspace to do custom throttling policies to mitigate the 'noisy neighbour' problem. Signed-off-by: Chenyi Qiang Message-Id: <20201106090315.18606-5-chenyi.qiang@intel.com> Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 45 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 42 insertions(+), 3 deletions(-) (limited to 'Documentation/virt') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 99ceb978c8b0..effcc08733fd 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4893,9 +4893,11 @@ local APIC is not used. __u16 flags; More architecture-specific flags detailing state of the VCPU that may -affect the device's behavior. The only currently defined flag is -KVM_RUN_X86_SMM, which is valid on x86 machines and is set if the -VCPU is in system management mode. +affect the device's behavior. Current defined flags: + /* x86, set if the VCPU is in system management mode */ + #define KVM_RUN_X86_SMM (1 << 0) + /* x86, set if bus lock detected in VM */ + #define KVM_RUN_BUS_LOCK (1 << 1) :: @@ -6038,6 +6040,43 @@ KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR exit notifications which user space can then handle to implement model specific MSR handling and/or user notifications to inform a user that an MSR was not handled. +7.22 KVM_CAP_X86_BUS_LOCK_EXIT +------------------------------- + +:Architectures: x86 +:Target: VM +:Parameters: args[0] defines the policy used when bus locks detected in guest +:Returns: 0 on success, -EINVAL when args[0] contains invalid bits + +Valid bits in args[0] are:: + + #define KVM_BUS_LOCK_DETECTION_OFF (1 << 0) + #define KVM_BUS_LOCK_DETECTION_EXIT (1 << 1) + +Enabling this capability on a VM provides userspace with a way to select +a policy to handle the bus locks detected in guest. Userspace can obtain +the supported modes from the result of KVM_CHECK_EXTENSION and define it +through the KVM_ENABLE_CAP. + +KVM_BUS_LOCK_DETECTION_OFF and KVM_BUS_LOCK_DETECTION_EXIT are supported +currently and mutually exclusive with each other. More bits can be added in +the future. + +With KVM_BUS_LOCK_DETECTION_OFF set, bus locks in guest will not cause vm exits +so that no additional actions are needed. This is the default mode. + +With KVM_BUS_LOCK_DETECTION_EXIT set, vm exits happen when bus lock detected +in VM. KVM just exits to userspace when handling them. Userspace can enforce +its own throttling or other policy based mitigations. + +This capability is aimed to address the thread that VM can exploit bus locks to +degree the performance of the whole system. Once the userspace enable this +capability and select the KVM_BUS_LOCK_DETECTION_EXIT mode, KVM will set the +KVM_RUN_BUS_LOCK flag in vcpu-run->flags field and exit to userspace. Concerning +the bus lock vm exit can be preempted by a higher priority VM exit, the exit +notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons. +KVM_RUN_BUS_LOCK flag is used to distinguish between them. + 8. Other capabilities. ====================== -- cgit v1.2.3 From 9a77daacc87dee9fd63e31243f21894132ed8407 Mon Sep 17 00:00:00 2001 From: Ben Gardon Date: Tue, 2 Feb 2021 10:57:26 -0800 Subject: KVM: x86/mmu: Use atomic ops to set SPTEs in TDP MMU map To prepare for handling page faults in parallel, change the TDP MMU page fault handler to use atomic operations to set SPTEs so that changes are not lost if multiple threads attempt to modify the same SPTE. Reviewed-by: Peter Feiner Signed-off-by: Ben Gardon Message-Id: <20210202185734.1680553-21-bgardon@google.com> [Document new locking rules. - Paolo] Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/locking.rst | 9 ++- arch/x86/include/asm/kvm_host.h | 13 ++++ arch/x86/kvm/mmu/tdp_mmu.c | 142 ++++++++++++++++++++++++++++--------- 3 files changed, 130 insertions(+), 34 deletions(-) (limited to 'Documentation/virt') diff --git a/Documentation/virt/kvm/locking.rst b/Documentation/virt/kvm/locking.rst index b21a34c34a21..0aa4817b466d 100644 --- a/Documentation/virt/kvm/locking.rst +++ b/Documentation/virt/kvm/locking.rst @@ -16,7 +16,14 @@ The acquisition orders for mutexes are as follows: - kvm->slots_lock is taken outside kvm->irq_lock, though acquiring them together is quite rare. -On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. +On x86: + +- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock + +- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock is + taken inside kvm->arch.mmu_lock, and cannot be taken without already + holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise + there's no need to take kvm->arch.tdp_mmu_pages_lock at all). Everything else is a leaf: no other lock is taken inside the critical sections. diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c445a51244d3..bcbb32ef9f00 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1039,6 +1039,19 @@ struct kvm_arch { * tdp_mmu_page set and a root_count of 0. */ struct list_head tdp_mmu_pages; + + /* + * Protects accesses to the following fields when the MMU lock + * is held in read mode: + * - tdp_mmu_pages (above) + * - the link field of struct kvm_mmu_pages used by the TDP MMU + * - lpage_disallowed_mmu_pages + * - the lpage_disallowed_link field of struct kvm_mmu_pages used + * by the TDP MMU + * It is acceptable, but not necessary, to acquire this lock when + * the thread holds the MMU lock in write mode. + */ + spinlock_t tdp_mmu_pages_lock; }; struct kvm_vm_stat { diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 5a9e964e0178..0b5a9339ac55 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -7,6 +7,7 @@ #include "tdp_mmu.h" #include "spte.h" +#include #include #ifdef CONFIG_X86_64 @@ -33,6 +34,7 @@ void kvm_mmu_init_tdp_mmu(struct kvm *kvm) kvm->arch.tdp_mmu_enabled = true; INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); + spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages); } @@ -225,7 +227,8 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head) } static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, - u64 old_spte, u64 new_spte, int level); + u64 old_spte, u64 new_spte, int level, + bool shared); static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp) { @@ -267,17 +270,26 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, * * @kvm: kvm instance * @sp: the new page + * @shared: This operation may not be running under the exclusive use of + * the MMU lock and the operation must synchronize with other + * threads that might be adding or removing pages. * @account_nx: This page replaces a NX large page and should be marked for * eventual reclaim. */ static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp, - bool account_nx) + bool shared, bool account_nx) { - lockdep_assert_held_write(&kvm->mmu_lock); + if (shared) + spin_lock(&kvm->arch.tdp_mmu_pages_lock); + else + lockdep_assert_held_write(&kvm->mmu_lock); list_add(&sp->link, &kvm->arch.tdp_mmu_pages); if (account_nx) account_huge_nx_page(kvm, sp); + + if (shared) + spin_unlock(&kvm->arch.tdp_mmu_pages_lock); } /** @@ -285,14 +297,24 @@ static void tdp_mmu_link_page(struct kvm *kvm, struct kvm_mmu_page *sp, * * @kvm: kvm instance * @sp: the page to be removed + * @shared: This operation may not be running under the exclusive use of + * the MMU lock and the operation must synchronize with other + * threads that might be adding or removing pages. */ -static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp) +static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp, + bool shared) { - lockdep_assert_held_write(&kvm->mmu_lock); + if (shared) + spin_lock(&kvm->arch.tdp_mmu_pages_lock); + else + lockdep_assert_held_write(&kvm->mmu_lock); list_del(&sp->link); if (sp->lpage_disallowed) unaccount_huge_nx_page(kvm, sp); + + if (shared) + spin_unlock(&kvm->arch.tdp_mmu_pages_lock); } /** @@ -300,28 +322,39 @@ static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp) * * @kvm: kvm instance * @pt: the page removed from the paging structure + * @shared: This operation may not be running under the exclusive use + * of the MMU lock and the operation must synchronize with other + * threads that might be modifying SPTEs. * * Given a page table that has been removed from the TDP paging structure, * iterates through the page table to clear SPTEs and free child page tables. */ -static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt) +static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt, + bool shared) { struct kvm_mmu_page *sp = sptep_to_sp(pt); int level = sp->role.level; gfn_t gfn = sp->gfn; u64 old_child_spte; + u64 *sptep; int i; trace_kvm_mmu_prepare_zap_page(sp); - tdp_mmu_unlink_page(kvm, sp); + tdp_mmu_unlink_page(kvm, sp, shared); for (i = 0; i < PT64_ENT_PER_PAGE; i++) { - old_child_spte = READ_ONCE(*(pt + i)); - WRITE_ONCE(*(pt + i), 0); + sptep = pt + i; + + if (shared) { + old_child_spte = xchg(sptep, 0); + } else { + old_child_spte = READ_ONCE(*sptep); + WRITE_ONCE(*sptep, 0); + } handle_changed_spte(kvm, kvm_mmu_page_as_id(sp), gfn + (i * KVM_PAGES_PER_HPAGE(level - 1)), - old_child_spte, 0, level - 1); + old_child_spte, 0, level - 1, shared); } kvm_flush_remote_tlbs_with_address(kvm, gfn, @@ -338,12 +371,16 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt) * @old_spte: The value of the SPTE before the change * @new_spte: The value of the SPTE after the change * @level: the level of the PT the SPTE is part of in the paging structure + * @shared: This operation may not be running under the exclusive use of + * the MMU lock and the operation must synchronize with other + * threads that might be modifying SPTEs. * * Handle bookkeeping that might result from the modification of a SPTE. * This function must be called for all TDP SPTE modifications. */ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, - u64 old_spte, u64 new_spte, int level) + u64 old_spte, u64 new_spte, int level, + bool shared) { bool was_present = is_shadow_present_pte(old_spte); bool is_present = is_shadow_present_pte(new_spte); @@ -415,18 +452,51 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, */ if (was_present && !was_leaf && (pfn_changed || !is_present)) handle_removed_tdp_mmu_page(kvm, - spte_to_child_pt(old_spte, level)); + spte_to_child_pt(old_spte, level), shared); } static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn, - u64 old_spte, u64 new_spte, int level) + u64 old_spte, u64 new_spte, int level, + bool shared) { - __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level); + __handle_changed_spte(kvm, as_id, gfn, old_spte, new_spte, level, + shared); handle_changed_spte_acc_track(old_spte, new_spte, level); handle_changed_spte_dirty_log(kvm, as_id, gfn, old_spte, new_spte, level); } +/* + * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically and handle the + * associated bookkeeping + * + * @kvm: kvm instance + * @iter: a tdp_iter instance currently on the SPTE that should be set + * @new_spte: The value the SPTE should be set to + * Returns: true if the SPTE was set, false if it was not. If false is returned, + * this function will have no side-effects. + */ +static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm, + struct tdp_iter *iter, + u64 new_spte) +{ + u64 *root_pt = tdp_iter_root_pt(iter); + struct kvm_mmu_page *root = sptep_to_sp(root_pt); + int as_id = kvm_mmu_page_as_id(root); + + lockdep_assert_held_read(&kvm->mmu_lock); + + if (cmpxchg64(rcu_dereference(iter->sptep), iter->old_spte, + new_spte) != iter->old_spte) + return false; + + handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, + iter->level, true); + + return true; +} + + /* * __tdp_mmu_set_spte - Set a TDP MMU SPTE and handle the associated bookkeeping * @kvm: kvm instance @@ -456,7 +526,7 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter, WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte); __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte, - iter->level); + iter->level, false); if (record_acc_track) handle_changed_spte_acc_track(iter->old_spte, new_spte, iter->level); @@ -630,23 +700,18 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, int ret = 0; int make_spte_ret = 0; - if (unlikely(is_noslot_pfn(pfn))) { + if (unlikely(is_noslot_pfn(pfn))) new_spte = make_mmio_spte(vcpu, iter->gfn, ACC_ALL); - trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn, - new_spte); - } else { + else make_spte_ret = make_spte(vcpu, ACC_ALL, iter->level, iter->gfn, pfn, iter->old_spte, prefault, true, map_writable, !shadow_accessed_mask, &new_spte); - trace_kvm_mmu_set_spte(iter->level, iter->gfn, - rcu_dereference(iter->sptep)); - } if (new_spte == iter->old_spte) ret = RET_PF_SPURIOUS; - else - tdp_mmu_set_spte(vcpu->kvm, iter, new_spte); + else if (!tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte)) + return RET_PF_RETRY; /* * If the page fault was caused by a write but the page is write @@ -660,8 +725,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, int write, } /* If a MMIO SPTE is installed, the MMIO will need to be emulated. */ - if (unlikely(is_mmio_spte(new_spte))) + if (unlikely(is_mmio_spte(new_spte))) { + trace_mark_mmio_spte(rcu_dereference(iter->sptep), iter->gfn, + new_spte); ret = RET_PF_EMULATE; + } else + trace_kvm_mmu_set_spte(iter->level, iter->gfn, + rcu_dereference(iter->sptep)); trace_kvm_mmu_set_spte(iter->level, iter->gfn, rcu_dereference(iter->sptep)); @@ -720,7 +790,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, */ if (is_shadow_present_pte(iter.old_spte) && is_large_pte(iter.old_spte)) { - tdp_mmu_set_spte(vcpu->kvm, &iter, 0); + if (!tdp_mmu_set_spte_atomic(vcpu->kvm, &iter, 0)) + break; kvm_flush_remote_tlbs_with_address(vcpu->kvm, iter.gfn, KVM_PAGES_PER_HPAGE(iter.level)); @@ -737,19 +808,24 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code, sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level); child_pt = sp->spt; - tdp_mmu_link_page(vcpu->kvm, sp, - huge_page_disallowed && - req_level >= iter.level); - new_spte = make_nonleaf_spte(child_pt, !shadow_accessed_mask); - trace_kvm_mmu_get_page(sp, true); - tdp_mmu_set_spte(vcpu->kvm, &iter, new_spte); + if (tdp_mmu_set_spte_atomic(vcpu->kvm, &iter, + new_spte)) { + tdp_mmu_link_page(vcpu->kvm, sp, true, + huge_page_disallowed && + req_level >= iter.level); + + trace_kvm_mmu_get_page(sp, true); + } else { + tdp_mmu_free_sp(sp); + break; + } } } - if (WARN_ON(iter.level != level)) { + if (iter.level != level) { rcu_read_unlock(); return RET_PF_RETRY; } -- cgit v1.2.3 From e1f68169a4f89e49f33bf52df29aeb57cb8b1144 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 4 Dec 2020 09:03:56 +0000 Subject: KVM: Add documentation for Xen hypercall and shared_info updates Signed-off-by: David Woodhouse --- Documentation/virt/kvm/api.rst | 171 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 166 insertions(+), 5 deletions(-) (limited to 'Documentation/virt') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index effcc08733fd..5919e9a94f25 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -960,6 +960,14 @@ memory. __u8 pad2[30]; }; +If the KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL flag is returned from the +KVM_CAP_XEN_HVM check, it may be set in the flags field of this ioctl. +This requests KVM to generate the contents of the hypercall page +automatically; hypercalls will be intercepted and passed to userspace +through KVM_EXIT_XEN. In this case, all of the blob size and address +fields must be zero. + +No other flags are currently valid in the struct kvm_xen_hvm_config. 4.29 KVM_GET_CLOCK ------------------ @@ -4831,6 +4839,101 @@ into user space. If a vCPU is in running state while this ioctl is invoked, the vCPU may experience inconsistent filtering behavior on MSR accesses. +4.127 KVM_XEN_HVM_SET_ATTR +-------------------------- + +:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO +:Architectures: x86 +:Type: vm ioctl +:Parameters: struct kvm_xen_hvm_attr +:Returns: 0 on success, < 0 on error + +:: + + struct kvm_xen_hvm_attr { + __u16 type; + __u16 pad[3]; + union { + __u8 long_mode; + __u8 vector; + struct { + __u64 gfn; + } shared_info; + __u64 pad[4]; + } u; + }; + +type values: + +KVM_XEN_ATTR_TYPE_LONG_MODE + Sets the ABI mode of the VM to 32-bit or 64-bit (long mode). This + determines the layout of the shared info pages exposed to the VM. + +KVM_XEN_ATTR_TYPE_SHARED_INFO + Sets the guest physical frame number at which the Xen "shared info" + page resides. Note that although Xen places vcpu_info for the first + 32 vCPUs in the shared_info page, KVM does not automatically do so + and instead requires that KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO be used + explicitly even when the vcpu_info for a given vCPU resides at the + "default" location in the shared_info page. This is because KVM is + not aware of the Xen CPU id which is used as the index into the + vcpu_info[] array, so cannot know the correct default location. + +KVM_XEN_ATTR_TYPE_UPCALL_VECTOR + Sets the exception vector used to deliver Xen event channel upcalls. + +4.128 KVM_XEN_HVM_GET_ATTR +-------------------------- + +:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO +:Architectures: x86 +:Type: vm ioctl +:Parameters: struct kvm_xen_hvm_attr +:Returns: 0 on success, < 0 on error + +Allows Xen VM attributes to be read. For the structure and types, +see KVM_XEN_HVM_SET_ATTR above. + +4.129 KVM_XEN_VCPU_SET_ATTR +--------------------------- + +:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO +:Architectures: x86 +:Type: vcpu ioctl +:Parameters: struct kvm_xen_vcpu_attr +:Returns: 0 on success, < 0 on error + +:: + + struct kvm_xen_vcpu_attr { + __u16 type; + __u16 pad[3]; + union { + __u64 gpa; + __u64 pad[4]; + } u; + }; + +type values: + +KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO + Sets the guest physical address of the vcpu_info for a given vCPU. + +KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO + Sets the guest physical address of an additional pvclock structure + for a given vCPU. This is typically used for guest vsyscall support. + +4.130 KVM_XEN_VCPU_GET_ATTR +-------------------------- + +:Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO +:Architectures: x86 +:Type: vcpu ioctl +:Parameters: struct kvm_xen_vcpu_attr +:Returns: 0 on success, < 0 on error + +Allows Xen vCPU attributes to be read. For the structure and types, +see KVM_XEN_VCPU_SET_ATTR above. 5. The kvm_run structure ======================== @@ -4998,13 +5101,18 @@ to the byte array. .. note:: - For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, + For KVM_EXIT_IO, KVM_EXIT_MMIO, KVM_EXIT_OSI, KVM_EXIT_PAPR, KVM_EXIT_XEN, KVM_EXIT_EPR, KVM_EXIT_X86_RDMSR and KVM_EXIT_X86_WRMSR the corresponding operations are complete (and guest state is consistent) only after userspace has re-entered the kernel with KVM_RUN. The kernel side will first finish - incomplete operations and then check for pending signals. Userspace - can re-enter the guest with an unmasked signal pending to complete - pending operations. + incomplete operations and then check for pending signals. + + The pending state of the operation is not preserved in state which is + visible to userspace, thus userspace should ensure that the operation is + completed before performing a live migration. Userspace can re-enter the + guest with an unmasked signal pending or with the immediate_exit field set + to complete pending operations without allowing any further instructions + to be executed. :: @@ -5329,6 +5437,34 @@ wants to write. Once finished processing the event, user space must continue vCPU execution. If the MSR write was unsuccessful, user space also sets the "error" field to "1". +:: + + + struct kvm_xen_exit { + #define KVM_EXIT_XEN_HCALL 1 + __u32 type; + union { + struct { + __u32 longmode; + __u32 cpl; + __u64 input; + __u64 result; + __u64 params[6]; + } hcall; + } u; + }; + /* KVM_EXIT_XEN */ + struct kvm_hyperv_exit xen; + +Indicates that the VCPU exits into userspace to process some tasks +related to Xen emulation. + +Valid values for 'type' are: + + - KVM_EXIT_XEN_HCALL -- synchronously notify user-space about Xen hypercall. + Userspace is expected to place the hypercall result into the appropriate + field before invoking KVM_RUN again. + :: /* Fix the size of the union. */ @@ -6454,7 +6590,6 @@ guest according to the bits in the KVM_CPUID_FEATURES CPUID leaf (0x40000001). Otherwise, a guest may use the paravirtual features regardless of what has actually been exposed through the CPUID leaf. - 8.29 KVM_CAP_DIRTY_LOG_RING --------------------------- @@ -6541,3 +6676,29 @@ KVM_GET_DIRTY_LOG and KVM_CLEAR_DIRTY_LOG. After enabling KVM_CAP_DIRTY_LOG_RING with an acceptable dirty ring size, the virtual machine will switch to ring-buffer dirty page tracking and further KVM_GET_DIRTY_LOG or KVM_CLEAR_DIRTY_LOG ioctls will fail. + +8.30 KVM_CAP_XEN_HVM +-------------------- + +:Architectures: x86 + +This capability indicates the features that Xen supports for hosting Xen +PVHVM guests. Valid flags are:: + + #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR (1 << 0) + #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL (1 << 1) + #define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2) + +The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG +ioctl is available, for the guest to set its hypercall page. + +If KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL is also set, the same flag may also be +provided in the flags to KVM_XEN_HVM_CONFIG, without providing hypercall page +contents, to request that KVM generate hypercall page content automatically +and also enable interception of guest hypercalls with KVM_EXIT_XEN. + +The KVM_XEN_HVM_CONFIG_SHARED_INFO flag indicates the availability of the +KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, KVM_XEN_VCPU_SET_ATTR and +KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors +for event channel upcalls when the evtchn_upcall_pending field of a vcpu's +vcpu_info is set. -- cgit v1.2.3 From 9294b8a12585f8b4ccb9c060b54bab0bd13f24b9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 9 Feb 2021 05:07:45 -0500 Subject: Documentation: kvm: fix warning Documentation/virt/kvm/api.rst:4927: WARNING: Title underline too short. 4.130 KVM_XEN_VCPU_GET_ATTR -------------------------- Fixes: e1f68169a4f8 ("KVM: Add documentation for Xen hypercall and shared_info updates") Reported-by: Stephen Rothwell Signed-off-by: Paolo Bonzini --- Documentation/virt/kvm/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation/virt') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 5919e9a94f25..482508ec7cc4 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -4924,7 +4924,7 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO for a given vCPU. This is typically used for guest vsyscall support. 4.130 KVM_XEN_VCPU_GET_ATTR --------------------------- +--------------------------- :Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO :Architectures: x86 -- cgit v1.2.3 From bd1de1a0e6eff4bde5ceae969673b85b8446fd6a Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 16 Dec 2020 16:12:18 +0530 Subject: KVM: PPC: Book3S HV: Add infrastructure to support 2nd DAWR KVM code assumes single DAWR everywhere. Add code to support 2nd DAWR. DAWR is a hypervisor resource and thus H_SET_MODE hcall is used to set/ unset it. Introduce new case H_SET_MODE_RESOURCE_SET_DAWR1 for 2nd DAWR. Also, KVM will support 2nd DAWR only if CPU_FTR_DAWR1 is set. Signed-off-by: Ravi Bangoria Signed-off-by: Paul Mackerras --- Documentation/virt/kvm/api.rst | 2 ++ arch/powerpc/include/asm/hvcall.h | 8 +++++- arch/powerpc/include/asm/kvm_host.h | 3 +++ arch/powerpc/include/uapi/asm/kvm.h | 2 ++ arch/powerpc/kernel/asm-offsets.c | 2 ++ arch/powerpc/kvm/book3s_hv.c | 43 +++++++++++++++++++++++++++++++ arch/powerpc/kvm/book3s_hv_nested.c | 7 +++++ arch/powerpc/kvm/book3s_hv_rmhandlers.S | 23 +++++++++++++++++ tools/arch/powerpc/include/uapi/asm/kvm.h | 2 ++ 9 files changed, 91 insertions(+), 1 deletion(-) (limited to 'Documentation/virt') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index 482508ec7cc4..a783a8da818b 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -2276,6 +2276,8 @@ registers, find a list below: PPC KVM_REG_PPC_PSSCR 64 PPC KVM_REG_PPC_DEC_EXPIRY 64 PPC KVM_REG_PPC_PTCR 64 + PPC KVM_REG_PPC_DAWR1 64 + PPC KVM_REG_PPC_DAWRX1 64 PPC KVM_REG_PPC_TM_GPR0 64 ... PPC KVM_REG_PPC_TM_GPR31 64 diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index cb4d07e2c0cc..ed6086d57b22 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -569,16 +569,22 @@ struct hv_guest_state { u64 pidr; u64 cfar; u64 ppr; + /* Version 1 ends here */ + u64 dawr1; + u64 dawrx1; + /* Version 2 ends here */ }; /* Latest version of hv_guest_state structure */ -#define HV_GUEST_STATE_VERSION 1 +#define HV_GUEST_STATE_VERSION 2 static inline int hv_guest_state_size(unsigned int version) { switch (version) { case 1: return offsetofend(struct hv_guest_state, ppr); + case 2: + return offsetofend(struct hv_guest_state, dawrx1); default: return -1; } diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index cf52081c9282..05fb00d37609 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -306,6 +306,7 @@ struct kvm_arch { u8 svm_enabled; bool threads_indep; bool nested_enable; + bool dawr1_enabled; pgd_t *pgtable; u64 process_table; struct dentry *debugfs_dir; @@ -585,6 +586,8 @@ struct kvm_vcpu_arch { ulong dabr; ulong dawr0; ulong dawrx0; + ulong dawr1; + ulong dawrx1; ulong ciabr; ulong cfar; ulong ppr; diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index c3af3f324c5a..9f18fa090f1f 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) #define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) +#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) +#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 410b8c47443f..4771a8511216 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -528,6 +528,8 @@ int main(void) OFFSET(VCPU_DABRX, kvm_vcpu, arch.dabrx); OFFSET(VCPU_DAWR0, kvm_vcpu, arch.dawr0); OFFSET(VCPU_DAWRX0, kvm_vcpu, arch.dawrx0); + OFFSET(VCPU_DAWR1, kvm_vcpu, arch.dawr1); + OFFSET(VCPU_DAWRX1, kvm_vcpu, arch.dawrx1); OFFSET(VCPU_CIABR, kvm_vcpu, arch.ciabr); OFFSET(VCPU_HFLAGS, kvm_vcpu, arch.hflags); OFFSET(VCPU_DEC, kvm_vcpu, arch.dec); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 07531ac6ba5d..b37bddfead1b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -785,6 +785,22 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags, vcpu->arch.dawr0 = value1; vcpu->arch.dawrx0 = value2; return H_SUCCESS; + case H_SET_MODE_RESOURCE_SET_DAWR1: + if (!kvmppc_power8_compatible(vcpu)) + return H_P2; + if (!ppc_breakpoint_available()) + return H_P2; + if (!cpu_has_feature(CPU_FTR_DAWR1)) + return H_P2; + if (!vcpu->kvm->arch.dawr1_enabled) + return H_FUNCTION; + if (mflags) + return H_UNSUPPORTED_FLAG_START; + if (value2 & DABRX_HYP) + return H_P4; + vcpu->arch.dawr1 = value1; + vcpu->arch.dawrx1 = value2; + return H_SUCCESS; case H_SET_MODE_RESOURCE_ADDR_TRANS_MODE: /* KVM does not support mflags=2 (AIL=2) */ if (mflags != 0 && mflags != 3) @@ -1764,6 +1780,12 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DAWRX: *val = get_reg_val(id, vcpu->arch.dawrx0); break; + case KVM_REG_PPC_DAWR1: + *val = get_reg_val(id, vcpu->arch.dawr1); + break; + case KVM_REG_PPC_DAWRX1: + *val = get_reg_val(id, vcpu->arch.dawrx1); + break; case KVM_REG_PPC_CIABR: *val = get_reg_val(id, vcpu->arch.ciabr); break; @@ -1996,6 +2018,12 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id, case KVM_REG_PPC_DAWRX: vcpu->arch.dawrx0 = set_reg_val(id, *val) & ~DAWRX_HYP; break; + case KVM_REG_PPC_DAWR1: + vcpu->arch.dawr1 = set_reg_val(id, *val); + break; + case KVM_REG_PPC_DAWRX1: + vcpu->arch.dawrx1 = set_reg_val(id, *val) & ~DAWRX_HYP; + break; case KVM_REG_PPC_CIABR: vcpu->arch.ciabr = set_reg_val(id, *val); /* Don't allow setting breakpoints in hypervisor code */ @@ -3453,6 +3481,13 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long host_dawrx0 = mfspr(SPRN_DAWRX0); unsigned long host_psscr = mfspr(SPRN_PSSCR); unsigned long host_pidr = mfspr(SPRN_PID); + unsigned long host_dawr1 = 0; + unsigned long host_dawrx1 = 0; + + if (cpu_has_feature(CPU_FTR_DAWR1)) { + host_dawr1 = mfspr(SPRN_DAWR1); + host_dawrx1 = mfspr(SPRN_DAWRX1); + } /* * P8 and P9 suppress the HDEC exception when LPCR[HDICE] = 0, @@ -3491,6 +3526,10 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, if (dawr_enabled()) { mtspr(SPRN_DAWR0, vcpu->arch.dawr0); mtspr(SPRN_DAWRX0, vcpu->arch.dawrx0); + if (cpu_has_feature(CPU_FTR_DAWR1)) { + mtspr(SPRN_DAWR1, vcpu->arch.dawr1); + mtspr(SPRN_DAWRX1, vcpu->arch.dawrx1); + } } mtspr(SPRN_CIABR, vcpu->arch.ciabr); mtspr(SPRN_IC, vcpu->arch.ic); @@ -3544,6 +3583,10 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_CIABR, host_ciabr); mtspr(SPRN_DAWR0, host_dawr0); mtspr(SPRN_DAWRX0, host_dawrx0); + if (cpu_has_feature(CPU_FTR_DAWR1)) { + mtspr(SPRN_DAWR1, host_dawr1); + mtspr(SPRN_DAWRX1, host_dawrx1); + } mtspr(SPRN_PID, host_pidr); /* diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index d22e45b97f88..0cd0e7aad588 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -49,6 +49,8 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) hr->pidr = vcpu->arch.pid; hr->cfar = vcpu->arch.cfar; hr->ppr = vcpu->arch.ppr; + hr->dawr1 = vcpu->arch.dawr1; + hr->dawrx1 = vcpu->arch.dawrx1; } static void byteswap_pt_regs(struct pt_regs *regs) @@ -91,6 +93,8 @@ static void byteswap_hv_regs(struct hv_guest_state *hr) hr->pidr = swab64(hr->pidr); hr->cfar = swab64(hr->cfar); hr->ppr = swab64(hr->ppr); + hr->dawr1 = swab64(hr->dawr1); + hr->dawrx1 = swab64(hr->dawrx1); } static void save_hv_return_state(struct kvm_vcpu *vcpu, int trap, @@ -138,6 +142,7 @@ static void sanitise_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) /* Don't let data address watchpoint match in hypervisor state */ hr->dawrx0 &= ~DAWRX_HYP; + hr->dawrx1 &= ~DAWRX_HYP; /* Don't let completed instruction address breakpt match in HV state */ if ((hr->ciabr & CIABR_PRIV) == CIABR_PRIV_HYPER) @@ -167,6 +172,8 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) vcpu->arch.pid = hr->pidr; vcpu->arch.cfar = hr->cfar; vcpu->arch.ppr = hr->ppr; + vcpu->arch.dawr1 = hr->dawr1; + vcpu->arch.dawrx1 = hr->dawrx1; } void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 75804062f2c5..65161c062dc5 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -57,6 +57,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) #define STACK_SLOT_HFSCR (SFS-72) #define STACK_SLOT_AMR (SFS-80) #define STACK_SLOT_UAMOR (SFS-88) +#define STACK_SLOT_DAWR1 (SFS-96) +#define STACK_SLOT_DAWRX1 (SFS-104) /* the following is used by the P9 short path */ #define STACK_SLOT_NVGPRS (SFS-152) /* 18 gprs */ @@ -715,6 +717,12 @@ BEGIN_FTR_SECTION std r7, STACK_SLOT_DAWRX0(r1) std r8, STACK_SLOT_IAMR(r1) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + mfspr r6, SPRN_DAWR1 + mfspr r7, SPRN_DAWRX1 + std r6, STACK_SLOT_DAWR1(r1) + std r7, STACK_SLOT_DAWRX1(r1) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1) mfspr r5, SPRN_AMR std r5, STACK_SLOT_AMR(r1) @@ -805,6 +813,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) ld r6, VCPU_DAWRX0(r4) mtspr SPRN_DAWR0, r5 mtspr SPRN_DAWRX0, r6 +BEGIN_FTR_SECTION + ld r5, VCPU_DAWR1(r4) + ld r6, VCPU_DAWRX1(r4) + mtspr SPRN_DAWR1, r5 + mtspr SPRN_DAWRX1, r6 +END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) 1: ld r7, VCPU_CIABR(r4) ld r8, VCPU_TAR(r4) @@ -1769,6 +1783,12 @@ BEGIN_FTR_SECTION mtspr SPRN_DAWR0, r6 mtspr SPRN_DAWRX0, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) +BEGIN_FTR_SECTION + ld r6, STACK_SLOT_DAWR1(r1) + ld r7, STACK_SLOT_DAWRX1(r1) + mtspr SPRN_DAWR1, r6 + mtspr SPRN_DAWRX1, r7 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S | CPU_FTR_DAWR1) BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) ld r6, STACK_SLOT_PSSCR(r1) @@ -3343,6 +3363,9 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_IAMR, r0 mtspr SPRN_CIABR, r0 mtspr SPRN_DAWRX0, r0 +BEGIN_FTR_SECTION + mtspr SPRN_DAWRX1, r0 +END_FTR_SECTION_IFSET(CPU_FTR_DAWR1) BEGIN_MMU_FTR_SECTION b 4f diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h index c3af3f324c5a..9f18fa090f1f 100644 --- a/tools/arch/powerpc/include/uapi/asm/kvm.h +++ b/tools/arch/powerpc/include/uapi/asm/kvm.h @@ -644,6 +644,8 @@ struct kvm_ppc_cpu_char { #define KVM_REG_PPC_MMCR3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1) #define KVM_REG_PPC_SIER2 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2) #define KVM_REG_PPC_SIER3 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3) +#define KVM_REG_PPC_DAWR1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc4) +#define KVM_REG_PPC_DAWRX1 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc5) /* Transactional Memory checkpointed state: * This is all GPRs, all VSX regs and a subset of SPRs -- cgit v1.2.3 From d9a47edabc4f948102753fa9d41f2dc1dbeb28be Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Wed, 16 Dec 2020 16:12:19 +0530 Subject: KVM: PPC: Book3S HV: Introduce new capability for 2nd DAWR Introduce KVM_CAP_PPC_DAWR1 which can be used by QEMU to query whether KVM supports 2nd DAWR or not. The capability is by default disabled even when the underlying CPU supports 2nd DAWR. QEMU needs to check and enable it manually to use the feature. Signed-off-by: Ravi Bangoria Signed-off-by: Paul Mackerras --- Documentation/virt/kvm/api.rst | 10 ++++++++++ arch/powerpc/include/asm/kvm_ppc.h | 1 + arch/powerpc/kvm/book3s_hv.c | 12 ++++++++++++ arch/powerpc/kvm/powerpc.c | 10 ++++++++++ include/uapi/linux/kvm.h | 1 + tools/include/uapi/linux/kvm.h | 1 + 6 files changed, 35 insertions(+) (limited to 'Documentation/virt') diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst index a783a8da818b..45fd862ac128 100644 --- a/Documentation/virt/kvm/api.rst +++ b/Documentation/virt/kvm/api.rst @@ -6215,6 +6215,16 @@ the bus lock vm exit can be preempted by a higher priority VM exit, the exit notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons. KVM_RUN_BUS_LOCK flag is used to distinguish between them. +7.22 KVM_CAP_PPC_DAWR1 +---------------------- + +:Architectures: ppc +:Parameters: none +:Returns: 0 on success, -EINVAL when CPU doesn't support 2nd DAWR + +This capability can be used to check / enable 2nd DAWR feature provided +by POWER10 processor. + 8. Other capabilities. ====================== diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 0a056c64c317..13c39d24dda5 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -314,6 +314,7 @@ struct kvmppc_ops { int size); int (*enable_svm)(struct kvm *kvm); int (*svm_off)(struct kvm *kvm); + int (*enable_dawr1)(struct kvm *kvm); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index b37bddfead1b..dffbfd1ab47b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -5642,6 +5642,17 @@ out: return ret; } +static int kvmhv_enable_dawr1(struct kvm *kvm) +{ + if (!cpu_has_feature(CPU_FTR_DAWR1)) + return -ENODEV; + + /* kvm == NULL means the caller is testing if the capability exists */ + if (kvm) + kvm->arch.dawr1_enabled = true; + return 0; +} + static struct kvmppc_ops kvm_ops_hv = { .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -5685,6 +5696,7 @@ static struct kvmppc_ops kvm_ops_hv = { .store_to_eaddr = kvmhv_store_to_eaddr, .enable_svm = kvmhv_enable_svm, .svm_off = kvmhv_svm_off, + .enable_dawr1 = kvmhv_enable_dawr1, }; static int kvm_init_subcore_bitmap(void) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index cf52d26f49cd..5914c32d9ef3 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -678,6 +678,10 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) r = hv_enabled && kvmppc_hv_ops->enable_svm && !kvmppc_hv_ops->enable_svm(NULL); break; + case KVM_CAP_PPC_DAWR1: + r = !!(hv_enabled && kvmppc_hv_ops->enable_dawr1 && + !kvmppc_hv_ops->enable_dawr1(NULL)); + break; #endif default: r = 0; @@ -2187,6 +2191,12 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm, break; r = kvm->arch.kvm_ops->enable_svm(kvm); break; + case KVM_CAP_PPC_DAWR1: + r = -EINVAL; + if (!is_kvmppc_hv_enabled(kvm) || !kvm->arch.kvm_ops->enable_dawr1) + break; + r = kvm->arch.kvm_ops->enable_dawr1(kvm); + break; #endif default: r = -EINVAL; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 63f8f6e95648..8b281f722e5b 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1077,6 +1077,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SYS_HYPERV_CPUID 191 #define KVM_CAP_DIRTY_LOG_RING 192 #define KVM_CAP_X86_BUS_LOCK_EXIT 193 +#define KVM_CAP_PPC_DAWR1 194 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 886802b8ffba..9c284a9c3ac5 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -1056,6 +1056,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190 #define KVM_CAP_SYS_HYPERV_CPUID 191 #define KVM_CAP_DIRTY_LOG_RING 192 +#define KVM_CAP_PPC_DAWR1 194 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3