diff options
Diffstat (limited to 'arch/x86/kvm/mmu/tdp_mmu.c')
-rw-r--r-- | arch/x86/kvm/mmu/tdp_mmu.c | 114 |
1 files changed, 57 insertions, 57 deletions
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c index 672f0432d777..771210ce5181 100644 --- a/arch/x86/kvm/mmu/tdp_mmu.c +++ b/arch/x86/kvm/mmu/tdp_mmu.c @@ -29,7 +29,6 @@ int kvm_mmu_init_tdp_mmu(struct kvm *kvm) kvm->arch.tdp_mmu_enabled = true; INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots); spin_lock_init(&kvm->arch.tdp_mmu_pages_lock); - INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages); kvm->arch.tdp_mmu_zap_wq = wq; return 1; } @@ -54,7 +53,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm) /* Also waits for any queued work items. */ destroy_workqueue(kvm->arch.tdp_mmu_zap_wq); - WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages)); + WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages)); WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots)); /* @@ -284,6 +283,8 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu) static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep, gfn_t gfn, union kvm_mmu_page_role role) { + INIT_LIST_HEAD(&sp->possible_nx_huge_page_link); + set_page_private(virt_to_page(sp->spt), (unsigned long)sp); sp->role = role; @@ -375,11 +376,13 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn, static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm_account_pgtable_pages((void *)sp->spt, +1); + atomic64_inc(&kvm->arch.tdp_mmu_pages); } static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp) { kvm_account_pgtable_pages((void *)sp->spt, -1); + atomic64_dec(&kvm->arch.tdp_mmu_pages); } /** @@ -395,14 +398,17 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp, bool shared) { tdp_unaccount_mmu_page(kvm, sp); + + if (!sp->nx_huge_page_disallowed) + return; + if (shared) spin_lock(&kvm->arch.tdp_mmu_pages_lock); else lockdep_assert_held_write(&kvm->mmu_lock); - list_del(&sp->link); - if (sp->lpage_disallowed) - unaccount_huge_nx_page(kvm, sp); + sp->nx_huge_page_disallowed = false; + untrack_possible_nx_huge_page(kvm, sp); if (shared) spin_unlock(&kvm->arch.tdp_mmu_pages_lock); @@ -1116,16 +1122,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu, * @kvm: kvm instance * @iter: a tdp_iter instance currently on the SPTE that should be set * @sp: The new TDP page table to install. - * @account_nx: True if this page table is being installed to split a - * non-executable huge page. * @shared: This operation is running under the MMU lock in read mode. * * Returns: 0 if the new page table was installed. Non-0 if the page table * could not be installed (e.g. the atomic compare-exchange failed). */ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, - struct kvm_mmu_page *sp, bool account_nx, - bool shared) + struct kvm_mmu_page *sp, bool shared) { u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled()); int ret = 0; @@ -1138,16 +1141,14 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, tdp_mmu_set_spte(kvm, iter, spte); } - spin_lock(&kvm->arch.tdp_mmu_pages_lock); - list_add(&sp->link, &kvm->arch.tdp_mmu_pages); - if (account_nx) - account_huge_nx_page(kvm, sp); - spin_unlock(&kvm->arch.tdp_mmu_pages_lock); tdp_account_mmu_page(kvm, sp); return 0; } +static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, + struct kvm_mmu_page *sp, bool shared); + /* * Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing * page tables and SPTEs to translate the faulting guest physical address. @@ -1155,9 +1156,10 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter, int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) { struct kvm_mmu *mmu = vcpu->arch.mmu; + struct kvm *kvm = vcpu->kvm; struct tdp_iter iter; struct kvm_mmu_page *sp; - int ret; + int ret = RET_PF_RETRY; kvm_mmu_hugepage_adjust(vcpu, fault); @@ -1166,6 +1168,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) rcu_read_lock(); tdp_mmu_for_each_pte(iter, mmu, fault->gfn, fault->gfn + 1) { + int r; + if (fault->nx_huge_page_workaround_enabled) disallowed_hugepage_adjust(fault, iter.old_spte, iter.level); @@ -1173,57 +1177,52 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault) break; /* - * If there is an SPTE mapping a large page at a higher level - * than the target, that SPTE must be cleared and replaced - * with a non-leaf SPTE. + * If SPTE has been frozen by another thread, just give up and + * retry, avoiding unnecessary page table allocation and free. */ - if (is_shadow_present_pte(iter.old_spte) && - is_large_pte(iter.old_spte)) { - if (tdp_mmu_zap_spte_atomic(vcpu->kvm, &iter)) - break; + if (is_removed_spte(iter.old_spte)) + goto retry; - /* - * The iter must explicitly re-read the spte here - * because the new value informs the !present - * path below. - */ - iter.old_spte = kvm_tdp_mmu_read_spte(iter.sptep); - } + /* Step down into the lower level page table if it exists. */ + if (is_shadow_present_pte(iter.old_spte) && + !is_large_pte(iter.old_spte)) + continue; - if (!is_shadow_present_pte(iter.old_spte)) { - bool account_nx = fault->huge_page_disallowed && - fault->req_level >= iter.level; + /* + * The SPTE is either non-present or points to a huge page that + * needs to be split. + */ + sp = tdp_mmu_alloc_sp(vcpu); + tdp_mmu_init_child_sp(sp, &iter); - /* - * If SPTE has been frozen by another thread, just - * give up and retry, avoiding unnecessary page table - * allocation and free. - */ - if (is_removed_spte(iter.old_spte)) - break; + sp->nx_huge_page_disallowed = fault->huge_page_disallowed; - sp = tdp_mmu_alloc_sp(vcpu); - tdp_mmu_init_child_sp(sp, &iter); + if (is_shadow_present_pte(iter.old_spte)) + r = tdp_mmu_split_huge_page(kvm, &iter, sp, true); + else + r = tdp_mmu_link_sp(kvm, &iter, sp, true); - if (tdp_mmu_link_sp(vcpu->kvm, &iter, sp, account_nx, true)) { - tdp_mmu_free_sp(sp); - break; - } + /* + * Also force the guest to retry the access if the upper level SPTEs + * aren't in place. + */ + if (r) { + tdp_mmu_free_sp(sp); + goto retry; } - } - /* - * Force the guest to retry the access if the upper level SPTEs aren't - * in place, or if the target leaf SPTE is frozen by another CPU. - */ - if (iter.level != fault->goal_level || is_removed_spte(iter.old_spte)) { - rcu_read_unlock(); - return RET_PF_RETRY; + if (fault->huge_page_disallowed && + fault->req_level >= iter.level) { + spin_lock(&kvm->arch.tdp_mmu_pages_lock); + track_possible_nx_huge_page(kvm, sp); + spin_unlock(&kvm->arch.tdp_mmu_pages_lock); + } } ret = tdp_mmu_map_handle_target_level(vcpu, fault, &iter); - rcu_read_unlock(); +retry: + rcu_read_unlock(); return ret; } @@ -1472,6 +1471,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm, return sp; } +/* Note, the caller is responsible for initializing @sp. */ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, struct kvm_mmu_page *sp, bool shared) { @@ -1479,8 +1479,6 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, const int level = iter->level; int ret, i; - tdp_mmu_init_child_sp(sp, iter); - /* * No need for atomics when writing to sp->spt since the page table has * not been linked in yet and thus is not reachable from any other CPU. @@ -1496,7 +1494,7 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter, * correctness standpoint since the translation will be the same either * way. */ - ret = tdp_mmu_link_sp(kvm, iter, sp, false, shared); + ret = tdp_mmu_link_sp(kvm, iter, sp, shared); if (ret) goto out; @@ -1556,6 +1554,8 @@ retry: continue; } + tdp_mmu_init_child_sp(sp, &iter); + if (tdp_mmu_split_huge_page(kvm, &iter, sp, shared)) goto retry; |