summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/mmu/tdp_mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu/tdp_mmu.c')
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c114
1 files changed, 57 insertions, 57 deletions
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 672f0432d777..771210ce5181 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -29,7 +29,6 @@ int kvm_mmu_init_tdp_mmu(struct kvm *kvm)
kvm->arch.tdp_mmu_enabled = true;
INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
- INIT_LIST_HEAD(&kvm->arch.tdp_mmu_pages);
kvm->arch.tdp_mmu_zap_wq = wq;
return 1;
}
@@ -54,7 +53,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
/* Also waits for any queued work items. */
destroy_workqueue(kvm->arch.tdp_mmu_zap_wq);
- WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages));
+ WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
/*
@@ -284,6 +283,8 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp(struct kvm_vcpu *vcpu)
static void tdp_mmu_init_sp(struct kvm_mmu_page *sp, tdp_ptep_t sptep,
gfn_t gfn, union kvm_mmu_page_role role)
{
+ INIT_LIST_HEAD(&sp->possible_nx_huge_page_link);
+
set_page_private(virt_to_page(sp->spt), (unsigned long)sp);
sp->role = role;
@@ -375,11 +376,13 @@ static void handle_changed_spte_dirty_log(struct kvm *kvm, int as_id, gfn_t gfn,
static void tdp_account_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
kvm_account_pgtable_pages((void *)sp->spt, +1);
+ atomic64_inc(&kvm->arch.tdp_mmu_pages);
}
static void tdp_unaccount_mmu_page(struct kvm *kvm, struct kvm_mmu_page *sp)
{
kvm_account_pgtable_pages((void *)sp->spt, -1);
+ atomic64_dec(&kvm->arch.tdp_mmu_pages);
}
/**
@@ -395,14 +398,17 @@ static void tdp_mmu_unlink_sp(struct kvm *kvm, struct kvm_mmu_page *sp,
bool shared)
{
tdp_unaccount_mmu_page(kvm, sp);
+
+ if (!sp->nx_huge_page_disallowed)
+ return;
+
if (shared)
spin_lock(&kvm->arch.tdp_mmu_pages_lock);
else
lockdep_assert_held_write(&kvm->mmu_lock);
- list_del(&sp->link);
- if (sp->lpage_disallowed)
- unaccount_huge_nx_page(kvm, sp);
+ sp->nx_huge_page_disallowed = false;
+ untrack_possible_nx_huge_page(kvm, sp);
if (shared)
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
@@ -1116,16 +1122,13 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
* @kvm: kvm instance
* @iter: a tdp_iter instance currently on the SPTE that should be set
* @sp: The new TDP page table to install.
- * @account_nx: True if this page table is being installed to split a
- * non-executable huge page.
* @shared: This operation is running under the MMU lock in read mode.
*
* Returns: 0 if the new page table was installed. Non-0 if the page table
* could not be installed (e.g. the atomic compare-exchange failed).
*/
static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
- struct kvm_mmu_page *sp, bool account_nx,
- bool shared)
+ struct kvm_mmu_page *sp, bool shared)
{
u64 spte = make_nonleaf_spte(sp->spt, !kvm_ad_enabled());
int ret = 0;
@@ -1138,16 +1141,14 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
tdp_mmu_set_spte(kvm, iter, spte);
}
- spin_lock(&kvm->arch.tdp_mmu_pages_lock);
- list_add(&sp->link, &kvm->arch.tdp_mmu_pages);
- if (account_nx)
- account_huge_nx_page(kvm, sp);
- spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
tdp_account_mmu_page(kvm, sp);
return 0;
}
+static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
+ struct kvm_mmu_page *sp, bool shared);
+
/*
* Handle a TDP page fault (NPT/EPT violation/misconfiguration) by installing
* page tables and SPTEs to translate the faulting guest physical address.
@@ -1155,9 +1156,10 @@ static int tdp_mmu_link_sp(struct kvm *kvm, struct tdp_iter *iter,
int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
{
struct kvm_mmu *mmu = vcpu->arch.mmu;
+ struct kvm *kvm = vcpu->kvm;
struct tdp_iter iter;
struct kvm_mmu_page *sp;
- int ret;
+ int ret = RET_PF_RETRY;
kvm_mmu_hugepage_adjust(vcpu, fault);
@@ -1166,6 +1168,8 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
rcu_read_lock();
tdp_mmu_for_each_pte(iter, mmu, fault->gfn, fault->gfn + 1) {
+ int r;
+
if (fault->nx_huge_page_workaround_enabled)
disallowed_hugepage_adjust(fault, iter.old_spte, iter.level);
@@ -1173,57 +1177,52 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, struct kvm_page_fault *fault)
break;
/*
- * If there is an SPTE mapping a large page at a higher level
- * than the target, that SPTE must be cleared and replaced
- * with a non-leaf SPTE.
+ * If SPTE has been frozen by another thread, just give up and
+ * retry, avoiding unnecessary page table allocation and free.
*/
- if (is_shadow_present_pte(iter.old_spte) &&
- is_large_pte(iter.old_spte)) {
- if (tdp_mmu_zap_spte_atomic(vcpu->kvm, &iter))
- break;
+ if (is_removed_spte(iter.old_spte))
+ goto retry;
- /*
- * The iter must explicitly re-read the spte here
- * because the new value informs the !present
- * path below.
- */
- iter.old_spte = kvm_tdp_mmu_read_spte(iter.sptep);
- }
+ /* Step down into the lower level page table if it exists. */
+ if (is_shadow_present_pte(iter.old_spte) &&
+ !is_large_pte(iter.old_spte))
+ continue;
- if (!is_shadow_present_pte(iter.old_spte)) {
- bool account_nx = fault->huge_page_disallowed &&
- fault->req_level >= iter.level;
+ /*
+ * The SPTE is either non-present or points to a huge page that
+ * needs to be split.
+ */
+ sp = tdp_mmu_alloc_sp(vcpu);
+ tdp_mmu_init_child_sp(sp, &iter);
- /*
- * If SPTE has been frozen by another thread, just
- * give up and retry, avoiding unnecessary page table
- * allocation and free.
- */
- if (is_removed_spte(iter.old_spte))
- break;
+ sp->nx_huge_page_disallowed = fault->huge_page_disallowed;
- sp = tdp_mmu_alloc_sp(vcpu);
- tdp_mmu_init_child_sp(sp, &iter);
+ if (is_shadow_present_pte(iter.old_spte))
+ r = tdp_mmu_split_huge_page(kvm, &iter, sp, true);
+ else
+ r = tdp_mmu_link_sp(kvm, &iter, sp, true);
- if (tdp_mmu_link_sp(vcpu->kvm, &iter, sp, account_nx, true)) {
- tdp_mmu_free_sp(sp);
- break;
- }
+ /*
+ * Also force the guest to retry the access if the upper level SPTEs
+ * aren't in place.
+ */
+ if (r) {
+ tdp_mmu_free_sp(sp);
+ goto retry;
}
- }
- /*
- * Force the guest to retry the access if the upper level SPTEs aren't
- * in place, or if the target leaf SPTE is frozen by another CPU.
- */
- if (iter.level != fault->goal_level || is_removed_spte(iter.old_spte)) {
- rcu_read_unlock();
- return RET_PF_RETRY;
+ if (fault->huge_page_disallowed &&
+ fault->req_level >= iter.level) {
+ spin_lock(&kvm->arch.tdp_mmu_pages_lock);
+ track_possible_nx_huge_page(kvm, sp);
+ spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
+ }
}
ret = tdp_mmu_map_handle_target_level(vcpu, fault, &iter);
- rcu_read_unlock();
+retry:
+ rcu_read_unlock();
return ret;
}
@@ -1472,6 +1471,7 @@ static struct kvm_mmu_page *tdp_mmu_alloc_sp_for_split(struct kvm *kvm,
return sp;
}
+/* Note, the caller is responsible for initializing @sp. */
static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
struct kvm_mmu_page *sp, bool shared)
{
@@ -1479,8 +1479,6 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
const int level = iter->level;
int ret, i;
- tdp_mmu_init_child_sp(sp, iter);
-
/*
* No need for atomics when writing to sp->spt since the page table has
* not been linked in yet and thus is not reachable from any other CPU.
@@ -1496,7 +1494,7 @@ static int tdp_mmu_split_huge_page(struct kvm *kvm, struct tdp_iter *iter,
* correctness standpoint since the translation will be the same either
* way.
*/
- ret = tdp_mmu_link_sp(kvm, iter, sp, false, shared);
+ ret = tdp_mmu_link_sp(kvm, iter, sp, shared);
if (ret)
goto out;
@@ -1556,6 +1554,8 @@ retry:
continue;
}
+ tdp_mmu_init_child_sp(sp, &iter);
+
if (tdp_mmu_split_huge_page(kvm, &iter, sp, shared))
goto retry;