summaryrefslogtreecommitdiff
path: root/arch/x86/kvm/mmu/tdp_mmu.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kvm/mmu/tdp_mmu.c')
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c189
1 files changed, 86 insertions, 103 deletions
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 512163d52194..6cd4dd631a2f 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -12,18 +12,10 @@
#include <trace/events/kvm.h>
/* Initializes the TDP MMU for the VM, if enabled. */
-int kvm_mmu_init_tdp_mmu(struct kvm *kvm)
+void kvm_mmu_init_tdp_mmu(struct kvm *kvm)
{
- struct workqueue_struct *wq;
-
- wq = alloc_workqueue("kvm", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE, 0);
- if (!wq)
- return -ENOMEM;
-
INIT_LIST_HEAD(&kvm->arch.tdp_mmu_roots);
spin_lock_init(&kvm->arch.tdp_mmu_pages_lock);
- kvm->arch.tdp_mmu_zap_wq = wq;
- return 1;
}
/* Arbitrarily returns true so that this may be used in if statements. */
@@ -46,20 +38,15 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
* ultimately frees all roots.
*/
kvm_tdp_mmu_invalidate_all_roots(kvm);
-
- /*
- * Destroying a workqueue also first flushes the workqueue, i.e. no
- * need to invoke kvm_tdp_mmu_zap_invalidated_roots().
- */
- destroy_workqueue(kvm->arch.tdp_mmu_zap_wq);
+ kvm_tdp_mmu_zap_invalidated_roots(kvm);
WARN_ON(atomic64_read(&kvm->arch.tdp_mmu_pages));
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
/*
* Ensure that all the outstanding RCU callbacks to free shadow pages
- * can run before the VM is torn down. Work items on tdp_mmu_zap_wq
- * can call kvm_tdp_mmu_put_root and create new callbacks.
+ * can run before the VM is torn down. Putting the last reference to
+ * zapped roots will create new callbacks.
*/
rcu_barrier();
}
@@ -86,46 +73,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
tdp_mmu_free_sp(sp);
}
-static void tdp_mmu_zap_root(struct kvm *kvm, struct kvm_mmu_page *root,
- bool shared);
-
-static void tdp_mmu_zap_root_work(struct work_struct *work)
-{
- struct kvm_mmu_page *root = container_of(work, struct kvm_mmu_page,
- tdp_mmu_async_work);
- struct kvm *kvm = root->tdp_mmu_async_data;
-
- read_lock(&kvm->mmu_lock);
-
- /*
- * A TLB flush is not necessary as KVM performs a local TLB flush when
- * allocating a new root (see kvm_mmu_load()), and when migrating vCPU
- * to a different pCPU. Note, the local TLB flush on reuse also
- * invalidates any paging-structure-cache entries, i.e. TLB entries for
- * intermediate paging structures, that may be zapped, as such entries
- * are associated with the ASID on both VMX and SVM.
- */
- tdp_mmu_zap_root(kvm, root, true);
-
- /*
- * Drop the refcount using kvm_tdp_mmu_put_root() to test its logic for
- * avoiding an infinite loop. By design, the root is reachable while
- * it's being asynchronously zapped, thus a different task can put its
- * last reference, i.e. flowing through kvm_tdp_mmu_put_root() for an
- * asynchronously zapped root is unavoidable.
- */
- kvm_tdp_mmu_put_root(kvm, root, true);
-
- read_unlock(&kvm->mmu_lock);
-}
-
-static void tdp_mmu_schedule_zap_root(struct kvm *kvm, struct kvm_mmu_page *root)
-{
- root->tdp_mmu_async_data = kvm;
- INIT_WORK(&root->tdp_mmu_async_work, tdp_mmu_zap_root_work);
- queue_work(kvm->arch.tdp_mmu_zap_wq, &root->tdp_mmu_async_work);
-}
-
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
bool shared)
{
@@ -211,8 +158,12 @@ static struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
#define for_each_valid_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared) \
__for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, _shared, true)
-#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id) \
- __for_each_tdp_mmu_root_yield_safe(_kvm, _root, _as_id, false, false)
+#define for_each_tdp_mmu_root_yield_safe(_kvm, _root, _shared) \
+ for (_root = tdp_mmu_next_root(_kvm, NULL, _shared, false); \
+ _root; \
+ _root = tdp_mmu_next_root(_kvm, _root, _shared, false)) \
+ if (!kvm_lockdep_assert_mmu_lock_held(_kvm, _shared)) { \
+ } else
/*
* Iterate over all TDP MMU roots. Requires that mmu_lock be held for write,
@@ -292,7 +243,7 @@ hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu)
* by a memslot update or by the destruction of the VM. Initialize the
* refcount to two; one reference for the vCPU, and one reference for
* the TDP MMU itself, which is held until the root is invalidated and
- * is ultimately put by tdp_mmu_zap_root_work().
+ * is ultimately put by kvm_tdp_mmu_zap_invalidated_roots().
*/
refcount_set(&root->tdp_mmu_root_count, 2);
@@ -475,9 +426,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
bool is_leaf = is_present && is_last_spte(new_spte, level);
bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
- WARN_ON(level > PT64_ROOT_MAX_LEVEL);
- WARN_ON(level < PG_LEVEL_4K);
- WARN_ON(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
+ WARN_ON_ONCE(level > PT64_ROOT_MAX_LEVEL);
+ WARN_ON_ONCE(level < PG_LEVEL_4K);
+ WARN_ON_ONCE(gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
/*
* If this warning were to trigger it would indicate that there was a
@@ -522,9 +473,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
* impact the guest since both the former and current SPTEs
* are nonpresent.
*/
- if (WARN_ON(!is_mmio_spte(old_spte) &&
- !is_mmio_spte(new_spte) &&
- !is_removed_spte(new_spte)))
+ if (WARN_ON_ONCE(!is_mmio_spte(old_spte) &&
+ !is_mmio_spte(new_spte) &&
+ !is_removed_spte(new_spte)))
pr_err("Unexpected SPTE change! Nonpresent SPTEs\n"
"should not be replaced with another,\n"
"different nonpresent SPTE, unless one or both\n"
@@ -661,7 +612,7 @@ static u64 tdp_mmu_set_spte(struct kvm *kvm, int as_id, tdp_ptep_t sptep,
* should be used. If operating under the MMU lock in write mode, the
* use of the removed SPTE should not be necessary.
*/
- WARN_ON(is_removed_spte(old_spte) || is_removed_spte(new_spte));
+ WARN_ON_ONCE(is_removed_spte(old_spte) || is_removed_spte(new_spte));
old_spte = kvm_tdp_mmu_write_spte(sptep, old_spte, new_spte, level);
@@ -689,7 +640,7 @@ static inline void tdp_mmu_iter_set_spte(struct kvm *kvm, struct tdp_iter *iter,
else
#define tdp_mmu_for_each_pte(_iter, _mmu, _start, _end) \
- for_each_tdp_pte(_iter, to_shadow_page(_mmu->root.hpa), _start, _end)
+ for_each_tdp_pte(_iter, root_to_sp(_mmu->root.hpa), _start, _end)
/*
* Yield if the MMU lock is contended or this thread needs to return control
@@ -709,7 +660,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
struct tdp_iter *iter,
bool flush, bool shared)
{
- WARN_ON(iter->yielded);
+ WARN_ON_ONCE(iter->yielded);
/* Ensure forward progress has been made before yielding. */
if (iter->next_last_level_gfn == iter->yielded_gfn)
@@ -728,7 +679,7 @@ static inline bool __must_check tdp_mmu_iter_cond_resched(struct kvm *kvm,
rcu_read_lock();
- WARN_ON(iter->gfn > iter->next_last_level_gfn);
+ WARN_ON_ONCE(iter->gfn > iter->next_last_level_gfn);
iter->yielded = true;
}
@@ -877,13 +828,12 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
* true if a TLB flush is needed before releasing the MMU lock, i.e. if one or
* more SPTEs were zapped since the MMU lock was last acquired.
*/
-bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end,
- bool can_yield, bool flush)
+bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, gfn_t start, gfn_t end, bool flush)
{
struct kvm_mmu_page *root;
- for_each_tdp_mmu_root_yield_safe(kvm, root, as_id)
- flush = tdp_mmu_zap_leafs(kvm, root, start, end, can_yield, flush);
+ for_each_tdp_mmu_root_yield_safe(kvm, root, false)
+ flush = tdp_mmu_zap_leafs(kvm, root, start, end, true, flush);
return flush;
}
@@ -891,7 +841,6 @@ bool kvm_tdp_mmu_zap_leafs(struct kvm *kvm, int as_id, gfn_t start, gfn_t end,
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
{
struct kvm_mmu_page *root;
- int i;
/*
* Zap all roots, including invalid roots, as all SPTEs must be dropped
@@ -905,10 +854,8 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
* is being destroyed or the userspace VMM has exited. In both cases,
* KVM_RUN is unreachable, i.e. no vCPUs will ever service the request.
*/
- for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) {
- for_each_tdp_mmu_root_yield_safe(kvm, root, i)
- tdp_mmu_zap_root(kvm, root, false);
- }
+ for_each_tdp_mmu_root_yield_safe(kvm, root, false)
+ tdp_mmu_zap_root(kvm, root, false);
}
/*
@@ -917,18 +864,47 @@ void kvm_tdp_mmu_zap_all(struct kvm *kvm)
*/
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
{
- flush_workqueue(kvm->arch.tdp_mmu_zap_wq);
+ struct kvm_mmu_page *root;
+
+ read_lock(&kvm->mmu_lock);
+
+ for_each_tdp_mmu_root_yield_safe(kvm, root, true) {
+ if (!root->tdp_mmu_scheduled_root_to_zap)
+ continue;
+
+ root->tdp_mmu_scheduled_root_to_zap = false;
+ KVM_BUG_ON(!root->role.invalid, kvm);
+
+ /*
+ * A TLB flush is not necessary as KVM performs a local TLB
+ * flush when allocating a new root (see kvm_mmu_load()), and
+ * when migrating a vCPU to a different pCPU. Note, the local
+ * TLB flush on reuse also invalidates paging-structure-cache
+ * entries, i.e. TLB entries for intermediate paging structures,
+ * that may be zapped, as such entries are associated with the
+ * ASID on both VMX and SVM.
+ */
+ tdp_mmu_zap_root(kvm, root, true);
+
+ /*
+ * The referenced needs to be put *after* zapping the root, as
+ * the root must be reachable by mmu_notifiers while it's being
+ * zapped
+ */
+ kvm_tdp_mmu_put_root(kvm, root, true);
+ }
+
+ read_unlock(&kvm->mmu_lock);
}
/*
* Mark each TDP MMU root as invalid to prevent vCPUs from reusing a root that
* is about to be zapped, e.g. in response to a memslots update. The actual
- * zapping is performed asynchronously. Using a separate workqueue makes it
- * easy to ensure that the destruction is performed before the "fast zap"
- * completes, without keeping a separate list of invalidated roots; the list is
- * effectively the list of work items in the workqueue.
+ * zapping is done separately so that it happens with mmu_lock with read,
+ * whereas invalidating roots must be done with mmu_lock held for write (unless
+ * the VM is being destroyed).
*
- * Note, the asynchronous worker is gifted the TDP MMU's reference.
+ * Note, kvm_tdp_mmu_zap_invalidated_roots() is gifted the TDP MMU's reference.
* See kvm_tdp_mmu_get_vcpu_root_hpa().
*/
void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
@@ -953,19 +929,20 @@ void kvm_tdp_mmu_invalidate_all_roots(struct kvm *kvm)
/*
* As above, mmu_lock isn't held when destroying the VM! There can't
* be other references to @kvm, i.e. nothing else can invalidate roots
- * or be consuming roots, but walking the list of roots does need to be
- * guarded against roots being deleted by the asynchronous zap worker.
+ * or get/put references to roots.
*/
- rcu_read_lock();
-
- list_for_each_entry_rcu(root, &kvm->arch.tdp_mmu_roots, link) {
+ list_for_each_entry(root, &kvm->arch.tdp_mmu_roots, link) {
+ /*
+ * Note, invalid roots can outlive a memslot update! Invalid
+ * roots must be *zapped* before the memslot update completes,
+ * but a different task can acquire a reference and keep the
+ * root alive after its been zapped.
+ */
if (!root->role.invalid) {
+ root->tdp_mmu_scheduled_root_to_zap = true;
root->role.invalid = true;
- tdp_mmu_schedule_zap_root(kvm, root);
}
}
-
- rcu_read_unlock();
}
/*
@@ -1146,8 +1123,13 @@ retry:
bool kvm_tdp_mmu_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range,
bool flush)
{
- return kvm_tdp_mmu_zap_leafs(kvm, range->slot->as_id, range->start,
- range->end, range->may_block, flush);
+ struct kvm_mmu_page *root;
+
+ __for_each_tdp_mmu_root_yield_safe(kvm, root, range->slot->as_id, false, false)
+ flush = tdp_mmu_zap_leafs(kvm, root, range->start, range->end,
+ range->may_block, flush);
+
+ return flush;
}
typedef bool (*tdp_handler_t)(struct kvm *kvm, struct tdp_iter *iter,
@@ -1241,7 +1223,7 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
u64 new_spte;
/* Huge pages aren't expected to be modified without first being zapped. */
- WARN_ON(pte_huge(range->pte) || range->start + 1 != range->end);
+ WARN_ON_ONCE(pte_huge(range->arg.pte) || range->start + 1 != range->end);
if (iter->level != PG_LEVEL_4K ||
!is_shadow_present_pte(iter->old_spte))
@@ -1255,9 +1237,9 @@ static bool set_spte_gfn(struct kvm *kvm, struct tdp_iter *iter,
*/
tdp_mmu_iter_set_spte(kvm, iter, 0);
- if (!pte_write(range->pte)) {
+ if (!pte_write(range->arg.pte)) {
new_spte = kvm_mmu_changed_pte_notifier_make_spte(iter->old_spte,
- pte_pfn(range->pte));
+ pte_pfn(range->arg.pte));
tdp_mmu_iter_set_spte(kvm, iter, new_spte);
}
@@ -1548,8 +1530,8 @@ retry:
if (!is_shadow_present_pte(iter.old_spte))
continue;
- MMU_WARN_ON(kvm_ad_enabled() &&
- spte_ad_need_write_protect(iter.old_spte));
+ KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+ spte_ad_need_write_protect(iter.old_spte));
if (!(iter.old_spte & dbit))
continue;
@@ -1600,6 +1582,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
shadow_dirty_mask;
struct tdp_iter iter;
+ lockdep_assert_held_write(&kvm->mmu_lock);
+
rcu_read_lock();
tdp_root_for_each_leaf_pte(iter, root, gfn + __ffs(mask),
@@ -1607,8 +1591,8 @@ static void clear_dirty_pt_masked(struct kvm *kvm, struct kvm_mmu_page *root,
if (!mask)
break;
- MMU_WARN_ON(kvm_ad_enabled() &&
- spte_ad_need_write_protect(iter.old_spte));
+ KVM_MMU_WARN_ON(kvm_ad_enabled() &&
+ spte_ad_need_write_protect(iter.old_spte));
if (iter.level > PG_LEVEL_4K ||
!(mask & (1UL << (iter.gfn - gfn))))
@@ -1646,7 +1630,6 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
{
struct kvm_mmu_page *root;
- lockdep_assert_held_write(&kvm->mmu_lock);
for_each_tdp_mmu_root(kvm, root, slot->as_id)
clear_dirty_pt_masked(kvm, root, gfn, mask, wrprot);
}