summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/hyp/pgtable.c
diff options
context:
space:
mode:
authorOliver Upton <oliver.upton@linux.dev>2022-11-08 00:56:37 +0300
committerMarc Zyngier <maz@kernel.org>2022-11-10 17:43:46 +0300
commit5c359cca1faf6d7671537fe1c240e8668467864d (patch)
tree06a518bb738a9da9588f1486d84d753c790c514e /arch/arm64/kvm/hyp/pgtable.c
parent6b91b8f95cadd3441c056182daf9024475ac4a91 (diff)
downloadlinux-5c359cca1faf6d7671537fe1c240e8668467864d.tar.xz
KVM: arm64: Tear down unlinked stage-2 subtree after break-before-make
The break-before-make sequence is a bit annoying as it opens a window wherein memory is unmapped from the guest. KVM should replace the PTE as quickly as possible and avoid unnecessary work in between. Presently, the stage-2 map walker tears down a removed table before installing a block mapping when coalescing a table into a block. As the removed table is no longer visible to hardware walkers after the DSB+TLBI, it is possible to move the remaining cleanup to happen after installing the new PTE. Reshuffle the stage-2 map walker to install the new block entry in the pre-order callback. Unwire all of the teardown logic and replace it with a call to kvm_pgtable_stage2_free_removed() after fixing the PTE. The post-order visitor is now completely unnecessary, so drop it. Finally, touch up the comments to better represent the now simplified map walker. Note that the call to tear down the unlinked stage-2 is indirected as a subsequent change will use an RCU callback to trigger tear down. RCU is not available to pKVM, so there is a need to use different implementations on pKVM and non-pKVM VMs. Signed-off-by: Oliver Upton <oliver.upton@linux.dev> Reviewed-by: Ben Gardon <bgardon@google.com> Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20221107215644.1895162-8-oliver.upton@linux.dev
Diffstat (limited to 'arch/arm64/kvm/hyp/pgtable.c')
-rw-r--r--arch/arm64/kvm/hyp/pgtable.c85
1 files changed, 22 insertions, 63 deletions
diff --git a/arch/arm64/kvm/hyp/pgtable.c b/arch/arm64/kvm/hyp/pgtable.c
index 7511494537e5..7c9782347570 100644
--- a/arch/arm64/kvm/hyp/pgtable.c
+++ b/arch/arm64/kvm/hyp/pgtable.c
@@ -750,13 +750,13 @@ static int stage2_map_walker_try_leaf(const struct kvm_pgtable_visit_ctx *ctx,
static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
struct stage2_map_data *data)
{
- if (data->anchor)
- return 0;
+ struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
+ kvm_pte_t *childp = kvm_pte_follow(ctx->old, mm_ops);
+ int ret;
if (!stage2_leaf_mapping_allowed(ctx, data))
return 0;
- data->childp = kvm_pte_follow(ctx->old, ctx->mm_ops);
kvm_clear_pte(ctx->ptep);
/*
@@ -765,8 +765,13 @@ static int stage2_map_walk_table_pre(const struct kvm_pgtable_visit_ctx *ctx,
* individually.
*/
kvm_call_hyp(__kvm_tlb_flush_vmid, data->mmu);
- data->anchor = ctx->ptep;
- return 0;
+
+ ret = stage2_map_walker_try_leaf(ctx, data);
+
+ mm_ops->put_page(ctx->ptep);
+ mm_ops->free_removed_table(childp, ctx->level);
+
+ return ret;
}
static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
@@ -776,13 +781,6 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
kvm_pte_t *childp;
int ret;
- if (data->anchor) {
- if (stage2_pte_is_counted(ctx->old))
- mm_ops->put_page(ctx->ptep);
-
- return 0;
- }
-
ret = stage2_map_walker_try_leaf(ctx, data);
if (ret != -E2BIG)
return ret;
@@ -811,49 +809,14 @@ static int stage2_map_walk_leaf(const struct kvm_pgtable_visit_ctx *ctx,
return 0;
}
-static int stage2_map_walk_table_post(const struct kvm_pgtable_visit_ctx *ctx,
- struct stage2_map_data *data)
-{
- struct kvm_pgtable_mm_ops *mm_ops = ctx->mm_ops;
- kvm_pte_t *childp;
- int ret = 0;
-
- if (!data->anchor)
- return 0;
-
- if (data->anchor == ctx->ptep) {
- childp = data->childp;
- data->anchor = NULL;
- data->childp = NULL;
- ret = stage2_map_walk_leaf(ctx, data);
- } else {
- childp = kvm_pte_follow(ctx->old, mm_ops);
- }
-
- mm_ops->put_page(childp);
- mm_ops->put_page(ctx->ptep);
-
- return ret;
-}
-
/*
- * This is a little fiddly, as we use all three of the walk flags. The idea
- * is that the TABLE_PRE callback runs for table entries on the way down,
- * looking for table entries which we could conceivably replace with a
- * block entry for this mapping. If it finds one, then it sets the 'anchor'
- * field in 'struct stage2_map_data' to point at the table entry, before
- * clearing the entry to zero and descending into the now detached table.
- *
- * The behaviour of the LEAF callback then depends on whether or not the
- * anchor has been set. If not, then we're not using a block mapping higher
- * up the table and we perform the mapping at the existing leaves instead.
- * If, on the other hand, the anchor _is_ set, then we drop references to
- * all valid leaves so that the pages beneath the anchor can be freed.
+ * The TABLE_PRE callback runs for table entries on the way down, looking
+ * for table entries which we could conceivably replace with a block entry
+ * for this mapping. If it finds one it replaces the entry and calls
+ * kvm_pgtable_mm_ops::free_removed_table() to tear down the detached table.
*
- * Finally, the TABLE_POST callback does nothing if the anchor has not
- * been set, but otherwise frees the page-table pages while walking back up
- * the page-table, installing the block entry when it revisits the anchor
- * pointer and clearing the anchor to NULL.
+ * Otherwise, the LEAF callback performs the mapping at the existing leaves
+ * instead.
*/
static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
enum kvm_pgtable_walk_flags visit)
@@ -865,11 +828,9 @@ static int stage2_map_walker(const struct kvm_pgtable_visit_ctx *ctx,
return stage2_map_walk_table_pre(ctx, data);
case KVM_PGTABLE_WALK_LEAF:
return stage2_map_walk_leaf(ctx, data);
- case KVM_PGTABLE_WALK_TABLE_POST:
- return stage2_map_walk_table_post(ctx, data);
+ default:
+ return -EINVAL;
}
-
- return -EINVAL;
}
int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
@@ -886,8 +847,7 @@ int kvm_pgtable_stage2_map(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct kvm_pgtable_walker walker = {
.cb = stage2_map_walker,
.flags = KVM_PGTABLE_WALK_TABLE_PRE |
- KVM_PGTABLE_WALK_LEAF |
- KVM_PGTABLE_WALK_TABLE_POST,
+ KVM_PGTABLE_WALK_LEAF,
.arg = &map_data,
};
@@ -917,8 +877,7 @@ int kvm_pgtable_stage2_set_owner(struct kvm_pgtable *pgt, u64 addr, u64 size,
struct kvm_pgtable_walker walker = {
.cb = stage2_map_walker,
.flags = KVM_PGTABLE_WALK_TABLE_PRE |
- KVM_PGTABLE_WALK_LEAF |
- KVM_PGTABLE_WALK_TABLE_POST,
+ KVM_PGTABLE_WALK_LEAF,
.arg = &map_data,
};
@@ -1207,7 +1166,7 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt)
void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level)
{
- kvm_pte_t *ptep = (kvm_pte_t *)pgtable;
+ kvm_pteref_t ptep = (kvm_pteref_t)pgtable;
struct kvm_pgtable_walker walker = {
.cb = stage2_free_walker,
.flags = KVM_PGTABLE_WALK_LEAF |
@@ -1225,5 +1184,5 @@ void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pg
.end = kvm_granule_size(level),
};
- WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level));
+ WARN_ON(__kvm_pgtable_walk(&data, mm_ops, ptep, level + 1));
}