summaryrefslogtreecommitdiff
path: root/arch/arm64/include/asm/kvm_pgtable.h
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-07-04 01:32:22 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-07-04 01:32:22 +0300
commite8069f5a8e3bdb5fdeeff895780529388592ee7a (patch)
treece35ab85db9b66a7e488707fccdb33ce54f696dd /arch/arm64/include/asm/kvm_pgtable.h
parenteded37770c9f80ecd5ba842359c4f1058d9812c3 (diff)
parent255006adb3da71bb75c334453786df781b415f54 (diff)
downloadlinux-e8069f5a8e3bdb5fdeeff895780529388592ee7a.tar.xz
Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull kvm updates from Paolo Bonzini: "ARM64: - Eager page splitting optimization for dirty logging, optionally allowing for a VM to avoid the cost of hugepage splitting in the stage-2 fault path. - Arm FF-A proxy for pKVM, allowing a pKVM host to safely interact with services that live in the Secure world. pKVM intervenes on FF-A calls to guarantee the host doesn't misuse memory donated to the hyp or a pKVM guest. - Support for running the split hypervisor with VHE enabled, known as 'hVHE' mode. This is extremely useful for testing the split hypervisor on VHE-only systems, and paves the way for new use cases that depend on having two TTBRs available at EL2. - Generalized framework for configurable ID registers from userspace. KVM/arm64 currently prevents arbitrary CPU feature set configuration from userspace, but the intent is to relax this limitation and allow userspace to select a feature set consistent with the CPU. - Enable the use of Branch Target Identification (FEAT_BTI) in the hypervisor. - Use a separate set of pointer authentication keys for the hypervisor when running in protected mode, as the host is untrusted at runtime. - Ensure timer IRQs are consistently released in the init failure paths. - Avoid trapping CTR_EL0 on systems with Enhanced Virtualization Traps (FEAT_EVT), as it is a register commonly read from userspace. - Erratum workaround for the upcoming AmpereOne part, which has broken hardware A/D state management. RISC-V: - Redirect AMO load/store misaligned traps to KVM guest - Trap-n-emulate AIA in-kernel irqchip for KVM guest - Svnapot support for KVM Guest s390: - New uvdevice secret API - CMM selftest and fixes - fix racy access to target CPU for diag 9c x86: - Fix missing/incorrect #GP checks on ENCLS - Use standard mmu_notifier hooks for handling APIC access page - Drop now unnecessary TR/TSS load after VM-Exit on AMD - Print more descriptive information about the status of SEV and SEV-ES during module load - Add a test for splitting and reconstituting hugepages during and after dirty logging - Add support for CPU pinning in demand paging test - Add support for AMD PerfMonV2, with a variety of cleanups and minor fixes included along the way - Add a "nx_huge_pages=never" option to effectively avoid creating NX hugepage recovery threads (because nx_huge_pages=off can be toggled at runtime) - Move handling of PAT out of MTRR code and dedup SVM+VMX code - Fix output of PIC poll command emulation when there's an interrupt - Add a maintainer's handbook to document KVM x86 processes, preferred coding style, testing expectations, etc. - Misc cleanups, fixes and comments Generic: - Miscellaneous bugfixes and cleanups Selftests: - Generate dependency files so that partial rebuilds work as expected" * tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (153 commits) Documentation/process: Add a maintainer handbook for KVM x86 Documentation/process: Add a label for the tip tree handbook's coding style KVM: arm64: Fix misuse of KVM_ARM_VCPU_POWER_OFF bit index RISC-V: KVM: Remove unneeded semicolon RISC-V: KVM: Allow Svnapot extension for Guest/VM riscv: kvm: define vcpu_sbi_ext_pmu in header RISC-V: KVM: Expose IMSIC registers as attributes of AIA irqchip RISC-V: KVM: Add in-kernel virtualization of AIA IMSIC RISC-V: KVM: Expose APLIC registers as attributes of AIA irqchip RISC-V: KVM: Add in-kernel emulation of AIA APLIC RISC-V: KVM: Implement device interface for AIA irqchip RISC-V: KVM: Skeletal in-kernel AIA irqchip support RISC-V: KVM: Set kvm_riscv_aia_nr_hgei to zero RISC-V: KVM: Add APLIC related defines RISC-V: KVM: Add IMSIC related defines RISC-V: KVM: Implement guest external interrupt line management KVM: x86: Remove PRIx* definitions as they are solely for user space s390/uv: Update query for secret-UVCs s390/uv: replace scnprintf with sysfs_emit s390/uvdevice: Add 'Lock Secret Store' UVC ...
Diffstat (limited to 'arch/arm64/include/asm/kvm_pgtable.h')
-rw-r--r--arch/arm64/include/asm/kvm_pgtable.h79
1 files changed, 75 insertions, 4 deletions
diff --git a/arch/arm64/include/asm/kvm_pgtable.h b/arch/arm64/include/asm/kvm_pgtable.h
index 93bd0975b15f..8294a9a7e566 100644
--- a/arch/arm64/include/asm/kvm_pgtable.h
+++ b/arch/arm64/include/asm/kvm_pgtable.h
@@ -92,6 +92,24 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
return level >= KVM_PGTABLE_MIN_BLOCK_LEVEL;
}
+static inline u32 kvm_supported_block_sizes(void)
+{
+ u32 level = KVM_PGTABLE_MIN_BLOCK_LEVEL;
+ u32 r = 0;
+
+ for (; level < KVM_PGTABLE_MAX_LEVELS; level++)
+ r |= BIT(kvm_granule_shift(level));
+
+ return r;
+}
+
+static inline bool kvm_is_block_size_supported(u64 size)
+{
+ bool is_power_of_two = IS_ALIGNED(size, size);
+
+ return is_power_of_two && (size & kvm_supported_block_sizes());
+}
+
/**
* struct kvm_pgtable_mm_ops - Memory management callbacks.
* @zalloc_page: Allocate a single zeroed memory page.
@@ -104,7 +122,7 @@ static inline bool kvm_level_supports_block_mapping(u32 level)
* allocation is physically contiguous.
* @free_pages_exact: Free an exact number of memory pages previously
* allocated by zalloc_pages_exact.
- * @free_removed_table: Free a removed paging structure by unlinking and
+ * @free_unlinked_table: Free an unlinked paging structure by unlinking and
* dropping references.
* @get_page: Increment the refcount on a page.
* @put_page: Decrement the refcount on a page. When the
@@ -124,7 +142,7 @@ struct kvm_pgtable_mm_ops {
void* (*zalloc_page)(void *arg);
void* (*zalloc_pages_exact)(size_t size);
void (*free_pages_exact)(void *addr, size_t size);
- void (*free_removed_table)(void *addr, u32 level);
+ void (*free_unlinked_table)(void *addr, u32 level);
void (*get_page)(void *addr);
void (*put_page)(void *addr);
int (*page_count)(void *addr);
@@ -195,6 +213,12 @@ typedef bool (*kvm_pgtable_force_pte_cb_t)(u64 addr, u64 end,
* with other software walkers.
* @KVM_PGTABLE_WALK_HANDLE_FAULT: Indicates the page-table walk was
* invoked from a fault handler.
+ * @KVM_PGTABLE_WALK_SKIP_BBM_TLBI: Visit and update table entries
+ * without Break-before-make's
+ * TLB invalidation.
+ * @KVM_PGTABLE_WALK_SKIP_CMO: Visit and update table entries
+ * without Cache maintenance
+ * operations required.
*/
enum kvm_pgtable_walk_flags {
KVM_PGTABLE_WALK_LEAF = BIT(0),
@@ -202,6 +226,8 @@ enum kvm_pgtable_walk_flags {
KVM_PGTABLE_WALK_TABLE_POST = BIT(2),
KVM_PGTABLE_WALK_SHARED = BIT(3),
KVM_PGTABLE_WALK_HANDLE_FAULT = BIT(4),
+ KVM_PGTABLE_WALK_SKIP_BBM_TLBI = BIT(5),
+ KVM_PGTABLE_WALK_SKIP_CMO = BIT(6),
};
struct kvm_pgtable_visit_ctx {
@@ -441,7 +467,7 @@ int __kvm_pgtable_stage2_init(struct kvm_pgtable *pgt, struct kvm_s2_mmu *mmu,
void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
/**
- * kvm_pgtable_stage2_free_removed() - Free a removed stage-2 paging structure.
+ * kvm_pgtable_stage2_free_unlinked() - Free an unlinked stage-2 paging structure.
* @mm_ops: Memory management callbacks.
* @pgtable: Unlinked stage-2 paging structure to be freed.
* @level: Level of the stage-2 paging structure to be freed.
@@ -449,7 +475,33 @@ void kvm_pgtable_stage2_destroy(struct kvm_pgtable *pgt);
* The page-table is assumed to be unreachable by any hardware walkers prior to
* freeing and therefore no TLB invalidation is performed.
*/
-void kvm_pgtable_stage2_free_removed(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
+void kvm_pgtable_stage2_free_unlinked(struct kvm_pgtable_mm_ops *mm_ops, void *pgtable, u32 level);
+
+/**
+ * kvm_pgtable_stage2_create_unlinked() - Create an unlinked stage-2 paging structure.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init*().
+ * @phys: Physical address of the memory to map.
+ * @level: Starting level of the stage-2 paging structure to be created.
+ * @prot: Permissions and attributes for the mapping.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ * @force_pte: Force mappings to PAGE_SIZE granularity.
+ *
+ * Returns an unlinked page-table tree. This new page-table tree is
+ * not reachable (i.e., it is unlinked) from the root pgd and it's
+ * therefore unreachableby the hardware page-table walker. No TLB
+ * invalidation or CMOs are performed.
+ *
+ * If device attributes are not explicitly requested in @prot, then the
+ * mapping will be normal, cacheable.
+ *
+ * Return: The fully populated (unlinked) stage-2 paging structure, or
+ * an ERR_PTR(error) on failure.
+ */
+kvm_pte_t *kvm_pgtable_stage2_create_unlinked(struct kvm_pgtable *pgt,
+ u64 phys, u32 level,
+ enum kvm_pgtable_prot prot,
+ void *mc, bool force_pte);
/**
* kvm_pgtable_stage2_map() - Install a mapping in a guest stage-2 page-table.
@@ -621,6 +673,25 @@ bool kvm_pgtable_stage2_is_young(struct kvm_pgtable *pgt, u64 addr);
int kvm_pgtable_stage2_flush(struct kvm_pgtable *pgt, u64 addr, u64 size);
/**
+ * kvm_pgtable_stage2_split() - Split a range of huge pages into leaf PTEs pointing
+ * to PAGE_SIZE guest pages.
+ * @pgt: Page-table structure initialised by kvm_pgtable_stage2_init().
+ * @addr: Intermediate physical address from which to split.
+ * @size: Size of the range.
+ * @mc: Cache of pre-allocated and zeroed memory from which to allocate
+ * page-table pages.
+ *
+ * The function tries to split any level 1 or 2 entry that overlaps
+ * with the input range (given by @addr and @size).
+ *
+ * Return: 0 on success, negative error code on failure. Note that
+ * kvm_pgtable_stage2_split() is best effort: it tries to break as many
+ * blocks in the input range as allowed by @mc_capacity.
+ */
+int kvm_pgtable_stage2_split(struct kvm_pgtable *pgt, u64 addr, u64 size,
+ struct kvm_mmu_memory_cache *mc);
+
+/**
* kvm_pgtable_walk() - Walk a page-table.
* @pgt: Page-table structure initialised by kvm_pgtable_*_init().
* @addr: Input address for the start of the walk.