summaryrefslogtreecommitdiff
path: root/arch/arm64/kvm/hyp/nvhe/pkvm.c
diff options
context:
space:
mode:
authorFuad Tabba <tabba@google.com>2022-11-10 22:02:45 +0300
committerMarc Zyngier <maz@kernel.org>2022-11-11 20:16:05 +0300
commita1ec5c70d3f63d8a143fb83cd7f53bd8ff2f72c8 (patch)
treea5402ab73434c6fc7703af7b4900f11c7b0dca04 /arch/arm64/kvm/hyp/nvhe/pkvm.c
parent5304002dc3754a5663d75c977bfa2d9e3c08906d (diff)
downloadlinux-a1ec5c70d3f63d8a143fb83cd7f53bd8ff2f72c8.tar.xz
KVM: arm64: Add infrastructure to create and track pKVM instances at EL2
Introduce a global table (and lock) to track pKVM instances at EL2, and provide hypercalls that can be used by the untrusted host to create and destroy pKVM VMs and their vCPUs. pKVM VM/vCPU state is directly accessible only by the trusted hypervisor (EL2). Each pKVM VM is directly associated with an untrusted host KVM instance, and is referenced by the host using an opaque handle. Future patches will provide hypercalls to allow the host to initialize/set/get pKVM VM/vCPU state using the opaque handle. Tested-by: Vincent Donnefort <vdonnefort@google.com> Signed-off-by: Fuad Tabba <tabba@google.com> Co-developed-by: Will Deacon <will@kernel.org> Signed-off-by: Will Deacon <will@kernel.org> [maz: silence warning on unmap_donated_memory_noclear()] Signed-off-by: Marc Zyngier <maz@kernel.org> Link: https://lore.kernel.org/r/20221110190259.26861-13-will@kernel.org
Diffstat (limited to 'arch/arm64/kvm/hyp/nvhe/pkvm.c')
-rw-r--r--arch/arm64/kvm/hyp/nvhe/pkvm.c380
1 files changed, 380 insertions, 0 deletions
diff --git a/arch/arm64/kvm/hyp/nvhe/pkvm.c b/arch/arm64/kvm/hyp/nvhe/pkvm.c
index 85d3b7ae720f..135c9a095eca 100644
--- a/arch/arm64/kvm/hyp/nvhe/pkvm.c
+++ b/arch/arm64/kvm/hyp/nvhe/pkvm.c
@@ -7,6 +7,9 @@
#include <linux/kvm_host.h>
#include <linux/mm.h>
#include <nvhe/fixed_config.h>
+#include <nvhe/mem_protect.h>
+#include <nvhe/memory.h>
+#include <nvhe/pkvm.h>
#include <nvhe/trap_handler.h>
/*
@@ -183,3 +186,380 @@ void __pkvm_vcpu_init_traps(struct kvm_vcpu *vcpu)
pvm_init_traps_aa64mmfr0(vcpu);
pvm_init_traps_aa64mmfr1(vcpu);
}
+
+/*
+ * Start the VM table handle at the offset defined instead of at 0.
+ * Mainly for sanity checking and debugging.
+ */
+#define HANDLE_OFFSET 0x1000
+
+static unsigned int vm_handle_to_idx(pkvm_handle_t handle)
+{
+ return handle - HANDLE_OFFSET;
+}
+
+static pkvm_handle_t idx_to_vm_handle(unsigned int idx)
+{
+ return idx + HANDLE_OFFSET;
+}
+
+/*
+ * Spinlock for protecting state related to the VM table. Protects writes
+ * to 'vm_table' and 'nr_table_entries' as well as reads and writes to
+ * 'last_hyp_vcpu_lookup'.
+ */
+static DEFINE_HYP_SPINLOCK(vm_table_lock);
+
+/*
+ * The table of VM entries for protected VMs in hyp.
+ * Allocated at hyp initialization and setup.
+ */
+static struct pkvm_hyp_vm **vm_table;
+
+void pkvm_hyp_vm_table_init(void *tbl)
+{
+ WARN_ON(vm_table);
+ vm_table = tbl;
+}
+
+/*
+ * Return the hyp vm structure corresponding to the handle.
+ */
+static struct pkvm_hyp_vm *get_vm_by_handle(pkvm_handle_t handle)
+{
+ unsigned int idx = vm_handle_to_idx(handle);
+
+ if (unlikely(idx >= KVM_MAX_PVMS))
+ return NULL;
+
+ return vm_table[idx];
+}
+
+static void unpin_host_vcpu(struct kvm_vcpu *host_vcpu)
+{
+ if (host_vcpu)
+ hyp_unpin_shared_mem(host_vcpu, host_vcpu + 1);
+}
+
+static void unpin_host_vcpus(struct pkvm_hyp_vcpu *hyp_vcpus[],
+ unsigned int nr_vcpus)
+{
+ int i;
+
+ for (i = 0; i < nr_vcpus; i++)
+ unpin_host_vcpu(hyp_vcpus[i]->host_vcpu);
+}
+
+static void init_pkvm_hyp_vm(struct kvm *host_kvm, struct pkvm_hyp_vm *hyp_vm,
+ unsigned int nr_vcpus)
+{
+ hyp_vm->host_kvm = host_kvm;
+ hyp_vm->kvm.created_vcpus = nr_vcpus;
+ hyp_vm->kvm.arch.vtcr = host_mmu.arch.vtcr;
+}
+
+static int init_pkvm_hyp_vcpu(struct pkvm_hyp_vcpu *hyp_vcpu,
+ struct pkvm_hyp_vm *hyp_vm,
+ struct kvm_vcpu *host_vcpu,
+ unsigned int vcpu_idx)
+{
+ int ret = 0;
+
+ if (hyp_pin_shared_mem(host_vcpu, host_vcpu + 1))
+ return -EBUSY;
+
+ if (host_vcpu->vcpu_idx != vcpu_idx) {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ hyp_vcpu->host_vcpu = host_vcpu;
+
+ hyp_vcpu->vcpu.kvm = &hyp_vm->kvm;
+ hyp_vcpu->vcpu.vcpu_id = READ_ONCE(host_vcpu->vcpu_id);
+ hyp_vcpu->vcpu.vcpu_idx = vcpu_idx;
+
+ hyp_vcpu->vcpu.arch.hw_mmu = &hyp_vm->kvm.arch.mmu;
+done:
+ if (ret)
+ unpin_host_vcpu(host_vcpu);
+ return ret;
+}
+
+static int find_free_vm_table_entry(struct kvm *host_kvm)
+{
+ int i;
+
+ for (i = 0; i < KVM_MAX_PVMS; ++i) {
+ if (!vm_table[i])
+ return i;
+ }
+
+ return -ENOMEM;
+}
+
+/*
+ * Allocate a VM table entry and insert a pointer to the new vm.
+ *
+ * Return a unique handle to the protected VM on success,
+ * negative error code on failure.
+ */
+static pkvm_handle_t insert_vm_table_entry(struct kvm *host_kvm,
+ struct pkvm_hyp_vm *hyp_vm)
+{
+ struct kvm_s2_mmu *mmu = &hyp_vm->kvm.arch.mmu;
+ int idx;
+
+ hyp_assert_lock_held(&vm_table_lock);
+
+ /*
+ * Initializing protected state might have failed, yet a malicious
+ * host could trigger this function. Thus, ensure that 'vm_table'
+ * exists.
+ */
+ if (unlikely(!vm_table))
+ return -EINVAL;
+
+ idx = find_free_vm_table_entry(host_kvm);
+ if (idx < 0)
+ return idx;
+
+ hyp_vm->kvm.arch.pkvm_handle = idx_to_vm_handle(idx);
+
+ /* VMID 0 is reserved for the host */
+ atomic64_set(&mmu->vmid.id, idx + 1);
+
+ mmu->arch = &hyp_vm->kvm.arch;
+ mmu->pgt = &hyp_vm->pgt;
+
+ vm_table[idx] = hyp_vm;
+ return hyp_vm->kvm.arch.pkvm_handle;
+}
+
+/*
+ * Deallocate and remove the VM table entry corresponding to the handle.
+ */
+static void remove_vm_table_entry(pkvm_handle_t handle)
+{
+ hyp_assert_lock_held(&vm_table_lock);
+ vm_table[vm_handle_to_idx(handle)] = NULL;
+}
+
+static size_t pkvm_get_hyp_vm_size(unsigned int nr_vcpus)
+{
+ return size_add(sizeof(struct pkvm_hyp_vm),
+ size_mul(sizeof(struct pkvm_hyp_vcpu *), nr_vcpus));
+}
+
+static void *map_donated_memory_noclear(unsigned long host_va, size_t size)
+{
+ void *va = (void *)kern_hyp_va(host_va);
+
+ if (!PAGE_ALIGNED(va))
+ return NULL;
+
+ if (__pkvm_host_donate_hyp(hyp_virt_to_pfn(va),
+ PAGE_ALIGN(size) >> PAGE_SHIFT))
+ return NULL;
+
+ return va;
+}
+
+static void *map_donated_memory(unsigned long host_va, size_t size)
+{
+ void *va = map_donated_memory_noclear(host_va, size);
+
+ if (va)
+ memset(va, 0, size);
+
+ return va;
+}
+
+static void __unmap_donated_memory(void *va, size_t size)
+{
+ WARN_ON(__pkvm_hyp_donate_host(hyp_virt_to_pfn(va),
+ PAGE_ALIGN(size) >> PAGE_SHIFT));
+}
+
+static void unmap_donated_memory(void *va, size_t size)
+{
+ if (!va)
+ return;
+
+ memset(va, 0, size);
+ __unmap_donated_memory(va, size);
+}
+
+static void __maybe_unused unmap_donated_memory_noclear(void *va, size_t size)
+{
+ if (!va)
+ return;
+
+ __unmap_donated_memory(va, size);
+}
+
+/*
+ * Initialize the hypervisor copy of the protected VM state using the
+ * memory donated by the host.
+ *
+ * Unmaps the donated memory from the host at stage 2.
+ *
+ * host_kvm: A pointer to the host's struct kvm.
+ * vm_hva: The host va of the area being donated for the VM state.
+ * Must be page aligned.
+ * pgd_hva: The host va of the area being donated for the stage-2 PGD for
+ * the VM. Must be page aligned. Its size is implied by the VM's
+ * VTCR.
+ *
+ * Return a unique handle to the protected VM on success,
+ * negative error code on failure.
+ */
+int __pkvm_init_vm(struct kvm *host_kvm, unsigned long vm_hva,
+ unsigned long pgd_hva)
+{
+ struct pkvm_hyp_vm *hyp_vm = NULL;
+ size_t vm_size, pgd_size;
+ unsigned int nr_vcpus;
+ void *pgd = NULL;
+ int ret;
+
+ ret = hyp_pin_shared_mem(host_kvm, host_kvm + 1);
+ if (ret)
+ return ret;
+
+ nr_vcpus = READ_ONCE(host_kvm->created_vcpus);
+ if (nr_vcpus < 1) {
+ ret = -EINVAL;
+ goto err_unpin_kvm;
+ }
+
+ vm_size = pkvm_get_hyp_vm_size(nr_vcpus);
+ pgd_size = kvm_pgtable_stage2_pgd_size(host_mmu.arch.vtcr);
+
+ ret = -ENOMEM;
+
+ hyp_vm = map_donated_memory(vm_hva, vm_size);
+ if (!hyp_vm)
+ goto err_remove_mappings;
+
+ pgd = map_donated_memory_noclear(pgd_hva, pgd_size);
+ if (!pgd)
+ goto err_remove_mappings;
+
+ init_pkvm_hyp_vm(host_kvm, hyp_vm, nr_vcpus);
+
+ hyp_spin_lock(&vm_table_lock);
+ ret = insert_vm_table_entry(host_kvm, hyp_vm);
+ if (ret < 0)
+ goto err_unlock;
+
+ ret = kvm_guest_prepare_stage2(hyp_vm, pgd);
+ if (ret)
+ goto err_remove_vm_table_entry;
+ hyp_spin_unlock(&vm_table_lock);
+
+ return hyp_vm->kvm.arch.pkvm_handle;
+
+err_remove_vm_table_entry:
+ remove_vm_table_entry(hyp_vm->kvm.arch.pkvm_handle);
+err_unlock:
+ hyp_spin_unlock(&vm_table_lock);
+err_remove_mappings:
+ unmap_donated_memory(hyp_vm, vm_size);
+ unmap_donated_memory(pgd, pgd_size);
+err_unpin_kvm:
+ hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
+ return ret;
+}
+
+/*
+ * Initialize the hypervisor copy of the protected vCPU state using the
+ * memory donated by the host.
+ *
+ * handle: The handle for the protected vm.
+ * host_vcpu: A pointer to the corresponding host vcpu.
+ * vcpu_hva: The host va of the area being donated for the vcpu state.
+ * Must be page aligned. The size of the area must be equal to
+ * the page-aligned size of 'struct pkvm_hyp_vcpu'.
+ * Return 0 on success, negative error code on failure.
+ */
+int __pkvm_init_vcpu(pkvm_handle_t handle, struct kvm_vcpu *host_vcpu,
+ unsigned long vcpu_hva)
+{
+ struct pkvm_hyp_vcpu *hyp_vcpu;
+ struct pkvm_hyp_vm *hyp_vm;
+ unsigned int idx;
+ int ret;
+
+ hyp_vcpu = map_donated_memory(vcpu_hva, sizeof(*hyp_vcpu));
+ if (!hyp_vcpu)
+ return -ENOMEM;
+
+ hyp_spin_lock(&vm_table_lock);
+
+ hyp_vm = get_vm_by_handle(handle);
+ if (!hyp_vm) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ idx = hyp_vm->nr_vcpus;
+ if (idx >= hyp_vm->kvm.created_vcpus) {
+ ret = -EINVAL;
+ goto unlock;
+ }
+
+ ret = init_pkvm_hyp_vcpu(hyp_vcpu, hyp_vm, host_vcpu, idx);
+ if (ret)
+ goto unlock;
+
+ hyp_vm->vcpus[idx] = hyp_vcpu;
+ hyp_vm->nr_vcpus++;
+unlock:
+ hyp_spin_unlock(&vm_table_lock);
+
+ if (ret)
+ unmap_donated_memory(hyp_vcpu, sizeof(*hyp_vcpu));
+
+ return ret;
+}
+
+int __pkvm_teardown_vm(pkvm_handle_t handle)
+{
+ struct pkvm_hyp_vm *hyp_vm;
+ struct kvm *host_kvm;
+ size_t vm_size;
+ int err;
+
+ hyp_spin_lock(&vm_table_lock);
+ hyp_vm = get_vm_by_handle(handle);
+ if (!hyp_vm) {
+ err = -ENOENT;
+ goto err_unlock;
+ }
+
+ if (WARN_ON(hyp_page_count(hyp_vm))) {
+ err = -EBUSY;
+ goto err_unlock;
+ }
+
+ /* Ensure the VMID is clean before it can be reallocated */
+ __kvm_tlb_flush_vmid(&hyp_vm->kvm.arch.mmu);
+ remove_vm_table_entry(handle);
+ hyp_spin_unlock(&vm_table_lock);
+
+ /* Reclaim guest pages (including page-table pages) */
+ reclaim_guest_pages(hyp_vm);
+ unpin_host_vcpus(hyp_vm->vcpus, hyp_vm->nr_vcpus);
+
+ /* Return the metadata pages to the host */
+ host_kvm = hyp_vm->host_kvm;
+ vm_size = pkvm_get_hyp_vm_size(hyp_vm->kvm.created_vcpus);
+ unmap_donated_memory(hyp_vm, vm_size);
+ hyp_unpin_shared_mem(host_kvm, host_kvm + 1);
+ return 0;
+
+err_unlock:
+ hyp_spin_unlock(&vm_table_lock);
+ return err;
+}