From fe0ef00304639cae82df7c9ad6a15286bd5f876e Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Thu, 20 Oct 2022 16:31:57 +0200 Subject: KVM: s390: sort out physical vs virtual pointers usage Fix virtual vs physical address confusion (which currently are the same). Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20221020143159.294605-4-nrb@linux.ibm.com Message-Id: <20221020143159.294605-4-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/intercept.c | 2 +- arch/s390/kvm/kvm-s390.c | 44 +++++++++++++++++++++++++------------------- arch/s390/kvm/kvm-s390.h | 5 +++-- 3 files changed, 29 insertions(+), 22 deletions(-) (limited to 'arch/s390/kvm') diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 88112065d941..b703b5202f25 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -217,7 +217,7 @@ static int handle_itdb(struct kvm_vcpu *vcpu) return 0; if (current->thread.per_flags & PER_FLAG_NO_TE) return 0; - itdb = (struct kvm_s390_itdb *)vcpu->arch.sie_block->itdba; + itdb = phys_to_virt(vcpu->arch.sie_block->itdba); rc = write_guest_lc(vcpu, __LC_PGM_TDB, itdb, sizeof(*itdb)); if (rc) return rc; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 45d4b8182b07..0f7ff0c9019f 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -3329,28 +3329,30 @@ static void sca_del_vcpu(struct kvm_vcpu *vcpu) static void sca_add_vcpu(struct kvm_vcpu *vcpu) { if (!kvm_s390_use_sca_entries()) { - struct bsca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(vcpu->kvm->arch.sca); /* we still need the basic sca for the ipte control */ - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys; return; } read_lock(&vcpu->kvm->arch.sca_lock); if (vcpu->kvm->arch.use_esca) { struct esca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(sca); - sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)sca & ~0x3fU; + sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys & ESCA_SCAOL_MASK; vcpu->arch.sie_block->ecb2 |= ECB2_ESCA; set_bit_inv(vcpu->vcpu_id, (unsigned long *) sca->mcn); } else { struct bsca_block *sca = vcpu->kvm->arch.sca; + phys_addr_t sca_phys = virt_to_phys(sca); - sca->cpu[vcpu->vcpu_id].sda = (__u64) vcpu->arch.sie_block; - vcpu->arch.sie_block->scaoh = (__u32)(((__u64)sca) >> 32); - vcpu->arch.sie_block->scaol = (__u32)(__u64)sca; + sca->cpu[vcpu->vcpu_id].sda = virt_to_phys(vcpu->arch.sie_block); + vcpu->arch.sie_block->scaoh = sca_phys >> 32; + vcpu->arch.sie_block->scaol = sca_phys; set_bit_inv(vcpu->vcpu_id, (unsigned long *) &sca->mcn); } read_unlock(&vcpu->kvm->arch.sca_lock); @@ -3381,6 +3383,7 @@ static int sca_switch_to_extended(struct kvm *kvm) struct kvm_vcpu *vcpu; unsigned long vcpu_idx; u32 scaol, scaoh; + phys_addr_t new_sca_phys; if (kvm->arch.use_esca) return 0; @@ -3389,8 +3392,9 @@ static int sca_switch_to_extended(struct kvm *kvm) if (!new_sca) return -ENOMEM; - scaoh = (u32)((u64)(new_sca) >> 32); - scaol = (u32)(u64)(new_sca) & ~0x3fU; + new_sca_phys = virt_to_phys(new_sca); + scaoh = new_sca_phys >> 32; + scaol = new_sca_phys & ESCA_SCAOL_MASK; kvm_s390_vcpu_block_all(kvm); write_lock(&kvm->arch.sca_lock); @@ -3610,15 +3614,18 @@ static void kvm_s390_vcpu_crypto_setup(struct kvm_vcpu *vcpu) void kvm_s390_vcpu_unsetup_cmma(struct kvm_vcpu *vcpu) { - free_page(vcpu->arch.sie_block->cbrlo); + free_page((unsigned long)phys_to_virt(vcpu->arch.sie_block->cbrlo)); vcpu->arch.sie_block->cbrlo = 0; } int kvm_s390_vcpu_setup_cmma(struct kvm_vcpu *vcpu) { - vcpu->arch.sie_block->cbrlo = get_zeroed_page(GFP_KERNEL_ACCOUNT); - if (!vcpu->arch.sie_block->cbrlo) + void *cbrlo_page = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT); + + if (!cbrlo_page) return -ENOMEM; + + vcpu->arch.sie_block->cbrlo = virt_to_phys(cbrlo_page); return 0; } @@ -3628,7 +3635,7 @@ static void kvm_s390_vcpu_setup_model(struct kvm_vcpu *vcpu) vcpu->arch.sie_block->ibc = model->ibc; if (test_kvm_facility(vcpu->kvm, 7)) - vcpu->arch.sie_block->fac = (u32)(u64) model->fac_list; + vcpu->arch.sie_block->fac = virt_to_phys(model->fac_list); } static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) @@ -3685,9 +3692,8 @@ static int kvm_s390_vcpu_setup(struct kvm_vcpu *vcpu) VCPU_EVENT(vcpu, 3, "AIV gisa format-%u enabled for cpu %03u", vcpu->arch.sie_block->gd & 0x3, vcpu->vcpu_id); } - vcpu->arch.sie_block->sdnxo = ((unsigned long) &vcpu->run->s.regs.sdnx) - | SDNXC; - vcpu->arch.sie_block->riccbd = (unsigned long) &vcpu->run->s.regs.riccb; + vcpu->arch.sie_block->sdnxo = virt_to_phys(&vcpu->run->s.regs.sdnx) | SDNXC; + vcpu->arch.sie_block->riccbd = virt_to_phys(&vcpu->run->s.regs.riccb); if (sclp.has_kss) kvm_s390_set_cpuflags(vcpu, CPUSTAT_KSS); @@ -3737,7 +3743,7 @@ int kvm_arch_vcpu_create(struct kvm_vcpu *vcpu) return -ENOMEM; vcpu->arch.sie_block = &sie_page->sie_block; - vcpu->arch.sie_block->itdba = (unsigned long) &sie_page->itdb; + vcpu->arch.sie_block->itdba = virt_to_phys(&sie_page->itdb); /* the real guest size will always be smaller than msl */ vcpu->arch.sie_block->mso = 0; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index f6fd668f887e..a60d1e5c44cd 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -23,7 +23,8 @@ /* Transactional Memory Execution related macros */ #define IS_TE_ENABLED(vcpu) ((vcpu->arch.sie_block->ecb & ECB_TE)) #define TDB_FORMAT1 1 -#define IS_ITDB_VALID(vcpu) ((*(char *)vcpu->arch.sie_block->itdba == TDB_FORMAT1)) +#define IS_ITDB_VALID(vcpu) \ + ((*(char *)phys_to_virt((vcpu)->arch.sie_block->itdba) == TDB_FORMAT1)) extern debug_info_t *kvm_s390_dbf; extern debug_info_t *kvm_s390_dbf_uv; @@ -233,7 +234,7 @@ static inline unsigned long kvm_s390_get_gfn_end(struct kvm_memslots *slots) static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) { - u32 gd = (u32)(u64)kvm->arch.gisa_int.origin; + u32 gd = virt_to_phys(kvm->arch.gisa_int.origin); if (gd && sclp.has_gisaf) gd |= GISA_FORMAT1; -- cgit v1.2.3 From b99f4512197acc10f63b5fb462c088c2f62b5120 Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Thu, 20 Oct 2022 16:31:58 +0200 Subject: KVM: s390: sida: sort out physical vs virtual pointers usage All callers of the sida_origin() macro actually expected a virtual address, so rename it to sida_addr() and hand out a virtual address. At some places, the macro wasn't used, potentially creating problems if the sida size ever becomes nonzero (not currently the case), so let's start using it everywhere now while at it. Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20221020143159.294605-5-nrb@linux.ibm.com Message-Id: <20221020143159.294605-5-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/include/asm/kvm_host.h | 3 +-- arch/s390/kvm/intercept.c | 7 +++---- arch/s390/kvm/kvm-s390.c | 9 +++++---- arch/s390/kvm/priv.c | 3 +-- arch/s390/kvm/pv.c | 8 +++++--- 5 files changed, 15 insertions(+), 15 deletions(-) (limited to 'arch/s390/kvm') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 931f97875899..21f1339a4197 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -142,8 +142,7 @@ struct mcck_volatile_info { CR14_EXTERNAL_DAMAGE_SUBMASK) #define SIDAD_SIZE_MASK 0xff -#define sida_origin(sie_block) \ - ((sie_block)->sidad & PAGE_MASK) +#define sida_addr(sie_block) phys_to_virt((sie_block)->sidad & PAGE_MASK) #define sida_size(sie_block) \ ((((sie_block)->sidad & SIDAD_SIZE_MASK) + 1) * PAGE_SIZE) diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index b703b5202f25..0ee02dae14b2 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -409,8 +409,7 @@ int handle_sthyi(struct kvm_vcpu *vcpu) out: if (!cc) { if (kvm_s390_pv_cpu_is_protected(vcpu)) { - memcpy((void *)(sida_origin(vcpu->arch.sie_block)), - sctns, PAGE_SIZE); + memcpy(sida_addr(vcpu->arch.sie_block), sctns, PAGE_SIZE); } else { r = write_guest(vcpu, addr, reg2, sctns, PAGE_SIZE); if (r) { @@ -464,7 +463,7 @@ static int handle_operexc(struct kvm_vcpu *vcpu) static int handle_pv_spx(struct kvm_vcpu *vcpu) { - u32 pref = *(u32 *)vcpu->arch.sie_block->sidad; + u32 pref = *(u32 *)sida_addr(vcpu->arch.sie_block); kvm_s390_set_prefix(vcpu, pref); trace_kvm_s390_handle_prefix(vcpu, 1, pref); @@ -497,7 +496,7 @@ static int handle_pv_sclp(struct kvm_vcpu *vcpu) static int handle_pv_uvc(struct kvm_vcpu *vcpu) { - struct uv_cb_share *guest_uvcb = (void *)vcpu->arch.sie_block->sidad; + struct uv_cb_share *guest_uvcb = sida_addr(vcpu->arch.sie_block); struct uv_cb_cts uvcb = { .header.cmd = UVC_CMD_UNPIN_PAGE_SHARED, .header.len = sizeof(uvcb), diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 0f7ff0c9019f..bd6e0201bfe5 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -5167,6 +5167,7 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, struct kvm_s390_mem_op *mop) { void __user *uaddr = (void __user *)mop->buf; + void *sida_addr; int r = 0; if (mop->flags || !mop->size) @@ -5178,16 +5179,16 @@ static long kvm_s390_vcpu_sida_op(struct kvm_vcpu *vcpu, if (!kvm_s390_pv_cpu_is_protected(vcpu)) return -EINVAL; + sida_addr = (char *)sida_addr(vcpu->arch.sie_block) + mop->sida_offset; + switch (mop->op) { case KVM_S390_MEMOP_SIDA_READ: - if (copy_to_user(uaddr, (void *)(sida_origin(vcpu->arch.sie_block) + - mop->sida_offset), mop->size)) + if (copy_to_user(uaddr, sida_addr, mop->size)) r = -EFAULT; break; case KVM_S390_MEMOP_SIDA_WRITE: - if (copy_from_user((void *)(sida_origin(vcpu->arch.sie_block) + - mop->sida_offset), uaddr, mop->size)) + if (copy_from_user(sida_addr, uaddr, mop->size)) r = -EFAULT; break; } diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 3335fa09b6f1..9f8a192bd750 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -924,8 +924,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) return -EREMOTE; } if (kvm_s390_pv_cpu_is_protected(vcpu)) { - memcpy((void *)sida_origin(vcpu->arch.sie_block), (void *)mem, - PAGE_SIZE); + memcpy(sida_addr(vcpu->arch.sie_block), (void *)mem, PAGE_SIZE); rc = 0; } else { rc = write_guest(vcpu, operand2, ar, (void *)mem, PAGE_SIZE); diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 7cb7799a0acb..c7435c37cdfe 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -44,7 +44,7 @@ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) free_pages(vcpu->arch.pv.stor_base, get_order(uv_info.guest_cpu_stor_len)); - free_page(sida_origin(vcpu->arch.sie_block)); + free_page((unsigned long)sida_addr(vcpu->arch.sie_block)); vcpu->arch.sie_block->pv_handle_cpu = 0; vcpu->arch.sie_block->pv_handle_config = 0; memset(&vcpu->arch.pv, 0, sizeof(vcpu->arch.pv)); @@ -66,6 +66,7 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) .header.cmd = UVC_CMD_CREATE_SEC_CPU, .header.len = sizeof(uvcb), }; + void *sida_addr; int cc; if (kvm_s390_pv_cpu_get_handle(vcpu)) @@ -83,12 +84,13 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base; /* Alloc Secure Instruction Data Area Designation */ - vcpu->arch.sie_block->sidad = __get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); - if (!vcpu->arch.sie_block->sidad) { + sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); + if (!sida_addr) { free_pages(vcpu->arch.pv.stor_base, get_order(uv_info.guest_cpu_stor_len)); return -ENOMEM; } + vcpu->arch.sie_block->sidad = virt_to_phys(sida_addr); cc = uv_call(0, (u64)&uvcb); *rc = uvcb.header.rc; -- cgit v1.2.3 From 4435b79a366495a5cb43b792d9e7d69d489428cd Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Thu, 20 Oct 2022 16:31:59 +0200 Subject: KVM: s390: pv: sort out physical vs virtual pointers usage Fix virtual vs physical address confusion (which currently are the same). Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Link: https://lore.kernel.org/r/20221020143159.294605-6-nrb@linux.ibm.com Message-Id: <20221020143159.294605-6-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/pv.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/s390/kvm') diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index c7435c37cdfe..48c4f57d5d76 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -80,8 +80,8 @@ int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc) /* Input */ uvcb.guest_handle = kvm_s390_pv_get_handle(vcpu->kvm); uvcb.num = vcpu->arch.sie_block->icpua; - uvcb.state_origin = (u64)vcpu->arch.sie_block; - uvcb.stor_origin = (u64)vcpu->arch.pv.stor_base; + uvcb.state_origin = virt_to_phys(vcpu->arch.sie_block); + uvcb.stor_origin = virt_to_phys((void *)vcpu->arch.pv.stor_base); /* Alloc Secure Instruction Data Area Designation */ sida_addr = (void *)__get_free_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); @@ -228,8 +228,9 @@ int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc) uvcb.guest_stor_origin = 0; /* MSO is 0 for KVM */ uvcb.guest_stor_len = kvm->arch.pv.guest_len; uvcb.guest_asce = kvm->arch.gmap->asce; - uvcb.guest_sca = (unsigned long)kvm->arch.sca; - uvcb.conf_base_stor_origin = (u64)kvm->arch.pv.stor_base; + uvcb.guest_sca = virt_to_phys(kvm->arch.sca); + uvcb.conf_base_stor_origin = + virt_to_phys((void *)kvm->arch.pv.stor_base); uvcb.conf_virt_stor_origin = (u64)kvm->arch.pv.stor_var; cc = uv_call_sched(0, (u64)&uvcb); -- cgit v1.2.3 From 77b533411595668659ce5aaade4ca36c7aa2c488 Mon Sep 17 00:00:00 2001 From: Nico Boehr Date: Tue, 25 Oct 2022 10:20:39 +0200 Subject: KVM: s390: VSIE: sort out virtual/physical address in pin_guest_page pin_guest_page() used page_to_virt() to calculate the hpa of the pinned page. This currently works, because virtual and physical addresses are the same. Use page_to_phys() instead to resolve the virtual-real address confusion. One caller of pin_guest_page() actually expected the hpa to be a hva, so add the missing phys_to_virt() conversion here. Signed-off-by: Nico Boehr Reviewed-by: Claudio Imbrenda Acked-by: David Hildenbrand Reviewed-by: Christian Borntraeger Link: https://lore.kernel.org/r/20221025082039.117372-2-nrb@linux.ibm.com Message-Id: <20221025082039.117372-2-nrb@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/vsie.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/s390/kvm') diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 94138f8f0c1c..0e9d020d7093 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -654,7 +654,7 @@ static int pin_guest_page(struct kvm *kvm, gpa_t gpa, hpa_t *hpa) page = gfn_to_page(kvm, gpa_to_gfn(gpa)); if (is_error_page(page)) return -EINVAL; - *hpa = (hpa_t) page_to_virt(page) + (gpa & ~PAGE_MASK); + *hpa = (hpa_t)page_to_phys(page) + (gpa & ~PAGE_MASK); return 0; } @@ -869,7 +869,7 @@ static int pin_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page, WARN_ON_ONCE(rc); return 1; } - vsie_page->scb_o = (struct kvm_s390_sie_block *) hpa; + vsie_page->scb_o = phys_to_virt(hpa); return 0; } -- cgit v1.2.3 From fb491d5500a7ca551e49bc32d9b19d226023f68d Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Fri, 11 Nov 2022 18:06:27 +0100 Subject: KVM: s390: pv: asynchronous destroy for reboot Until now, destroying a protected guest was an entirely synchronous operation that could potentially take a very long time, depending on the size of the guest, due to the time needed to clean up the address space from protected pages. This patch implements an asynchronous destroy mechanism, that allows a protected guest to reboot significantly faster than previously. This is achieved by clearing the pages of the old guest in background. In case of reboot, the new guest will be able to run in the same address space almost immediately. The old protected guest is then only destroyed when all of its memory has been destroyed or otherwise made non protected. Two new PV commands are added for the KVM_S390_PV_COMMAND ioctl: KVM_PV_ASYNC_CLEANUP_PREPARE: set aside the current protected VM for later asynchronous teardown. The current KVM VM will then continue immediately as non-protected. If a protected VM had already been set aside for asynchronous teardown, but without starting the teardown process, this call will fail. There can be at most one VM set aside at any time. Once it is set aside, the protected VM only exists in the context of the Ultravisor, it is not associated with the KVM VM anymore. Its protected CPUs have already been destroyed, but not its memory. This command can be issued again immediately after starting KVM_PV_ASYNC_CLEANUP_PERFORM, without having to wait for completion. KVM_PV_ASYNC_CLEANUP_PERFORM: tears down the protected VM previously set aside using KVM_PV_ASYNC_CLEANUP_PREPARE. Ideally the KVM_PV_ASYNC_CLEANUP_PERFORM PV command should be issued by userspace from a separate thread. If a fatal signal is received (or if the process terminates naturally), the command will terminate immediately without completing. All protected VMs whose teardown was interrupted will be put in the need_cleanup list. The rest of the normal KVM teardown process will take care of properly cleaning up all remaining protected VMs, including the ones on the need_cleanup list. Signed-off-by: Claudio Imbrenda Reviewed-by: Nico Boehr Reviewed-by: Janosch Frank Reviewed-by: Steffen Eiden Link: https://lore.kernel.org/r/20221111170632.77622-2-imbrenda@linux.ibm.com Message-Id: <20221111170632.77622-2-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/include/asm/kvm_host.h | 2 + arch/s390/kvm/kvm-s390.c | 49 +++++-- arch/s390/kvm/kvm-s390.h | 3 + arch/s390/kvm/pv.c | 295 +++++++++++++++++++++++++++++++++++++-- include/uapi/linux/kvm.h | 2 + 5 files changed, 333 insertions(+), 18 deletions(-) (limited to 'arch/s390/kvm') diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 21f1339a4197..d67ce719d16a 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -942,6 +942,8 @@ struct kvm_s390_pv { unsigned long stor_base; void *stor_var; bool dumping; + void *set_aside; + struct list_head need_cleanup; struct mmu_notifier mmu_notifier; }; diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index bd6e0201bfe5..f0abaaf7eea4 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -209,6 +209,8 @@ unsigned int diag9c_forwarding_hz; module_param(diag9c_forwarding_hz, uint, 0644); MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); +static int async_destroy; + /* * For now we handle at most 16 double words as this is what the s390 base * kernel handles and stores in the prefix page. If we ever need to go beyond @@ -2504,9 +2506,13 @@ static int kvm_s390_pv_dmp(struct kvm *kvm, struct kvm_pv_cmd *cmd, static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) { + const bool need_lock = (cmd->cmd != KVM_PV_ASYNC_CLEANUP_PERFORM); + void __user *argp = (void __user *)cmd->data; int r = 0; u16 dummy; - void __user *argp = (void __user *)cmd->data; + + if (need_lock) + mutex_lock(&kvm->lock); switch (cmd->cmd) { case KVM_PV_ENABLE: { @@ -2540,6 +2546,31 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) set_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); break; } + case KVM_PV_ASYNC_CLEANUP_PREPARE: + r = -EINVAL; + if (!kvm_s390_pv_is_protected(kvm) || !async_destroy) + break; + + r = kvm_s390_cpus_from_pv(kvm, &cmd->rc, &cmd->rrc); + /* + * If a CPU could not be destroyed, destroy VM will also fail. + * There is no point in trying to destroy it. Instead return + * the rc and rrc from the first CPU that failed destroying. + */ + if (r) + break; + r = kvm_s390_pv_set_aside(kvm, &cmd->rc, &cmd->rrc); + + /* no need to block service interrupts any more */ + clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); + break; + case KVM_PV_ASYNC_CLEANUP_PERFORM: + r = -EINVAL; + if (!async_destroy) + break; + /* kvm->lock must not be held; this is asserted inside the function. */ + r = kvm_s390_pv_deinit_aside_vm(kvm, &cmd->rc, &cmd->rrc); + break; case KVM_PV_DISABLE: { r = -EINVAL; if (!kvm_s390_pv_is_protected(kvm)) @@ -2553,7 +2584,7 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) */ if (r) break; - r = kvm_s390_pv_deinit_vm(kvm, &cmd->rc, &cmd->rrc); + r = kvm_s390_pv_deinit_cleanup_all(kvm, &cmd->rc, &cmd->rrc); /* no need to block service interrupts any more */ clear_bit(IRQ_PEND_EXT_SERVICE, &kvm->arch.float_int.masked_irqs); @@ -2703,6 +2734,9 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd) default: r = -ENOTTY; } + if (need_lock) + mutex_unlock(&kvm->lock); + return r; } @@ -2907,9 +2941,8 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EINVAL; break; } - mutex_lock(&kvm->lock); + /* must be called without kvm->lock */ r = kvm_s390_handle_pv(kvm, &args); - mutex_unlock(&kvm->lock); if (copy_to_user(argp, &args, sizeof(args))) { r = -EFAULT; break; @@ -3228,6 +3261,8 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type) kvm_s390_vsie_init(kvm); if (use_gisa) kvm_s390_gisa_init(kvm); + INIT_LIST_HEAD(&kvm->arch.pv.need_cleanup); + kvm->arch.pv.set_aside = NULL; KVM_EVENT(3, "vm 0x%pK created by pid %u", kvm, current->pid); return 0; @@ -3272,11 +3307,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm) /* * We are already at the end of life and kvm->lock is not taken. * This is ok as the file descriptor is closed by now and nobody - * can mess with the pv state. To avoid lockdep_assert_held from - * complaining we do not use kvm_s390_pv_is_protected. + * can mess with the pv state. */ - if (kvm_s390_pv_get_handle(kvm)) - kvm_s390_pv_deinit_vm(kvm, &rc, &rrc); + kvm_s390_pv_deinit_cleanup_all(kvm, &rc, &rrc); /* * Remove the mmu notifier only when the whole KVM VM is torn down, * and only if one was registered to begin with. If the VM is diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index a60d1e5c44cd..826754937ae4 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -244,6 +244,9 @@ static inline u32 kvm_s390_get_gisa_desc(struct kvm *kvm) /* implemented in pv.c */ int kvm_s390_pv_destroy_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); int kvm_s390_pv_create_cpu(struct kvm_vcpu *vcpu, u16 *rc, u16 *rrc); +int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc); +int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc); +int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_init_vm(struct kvm *kvm, u16 *rc, u16 *rrc); int kvm_s390_pv_set_sec_parms(struct kvm *kvm, void *hdr, u64 length, u16 *rc, diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 48c4f57d5d76..5f958fcf6283 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -18,6 +18,29 @@ #include #include "kvm-s390.h" +/** + * struct pv_vm_to_be_destroyed - Represents a protected VM that needs to + * be destroyed + * + * @list: list head for the list of leftover VMs + * @old_gmap_table: the gmap table of the leftover protected VM + * @handle: the handle of the leftover protected VM + * @stor_var: pointer to the variable storage of the leftover protected VM + * @stor_base: address of the base storage of the leftover protected VM + * + * Represents a protected VM that is still registered with the Ultravisor, + * but which does not correspond any longer to an active KVM VM. It should + * be destroyed at some point later, either asynchronously or when the + * process terminates. + */ +struct pv_vm_to_be_destroyed { + struct list_head list; + unsigned long old_gmap_table; + u64 handle; + void *stor_var; + unsigned long stor_base; +}; + static void kvm_s390_clear_pv_state(struct kvm *kvm) { kvm->arch.pv.handle = 0; @@ -161,23 +184,150 @@ out_err: return -ENOMEM; } -/* this should not fail, but if it does, we must not free the donated memory */ -int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) +/** + * kvm_s390_pv_dispose_one_leftover - Clean up one leftover protected VM. + * @kvm: the KVM that was associated with this leftover protected VM + * @leftover: details about the leftover protected VM that needs a clean up + * @rc: the RC code of the Destroy Secure Configuration UVC + * @rrc: the RRC code of the Destroy Secure Configuration UVC + * + * Destroy one leftover protected VM. + * On success, kvm->mm->context.protected_count will be decremented atomically + * and all other resources used by the VM will be freed. + * + * Return: 0 in case of success, otherwise 1 + */ +static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm, + struct pv_vm_to_be_destroyed *leftover, + u16 *rc, u16 *rrc) { int cc; - cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), - UVC_CMD_DESTROY_SEC_CONF, rc, rrc); - WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); + cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc); + KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc); + WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc); + if (cc) + return cc; /* - * if the mm still has a mapping, make all its pages accessible - * before destroying the guest + * Intentionally leak unusable memory. If the UVC fails, the memory + * used for the VM and its metadata is permanently unusable. + * This can only happen in case of a serious KVM or hardware bug; it + * is not expected to happen in normal operation. */ - if (mmget_not_zero(kvm->mm)) { - s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE); - mmput(kvm->mm); + free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len)); + free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER); + vfree(leftover->stor_var); + atomic_dec(&kvm->mm->context.protected_count); + return 0; +} + +/** + * kvm_s390_destroy_lower_2g - Destroy the first 2GB of protected guest memory. + * @kvm: the VM whose memory is to be cleared. + * + * Destroy the first 2GB of guest memory, to avoid prefix issues after reboot. + * The CPUs of the protected VM need to be destroyed beforehand. + */ +static void kvm_s390_destroy_lower_2g(struct kvm *kvm) +{ + const unsigned long pages_2g = SZ_2G / PAGE_SIZE; + struct kvm_memory_slot *slot; + unsigned long len; + int srcu_idx; + + srcu_idx = srcu_read_lock(&kvm->srcu); + + /* Take the memslot containing guest absolute address 0 */ + slot = gfn_to_memslot(kvm, 0); + /* Clear all slots or parts thereof that are below 2GB */ + while (slot && slot->base_gfn < pages_2g) { + len = min_t(u64, slot->npages, pages_2g - slot->base_gfn) * PAGE_SIZE; + s390_uv_destroy_range(kvm->mm, slot->userspace_addr, slot->userspace_addr + len); + /* Take the next memslot */ + slot = gfn_to_memslot(kvm, slot->base_gfn + slot->npages); + } + + srcu_read_unlock(&kvm->srcu, srcu_idx); +} + +/** + * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown. + * @kvm: the VM + * @rc: return value for the RC field of the UVCB + * @rrc: return value for the RRC field of the UVCB + * + * Set aside the protected VM for a subsequent teardown. The VM will be able + * to continue immediately as a non-secure VM, and the information needed to + * properly tear down the protected VM is set aside. If another protected VM + * was already set aside without starting its teardown, this function will + * fail. + * The CPUs of the protected VM need to be destroyed beforehand. + * + * Context: kvm->lock needs to be held + * + * Return: 0 in case of success, -EINVAL if another protected VM was already set + * aside, -ENOMEM if the system ran out of memory. + */ +int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct pv_vm_to_be_destroyed *priv; + + lockdep_assert_held(&kvm->lock); + /* + * If another protected VM was already prepared for teardown, refuse. + * A normal deinitialization has to be performed instead. + */ + if (kvm->arch.pv.set_aside) + return -EINVAL; + priv = kzalloc(sizeof(*priv), GFP_KERNEL); + if (!priv) + return -ENOMEM; + + priv->stor_var = kvm->arch.pv.stor_var; + priv->stor_base = kvm->arch.pv.stor_base; + priv->handle = kvm_s390_pv_get_handle(kvm); + priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table; + WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); + if (s390_replace_asce(kvm->arch.gmap)) { + kfree(priv); + return -ENOMEM; } + kvm_s390_destroy_lower_2g(kvm); + kvm_s390_clear_pv_state(kvm); + kvm->arch.pv.set_aside = priv; + + *rc = UVC_RC_EXECUTED; + *rrc = 42; + return 0; +} + +/** + * kvm_s390_pv_deinit_vm - Deinitialize the current protected VM + * @kvm: the KVM whose protected VM needs to be deinitialized + * @rc: the RC code of the UVC + * @rrc: the RRC code of the UVC + * + * Deinitialize the current protected VM. This function will destroy and + * cleanup the current protected VM, but it will not cleanup the guest + * memory. This function should only be called when the protected VM has + * just been created and therefore does not have any guest memory, or when + * the caller cleans up the guest memory separately. + * + * This function should not fail, but if it does, the donated memory must + * not be freed. + * + * Context: kvm->lock needs to be held + * + * Return: 0 in case of success, otherwise -EIO + */ +int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + int cc; + + cc = uv_cmd_nodata(kvm_s390_pv_get_handle(kvm), + UVC_CMD_DESTROY_SEC_CONF, rc, rrc); + WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); if (!cc) { atomic_dec(&kvm->mm->context.protected_count); kvm_s390_pv_dealloc_vm(kvm); @@ -191,6 +341,131 @@ int kvm_s390_pv_deinit_vm(struct kvm *kvm, u16 *rc, u16 *rrc) return cc ? -EIO : 0; } +/** + * kvm_s390_pv_deinit_cleanup_all - Clean up all protected VMs associated + * with a specific KVM. + * @kvm: the KVM to be cleaned up + * @rc: the RC code of the first failing UVC + * @rrc: the RRC code of the first failing UVC + * + * This function will clean up all protected VMs associated with a KVM. + * This includes the active one, the one prepared for deinitialization with + * kvm_s390_pv_set_aside, and any still pending in the need_cleanup list. + * + * Context: kvm->lock needs to be held unless being called from + * kvm_arch_destroy_vm. + * + * Return: 0 if all VMs are successfully cleaned up, otherwise -EIO + */ +int kvm_s390_pv_deinit_cleanup_all(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct pv_vm_to_be_destroyed *cur; + bool need_zap = false; + u16 _rc, _rrc; + int cc = 0; + + /* Make sure the counter does not reach 0 before calling s390_uv_destroy_range */ + atomic_inc(&kvm->mm->context.protected_count); + + *rc = 1; + /* If the current VM is protected, destroy it */ + if (kvm_s390_pv_get_handle(kvm)) { + cc = kvm_s390_pv_deinit_vm(kvm, rc, rrc); + need_zap = true; + } + + /* If a previous protected VM was set aside, put it in the need_cleanup list */ + if (kvm->arch.pv.set_aside) { + list_add(kvm->arch.pv.set_aside, &kvm->arch.pv.need_cleanup); + kvm->arch.pv.set_aside = NULL; + } + + /* Cleanup all protected VMs in the need_cleanup list */ + while (!list_empty(&kvm->arch.pv.need_cleanup)) { + cur = list_first_entry(&kvm->arch.pv.need_cleanup, typeof(*cur), list); + need_zap = true; + if (kvm_s390_pv_dispose_one_leftover(kvm, cur, &_rc, &_rrc)) { + cc = 1; + /* + * Only return the first error rc and rrc, so make + * sure it is not overwritten. All destroys will + * additionally be reported via KVM_UV_EVENT(). + */ + if (*rc == UVC_RC_EXECUTED) { + *rc = _rc; + *rrc = _rrc; + } + } + list_del(&cur->list); + kfree(cur); + } + + /* + * If the mm still has a mapping, try to mark all its pages as + * accessible. The counter should not reach zero before this + * cleanup has been performed. + */ + if (need_zap && mmget_not_zero(kvm->mm)) { + s390_uv_destroy_range(kvm->mm, 0, TASK_SIZE); + mmput(kvm->mm); + } + + /* Now the counter can safely reach 0 */ + atomic_dec(&kvm->mm->context.protected_count); + return cc ? -EIO : 0; +} + +/** + * kvm_s390_pv_deinit_aside_vm - Teardown a previously set aside protected VM. + * @kvm: the VM previously associated with the protected VM + * @rc: return value for the RC field of the UVCB + * @rrc: return value for the RRC field of the UVCB + * + * Tear down the protected VM that had been previously prepared for teardown + * using kvm_s390_pv_set_aside_vm. Ideally this should be called by + * userspace asynchronously from a separate thread. + * + * Context: kvm->lock must not be held. + * + * Return: 0 in case of success, -EINVAL if no protected VM had been + * prepared for asynchronous teardowm, -EIO in case of other errors. + */ +int kvm_s390_pv_deinit_aside_vm(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct pv_vm_to_be_destroyed *p; + int ret = 0; + + lockdep_assert_not_held(&kvm->lock); + mutex_lock(&kvm->lock); + p = kvm->arch.pv.set_aside; + kvm->arch.pv.set_aside = NULL; + mutex_unlock(&kvm->lock); + if (!p) + return -EINVAL; + + /* When a fatal signal is received, stop immediately */ + if (s390_uv_destroy_range_interruptible(kvm->mm, 0, TASK_SIZE_MAX)) + goto done; + if (kvm_s390_pv_dispose_one_leftover(kvm, p, rc, rrc)) + ret = -EIO; + kfree(p); + p = NULL; +done: + /* + * p is not NULL if we aborted because of a fatal signal, in which + * case queue the leftover for later cleanup. + */ + if (p) { + mutex_lock(&kvm->lock); + list_add(&p->list, &kvm->arch.pv.need_cleanup); + mutex_unlock(&kvm->lock); + /* Did not finish, but pretend things went well */ + *rc = UVC_RC_EXECUTED; + *rrc = 42; + } + return ret; +} + static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, struct mm_struct *mm) { diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 0d5d4419139a..b3701b23ca18 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1740,6 +1740,8 @@ enum pv_cmd_id { KVM_PV_UNSHARE_ALL, KVM_PV_INFO, KVM_PV_DUMP, + KVM_PV_ASYNC_CLEANUP_PREPARE, + KVM_PV_ASYNC_CLEANUP_PERFORM, }; struct kvm_pv_cmd { -- cgit v1.2.3 From 8c516b25d6e9c70e6d76627932b14b0ef03a82c4 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Fri, 11 Nov 2022 18:06:29 +0100 Subject: KVM: s390: pv: add KVM_CAP_S390_PROTECTED_ASYNC_DISABLE Add KVM_CAP_S390_PROTECTED_ASYNC_DISABLE to signal that the KVM_PV_ASYNC_DISABLE and KVM_PV_ASYNC_DISABLE_PREPARE commands for the KVM_S390_PV_COMMAND ioctl are available. Signed-off-by: Claudio Imbrenda Reviewed-by: Nico Boehr Reviewed-by: Steffen Eiden Reviewed-by: Janosch Frank Link: https://lore.kernel.org/r/20221111170632.77622-4-imbrenda@linux.ibm.com Message-Id: <20221111170632.77622-4-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/kvm-s390.c | 3 +++ include/uapi/linux/kvm.h | 1 + 2 files changed, 4 insertions(+) (limited to 'arch/s390/kvm') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f0abaaf7eea4..b6cc7d2935c0 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -618,6 +618,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_BPB: r = test_facility(82); break; + case KVM_CAP_S390_PROTECTED_ASYNC_DISABLE: + r = async_destroy && is_prot_virt_host(); + break; case KVM_CAP_S390_PROTECTED: r = is_prot_virt_host(); break; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index b3701b23ca18..d3f86a280858 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -1178,6 +1178,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_S390_ZPCI_OP 221 #define KVM_CAP_S390_CPU_TOPOLOGY 222 #define KVM_CAP_DIRTY_LOG_RING_ACQ_REL 223 +#define KVM_CAP_S390_PROTECTED_ASYNC_DISABLE 224 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From f7866f582b1c9d80d1a3bd0953170185668c52ca Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Fri, 11 Nov 2022 18:06:31 +0100 Subject: KVM: s390: pv: support for Destroy fast UVC Add support for the Destroy Secure Configuration Fast Ultravisor call, and take advantage of it for asynchronous destroy. When supported, the protected guest is destroyed immediately using the new UVC, leaving only the memory to be cleaned up asynchronously. Signed-off-by: Claudio Imbrenda Reviewed-by: Nico Boehr Reviewed-by: Janosch Frank Reviewed-by: Steffen Eiden Link: https://lore.kernel.org/r/20221111170632.77622-6-imbrenda@linux.ibm.com Message-Id: <20221111170632.77622-6-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/include/asm/uv.h | 10 ++++++++ arch/s390/kvm/pv.c | 61 ++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 63 insertions(+), 8 deletions(-) (limited to 'arch/s390/kvm') diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h index be3ef9dd6972..28a9ad57b6f1 100644 --- a/arch/s390/include/asm/uv.h +++ b/arch/s390/include/asm/uv.h @@ -34,6 +34,7 @@ #define UVC_CMD_INIT_UV 0x000f #define UVC_CMD_CREATE_SEC_CONF 0x0100 #define UVC_CMD_DESTROY_SEC_CONF 0x0101 +#define UVC_CMD_DESTROY_SEC_CONF_FAST 0x0102 #define UVC_CMD_CREATE_SEC_CPU 0x0120 #define UVC_CMD_DESTROY_SEC_CPU 0x0121 #define UVC_CMD_CONV_TO_SEC_STOR 0x0200 @@ -81,6 +82,7 @@ enum uv_cmds_inst { BIT_UVC_CMD_UNSHARE_ALL = 20, BIT_UVC_CMD_PIN_PAGE_SHARED = 21, BIT_UVC_CMD_UNPIN_PAGE_SHARED = 22, + BIT_UVC_CMD_DESTROY_SEC_CONF_FAST = 23, BIT_UVC_CMD_DUMP_INIT = 24, BIT_UVC_CMD_DUMP_CONFIG_STOR_STATE = 25, BIT_UVC_CMD_DUMP_CPU = 26, @@ -230,6 +232,14 @@ struct uv_cb_nodata { u64 reserved20[4]; } __packed __aligned(8); +/* Destroy Configuration Fast */ +struct uv_cb_destroy_fast { + struct uv_cb_header header; + u64 reserved08[2]; + u64 handle; + u64 reserved20[5]; +} __packed __aligned(8); + /* Set Shared Access */ struct uv_cb_share { struct uv_cb_header header; diff --git a/arch/s390/kvm/pv.c b/arch/s390/kvm/pv.c index 5f958fcf6283..e032ebbf51b9 100644 --- a/arch/s390/kvm/pv.c +++ b/arch/s390/kvm/pv.c @@ -203,6 +203,9 @@ static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm, { int cc; + /* It used the destroy-fast UVC, nothing left to do here */ + if (!leftover->handle) + goto done_fast; cc = uv_cmd_nodata(leftover->handle, UVC_CMD_DESTROY_SEC_CONF, rc, rrc); KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY LEFTOVER VM: rc %x rrc %x", *rc, *rrc); WARN_ONCE(cc, "protvirt destroy leftover vm failed rc %x rrc %x", *rc, *rrc); @@ -217,6 +220,7 @@ static int kvm_s390_pv_dispose_one_leftover(struct kvm *kvm, free_pages(leftover->stor_base, get_order(uv_info.guest_base_stor_len)); free_pages(leftover->old_gmap_table, CRST_ALLOC_ORDER); vfree(leftover->stor_var); +done_fast: atomic_dec(&kvm->mm->context.protected_count); return 0; } @@ -250,6 +254,36 @@ static void kvm_s390_destroy_lower_2g(struct kvm *kvm) srcu_read_unlock(&kvm->srcu, srcu_idx); } +static int kvm_s390_pv_deinit_vm_fast(struct kvm *kvm, u16 *rc, u16 *rrc) +{ + struct uv_cb_destroy_fast uvcb = { + .header.cmd = UVC_CMD_DESTROY_SEC_CONF_FAST, + .header.len = sizeof(uvcb), + .handle = kvm_s390_pv_get_handle(kvm), + }; + int cc; + + cc = uv_call_sched(0, (u64)&uvcb); + if (rc) + *rc = uvcb.header.rc; + if (rrc) + *rrc = uvcb.header.rrc; + WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); + KVM_UV_EVENT(kvm, 3, "PROTVIRT DESTROY VM FAST: rc %x rrc %x", + uvcb.header.rc, uvcb.header.rrc); + WARN_ONCE(cc, "protvirt destroy vm fast failed handle %llx rc %x rrc %x", + kvm_s390_pv_get_handle(kvm), uvcb.header.rc, uvcb.header.rrc); + /* Inteded memory leak on "impossible" error */ + if (!cc) + kvm_s390_pv_dealloc_vm(kvm); + return cc ? -EIO : 0; +} + +static inline bool is_destroy_fast_available(void) +{ + return test_bit_inv(BIT_UVC_CMD_DESTROY_SEC_CONF_FAST, uv_info.inst_calls_list); +} + /** * kvm_s390_pv_set_aside - Set aside a protected VM for later teardown. * @kvm: the VM @@ -271,6 +305,7 @@ static void kvm_s390_destroy_lower_2g(struct kvm *kvm) int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) { struct pv_vm_to_be_destroyed *priv; + int res = 0; lockdep_assert_held(&kvm->lock); /* @@ -283,14 +318,21 @@ int kvm_s390_pv_set_aside(struct kvm *kvm, u16 *rc, u16 *rrc) if (!priv) return -ENOMEM; - priv->stor_var = kvm->arch.pv.stor_var; - priv->stor_base = kvm->arch.pv.stor_base; - priv->handle = kvm_s390_pv_get_handle(kvm); - priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table; - WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); - if (s390_replace_asce(kvm->arch.gmap)) { + if (is_destroy_fast_available()) { + res = kvm_s390_pv_deinit_vm_fast(kvm, rc, rrc); + } else { + priv->stor_var = kvm->arch.pv.stor_var; + priv->stor_base = kvm->arch.pv.stor_base; + priv->handle = kvm_s390_pv_get_handle(kvm); + priv->old_gmap_table = (unsigned long)kvm->arch.gmap->table; + WRITE_ONCE(kvm->arch.gmap->guest_handle, 0); + if (s390_replace_asce(kvm->arch.gmap)) + res = -ENOMEM; + } + + if (res) { kfree(priv); - return -ENOMEM; + return res; } kvm_s390_destroy_lower_2g(kvm); @@ -471,6 +513,7 @@ static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, { struct kvm *kvm = container_of(subscription, struct kvm, arch.pv.mmu_notifier); u16 dummy; + int r; /* * No locking is needed since this is the last thread of the last user of this @@ -479,7 +522,9 @@ static void kvm_s390_pv_mmu_notifier_release(struct mmu_notifier *subscription, * unregistered. This means that if this notifier runs, then the * struct kvm is still valid. */ - kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); + r = kvm_s390_cpus_from_pv(kvm, &dummy, &dummy); + if (!r && is_destroy_fast_available() && kvm_s390_pv_get_handle(kvm)) + kvm_s390_pv_deinit_vm_fast(kvm, &dummy, &dummy); } static const struct mmu_notifier_ops kvm_s390_pv_mmu_notifier_ops = { -- cgit v1.2.3 From cc726886079febfa2384d77486d7f3a11f951ea9 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Fri, 11 Nov 2022 18:06:32 +0100 Subject: KVM: s390: pv: module parameter to fence asynchronous destroy Add the module parameter "async_destroy", to allow the asynchronous destroy mechanism to be switched off. This might be useful for debugging purposes. The parameter is enabled by default since the feature is opt-in anyway. Signed-off-by: Claudio Imbrenda Reviewed-by: Janosch Frank Reviewed-by: Steffen Eiden Reviewed-by: Nico Boehr Link: https://lore.kernel.org/r/20221111170632.77622-7-imbrenda@linux.ibm.com Message-Id: <20221111170632.77622-7-imbrenda@linux.ibm.com> Signed-off-by: Janosch Frank --- arch/s390/kvm/kvm-s390.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/s390/kvm') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index b6cc7d2935c0..8a0c884bf737 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -209,7 +209,13 @@ unsigned int diag9c_forwarding_hz; module_param(diag9c_forwarding_hz, uint, 0644); MODULE_PARM_DESC(diag9c_forwarding_hz, "Maximum diag9c forwarding per second, 0 to turn off"); -static int async_destroy; +/* + * allow asynchronous deinit for protected guests; enable by default since + * the feature is opt-in anyway + */ +static int async_destroy = 1; +module_param(async_destroy, int, 0444); +MODULE_PARM_DESC(async_destroy, "Asynchronous destroy for protected guests"); /* * For now we handle at most 16 double words as this is what the s390 base -- cgit v1.2.3 From 99b63f55dc514a357c2ecf25e9aab149879329f0 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Fri, 18 Nov 2022 16:11:33 +0100 Subject: KVM: s390: remove unused gisa_clear_ipm_gisc() function clang warns about an unused function: arch/s390/kvm/interrupt.c:317:20: error: unused function 'gisa_clear_ipm_gisc' [-Werror,-Wunused-function] static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) Remove gisa_clear_ipm_gisc(), since it is unused and get rid of this warning. Signed-off-by: Heiko Carstens Reviewed-by: Thomas Huth Link: https://lore.kernel.org/r/20221118151133.2974602-1-hca@linux.ibm.com Signed-off-by: Christian Borntraeger Signed-off-by: Janosch Frank --- arch/s390/kvm/interrupt.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/s390/kvm') diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c index ab569faf0df2..1dae78deddf2 100644 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@ -314,11 +314,6 @@ static inline u8 gisa_get_ipm(struct kvm_s390_gisa *gisa) return READ_ONCE(gisa->ipm); } -static inline void gisa_clear_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) -{ - clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); -} - static inline int gisa_tac_ipm_gisc(struct kvm_s390_gisa *gisa, u32 gisc) { return test_and_clear_bit_inv(IPM_BIT_OFFSET + gisc, (unsigned long *) gisa); -- cgit v1.2.3