From a679c547d19ded9b7d1a88e17cf5e5b69ac619b6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 15 Dec 2016 15:58:14 +0100 Subject: KVM: s390: gaccess: add ESOP2 handling When we access guest memory and run into a protection exception, we need to pass the exception data to the guest. ESOP2 provides detailed information about all protection exceptions which ESOP1 only partially provided. The gaccess changes make sure, that the guest always gets all available information. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.c | 12 +++++++++--- arch/s390/tools/gen_facilities.c | 1 + 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 4aa8a7e2a1da..6e94705efd4e 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -465,7 +465,9 @@ static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, struct trans_exc_code_bits { unsigned long addr : 52; /* Translation-exception Address */ unsigned long fsi : 2; /* Access Exception Fetch/Store Indication */ - unsigned long : 6; + unsigned long : 2; + unsigned long b56 : 1; + unsigned long : 3; unsigned long b60 : 1; unsigned long b61 : 1; unsigned long as : 2; /* ASCE Identifier */ @@ -497,14 +499,18 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, switch (code) { case PGM_PROTECTION: switch (prot) { + case PROT_TYPE_LA: + tec->b56 = 1; + break; + case PROT_TYPE_KEYC: + tec->b60 = 1; + break; case PROT_TYPE_ALC: tec->b60 = 1; /* FALL THROUGH */ case PROT_TYPE_DAT: tec->b61 = 1; break; - default: /* LA and KEYC set b61 to 0, other params undefined */ - return code; } /* FALL THROUGH */ case PGM_ASCE_TYPE: diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index 8cc53b1e6d03..b6ac8dfee001 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -80,6 +80,7 @@ static struct facility_def facility_defs[] = { 76, /* msa extension 3 */ 77, /* msa extension 4 */ 78, /* enhanced-DAT 2 */ + 131, /* enhanced-SOP 2 and side-effect */ -1 /* END */ } }, -- cgit v1.2.3 From cd1836f583d78bdd15ef748f4d85bf007569c7ad Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 4 Aug 2016 09:57:36 +0200 Subject: KVM: s390: instruction-execution-protection support The new Instruction Execution Protection needs to be enabled before the guest can use it. Therefore we pass the IEP facility bit to the guest and enable IEP interpretation. Signed-off-by: Janosch Frank Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 ++ arch/s390/kvm/vsie.c | 3 +++ arch/s390/tools/gen_facilities.c | 1 + 3 files changed, 6 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index bec71e902be3..e5130818fb03 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -1938,6 +1938,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu) if (test_kvm_facility(vcpu->kvm, 8) && sclp.has_pfmfi) vcpu->arch.sie_block->ecb2 |= 0x08; + if (test_kvm_facility(vcpu->kvm, 130)) + vcpu->arch.sie_block->ecb2 |= 0x20; vcpu->arch.sie_block->eca = 0x1002000U; if (sclp.has_cei) vcpu->arch.sie_block->eca |= 0x80000000U; diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index d8673e243f13..ed62c6d57d93 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -324,6 +324,9 @@ static int shadow_scb(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) /* Run-time-Instrumentation */ if (test_kvm_facility(vcpu->kvm, 64)) scb_s->ecb3 |= scb_o->ecb3 & 0x01U; + /* Instruction Execution Prevention */ + if (test_kvm_facility(vcpu->kvm, 130)) + scb_s->ecb2 |= scb_o->ecb2 & 0x20U; if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_SIIF)) scb_s->eca |= scb_o->eca & 0x00000001U; if (test_kvm_cpu_feat(vcpu->kvm, KVM_S390_VM_CPU_FEAT_IB)) diff --git a/arch/s390/tools/gen_facilities.c b/arch/s390/tools/gen_facilities.c index b6ac8dfee001..0cf802de52a1 100644 --- a/arch/s390/tools/gen_facilities.c +++ b/arch/s390/tools/gen_facilities.c @@ -80,6 +80,7 @@ static struct facility_def facility_defs[] = { 76, /* msa extension 3 */ 77, /* msa extension 4 */ 78, /* enhanced-DAT 2 */ + 130, /* instruction-execution-protection */ 131, /* enhanced-SOP 2 and side-effect */ -1 /* END */ } -- cgit v1.2.3 From d051ae531324fb5130366d47e05bf8eadeb95535 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 13 Dec 2016 14:25:32 +0100 Subject: KVM: s390: get rid of bogus cc initialization The plo inline assembly has a cc output operand that is always written to and is also as such an operand declared. Therefore the compiler is free to omit the rather pointless and misleading initialization. Get rid of this. Signed-off-by: Heiko Carstens Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index e5130818fb03..4f74511015b8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -217,7 +217,7 @@ static void allow_cpu_feat(unsigned long nr) static inline int plo_test_bit(unsigned char nr) { register unsigned long r0 asm("0") = (unsigned long) nr | 0x100; - int cc = 3; /* subfunction not available */ + int cc; asm volatile( /* Parameter registers are ignored for "test bit" */ -- cgit v1.2.3 From 27f67f8727843fbbbcd05a003183af79693759e9 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 9 Dec 2016 12:44:40 +0100 Subject: KVM: s390: Get rid of ar_t sparse with __CHECK_ENDIAN__ shows that ar_t was never properly used across KVM on s390. We can now: - fix all places - do not make ar_t special Since ar_t is just used as a register number (no endianness issues for u8), and all other register numbers are also just plain int variables, let's just use u8, which matches the __u8 in the userspace ABI for the memop ioctl. Signed-off-by: Christian Borntraeger Acked-by: Janosch Frank Reviewed-by: Cornelia Huck --- arch/s390/kvm/gaccess.c | 14 +++++++------- arch/s390/kvm/gaccess.h | 10 +++++----- arch/s390/kvm/kvm-s390.h | 10 ++++------ arch/s390/kvm/priv.c | 30 +++++++++++++++--------------- 4 files changed, 31 insertions(+), 33 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 6e94705efd4e..4492c9363178 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -373,7 +373,7 @@ void ipte_unlock(struct kvm_vcpu *vcpu) ipte_unlock_simple(vcpu); } -static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, ar_t ar, +static int ar_translation(struct kvm_vcpu *vcpu, union asce *asce, u8 ar, enum gacc_mode mode) { union alet alet; @@ -487,7 +487,7 @@ enum prot_type { }; static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, - ar_t ar, enum gacc_mode mode, enum prot_type prot) + u8 ar, enum gacc_mode mode, enum prot_type prot) { struct kvm_s390_pgm_info *pgm = &vcpu->arch.pgm; struct trans_exc_code_bits *tec; @@ -545,7 +545,7 @@ static int trans_exc(struct kvm_vcpu *vcpu, int code, unsigned long gva, } static int get_vcpu_asce(struct kvm_vcpu *vcpu, union asce *asce, - unsigned long ga, ar_t ar, enum gacc_mode mode) + unsigned long ga, u8 ar, enum gacc_mode mode) { int rc; struct psw_bits psw = psw_bits(vcpu->arch.sie_block->gpsw); @@ -777,7 +777,7 @@ static int low_address_protection_enabled(struct kvm_vcpu *vcpu, return 1; } -static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, +static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, unsigned long *pages, unsigned long nr_pages, const union asce asce, enum gacc_mode mode) { @@ -809,7 +809,7 @@ static int guest_page_range(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, return 0; } -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, +int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len, enum gacc_mode mode) { psw_t *psw = &vcpu->arch.sie_block->gpsw; @@ -883,7 +883,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * Note: The IPTE lock is not taken during this function, so the caller * has to take care of this. */ -int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, +int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, unsigned long *gpa, enum gacc_mode mode) { psw_t *psw = &vcpu->arch.sie_block->gpsw; @@ -916,7 +916,7 @@ int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, /** * check_gva_range - test a range of guest virtual addresses for accessibility */ -int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, +int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, unsigned long length, enum gacc_mode mode) { unsigned long gpa; diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 8756569ad938..5c9cc18f3b4a 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -162,11 +162,11 @@ enum gacc_mode { }; int guest_translate_address(struct kvm_vcpu *vcpu, unsigned long gva, - ar_t ar, unsigned long *gpa, enum gacc_mode mode); -int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, ar_t ar, + u8 ar, unsigned long *gpa, enum gacc_mode mode); +int check_gva_range(struct kvm_vcpu *vcpu, unsigned long gva, u8 ar, unsigned long length, enum gacc_mode mode); -int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, +int access_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len, enum gacc_mode mode); int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, @@ -218,7 +218,7 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra, * if data has been changed in guest space in case of an exception. */ static inline __must_check -int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, +int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len) { return access_guest(vcpu, ga, ar, data, len, GACC_STORE); @@ -238,7 +238,7 @@ int write_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, * data will be copied from guest space to kernel space. */ static inline __must_check -int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, ar_t ar, void *data, +int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, unsigned long len) { return access_guest(vcpu, ga, ar, data, len, GACC_FETCH); diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 3a4e97f1a9e6..22a0a7ceffad 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -86,9 +86,7 @@ static inline void kvm_s390_set_prefix(struct kvm_vcpu *vcpu, u32 prefix) kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu); } -typedef u8 __bitwise ar_t; - -static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar) +static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, u8 *ar) { u32 base2 = vcpu->arch.sie_block->ipb >> 28; u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); @@ -101,7 +99,7 @@ static inline u64 kvm_s390_get_base_disp_s(struct kvm_vcpu *vcpu, ar_t *ar) static inline void kvm_s390_get_base_disp_sse(struct kvm_vcpu *vcpu, u64 *address1, u64 *address2, - ar_t *ar_b1, ar_t *ar_b2) + u8 *ar_b1, u8 *ar_b2) { u32 base1 = (vcpu->arch.sie_block->ipb & 0xf0000000) >> 28; u32 disp1 = (vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16; @@ -125,7 +123,7 @@ static inline void kvm_s390_get_regs_rre(struct kvm_vcpu *vcpu, int *r1, int *r2 *r2 = (vcpu->arch.sie_block->ipb & 0x000f0000) >> 16; } -static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar) +static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, u8 *ar) { u32 base2 = vcpu->arch.sie_block->ipb >> 28; u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16) + @@ -140,7 +138,7 @@ static inline u64 kvm_s390_get_base_disp_rsy(struct kvm_vcpu *vcpu, ar_t *ar) return (base2 ? vcpu->run->s.regs.gprs[base2] : 0) + (long)(int)disp2; } -static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, ar_t *ar) +static inline u64 kvm_s390_get_base_disp_rs(struct kvm_vcpu *vcpu, u8 *ar) { u32 base2 = vcpu->arch.sie_block->ipb >> 28; u32 disp2 = ((vcpu->arch.sie_block->ipb & 0x0fff0000) >> 16); diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index e18435355c16..1ecc1cffdf7c 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -54,7 +54,7 @@ int kvm_s390_handle_aa(struct kvm_vcpu *vcpu) static int handle_set_clock(struct kvm_vcpu *vcpu) { int rc; - ar_t ar; + u8 ar; u64 op2, val; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) @@ -79,7 +79,7 @@ static int handle_set_prefix(struct kvm_vcpu *vcpu) u64 operand2; u32 address; int rc; - ar_t ar; + u8 ar; vcpu->stat.instruction_spx++; @@ -117,7 +117,7 @@ static int handle_store_prefix(struct kvm_vcpu *vcpu) u64 operand2; u32 address; int rc; - ar_t ar; + u8 ar; vcpu->stat.instruction_stpx++; @@ -147,7 +147,7 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) u16 vcpu_id = vcpu->vcpu_id; u64 ga; int rc; - ar_t ar; + u8 ar; vcpu->stat.instruction_stap++; @@ -380,7 +380,7 @@ static int handle_tpi(struct kvm_vcpu *vcpu) u32 tpi_data[3]; int rc; u64 addr; - ar_t ar; + u8 ar; addr = kvm_s390_get_base_disp_s(vcpu, &ar); if (addr & 3) @@ -548,7 +548,7 @@ int kvm_s390_handle_lpsw(struct kvm_vcpu *vcpu) psw_compat_t new_psw; u64 addr; int rc; - ar_t ar; + u8 ar; if (gpsw->mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); @@ -575,7 +575,7 @@ static int handle_lpswe(struct kvm_vcpu *vcpu) psw_t new_psw; u64 addr; int rc; - ar_t ar; + u8 ar; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP); @@ -597,7 +597,7 @@ static int handle_stidp(struct kvm_vcpu *vcpu) u64 stidp_data = vcpu->kvm->arch.model.cpuid; u64 operand2; int rc; - ar_t ar; + u8 ar; vcpu->stat.instruction_stidp++; @@ -644,7 +644,7 @@ static void handle_stsi_3_2_2(struct kvm_vcpu *vcpu, struct sysinfo_3_2_2 *mem) ASCEBC(mem->vm[0].cpi, 16); } -static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, ar_t ar, +static void insert_stsi_usr_data(struct kvm_vcpu *vcpu, u64 addr, u8 ar, u8 fc, u8 sel1, u16 sel2) { vcpu->run->exit_reason = KVM_EXIT_S390_STSI; @@ -663,7 +663,7 @@ static int handle_stsi(struct kvm_vcpu *vcpu) unsigned long mem = 0; u64 operand2; int rc = 0; - ar_t ar; + u8 ar; vcpu->stat.instruction_stsi++; VCPU_EVENT(vcpu, 3, "STSI: fc: %u sel1: %u sel2: %u", fc, sel1, sel2); @@ -970,7 +970,7 @@ int kvm_s390_handle_lctl(struct kvm_vcpu *vcpu) int reg, rc, nr_regs; u32 ctl_array[16]; u64 ga; - ar_t ar; + u8 ar; vcpu->stat.instruction_lctl++; @@ -1009,7 +1009,7 @@ int kvm_s390_handle_stctl(struct kvm_vcpu *vcpu) int reg, rc, nr_regs; u32 ctl_array[16]; u64 ga; - ar_t ar; + u8 ar; vcpu->stat.instruction_stctl++; @@ -1043,7 +1043,7 @@ static int handle_lctlg(struct kvm_vcpu *vcpu) int reg, rc, nr_regs; u64 ctl_array[16]; u64 ga; - ar_t ar; + u8 ar; vcpu->stat.instruction_lctlg++; @@ -1081,7 +1081,7 @@ static int handle_stctg(struct kvm_vcpu *vcpu) int reg, rc, nr_regs; u64 ctl_array[16]; u64 ga; - ar_t ar; + u8 ar; vcpu->stat.instruction_stctg++; @@ -1132,7 +1132,7 @@ static int handle_tprot(struct kvm_vcpu *vcpu) unsigned long hva, gpa; int ret = 0, cc = 0; bool writable; - ar_t ar; + u8 ar; vcpu->stat.instruction_tprot++; -- cgit v1.2.3 From 53743aa7f14671dea6f3567ddca2f7d97454f3fe Mon Sep 17 00:00:00 2001 From: Maxim Samoylov Date: Wed, 10 Feb 2016 10:31:23 +0100 Subject: KVM: s390: Introduce Vector Enhancements facility 1 to the guest We can directly forward the vector enhancement facility 1 to the guest if available and VX is requested by user space. Please note that user space will have to take care of the final state of the facility bit when migrating to older machines. Reviewed-by: David Hildenbrand Signed-off-by: Maxim Samoylov Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4f74511015b8..1fd4b854efdc 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -505,6 +505,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) } else if (MACHINE_HAS_VX) { set_kvm_facility(kvm->arch.model.fac_mask, 129); set_kvm_facility(kvm->arch.model.fac_list, 129); + if (test_facility(135)) { + set_kvm_facility(kvm->arch.model.fac_mask, 135); + set_kvm_facility(kvm->arch.model.fac_list, 135); + } r = 0; } else r = -EINVAL; -- cgit v1.2.3 From 2f87d942be9d0f86e44fbcbd473264c26c7f1809 Mon Sep 17 00:00:00 2001 From: Guenther Hutzl Date: Fri, 3 Jun 2016 14:37:17 +0200 Subject: KVM: s390: Introduce BCD Vector Instructions to the guest We can directly forward the vector BCD instructions to the guest if available and VX is requested by user space. Please note that user space will have to take care of the final state of the facility bit when migrating to older machines. Signed-off-by: Guenther Hutzl Reviewed-by: Christian Borntraeger Reviewed-by: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 1fd4b854efdc..69401b8d4521 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -505,6 +505,10 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, struct kvm_enable_cap *cap) } else if (MACHINE_HAS_VX) { set_kvm_facility(kvm->arch.model.fac_mask, 129); set_kvm_facility(kvm->arch.model.fac_list, 129); + if (test_facility(134)) { + set_kvm_facility(kvm->arch.model.fac_mask, 134); + set_kvm_facility(kvm->arch.model.fac_list, 134); + } if (test_facility(135)) { set_kvm_facility(kvm->arch.model.fac_mask, 135); set_kvm_facility(kvm->arch.model.fac_list, 135); -- cgit v1.2.3 From f41711788c9c281a61c8cf3222dca8a0e74a4fb3 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 May 2016 12:33:52 +0200 Subject: KVM: s390: guestdbg: filter i-fetch events on icpts We already filter PER events reported via icpt code 8. For icpt code 4 and 56, this is still missing. So let's properly detect if we have a debugging event and if we have to inject a PER i-fetch event into the guest at all. Signed-off-by: David Hildenbrand Reviewed-by: Christian Borntraeger Cc: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/guestdbg.c | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index d7c6a7f53ced..a2077833ab01 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -388,14 +388,13 @@ void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu) #define per_write_wp_event(code) \ (code & (PER_CODE_STORE | PER_CODE_STORE_REAL)) -static int debug_exit_required(struct kvm_vcpu *vcpu) +static int debug_exit_required(struct kvm_vcpu *vcpu, u8 perc, + unsigned long peraddr) { - u8 perc = vcpu->arch.sie_block->perc; struct kvm_debug_exit_arch *debug_exit = &vcpu->run->debug.arch; struct kvm_hw_wp_info_arch *wp_info = NULL; struct kvm_hw_bp_info_arch *bp_info = NULL; unsigned long addr = vcpu->arch.sie_block->gpsw.addr; - unsigned long peraddr = vcpu->arch.sie_block->peraddr; if (guestdbg_hw_bp_enabled(vcpu)) { if (per_write_wp_event(perc) && @@ -442,6 +441,8 @@ exit_required: int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu) { + const u64 cr10 = vcpu->arch.sie_block->gcr[10]; + const u64 cr11 = vcpu->arch.sie_block->gcr[11]; const u8 ilen = kvm_s390_get_ilen(vcpu); struct kvm_s390_pgm_info pgm_info = { .code = PGM_PER, @@ -454,7 +455,19 @@ int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu) * instruction generated a PER i-fetch event. PER address therefore * points at the previous PSW address (could be an EXECUTE function). */ - return kvm_s390_inject_prog_irq(vcpu, &pgm_info); + if (!guestdbg_enabled(vcpu)) + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); + + if (debug_exit_required(vcpu, pgm_info.per_code, pgm_info.per_address)) + vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING; + + if (!guest_per_enabled(vcpu) || + !(vcpu->arch.sie_block->gcr[9] & PER_EVENT_IFETCH)) + return 0; + + if (in_addr_range(pgm_info.per_address, cr10, cr11)) + return kvm_s390_inject_prog_irq(vcpu, &pgm_info); + return 0; } static void filter_guest_per_event(struct kvm_vcpu *vcpu) @@ -500,7 +513,8 @@ void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) { int new_as; - if (debug_exit_required(vcpu)) + if (debug_exit_required(vcpu, vcpu->arch.sie_block->perc, + vcpu->arch.sie_block->peraddr)) vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING; filter_guest_per_event(vcpu); -- cgit v1.2.3 From 3fa8cad7402cfe982a60d746609e89aafe15d131 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 May 2016 12:00:49 +0200 Subject: KVM: s390: prepare to read random guest instructions We will have to read instructions not residing at the current PSW address. Reviewed-by: Eric Farman Signed-off-by: David Hildenbrand Cc: David Hildenbrand Signed-off-by: Christian Borntraeger --- arch/s390/kvm/gaccess.h | 9 +++++---- arch/s390/kvm/kvm-s390.c | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h index 5c9cc18f3b4a..7ce47fd36f28 100644 --- a/arch/s390/kvm/gaccess.h +++ b/arch/s390/kvm/gaccess.h @@ -247,10 +247,11 @@ int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, /** * read_guest_instr - copy instruction data from guest space to kernel space * @vcpu: virtual cpu + * @ga: guest address * @data: destination address in kernel space * @len: number of bytes to copy * - * Copy @len bytes from the current psw address (guest space) to @data (kernel + * Copy @len bytes from the given address (guest space) to @data (kernel * space). * * The behaviour of read_guest_instr is identical to read_guest, except that @@ -258,10 +259,10 @@ int read_guest(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar, void *data, * address-space mode. */ static inline __must_check -int read_guest_instr(struct kvm_vcpu *vcpu, void *data, unsigned long len) +int read_guest_instr(struct kvm_vcpu *vcpu, unsigned long ga, void *data, + unsigned long len) { - return access_guest(vcpu, vcpu->arch.sie_block->gpsw.addr, 0, data, len, - GACC_IFETCH); + return access_guest(vcpu, ga, 0, data, len, GACC_IFETCH); } /** diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 69401b8d4521..66e73f4ed64b 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2588,7 +2588,7 @@ static int vcpu_post_run_fault_in_sie(struct kvm_vcpu *vcpu) * to look up the current opcode to get the length of the instruction * to be able to forward the PSW. */ - rc = read_guest_instr(vcpu, &opcode, 1); + rc = read_guest_instr(vcpu, vcpu->arch.sie_block->gpsw.addr, &opcode, 1); ilen = insn_length(opcode); if (rc < 0) { return rc; -- cgit v1.2.3 From a69cbe81b2f38437113c05019a134a4731a3aa78 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 May 2016 12:40:11 +0200 Subject: KVM: s390: guestdbg: filter PER i-fetch on EXECUTE properly When we get a PER i-fetch event on an EXECUTE or EXECUTE RELATIVE LONG instruction, because the executed instruction generated a PER i-fetch event, then the PER address points at the EXECUTE function, not the fetched one. Therefore, when filtering PER events, we have to take care of the really fetched instruction, which we can only get by reading in guest virtual memory. For icpt code 4 and 56, we directly have additional information about an EXECUTE instruction at hand. For icpt code 8, we always have to read in guest virtual memory. Signed-off-by: David Hildenbrand Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger [small fixes] --- arch/s390/kvm/guestdbg.c | 98 ++++++++++++++++++++++++++++++++++++++++++----- arch/s390/kvm/intercept.c | 4 +- arch/s390/kvm/kvm-s390.h | 2 +- 3 files changed, 93 insertions(+), 11 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/kvm/guestdbg.c b/arch/s390/kvm/guestdbg.c index a2077833ab01..23d9a4e12da1 100644 --- a/arch/s390/kvm/guestdbg.c +++ b/arch/s390/kvm/guestdbg.c @@ -436,6 +436,64 @@ exit_required: return 1; } +static int per_fetched_addr(struct kvm_vcpu *vcpu, unsigned long *addr) +{ + u8 exec_ilen = 0; + u16 opcode[3]; + int rc; + + if (vcpu->arch.sie_block->icptcode == ICPT_PROGI) { + /* PER address references the fetched or the execute instr */ + *addr = vcpu->arch.sie_block->peraddr; + /* + * Manually detect if we have an EXECUTE instruction. As + * instructions are always 2 byte aligned we can read the + * first two bytes unconditionally + */ + rc = read_guest_instr(vcpu, *addr, &opcode, 2); + if (rc) + return rc; + if (opcode[0] >> 8 == 0x44) + exec_ilen = 4; + if ((opcode[0] & 0xff0f) == 0xc600) + exec_ilen = 6; + } else { + /* instr was suppressed, calculate the responsible instr */ + *addr = __rewind_psw(vcpu->arch.sie_block->gpsw, + kvm_s390_get_ilen(vcpu)); + if (vcpu->arch.sie_block->icptstatus & 0x01) { + exec_ilen = (vcpu->arch.sie_block->icptstatus & 0x60) >> 4; + if (!exec_ilen) + exec_ilen = 4; + } + } + + if (exec_ilen) { + /* read the complete EXECUTE instr to detect the fetched addr */ + rc = read_guest_instr(vcpu, *addr, &opcode, exec_ilen); + if (rc) + return rc; + if (exec_ilen == 6) { + /* EXECUTE RELATIVE LONG - RIL-b format */ + s32 rl = *((s32 *) (opcode + 1)); + + /* rl is a _signed_ 32 bit value specifying halfwords */ + *addr += (u64)(s64) rl * 2; + } else { + /* EXECUTE - RX-a format */ + u32 base = (opcode[1] & 0xf000) >> 12; + u32 disp = opcode[1] & 0x0fff; + u32 index = opcode[0] & 0x000f; + + *addr = base ? vcpu->run->s.regs.gprs[base] : 0; + *addr += index ? vcpu->run->s.regs.gprs[index] : 0; + *addr += disp; + } + *addr = kvm_s390_logical_to_effective(vcpu, *addr); + } + return 0; +} + #define guest_per_enabled(vcpu) \ (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PER) @@ -449,6 +507,8 @@ int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu) .per_code = PER_CODE_IFETCH, .per_address = __rewind_psw(vcpu->arch.sie_block->gpsw, ilen), }; + unsigned long fetched_addr; + int rc; /* * The PSW points to the next instruction, therefore the intercepted @@ -465,21 +525,29 @@ int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu) !(vcpu->arch.sie_block->gcr[9] & PER_EVENT_IFETCH)) return 0; - if (in_addr_range(pgm_info.per_address, cr10, cr11)) + rc = per_fetched_addr(vcpu, &fetched_addr); + if (rc < 0) + return rc; + if (rc) + /* instruction-fetching exceptions */ + return kvm_s390_inject_program_int(vcpu, PGM_ADDRESSING); + + if (in_addr_range(fetched_addr, cr10, cr11)) return kvm_s390_inject_prog_irq(vcpu, &pgm_info); return 0; } -static void filter_guest_per_event(struct kvm_vcpu *vcpu) +static int filter_guest_per_event(struct kvm_vcpu *vcpu) { const u8 perc = vcpu->arch.sie_block->perc; - u64 peraddr = vcpu->arch.sie_block->peraddr; u64 addr = vcpu->arch.sie_block->gpsw.addr; u64 cr9 = vcpu->arch.sie_block->gcr[9]; u64 cr10 = vcpu->arch.sie_block->gcr[10]; u64 cr11 = vcpu->arch.sie_block->gcr[11]; /* filter all events, demanded by the guest */ u8 guest_perc = perc & (cr9 >> 24) & PER_CODE_MASK; + unsigned long fetched_addr; + int rc; if (!guest_per_enabled(vcpu)) guest_perc = 0; @@ -491,9 +559,17 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu) guest_perc &= ~PER_CODE_BRANCH; /* filter "instruction-fetching" events */ - if (guest_perc & PER_CODE_IFETCH && - !in_addr_range(peraddr, cr10, cr11)) - guest_perc &= ~PER_CODE_IFETCH; + if (guest_perc & PER_CODE_IFETCH) { + rc = per_fetched_addr(vcpu, &fetched_addr); + if (rc < 0) + return rc; + /* + * Don't inject an irq on exceptions. This would make handling + * on icpt code 8 very complex (as PSW was already rewound). + */ + if (rc || !in_addr_range(fetched_addr, cr10, cr11)) + guest_perc &= ~PER_CODE_IFETCH; + } /* All other PER events will be given to the guest */ /* TODO: Check altered address/address space */ @@ -502,6 +578,7 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu) if (!guest_perc) vcpu->arch.sie_block->iprcc &= ~PGM_PER; + return 0; } #define pssec(vcpu) (vcpu->arch.sie_block->gcr[1] & _ASCE_SPACE_SWITCH) @@ -509,15 +586,17 @@ static void filter_guest_per_event(struct kvm_vcpu *vcpu) #define old_ssec(vcpu) ((vcpu->arch.sie_block->tecmc >> 31) & 0x1) #define old_as_is_home(vcpu) !(vcpu->arch.sie_block->tecmc & 0xffff) -void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) +int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) { - int new_as; + int rc, new_as; if (debug_exit_required(vcpu, vcpu->arch.sie_block->perc, vcpu->arch.sie_block->peraddr)) vcpu->guest_debug |= KVM_GUESTDBG_EXIT_PENDING; - filter_guest_per_event(vcpu); + rc = filter_guest_per_event(vcpu); + if (rc) + return rc; /* * Only RP, SAC, SACF, PT, PTI, PR, PC instructions can trigger @@ -546,4 +625,5 @@ void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu) (pssec(vcpu) || old_ssec(vcpu))) vcpu->arch.sie_block->iprcc = PGM_SPACE_SWITCH; } + return 0; } diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 7a27eebab28a..8b13f7098c61 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -238,7 +238,9 @@ static int handle_prog(struct kvm_vcpu *vcpu) vcpu->stat.exit_program_interruption++; if (guestdbg_enabled(vcpu) && per_event(vcpu)) { - kvm_s390_handle_per_event(vcpu); + rc = kvm_s390_handle_per_event(vcpu); + if (rc) + return rc; /* the interrupt might have been filtered out completely */ if (vcpu->arch.sie_block->iprcc == 0) return 0; diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h index 22a0a7ceffad..af9fa91a0c91 100644 --- a/arch/s390/kvm/kvm-s390.h +++ b/arch/s390/kvm/kvm-s390.h @@ -377,7 +377,7 @@ int kvm_s390_import_bp_data(struct kvm_vcpu *vcpu, void kvm_s390_clear_bp_data(struct kvm_vcpu *vcpu); void kvm_s390_prepare_debug_exit(struct kvm_vcpu *vcpu); int kvm_s390_handle_per_ifetch_icpt(struct kvm_vcpu *vcpu); -void kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); +int kvm_s390_handle_per_event(struct kvm_vcpu *vcpu); /* support for Basic/Extended SCA handling */ static inline union ipte_control *kvm_s390_get_ipte_control(struct kvm *kvm) -- cgit v1.2.3 From 4bead2a423ea5268b0ab3cba058e215c65ee2cbd Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Fri, 27 Jan 2017 10:23:59 +0100 Subject: KVM: s390: Fix RRBE return code not being CC reset_guest_reference_bit needs to return the CC, so we can set it in the guest PSW when emulating RRBE. Right now it only returns 0. Let's fix that. Signed-off-by: Janosch Frank Reviewed-by: Christian Borntraeger Signed-off-by: Christian Borntraeger --- arch/s390/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 7a1897c51c54..f70db837ddc4 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -741,7 +741,7 @@ int reset_guest_reference_bit(struct mm_struct *mm, unsigned long addr) pgste_set_unlock(ptep, new); pte_unmap_unlock(ptep, ptl); - return 0; + return cc; } EXPORT_SYMBOL(reset_guest_reference_bit); -- cgit v1.2.3 From a8c39dd77cb9fad0d0e5c5e6581851bdcbc1e6f6 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 18 Jan 2017 16:01:02 +0100 Subject: KVM: s390: Add debug logging to basic cpu model interface Let's log something for changes in facilities, cpuid and ibc now that we have a cpu model in QEMU. All of these calls are pretty seldom, so we will not spill the log, the they will help to understand pontential guest issues, for example if some instructions are fenced off. As the s390 debug feature has a limited amount of parameters and strings must not go away we limit the facility printing to 3 double words, instead of building that list dynamically. This should be enough for several years. If we ever exceed 3 double words then the logging will be incomplete but no functional impact will happen. Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 4078ba630689..dabd3b15bf11 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -829,6 +829,13 @@ static int kvm_s390_set_processor(struct kvm *kvm, struct kvm_device_attr *attr) } memcpy(kvm->arch.model.fac_list, proc->fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); + VM_EVENT(kvm, 3, "SET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", + kvm->arch.model.ibc, + kvm->arch.model.cpuid); + VM_EVENT(kvm, 3, "SET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", + kvm->arch.model.fac_list[0], + kvm->arch.model.fac_list[1], + kvm->arch.model.fac_list[2]); } else ret = -EFAULT; kfree(proc); @@ -902,6 +909,13 @@ static int kvm_s390_get_processor(struct kvm *kvm, struct kvm_device_attr *attr) proc->ibc = kvm->arch.model.ibc; memcpy(&proc->fac_list, kvm->arch.model.fac_list, S390_ARCH_FAC_LIST_SIZE_BYTE); + VM_EVENT(kvm, 3, "GET: guest ibc: 0x%4.4x, guest cpuid: 0x%16.16llx", + kvm->arch.model.ibc, + kvm->arch.model.cpuid); + VM_EVENT(kvm, 3, "GET: guest faclist: 0x%16.16llx.%16.16llx.%16.16llx", + kvm->arch.model.fac_list[0], + kvm->arch.model.fac_list[1], + kvm->arch.model.fac_list[2]); if (copy_to_user((void __user *)attr->addr, proc, sizeof(*proc))) ret = -EFAULT; kfree(proc); @@ -925,6 +939,17 @@ static int kvm_s390_get_machine(struct kvm *kvm, struct kvm_device_attr *attr) S390_ARCH_FAC_LIST_SIZE_BYTE); memcpy((unsigned long *)&mach->fac_list, S390_lowcore.stfle_fac_list, sizeof(S390_lowcore.stfle_fac_list)); + VM_EVENT(kvm, 3, "GET: host ibc: 0x%4.4x, host cpuid: 0x%16.16llx", + kvm->arch.model.ibc, + kvm->arch.model.cpuid); + VM_EVENT(kvm, 3, "GET: host facmask: 0x%16.16llx.%16.16llx.%16.16llx", + mach->fac_mask[0], + mach->fac_mask[1], + mach->fac_mask[2]); + VM_EVENT(kvm, 3, "GET: host faclist: 0x%16.16llx.%16.16llx.%16.16llx", + mach->fac_list[0], + mach->fac_list[1], + mach->fac_list[2]); if (copy_to_user((void __user *)attr->addr, mach, sizeof(*mach))) ret = -EFAULT; kfree(mach); -- cgit v1.2.3 From e1e8a9624f7ba8ead4f056ff558ed070e86fa747 Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Thu, 2 Feb 2017 16:39:31 +0100 Subject: KVM: s390: Disable dirty log retrieval for UCONTROL guests User controlled KVM guests do not support the dirty log, as they have no single gmap that we can check for changes. As they have no single gmap, kvm->arch.gmap is NULL and all further referencing to it for dirty checking will result in a NULL dereference. Let's return -EINVAL if a caller tries to sync dirty logs for a UCONTROL guest. Fixes: 15f36eb ("KVM: s390: Add proper dirty bitmap support to S390 kvm.") Cc: # 3.16+ Signed-off-by: Janosch Frank Reported-by: Martin Schwidefsky Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/kvm-s390.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index dabd3b15bf11..502de74ea984 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -442,6 +442,9 @@ int kvm_vm_ioctl_get_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot; int is_dirty = 0; + if (kvm_is_ucontrol(kvm)) + return -EINVAL; + mutex_lock(&kvm->slots_lock); r = -EINVAL; -- cgit v1.2.3 From fb7dc1d4ddce744c8d8e1aca19d4982102cf72e1 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 26 Jan 2017 20:45:33 +0100 Subject: KVM: s390: detect some program check loops Sometimes (e.g. early boot) a guest is broken in such ways that it loops 100% delivering operation exceptions (illegal operation) but the pgm new PSW is not set properly. This will result in code being read from address zero, which usually contains another illegal op. Let's detect this case and return to userspace. Instead of only detecting this for address zero apply a heuristic that will work for any program check new psw. We do not want guest problem state to be able to trigger a guest panic, e.g. by faulting on an address that is the same as the program check new PSW, so we check for the problem state bit being off. With proper handling in userspace we a: get rid of CPU consumption of such broken guests b: keep the program old PSW. This allows to find out the original illegal operation - making debugging such early boot issues much easier than with single stepping Signed-off-by: Christian Borntraeger Reviewed-by: Cornelia Huck --- arch/s390/kvm/intercept.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'arch/s390') diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 8b13f7098c61..59920f96ebc0 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -361,6 +361,9 @@ static int handle_partial_execution(struct kvm_vcpu *vcpu) static int handle_operexc(struct kvm_vcpu *vcpu) { + psw_t oldpsw, newpsw; + int rc; + vcpu->stat.exit_operation_exception++; trace_kvm_s390_handle_operexc(vcpu, vcpu->arch.sie_block->ipa, vcpu->arch.sie_block->ipb); @@ -371,6 +374,24 @@ static int handle_operexc(struct kvm_vcpu *vcpu) if (vcpu->arch.sie_block->ipa == 0 && vcpu->kvm->arch.user_instr0) return -EOPNOTSUPP; + rc = read_guest_lc(vcpu, __LC_PGM_NEW_PSW, &newpsw, sizeof(psw_t)); + if (rc) + return rc; + /* + * Avoid endless loops of operation exceptions, if the pgm new + * PSW will cause a new operation exception. + * The heuristic checks if the pgm new psw is within 6 bytes before + * the faulting psw address (with same DAT, AS settings) and the + * new psw is not a wait psw and the fault was not triggered by + * problem state. + */ + oldpsw = vcpu->arch.sie_block->gpsw; + if (oldpsw.addr - newpsw.addr <= 6 && + !(newpsw.mask & PSW_MASK_WAIT) && + !(oldpsw.mask & PSW_MASK_PSTATE) && + (newpsw.mask & PSW_MASK_ASC) == (oldpsw.mask & PSW_MASK_ASC) && + (newpsw.mask & PSW_MASK_DAT) == (oldpsw.mask & PSW_MASK_DAT)) + return -EOPNOTSUPP; return kvm_s390_inject_program_int(vcpu, PGM_OPERATION); } -- cgit v1.2.3 From 460df4c1fc7c00829050c08d6368dc6e6beef307 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Feb 2017 11:50:15 +0100 Subject: KVM: race-free exit from KVM_RUN without POSIX signals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The purpose of the KVM_SET_SIGNAL_MASK API is to let userspace "kick" a VCPU out of KVM_RUN through a POSIX signal. A signal is attached to a dummy signal handler; by blocking the signal outside KVM_RUN and unblocking it inside, this possible race is closed: VCPU thread service thread -------------------------------------------------------------- check flag set flag raise signal (signal handler does nothing) KVM_RUN However, one issue with KVM_SET_SIGNAL_MASK is that it has to take tsk->sighand->siglock on every KVM_RUN. This lock is often on a remote NUMA node, because it is on the node of a thread's creator. Taking this lock can be very expensive if there are many userspace exits (as is the case for SMP Windows VMs without Hyper-V reference time counter). As an alternative, we can put the flag directly in kvm_run so that KVM can see it: VCPU thread service thread -------------------------------------------------------------- raise signal signal handler set run->immediate_exit KVM_RUN check run->immediate_exit Reviewed-by: Radim Krčmář Reviewed-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 13 ++++++++++++- arch/arm/kvm/arm.c | 4 ++++ arch/mips/kvm/mips.c | 7 ++++++- arch/powerpc/kvm/powerpc.c | 6 +++++- arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 6 +++++- include/uapi/linux/kvm.h | 4 +++- 7 files changed, 39 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e4f2cdcf78eb..069450938b79 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3389,7 +3389,18 @@ struct kvm_run { Request that KVM_RUN return when it becomes possible to inject external interrupts into the guest. Useful in conjunction with KVM_INTERRUPT. - __u8 padding1[7]; + __u8 immediate_exit; + +This field is polled once when KVM_RUN starts; if non-zero, KVM_RUN +exits immediately, returning -EINTR. In the common scenario where a +signal is used to "kick" a VCPU out of KVM_RUN, this field can be used +to avoid usage of KVM_SET_SIGNAL_MASK, which has worse scalability. +Rather than blocking the signal outside KVM_RUN, userspace can set up +a signal handler that sets run->immediate_exit to a non-zero value. + +This field is ignored if KVM_CAP_IMMEDIATE_EXIT is not available. + + __u8 padding1[6]; /* out */ __u32 exit_reason; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 21c493a9e5c9..c9a2103faeb9 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -206,6 +206,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_PSCI_0_2: case KVM_CAP_READONLY_MEM: case KVM_CAP_MP_STATE: + case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -604,6 +605,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) return ret; } + if (run->immediate_exit) + return -EINTR; + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 31ee5ee0010b..ed81e5ac1426 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -397,7 +397,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { - int r = 0; + int r = -EINTR; sigset_t sigsaved; if (vcpu->sigset_active) @@ -409,6 +409,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->mmio_needed = 0; } + if (run->immediate_exit) + goto out; + lose_fpu(1); local_irq_disable(); @@ -429,6 +432,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) guest_exit_irqoff(); local_irq_enable(); +out: if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -1021,6 +1025,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ENABLE_CAP: case KVM_CAP_READONLY_MEM: case KVM_CAP_SYNC_MMU: + case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2b3e4e620078..1fe1391ba2c2 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -511,6 +511,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: + case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; case KVM_CAP_PPC_PAIRED_SINGLES: @@ -1117,7 +1118,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) #endif } - r = kvmppc_vcpu_run(run, vcpu); + if (run->immediate_exit) + r = -EINTR; + else + r = kvmppc_vcpu_run(run, vcpu); if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 502de74ea984..99e35fe0dea8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -370,6 +370,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_IRQCHIP: case KVM_CAP_VM_ATTRIBUTES: case KVM_CAP_MP_STATE: + case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_S390_INJECT_IRQ: case KVM_CAP_S390_USER_SIGP: case KVM_CAP_S390_USER_STSI: @@ -2798,6 +2799,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int rc; sigset_t sigsaved; + if (kvm_run->immediate_exit) + return -EINTR; + if (guestdbg_exit_pending(vcpu)) { kvm_s390_prepare_debug_exit(vcpu); return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0aa8db229e0a..8d3047c8cce7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2672,6 +2672,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_DISABLE_QUIRKS: case KVM_CAP_SET_BOOT_CPU_ID: case KVM_CAP_SPLIT_IRQCHIP: + case KVM_CAP_IMMEDIATE_EXIT: #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: @@ -7202,7 +7203,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } else WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); - r = vcpu_run(vcpu); + if (kvm_run->immediate_exit) + r = -EINTR; + else + r = vcpu_run(vcpu); out: post_kvm_run_save(vcpu); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7964b970b9ad..f51d5082a377 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -218,7 +218,8 @@ struct kvm_hyperv_exit { struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 padding1[7]; + __u8 immediate_exit; + __u8 padding1[6]; /* out */ __u32 exit_reason; @@ -881,6 +882,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SPAPR_RESIZE_HPT 133 #define KVM_CAP_PPC_MMU_RADIX 134 #define KVM_CAP_PPC_MMU_HASH_V3 135 +#define KVM_CAP_IMMEDIATE_EXIT 136 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3