summaryrefslogtreecommitdiff
path: root/arch/s390/kvm
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2023-02-15 20:35:26 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2023-02-15 20:35:26 +0300
commite4922088f8e90ea163281ba8e69b98f34a7a1b83 (patch)
tree1db574412a5365f285754dd0364d249581535ad8 /arch/s390/kvm
parent33436335e93a1788a58443fc99c5ab320ce4b9d9 (diff)
parent5fc5b94a273655128159186c87662105db8afeb5 (diff)
downloadlinux-e4922088f8e90ea163281ba8e69b98f34a7a1b83.tar.xz
Merge tag 'kvm-s390-next-6.3-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
* Two more V!=R patches * The last part of the cmpxchg patches * A few fixes
Diffstat (limited to 'arch/s390/kvm')
-rw-r--r--arch/s390/kvm/gaccess.c109
-rw-r--r--arch/s390/kvm/gaccess.h3
-rw-r--r--arch/s390/kvm/interrupt.c11
-rw-r--r--arch/s390/kvm/kvm-s390.c264
4 files changed, 285 insertions, 102 deletions
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 0243b6e38d36..3eb85f254881 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1162,6 +1162,115 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
}
/**
+ * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
+ * @kvm: Virtual machine instance.
+ * @gpa: Absolute guest address of the location to be changed.
+ * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
+ * non power of two will result in failure.
+ * @old_addr: Pointer to old value. If the location at @gpa contains this value,
+ * the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
+ * *@old_addr contains the value at @gpa before the attempt to
+ * exchange the value.
+ * @new: The value to place at @gpa.
+ * @access_key: The access key to use for the guest access.
+ * @success: output value indicating if an exchange occurred.
+ *
+ * Atomically exchange the value at @gpa by @new, if it contains *@old.
+ * Honors storage keys.
+ *
+ * Return: * 0: successful exchange
+ * * >0: a program interruption code indicating the reason cmpxchg could
+ * not be attempted
+ * * -EINVAL: address misaligned or len not power of two
+ * * -EAGAIN: transient failure (len 1 or 2)
+ * * -EOPNOTSUPP: read-only memslot (should never occur)
+ */
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len,
+ __uint128_t *old_addr, __uint128_t new,
+ u8 access_key, bool *success)
+{
+ gfn_t gfn = gpa_to_gfn(gpa);
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+ bool writable;
+ hva_t hva;
+ int ret;
+
+ if (!IS_ALIGNED(gpa, len))
+ return -EINVAL;
+
+ hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+ if (kvm_is_error_hva(hva))
+ return PGM_ADDRESSING;
+ /*
+ * Check if it's a read-only memslot, even though that cannot occur
+ * since those are unsupported.
+ * Don't try to actually handle that case.
+ */
+ if (!writable)
+ return -EOPNOTSUPP;
+
+ hva += offset_in_page(gpa);
+ /*
+ * The cmpxchg_user_key macro depends on the type of "old", so we need
+ * a case for each valid length and get some code duplication as long
+ * as we don't introduce a new macro.
+ */
+ switch (len) {
+ case 1: {
+ u8 old;
+
+ ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 2: {
+ u16 old;
+
+ ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 4: {
+ u32 old;
+
+ ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 8: {
+ u64 old;
+
+ ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 16: {
+ __uint128_t old;
+
+ ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+ if (*success)
+ mark_page_dirty_in_slot(kvm, slot, gfn);
+ /*
+ * Assume that the fault is caused by protection, either key protection
+ * or user page write protection.
+ */
+ if (ret == -EFAULT)
+ ret = PGM_PROTECTION;
+ return ret;
+}
+
+/**
* guest_translate_address_with_key - translate guest logical into guest absolute address
* @vcpu: virtual cpu
* @gva: Guest virtual address
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 9408d6cc8e2c..b320d12aa049 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -206,6 +206,9 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
void *data, unsigned long len, enum gacc_mode mode);
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, __uint128_t *old,
+ __uint128_t new, u8 access_key, bool *success);
+
/**
* write_guest_with_key - copy data from kernel space to guest space
* @vcpu: virtual cpu
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 8edb3bee74b6..9250fde1f97d 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3103,9 +3103,9 @@ static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer)
static void process_gib_alert_list(void)
{
struct kvm_s390_gisa_interrupt *gi;
+ u32 final, gisa_phys, origin = 0UL;
struct kvm_s390_gisa *gisa;
struct kvm *kvm;
- u32 final, origin = 0UL;
do {
/*
@@ -3131,9 +3131,10 @@ static void process_gib_alert_list(void)
* interruptions asap.
*/
while (origin & GISA_ADDR_MASK) {
- gisa = (struct kvm_s390_gisa *)(u64)origin;
+ gisa_phys = origin;
+ gisa = phys_to_virt(gisa_phys);
origin = gisa->next_alert;
- gisa->next_alert = (u32)(u64)gisa;
+ gisa->next_alert = gisa_phys;
kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
gi = &kvm->arch.gisa_int;
if (hrtimer_active(&gi->timer))
@@ -3417,6 +3418,7 @@ void kvm_s390_gib_destroy(void)
int __init kvm_s390_gib_init(u8 nisc)
{
+ u32 gib_origin;
int rc = 0;
if (!css_general_characteristics.aiv) {
@@ -3438,7 +3440,8 @@ int __init kvm_s390_gib_init(u8 nisc)
}
gib->nisc = nisc;
- if (chsc_sgib((u32)(u64)gib)) {
+ gib_origin = virt_to_phys(gib);
+ if (chsc_sgib(gib_origin)) {
pr_err("Associating the GIB with the AIV facility failed\n");
free_page((unsigned long)gib);
gib = NULL;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index bd25076aa19b..39b36562c043 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -573,7 +573,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_VCPU_RESETS:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_S390_DIAG318:
- case KVM_CAP_S390_MEM_OP_EXTENSION:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -587,6 +586,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_MEM_OP:
r = MEM_OP_MAX_SIZE;
break;
+ case KVM_CAP_S390_MEM_OP_EXTENSION:
+ /*
+ * Flag bits indicating which extensions are supported.
+ * If r > 0, the base extension must also be supported/indicated,
+ * in order to maintain backwards compatibility.
+ */
+ r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
+ KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG;
+ break;
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
case KVM_CAP_MAX_VCPU_ID:
@@ -2753,41 +2761,33 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
return r;
}
-static bool access_key_invalid(u8 access_key)
-{
- return access_key > 0xf;
-}
-
-static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags)
{
- void __user *uaddr = (void __user *)mop->buf;
- u64 supported_flags;
- void *tmpbuf = NULL;
- int r, srcu_idx;
-
- supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
- | KVM_S390_MEMOP_F_CHECK_ONLY;
if (mop->flags & ~supported_flags || !mop->size)
return -EINVAL;
if (mop->size > MEM_OP_MAX_SIZE)
return -E2BIG;
- /*
- * This is technically a heuristic only, if the kvm->lock is not
- * taken, it is not guaranteed that the vm is/remains non-protected.
- * This is ok from a kernel perspective, wrongdoing is detected
- * on the access, -EFAULT is returned and the vm may crash the
- * next time it accesses the memory in question.
- * There is no sane usecase to do switching and a memop on two
- * different CPUs at the same time.
- */
- if (kvm_s390_pv_get_handle(kvm))
- return -EINVAL;
if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
- if (access_key_invalid(mop->key))
+ if (mop->key > 0xf)
return -EINVAL;
} else {
mop->key = 0;
}
+ return 0;
+}
+
+static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ enum gacc_mode acc_mode;
+ void *tmpbuf = NULL;
+ int r, srcu_idx;
+
+ r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION |
+ KVM_S390_MEMOP_F_CHECK_ONLY);
+ if (r)
+ return r;
+
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
tmpbuf = vmalloc(mop->size);
if (!tmpbuf)
@@ -2801,35 +2801,25 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
goto out_unlock;
}
- switch (mop->op) {
- case KVM_S390_MEMOP_ABSOLUTE_READ: {
- if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
- } else {
- r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
- mop->size, GACC_FETCH, mop->key);
- if (r == 0) {
- if (copy_to_user(uaddr, tmpbuf, mop->size))
- r = -EFAULT;
- }
- }
- break;
+ acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE;
+ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+ r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key);
+ goto out_unlock;
}
- case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
- if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
- } else {
- if (copy_from_user(tmpbuf, uaddr, mop->size)) {
- r = -EFAULT;
- break;
- }
- r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
- mop->size, GACC_STORE, mop->key);
+ if (acc_mode == GACC_FETCH) {
+ r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+ mop->size, GACC_FETCH, mop->key);
+ if (r)
+ goto out_unlock;
+ if (copy_to_user(uaddr, tmpbuf, mop->size))
+ r = -EFAULT;
+ } else {
+ if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+ r = -EFAULT;
+ goto out_unlock;
}
- break;
- }
- default:
- r = -EINVAL;
+ r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+ mop->size, GACC_STORE, mop->key);
}
out_unlock:
@@ -2839,6 +2829,75 @@ out_unlock:
return r;
}
+static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ void __user *old_addr = (void __user *)mop->old_addr;
+ union {
+ __uint128_t quad;
+ char raw[sizeof(__uint128_t)];
+ } old = { .quad = 0}, new = { .quad = 0 };
+ unsigned int off_in_quad = sizeof(new) - mop->size;
+ int r, srcu_idx;
+ bool success;
+
+ r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION);
+ if (r)
+ return r;
+ /*
+ * This validates off_in_quad. Checking that size is a power
+ * of two is not necessary, as cmpxchg_guest_abs_with_key
+ * takes care of that
+ */
+ if (mop->size > sizeof(new))
+ return -EINVAL;
+ if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
+ return -EFAULT;
+ if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
+ return -EFAULT;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ if (kvm_is_error_gpa(kvm, mop->gaddr)) {
+ r = PGM_ADDRESSING;
+ goto out_unlock;
+ }
+
+ r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad,
+ new.quad, mop->key, &success);
+ if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
+ r = -EFAULT;
+
+out_unlock:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return r;
+}
+
+static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+ /*
+ * This is technically a heuristic only, if the kvm->lock is not
+ * taken, it is not guaranteed that the vm is/remains non-protected.
+ * This is ok from a kernel perspective, wrongdoing is detected
+ * on the access, -EFAULT is returned and the vm may crash the
+ * next time it accesses the memory in question.
+ * There is no sane usecase to do switching and a memop on two
+ * different CPUs at the same time.
+ */
+ if (kvm_s390_pv_get_handle(kvm))
+ return -EINVAL;
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_ABSOLUTE_READ:
+ case KVM_S390_MEMOP_ABSOLUTE_WRITE:
+ return kvm_s390_vm_mem_op_abs(kvm, mop);
+ case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
+ return kvm_s390_vm_mem_op_cmpxchg(kvm, mop);
+ default:
+ return -EINVAL;
+ }
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -5238,62 +5297,54 @@ static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
+ enum gacc_mode acc_mode;
void *tmpbuf = NULL;
- int r = 0;
- const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
- | KVM_S390_MEMOP_F_CHECK_ONLY
- | KVM_S390_MEMOP_F_SKEY_PROTECTION;
+ int r;
- if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
+ r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION |
+ KVM_S390_MEMOP_F_CHECK_ONLY |
+ KVM_S390_MEMOP_F_SKEY_PROTECTION);
+ if (r)
+ return r;
+ if (mop->ar >= NUM_ACRS)
return -EINVAL;
- if (mop->size > MEM_OP_MAX_SIZE)
- return -E2BIG;
if (kvm_s390_pv_cpu_is_protected(vcpu))
return -EINVAL;
- if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
- if (access_key_invalid(mop->key))
- return -EINVAL;
- } else {
- mop->key = 0;
- }
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
tmpbuf = vmalloc(mop->size);
if (!tmpbuf)
return -ENOMEM;
}
- switch (mop->op) {
- case KVM_S390_MEMOP_LOGICAL_READ:
- if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
- GACC_FETCH, mop->key);
- break;
- }
+ acc_mode = mop->op == KVM_S390_MEMOP_LOGICAL_READ ? GACC_FETCH : GACC_STORE;
+ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
+ acc_mode, mop->key);
+ goto out_inject;
+ }
+ if (acc_mode == GACC_FETCH) {
r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
mop->size, mop->key);
- if (r == 0) {
- if (copy_to_user(uaddr, tmpbuf, mop->size))
- r = -EFAULT;
- }
- break;
- case KVM_S390_MEMOP_LOGICAL_WRITE:
- if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
- GACC_STORE, mop->key);
- break;
+ if (r)
+ goto out_inject;
+ if (copy_to_user(uaddr, tmpbuf, mop->size)) {
+ r = -EFAULT;
+ goto out_free;
}
+ } else {
if (copy_from_user(tmpbuf, uaddr, mop->size)) {
r = -EFAULT;
- break;
+ goto out_free;
}
r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
mop->size, mop->key);
- break;
}
+out_inject:
if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+out_free:
vfree(tmpbuf);
return r;
}
@@ -5622,23 +5673,40 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (kvm_s390_pv_get_handle(kvm))
return -EINVAL;
- if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
- return 0;
+ if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) {
+ /*
+ * A few sanity checks. We can have memory slots which have to be
+ * located/ended at a segment boundary (1MB). The memory in userland is
+ * ok to be fragmented into various different vmas. It is okay to mmap()
+ * and munmap() stuff in this slot after doing this call at any time
+ */
- /* A few sanity checks. We can have memory slots which have to be
- located/ended at a segment boundary (1MB). The memory in userland is
- ok to be fragmented into various different vmas. It is okay to mmap()
- and munmap() stuff in this slot after doing this call at any time */
+ if (new->userspace_addr & 0xffffful)
+ return -EINVAL;
- if (new->userspace_addr & 0xffffful)
- return -EINVAL;
+ size = new->npages * PAGE_SIZE;
+ if (size & 0xffffful)
+ return -EINVAL;
- size = new->npages * PAGE_SIZE;
- if (size & 0xffffful)
- return -EINVAL;
+ if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
+ return -EINVAL;
+ }
- if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
- return -EINVAL;
+ if (!kvm->arch.migration_mode)
+ return 0;
+
+ /*
+ * Turn off migration mode when:
+ * - userspace creates a new memslot with dirty logging off,
+ * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
+ * dirty logging is turned off.
+ * Migration mode expects dirty page logging being enabled to store
+ * its dirty bitmap.
+ */
+ if (change != KVM_MR_DELETE &&
+ !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+ WARN(kvm_s390_vm_stop_migration(kvm),
+ "Failed to stop migration mode");
return 0;
}