summaryrefslogtreecommitdiff
path: root/arch/s390
diff options
context:
space:
mode:
authorPaolo Bonzini <pbonzini@redhat.com>2023-02-15 20:35:26 +0300
committerPaolo Bonzini <pbonzini@redhat.com>2023-02-15 20:35:26 +0300
commite4922088f8e90ea163281ba8e69b98f34a7a1b83 (patch)
tree1db574412a5365f285754dd0364d249581535ad8 /arch/s390
parent33436335e93a1788a58443fc99c5ab320ce4b9d9 (diff)
parent5fc5b94a273655128159186c87662105db8afeb5 (diff)
downloadlinux-e4922088f8e90ea163281ba8e69b98f34a7a1b83.tar.xz
Merge tag 'kvm-s390-next-6.3-1' of https://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux into HEAD
* Two more V!=R patches * The last part of the cmpxchg patches * A few fixes
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/include/asm/asm-extable.h4
-rw-r--r--arch/s390/include/asm/cmpxchg.h109
-rw-r--r--arch/s390/include/asm/uaccess.h208
-rw-r--r--arch/s390/kvm/gaccess.c109
-rw-r--r--arch/s390/kvm/gaccess.h3
-rw-r--r--arch/s390/kvm/interrupt.c11
-rw-r--r--arch/s390/kvm/kvm-s390.c264
-rw-r--r--arch/s390/mm/extable.c9
8 files changed, 570 insertions, 147 deletions
diff --git a/arch/s390/include/asm/asm-extable.h b/arch/s390/include/asm/asm-extable.h
index b74f1070ddb2..55a02a153dfc 100644
--- a/arch/s390/include/asm/asm-extable.h
+++ b/arch/s390/include/asm/asm-extable.h
@@ -12,6 +12,7 @@
#define EX_TYPE_UA_STORE 3
#define EX_TYPE_UA_LOAD_MEM 4
#define EX_TYPE_UA_LOAD_REG 5
+#define EX_TYPE_UA_LOAD_REGPAIR 6
#define EX_DATA_REG_ERR_SHIFT 0
#define EX_DATA_REG_ERR GENMASK(3, 0)
@@ -85,4 +86,7 @@
#define EX_TABLE_UA_LOAD_REG(_fault, _target, _regerr, _regzero) \
__EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REG, _regerr, _regzero, 0)
+#define EX_TABLE_UA_LOAD_REGPAIR(_fault, _target, _regerr, _regzero) \
+ __EX_TABLE_UA(__ex_table, _fault, _target, EX_TYPE_UA_LOAD_REGPAIR, _regerr, _regzero, 0)
+
#endif /* __ASM_EXTABLE_H */
diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h
index 84c3f0d576c5..3f26416c2ad8 100644
--- a/arch/s390/include/asm/cmpxchg.h
+++ b/arch/s390/include/asm/cmpxchg.h
@@ -88,67 +88,90 @@ static __always_inline unsigned long __cmpxchg(unsigned long address,
unsigned long old,
unsigned long new, int size)
{
- unsigned long prev, tmp;
- int shift;
-
switch (size) {
- case 1:
+ case 1: {
+ unsigned int prev, shift, mask;
+
shift = (3 ^ (address & 3)) << 3;
address ^= address & 3;
+ old = (old & 0xff) << shift;
+ new = (new & 0xff) << shift;
+ mask = ~(0xff << shift);
asm volatile(
- " l %0,%2\n"
- "0: nr %0,%5\n"
- " lr %1,%0\n"
- " or %0,%3\n"
- " or %1,%4\n"
- " cs %0,%1,%2\n"
- " jnl 1f\n"
- " xr %1,%0\n"
- " nr %1,%5\n"
- " jnz 0b\n"
+ " l %[prev],%[address]\n"
+ " nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "0: lr %[tmp],%[prev]\n"
+ " cs %[prev],%[new],%[address]\n"
+ " jnl 1f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jz 0b\n"
"1:"
- : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address)
- : "d" ((old & 0xff) << shift),
- "d" ((new & 0xff) << shift),
- "d" (~(0xff << shift))
- : "memory", "cc");
+ : [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (old),
+ [new] "+&d" (new),
+ [mask] "+&d" (mask)
+ :: "memory", "cc");
return prev >> shift;
- case 2:
+ }
+ case 2: {
+ unsigned int prev, shift, mask;
+
shift = (2 ^ (address & 2)) << 3;
address ^= address & 2;
+ old = (old & 0xffff) << shift;
+ new = (new & 0xffff) << shift;
+ mask = ~(0xffff << shift);
asm volatile(
- " l %0,%2\n"
- "0: nr %0,%5\n"
- " lr %1,%0\n"
- " or %0,%3\n"
- " or %1,%4\n"
- " cs %0,%1,%2\n"
- " jnl 1f\n"
- " xr %1,%0\n"
- " nr %1,%5\n"
- " jnz 0b\n"
+ " l %[prev],%[address]\n"
+ " nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "0: lr %[tmp],%[prev]\n"
+ " cs %[prev],%[new],%[address]\n"
+ " jnl 1f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jz 0b\n"
"1:"
- : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) address)
- : "d" ((old & 0xffff) << shift),
- "d" ((new & 0xffff) << shift),
- "d" (~(0xffff << shift))
- : "memory", "cc");
+ : [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (old),
+ [new] "+&d" (new),
+ [mask] "+&d" (mask)
+ :: "memory", "cc");
return prev >> shift;
- case 4:
+ }
+ case 4: {
+ unsigned int prev = old;
+
asm volatile(
- " cs %0,%3,%1\n"
- : "=&d" (prev), "+Q" (*(int *) address)
- : "0" (old), "d" (new)
+ " cs %[prev],%[new],%[address]\n"
+ : [prev] "+&d" (prev),
+ [address] "+Q" (*(int *)address)
+ : [new] "d" (new)
: "memory", "cc");
return prev;
- case 8:
+ }
+ case 8: {
+ unsigned long prev = old;
+
asm volatile(
- " csg %0,%3,%1\n"
- : "=&d" (prev), "+QS" (*(long *) address)
- : "0" (old), "d" (new)
+ " csg %[prev],%[new],%[address]\n"
+ : [prev] "+&d" (prev),
+ [address] "+QS" (*(long *)address)
+ : [new] "d" (new)
: "memory", "cc");
return prev;
}
+ }
__cmpxchg_called_with_bad_pointer();
return old;
}
diff --git a/arch/s390/include/asm/uaccess.h b/arch/s390/include/asm/uaccess.h
index f7038b800cc3..8a8c64a678c4 100644
--- a/arch/s390/include/asm/uaccess.h
+++ b/arch/s390/include/asm/uaccess.h
@@ -390,4 +390,212 @@ do { \
goto err_label; \
} while (0)
+void __cmpxchg_user_key_called_with_bad_pointer(void);
+
+#define CMPXCHG_USER_KEY_MAX_LOOPS 128
+
+static __always_inline int __cmpxchg_user_key(unsigned long address, void *uval,
+ __uint128_t old, __uint128_t new,
+ unsigned long key, int size)
+{
+ int rc = 0;
+
+ switch (size) {
+ case 1: {
+ unsigned int prev, shift, mask, _old, _new;
+ unsigned long count;
+
+ shift = (3 ^ (address & 3)) << 3;
+ address ^= address & 3;
+ _old = ((unsigned int)old & 0xff) << shift;
+ _new = ((unsigned int)new & 0xff) << shift;
+ mask = ~(0xff << shift);
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ " llill %[count],%[max_loops]\n"
+ "0: l %[prev],%[address]\n"
+ "1: nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "2: lr %[tmp],%[prev]\n"
+ "3: cs %[prev],%[new],%[address]\n"
+ "4: jnl 5f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jnz 5f\n"
+ " brct %[count],2b\n"
+ "5: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (_old),
+ [new] "+&d" (_new),
+ [mask] "+&d" (mask),
+ [count] "=a" (count)
+ : [key] "%[count]" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY),
+ [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+ : "memory", "cc");
+ *(unsigned char *)uval = prev >> shift;
+ if (!count)
+ rc = -EAGAIN;
+ return rc;
+ }
+ case 2: {
+ unsigned int prev, shift, mask, _old, _new;
+ unsigned long count;
+
+ shift = (2 ^ (address & 2)) << 3;
+ address ^= address & 2;
+ _old = ((unsigned int)old & 0xffff) << shift;
+ _new = ((unsigned int)new & 0xffff) << shift;
+ mask = ~(0xffff << shift);
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ " llill %[count],%[max_loops]\n"
+ "0: l %[prev],%[address]\n"
+ "1: nr %[prev],%[mask]\n"
+ " xilf %[mask],0xffffffff\n"
+ " or %[new],%[prev]\n"
+ " or %[prev],%[tmp]\n"
+ "2: lr %[tmp],%[prev]\n"
+ "3: cs %[prev],%[new],%[address]\n"
+ "4: jnl 5f\n"
+ " xr %[tmp],%[prev]\n"
+ " xr %[new],%[tmp]\n"
+ " nr %[tmp],%[mask]\n"
+ " jnz 5f\n"
+ " brct %[count],2b\n"
+ "5: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(3b, 5b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(4b, 5b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "=&d" (prev),
+ [address] "+Q" (*(int *)address),
+ [tmp] "+&d" (_old),
+ [new] "+&d" (_new),
+ [mask] "+&d" (mask),
+ [count] "=a" (count)
+ : [key] "%[count]" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY),
+ [max_loops] "J" (CMPXCHG_USER_KEY_MAX_LOOPS)
+ : "memory", "cc");
+ *(unsigned short *)uval = prev >> shift;
+ if (!count)
+ rc = -EAGAIN;
+ return rc;
+ }
+ case 4: {
+ unsigned int prev = old;
+
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: cs %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+Q" (*(int *)address)
+ : [new] "d" ((unsigned int)new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ *(unsigned int *)uval = prev;
+ return rc;
+ }
+ case 8: {
+ unsigned long prev = old;
+
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: csg %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REG(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REG(1b, 1b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+QS" (*(long *)address)
+ : [new] "d" ((unsigned long)new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ *(unsigned long *)uval = prev;
+ return rc;
+ }
+ case 16: {
+ __uint128_t prev = old;
+
+ asm volatile(
+ " spka 0(%[key])\n"
+ " sacf 256\n"
+ "0: cdsg %[prev],%[new],%[address]\n"
+ "1: sacf 768\n"
+ " spka %[default_key]\n"
+ EX_TABLE_UA_LOAD_REGPAIR(0b, 1b, %[rc], %[prev])
+ EX_TABLE_UA_LOAD_REGPAIR(1b, 1b, %[rc], %[prev])
+ : [rc] "+&d" (rc),
+ [prev] "+&d" (prev),
+ [address] "+QS" (*(__int128_t *)address)
+ : [new] "d" (new),
+ [key] "a" (key << 4),
+ [default_key] "J" (PAGE_DEFAULT_KEY)
+ : "memory", "cc");
+ *(__uint128_t *)uval = prev;
+ return rc;
+ }
+ }
+ __cmpxchg_user_key_called_with_bad_pointer();
+ return rc;
+}
+
+/**
+ * cmpxchg_user_key() - cmpxchg with user space target, honoring storage keys
+ * @ptr: User space address of value to compare to @old and exchange with
+ * @new. Must be aligned to sizeof(*@ptr).
+ * @uval: Address where the old value of *@ptr is written to.
+ * @old: Old value. Compared to the content pointed to by @ptr in order to
+ * determine if the exchange occurs. The old value read from *@ptr is
+ * written to *@uval.
+ * @new: New value to place at *@ptr.
+ * @key: Access key to use for checking storage key protection.
+ *
+ * Perform a cmpxchg on a user space target, honoring storage key protection.
+ * @key alone determines how key checking is performed, neither
+ * storage-protection-override nor fetch-protection-override apply.
+ * The caller must compare *@uval and @old to determine if values have been
+ * exchanged. In case of an exception *@uval is set to zero.
+ *
+ * Return: 0: cmpxchg executed
+ * -EFAULT: an exception happened when trying to access *@ptr
+ * -EAGAIN: maxed out number of retries (byte and short only)
+ */
+#define cmpxchg_user_key(ptr, uval, old, new, key) \
+({ \
+ __typeof__(ptr) __ptr = (ptr); \
+ __typeof__(uval) __uval = (uval); \
+ \
+ BUILD_BUG_ON(sizeof(*(__ptr)) != sizeof(*(__uval))); \
+ might_fault(); \
+ __chk_user_ptr(__ptr); \
+ __cmpxchg_user_key((unsigned long)(__ptr), (void *)(__uval), \
+ (old), (new), (key), sizeof(*(__ptr))); \
+})
+
#endif /* __S390_UACCESS_H */
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index 0243b6e38d36..3eb85f254881 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -1162,6 +1162,115 @@ int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
}
/**
+ * cmpxchg_guest_abs_with_key() - Perform cmpxchg on guest absolute address.
+ * @kvm: Virtual machine instance.
+ * @gpa: Absolute guest address of the location to be changed.
+ * @len: Operand length of the cmpxchg, required: 1 <= len <= 16. Providing a
+ * non power of two will result in failure.
+ * @old_addr: Pointer to old value. If the location at @gpa contains this value,
+ * the exchange will succeed. After calling cmpxchg_guest_abs_with_key()
+ * *@old_addr contains the value at @gpa before the attempt to
+ * exchange the value.
+ * @new: The value to place at @gpa.
+ * @access_key: The access key to use for the guest access.
+ * @success: output value indicating if an exchange occurred.
+ *
+ * Atomically exchange the value at @gpa by @new, if it contains *@old.
+ * Honors storage keys.
+ *
+ * Return: * 0: successful exchange
+ * * >0: a program interruption code indicating the reason cmpxchg could
+ * not be attempted
+ * * -EINVAL: address misaligned or len not power of two
+ * * -EAGAIN: transient failure (len 1 or 2)
+ * * -EOPNOTSUPP: read-only memslot (should never occur)
+ */
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len,
+ __uint128_t *old_addr, __uint128_t new,
+ u8 access_key, bool *success)
+{
+ gfn_t gfn = gpa_to_gfn(gpa);
+ struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+ bool writable;
+ hva_t hva;
+ int ret;
+
+ if (!IS_ALIGNED(gpa, len))
+ return -EINVAL;
+
+ hva = gfn_to_hva_memslot_prot(slot, gfn, &writable);
+ if (kvm_is_error_hva(hva))
+ return PGM_ADDRESSING;
+ /*
+ * Check if it's a read-only memslot, even though that cannot occur
+ * since those are unsupported.
+ * Don't try to actually handle that case.
+ */
+ if (!writable)
+ return -EOPNOTSUPP;
+
+ hva += offset_in_page(gpa);
+ /*
+ * The cmpxchg_user_key macro depends on the type of "old", so we need
+ * a case for each valid length and get some code duplication as long
+ * as we don't introduce a new macro.
+ */
+ switch (len) {
+ case 1: {
+ u8 old;
+
+ ret = cmpxchg_user_key((u8 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 2: {
+ u16 old;
+
+ ret = cmpxchg_user_key((u16 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 4: {
+ u32 old;
+
+ ret = cmpxchg_user_key((u32 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 8: {
+ u64 old;
+
+ ret = cmpxchg_user_key((u64 __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ case 16: {
+ __uint128_t old;
+
+ ret = cmpxchg_user_key((__uint128_t __user *)hva, &old, *old_addr, new, access_key);
+ *success = !ret && old == *old_addr;
+ *old_addr = old;
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+ if (*success)
+ mark_page_dirty_in_slot(kvm, slot, gfn);
+ /*
+ * Assume that the fault is caused by protection, either key protection
+ * or user page write protection.
+ */
+ if (ret == -EFAULT)
+ ret = PGM_PROTECTION;
+ return ret;
+}
+
+/**
* guest_translate_address_with_key - translate guest logical into guest absolute address
* @vcpu: virtual cpu
* @gva: Guest virtual address
diff --git a/arch/s390/kvm/gaccess.h b/arch/s390/kvm/gaccess.h
index 9408d6cc8e2c..b320d12aa049 100644
--- a/arch/s390/kvm/gaccess.h
+++ b/arch/s390/kvm/gaccess.h
@@ -206,6 +206,9 @@ int access_guest_with_key(struct kvm_vcpu *vcpu, unsigned long ga, u8 ar,
int access_guest_real(struct kvm_vcpu *vcpu, unsigned long gra,
void *data, unsigned long len, enum gacc_mode mode);
+int cmpxchg_guest_abs_with_key(struct kvm *kvm, gpa_t gpa, int len, __uint128_t *old,
+ __uint128_t new, u8 access_key, bool *success);
+
/**
* write_guest_with_key - copy data from kernel space to guest space
* @vcpu: virtual cpu
diff --git a/arch/s390/kvm/interrupt.c b/arch/s390/kvm/interrupt.c
index 8edb3bee74b6..9250fde1f97d 100644
--- a/arch/s390/kvm/interrupt.c
+++ b/arch/s390/kvm/interrupt.c
@@ -3103,9 +3103,9 @@ static enum hrtimer_restart gisa_vcpu_kicker(struct hrtimer *timer)
static void process_gib_alert_list(void)
{
struct kvm_s390_gisa_interrupt *gi;
+ u32 final, gisa_phys, origin = 0UL;
struct kvm_s390_gisa *gisa;
struct kvm *kvm;
- u32 final, origin = 0UL;
do {
/*
@@ -3131,9 +3131,10 @@ static void process_gib_alert_list(void)
* interruptions asap.
*/
while (origin & GISA_ADDR_MASK) {
- gisa = (struct kvm_s390_gisa *)(u64)origin;
+ gisa_phys = origin;
+ gisa = phys_to_virt(gisa_phys);
origin = gisa->next_alert;
- gisa->next_alert = (u32)(u64)gisa;
+ gisa->next_alert = gisa_phys;
kvm = container_of(gisa, struct sie_page2, gisa)->kvm;
gi = &kvm->arch.gisa_int;
if (hrtimer_active(&gi->timer))
@@ -3417,6 +3418,7 @@ void kvm_s390_gib_destroy(void)
int __init kvm_s390_gib_init(u8 nisc)
{
+ u32 gib_origin;
int rc = 0;
if (!css_general_characteristics.aiv) {
@@ -3438,7 +3440,8 @@ int __init kvm_s390_gib_init(u8 nisc)
}
gib->nisc = nisc;
- if (chsc_sgib((u32)(u64)gib)) {
+ gib_origin = virt_to_phys(gib);
+ if (chsc_sgib(gib_origin)) {
pr_err("Associating the GIB with the AIV facility failed\n");
free_page((unsigned long)gib);
gib = NULL;
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index bd25076aa19b..39b36562c043 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -573,7 +573,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_VCPU_RESETS:
case KVM_CAP_SET_GUEST_DEBUG:
case KVM_CAP_S390_DIAG318:
- case KVM_CAP_S390_MEM_OP_EXTENSION:
r = 1;
break;
case KVM_CAP_SET_GUEST_DEBUG2:
@@ -587,6 +586,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_S390_MEM_OP:
r = MEM_OP_MAX_SIZE;
break;
+ case KVM_CAP_S390_MEM_OP_EXTENSION:
+ /*
+ * Flag bits indicating which extensions are supported.
+ * If r > 0, the base extension must also be supported/indicated,
+ * in order to maintain backwards compatibility.
+ */
+ r = KVM_S390_MEMOP_EXTENSION_CAP_BASE |
+ KVM_S390_MEMOP_EXTENSION_CAP_CMPXCHG;
+ break;
case KVM_CAP_NR_VCPUS:
case KVM_CAP_MAX_VCPUS:
case KVM_CAP_MAX_VCPU_ID:
@@ -2753,41 +2761,33 @@ static int kvm_s390_handle_pv(struct kvm *kvm, struct kvm_pv_cmd *cmd)
return r;
}
-static bool access_key_invalid(u8 access_key)
-{
- return access_key > 0xf;
-}
-
-static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+static int mem_op_validate_common(struct kvm_s390_mem_op *mop, u64 supported_flags)
{
- void __user *uaddr = (void __user *)mop->buf;
- u64 supported_flags;
- void *tmpbuf = NULL;
- int r, srcu_idx;
-
- supported_flags = KVM_S390_MEMOP_F_SKEY_PROTECTION
- | KVM_S390_MEMOP_F_CHECK_ONLY;
if (mop->flags & ~supported_flags || !mop->size)
return -EINVAL;
if (mop->size > MEM_OP_MAX_SIZE)
return -E2BIG;
- /*
- * This is technically a heuristic only, if the kvm->lock is not
- * taken, it is not guaranteed that the vm is/remains non-protected.
- * This is ok from a kernel perspective, wrongdoing is detected
- * on the access, -EFAULT is returned and the vm may crash the
- * next time it accesses the memory in question.
- * There is no sane usecase to do switching and a memop on two
- * different CPUs at the same time.
- */
- if (kvm_s390_pv_get_handle(kvm))
- return -EINVAL;
if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
- if (access_key_invalid(mop->key))
+ if (mop->key > 0xf)
return -EINVAL;
} else {
mop->key = 0;
}
+ return 0;
+}
+
+static int kvm_s390_vm_mem_op_abs(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ enum gacc_mode acc_mode;
+ void *tmpbuf = NULL;
+ int r, srcu_idx;
+
+ r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION |
+ KVM_S390_MEMOP_F_CHECK_ONLY);
+ if (r)
+ return r;
+
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
tmpbuf = vmalloc(mop->size);
if (!tmpbuf)
@@ -2801,35 +2801,25 @@ static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
goto out_unlock;
}
- switch (mop->op) {
- case KVM_S390_MEMOP_ABSOLUTE_READ: {
- if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_FETCH, mop->key);
- } else {
- r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
- mop->size, GACC_FETCH, mop->key);
- if (r == 0) {
- if (copy_to_user(uaddr, tmpbuf, mop->size))
- r = -EFAULT;
- }
- }
- break;
+ acc_mode = mop->op == KVM_S390_MEMOP_ABSOLUTE_READ ? GACC_FETCH : GACC_STORE;
+ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+ r = check_gpa_range(kvm, mop->gaddr, mop->size, acc_mode, mop->key);
+ goto out_unlock;
}
- case KVM_S390_MEMOP_ABSOLUTE_WRITE: {
- if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gpa_range(kvm, mop->gaddr, mop->size, GACC_STORE, mop->key);
- } else {
- if (copy_from_user(tmpbuf, uaddr, mop->size)) {
- r = -EFAULT;
- break;
- }
- r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
- mop->size, GACC_STORE, mop->key);
+ if (acc_mode == GACC_FETCH) {
+ r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+ mop->size, GACC_FETCH, mop->key);
+ if (r)
+ goto out_unlock;
+ if (copy_to_user(uaddr, tmpbuf, mop->size))
+ r = -EFAULT;
+ } else {
+ if (copy_from_user(tmpbuf, uaddr, mop->size)) {
+ r = -EFAULT;
+ goto out_unlock;
}
- break;
- }
- default:
- r = -EINVAL;
+ r = access_guest_abs_with_key(kvm, mop->gaddr, tmpbuf,
+ mop->size, GACC_STORE, mop->key);
}
out_unlock:
@@ -2839,6 +2829,75 @@ out_unlock:
return r;
}
+static int kvm_s390_vm_mem_op_cmpxchg(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+ void __user *uaddr = (void __user *)mop->buf;
+ void __user *old_addr = (void __user *)mop->old_addr;
+ union {
+ __uint128_t quad;
+ char raw[sizeof(__uint128_t)];
+ } old = { .quad = 0}, new = { .quad = 0 };
+ unsigned int off_in_quad = sizeof(new) - mop->size;
+ int r, srcu_idx;
+ bool success;
+
+ r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_SKEY_PROTECTION);
+ if (r)
+ return r;
+ /*
+ * This validates off_in_quad. Checking that size is a power
+ * of two is not necessary, as cmpxchg_guest_abs_with_key
+ * takes care of that
+ */
+ if (mop->size > sizeof(new))
+ return -EINVAL;
+ if (copy_from_user(&new.raw[off_in_quad], uaddr, mop->size))
+ return -EFAULT;
+ if (copy_from_user(&old.raw[off_in_quad], old_addr, mop->size))
+ return -EFAULT;
+
+ srcu_idx = srcu_read_lock(&kvm->srcu);
+
+ if (kvm_is_error_gpa(kvm, mop->gaddr)) {
+ r = PGM_ADDRESSING;
+ goto out_unlock;
+ }
+
+ r = cmpxchg_guest_abs_with_key(kvm, mop->gaddr, mop->size, &old.quad,
+ new.quad, mop->key, &success);
+ if (!success && copy_to_user(old_addr, &old.raw[off_in_quad], mop->size))
+ r = -EFAULT;
+
+out_unlock:
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+ return r;
+}
+
+static int kvm_s390_vm_mem_op(struct kvm *kvm, struct kvm_s390_mem_op *mop)
+{
+ /*
+ * This is technically a heuristic only, if the kvm->lock is not
+ * taken, it is not guaranteed that the vm is/remains non-protected.
+ * This is ok from a kernel perspective, wrongdoing is detected
+ * on the access, -EFAULT is returned and the vm may crash the
+ * next time it accesses the memory in question.
+ * There is no sane usecase to do switching and a memop on two
+ * different CPUs at the same time.
+ */
+ if (kvm_s390_pv_get_handle(kvm))
+ return -EINVAL;
+
+ switch (mop->op) {
+ case KVM_S390_MEMOP_ABSOLUTE_READ:
+ case KVM_S390_MEMOP_ABSOLUTE_WRITE:
+ return kvm_s390_vm_mem_op_abs(kvm, mop);
+ case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
+ return kvm_s390_vm_mem_op_cmpxchg(kvm, mop);
+ default:
+ return -EINVAL;
+ }
+}
+
long kvm_arch_vm_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg)
{
@@ -5238,62 +5297,54 @@ static long kvm_s390_vcpu_mem_op(struct kvm_vcpu *vcpu,
struct kvm_s390_mem_op *mop)
{
void __user *uaddr = (void __user *)mop->buf;
+ enum gacc_mode acc_mode;
void *tmpbuf = NULL;
- int r = 0;
- const u64 supported_flags = KVM_S390_MEMOP_F_INJECT_EXCEPTION
- | KVM_S390_MEMOP_F_CHECK_ONLY
- | KVM_S390_MEMOP_F_SKEY_PROTECTION;
+ int r;
- if (mop->flags & ~supported_flags || mop->ar >= NUM_ACRS || !mop->size)
+ r = mem_op_validate_common(mop, KVM_S390_MEMOP_F_INJECT_EXCEPTION |
+ KVM_S390_MEMOP_F_CHECK_ONLY |
+ KVM_S390_MEMOP_F_SKEY_PROTECTION);
+ if (r)
+ return r;
+ if (mop->ar >= NUM_ACRS)
return -EINVAL;
- if (mop->size > MEM_OP_MAX_SIZE)
- return -E2BIG;
if (kvm_s390_pv_cpu_is_protected(vcpu))
return -EINVAL;
- if (mop->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION) {
- if (access_key_invalid(mop->key))
- return -EINVAL;
- } else {
- mop->key = 0;
- }
if (!(mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY)) {
tmpbuf = vmalloc(mop->size);
if (!tmpbuf)
return -ENOMEM;
}
- switch (mop->op) {
- case KVM_S390_MEMOP_LOGICAL_READ:
- if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
- GACC_FETCH, mop->key);
- break;
- }
+ acc_mode = mop->op == KVM_S390_MEMOP_LOGICAL_READ ? GACC_FETCH : GACC_STORE;
+ if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
+ r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
+ acc_mode, mop->key);
+ goto out_inject;
+ }
+ if (acc_mode == GACC_FETCH) {
r = read_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
mop->size, mop->key);
- if (r == 0) {
- if (copy_to_user(uaddr, tmpbuf, mop->size))
- r = -EFAULT;
- }
- break;
- case KVM_S390_MEMOP_LOGICAL_WRITE:
- if (mop->flags & KVM_S390_MEMOP_F_CHECK_ONLY) {
- r = check_gva_range(vcpu, mop->gaddr, mop->ar, mop->size,
- GACC_STORE, mop->key);
- break;
+ if (r)
+ goto out_inject;
+ if (copy_to_user(uaddr, tmpbuf, mop->size)) {
+ r = -EFAULT;
+ goto out_free;
}
+ } else {
if (copy_from_user(tmpbuf, uaddr, mop->size)) {
r = -EFAULT;
- break;
+ goto out_free;
}
r = write_guest_with_key(vcpu, mop->gaddr, mop->ar, tmpbuf,
mop->size, mop->key);
- break;
}
+out_inject:
if (r > 0 && (mop->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION) != 0)
kvm_s390_inject_prog_irq(vcpu, &vcpu->arch.pgm);
+out_free:
vfree(tmpbuf);
return r;
}
@@ -5622,23 +5673,40 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
if (kvm_s390_pv_get_handle(kvm))
return -EINVAL;
- if (change == KVM_MR_DELETE || change == KVM_MR_FLAGS_ONLY)
- return 0;
+ if (change != KVM_MR_DELETE && change != KVM_MR_FLAGS_ONLY) {
+ /*
+ * A few sanity checks. We can have memory slots which have to be
+ * located/ended at a segment boundary (1MB). The memory in userland is
+ * ok to be fragmented into various different vmas. It is okay to mmap()
+ * and munmap() stuff in this slot after doing this call at any time
+ */
- /* A few sanity checks. We can have memory slots which have to be
- located/ended at a segment boundary (1MB). The memory in userland is
- ok to be fragmented into various different vmas. It is okay to mmap()
- and munmap() stuff in this slot after doing this call at any time */
+ if (new->userspace_addr & 0xffffful)
+ return -EINVAL;
- if (new->userspace_addr & 0xffffful)
- return -EINVAL;
+ size = new->npages * PAGE_SIZE;
+ if (size & 0xffffful)
+ return -EINVAL;
- size = new->npages * PAGE_SIZE;
- if (size & 0xffffful)
- return -EINVAL;
+ if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
+ return -EINVAL;
+ }
- if ((new->base_gfn * PAGE_SIZE) + size > kvm->arch.mem_limit)
- return -EINVAL;
+ if (!kvm->arch.migration_mode)
+ return 0;
+
+ /*
+ * Turn off migration mode when:
+ * - userspace creates a new memslot with dirty logging off,
+ * - userspace modifies an existing memslot (MOVE or FLAGS_ONLY) and
+ * dirty logging is turned off.
+ * Migration mode expects dirty page logging being enabled to store
+ * its dirty bitmap.
+ */
+ if (change != KVM_MR_DELETE &&
+ !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
+ WARN(kvm_s390_vm_stop_migration(kvm),
+ "Failed to stop migration mode");
return 0;
}
diff --git a/arch/s390/mm/extable.c b/arch/s390/mm/extable.c
index 1e4d2187541a..fe87291df95d 100644
--- a/arch/s390/mm/extable.c
+++ b/arch/s390/mm/extable.c
@@ -47,13 +47,16 @@ static bool ex_handler_ua_load_mem(const struct exception_table_entry *ex, struc
return true;
}
-static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex, struct pt_regs *regs)
+static bool ex_handler_ua_load_reg(const struct exception_table_entry *ex,
+ bool pair, struct pt_regs *regs)
{
unsigned int reg_zero = FIELD_GET(EX_DATA_REG_ADDR, ex->data);
unsigned int reg_err = FIELD_GET(EX_DATA_REG_ERR, ex->data);
regs->gprs[reg_err] = -EFAULT;
regs->gprs[reg_zero] = 0;
+ if (pair)
+ regs->gprs[reg_zero + 1] = 0;
regs->psw.addr = extable_fixup(ex);
return true;
}
@@ -75,7 +78,9 @@ bool fixup_exception(struct pt_regs *regs)
case EX_TYPE_UA_LOAD_MEM:
return ex_handler_ua_load_mem(ex, regs);
case EX_TYPE_UA_LOAD_REG:
- return ex_handler_ua_load_reg(ex, regs);
+ return ex_handler_ua_load_reg(ex, false, regs);
+ case EX_TYPE_UA_LOAD_REGPAIR:
+ return ex_handler_ua_load_reg(ex, true, regs);
}
panic("invalid exception table entry");
}