summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/s390/include/asm/lowcore.h5
-rw-r--r--arch/s390/include/asm/spinlock.h18
-rw-r--r--arch/s390/kernel/setup.c2
-rw-r--r--arch/s390/kernel/smp.c4
-rw-r--r--arch/s390/lib/spinlock.c194
5 files changed, 180 insertions, 43 deletions
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index a6870ea6ea8b..62943af36ac6 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -133,8 +133,9 @@ struct lowcore {
__u8 pad_0x03b4[0x03b8-0x03b4]; /* 0x03b4 */
__u64 gmap; /* 0x03b8 */
__u32 spinlock_lockval; /* 0x03c0 */
- __u32 fpu_flags; /* 0x03c4 */
- __u8 pad_0x03c8[0x0400-0x03c8]; /* 0x03c8 */
+ __u32 spinlock_index; /* 0x03c4 */
+ __u32 fpu_flags; /* 0x03c8 */
+ __u8 pad_0x03cc[0x0400-0x03cc]; /* 0x03cc */
/* Per cpu primary space access list */
__u32 paste[16]; /* 0x0400 */
diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h
index 6727cc30d59b..2da4a6d13f54 100644
--- a/arch/s390/include/asm/spinlock.h
+++ b/arch/s390/include/asm/spinlock.h
@@ -36,15 +36,11 @@ bool arch_vcpu_is_preempted(int cpu);
*/
void arch_lock_relax(int cpu);
+void arch_spin_relax(arch_spinlock_t *lock);
void arch_spin_lock_wait(arch_spinlock_t *);
int arch_spin_trylock_retry(arch_spinlock_t *);
-void arch_spin_lock_wait_flags(arch_spinlock_t *, unsigned long flags);
-
-static inline void arch_spin_relax(arch_spinlock_t *lock)
-{
- arch_lock_relax(lock->lock);
-}
+void arch_spin_lock_setup(int cpu);
static inline u32 arch_spin_lockval(int cpu)
{
@@ -64,8 +60,7 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lp)
static inline int arch_spin_trylock_once(arch_spinlock_t *lp)
{
barrier();
- return likely(arch_spin_value_unlocked(*lp) &&
- __atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL));
+ return likely(__atomic_cmpxchg_bool(&lp->lock, 0, SPINLOCK_LOCKVAL));
}
static inline void arch_spin_lock(arch_spinlock_t *lp)
@@ -78,7 +73,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lp,
unsigned long flags)
{
if (!arch_spin_trylock_once(lp))
- arch_spin_lock_wait_flags(lp, flags);
+ arch_spin_lock_wait(lp);
}
static inline int arch_spin_trylock(arch_spinlock_t *lp)
@@ -95,8 +90,9 @@ static inline void arch_spin_unlock(arch_spinlock_t *lp)
#ifdef CONFIG_HAVE_MARCH_ZEC12_FEATURES
" .long 0xb2fa0070\n" /* NIAI 7 */
#endif
- " st %1,%0\n"
- : "=Q" (lp->lock) : "d" (0) : "cc", "memory");
+ " sth %1,%0\n"
+ : "=Q" (((unsigned short *) &lp->lock)[1])
+ : "d" (0) : "cc", "memory");
}
/*
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 164a1e16b53e..b2c9af9b88d5 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -380,6 +380,8 @@ static void __init setup_lowcore(void)
#ifdef CONFIG_SMP
lc->spinlock_lockval = arch_spin_lockval(0);
+ lc->spinlock_index = 0;
+ arch_spin_lock_setup(0);
#endif
set_prefix((u32)(unsigned long) lc);
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index b9fc9b00d845..cc04b74fbb84 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -226,6 +226,7 @@ static int pcpu_alloc_lowcore(struct pcpu *pcpu, int cpu)
lc->mcesad = mcesa_origin | mcesa_bits;
lc->cpu_nr = cpu;
lc->spinlock_lockval = arch_spin_lockval(cpu);
+ lc->spinlock_index = 0;
if (vdso_alloc_per_cpu(lc))
goto out;
lowcore_ptr[cpu] = lc;
@@ -273,6 +274,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
cpumask_set_cpu(cpu, mm_cpumask(&init_mm));
lc->cpu_nr = cpu;
lc->spinlock_lockval = arch_spin_lockval(cpu);
+ lc->spinlock_index = 0;
lc->percpu_offset = __per_cpu_offset[cpu];
lc->kernel_asce = S390_lowcore.kernel_asce;
lc->machine_flags = S390_lowcore.machine_flags;
@@ -281,6 +283,7 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
save_access_regs((unsigned int *) lc->access_regs_save_area);
memcpy(lc->stfle_fac_list, S390_lowcore.stfle_fac_list,
MAX_FACILITY_BIT/8);
+ arch_spin_lock_setup(cpu);
}
static void pcpu_attach_task(struct pcpu *pcpu, struct task_struct *tsk)
@@ -967,6 +970,7 @@ void __init smp_setup_processor_id(void)
pcpu_devices[0].address = stap();
S390_lowcore.cpu_nr = 0;
S390_lowcore.spinlock_lockval = arch_spin_lockval(0);
+ S390_lowcore.spinlock_index = 0;
}
/*
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index ee73bcca7e6f..6747134227cd 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -8,8 +8,10 @@
#include <linux/types.h>
#include <linux/export.h>
#include <linux/spinlock.h>
+#include <linux/jiffies.h>
#include <linux/init.h>
#include <linux/smp.h>
+#include <linux/percpu.h>
#include <asm/io.h>
int spin_retry = -1;
@@ -32,6 +34,40 @@ static int __init spin_retry_setup(char *str)
}
__setup("spin_retry=", spin_retry_setup);
+struct spin_wait {
+ struct spin_wait *next, *prev;
+ int node_id;
+} __aligned(32);
+
+static DEFINE_PER_CPU_ALIGNED(struct spin_wait, spin_wait[4]);
+
+#define _Q_LOCK_CPU_OFFSET 0
+#define _Q_LOCK_STEAL_OFFSET 16
+#define _Q_TAIL_IDX_OFFSET 18
+#define _Q_TAIL_CPU_OFFSET 20
+
+#define _Q_LOCK_CPU_MASK 0x0000ffff
+#define _Q_LOCK_STEAL_ADD 0x00010000
+#define _Q_LOCK_STEAL_MASK 0x00030000
+#define _Q_TAIL_IDX_MASK 0x000c0000
+#define _Q_TAIL_CPU_MASK 0xfff00000
+
+#define _Q_LOCK_MASK (_Q_LOCK_CPU_MASK | _Q_LOCK_STEAL_MASK)
+#define _Q_TAIL_MASK (_Q_TAIL_IDX_MASK | _Q_TAIL_CPU_MASK)
+
+void arch_spin_lock_setup(int cpu)
+{
+ struct spin_wait *node;
+ int ix;
+
+ node = per_cpu_ptr(&spin_wait[0], cpu);
+ for (ix = 0; ix < 4; ix++, node++) {
+ memset(node, 0, sizeof(*node));
+ node->node_id = ((cpu + 1) << _Q_TAIL_CPU_OFFSET) +
+ (ix << _Q_TAIL_IDX_OFFSET);
+ }
+}
+
static inline int arch_load_niai4(int *lock)
{
int owner;
@@ -60,75 +96,160 @@ static inline int arch_cmpxchg_niai8(int *lock, int old, int new)
return expected == old;
}
-void arch_spin_lock_wait(arch_spinlock_t *lp)
+static inline struct spin_wait *arch_spin_decode_tail(int lock)
{
- int cpu = SPINLOCK_LOCKVAL;
- int owner, count;
+ int ix, cpu;
+
+ ix = (lock & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
+ cpu = (lock & _Q_TAIL_CPU_MASK) >> _Q_TAIL_CPU_OFFSET;
+ return per_cpu_ptr(&spin_wait[ix], cpu - 1);
+}
+
+static inline int arch_spin_yield_target(int lock, struct spin_wait *node)
+{
+ if (lock & _Q_LOCK_CPU_MASK)
+ return lock & _Q_LOCK_CPU_MASK;
+ if (node == NULL || node->prev == NULL)
+ return 0; /* 0 -> no target cpu */
+ while (node->prev)
+ node = node->prev;
+ return node->node_id >> _Q_TAIL_CPU_OFFSET;
+}
+
+static inline void arch_spin_lock_queued(arch_spinlock_t *lp)
+{
+ struct spin_wait *node, *next;
+ int lockval, ix, node_id, tail_id, old, new, owner, count;
+
+ ix = S390_lowcore.spinlock_index++;
+ barrier();
+ lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
+ node = this_cpu_ptr(&spin_wait[ix]);
+ node->prev = node->next = NULL;
+ node_id = node->node_id;
+
+ /* Enqueue the node for this CPU in the spinlock wait queue */
+ while (1) {
+ old = READ_ONCE(lp->lock);
+ if ((old & _Q_LOCK_CPU_MASK) == 0 &&
+ (old & _Q_LOCK_STEAL_MASK) != _Q_LOCK_STEAL_MASK) {
+ /*
+ * The lock is free but there may be waiters.
+ * With no waiters simply take the lock, if there
+ * are waiters try to steal the lock. The lock may
+ * be stolen three times before the next queued
+ * waiter will get the lock.
+ */
+ new = (old ? (old + _Q_LOCK_STEAL_ADD) : 0) | lockval;
+ if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+ /* Got the lock */
+ goto out;
+ /* lock passing in progress */
+ continue;
+ }
+ /* Make the node of this CPU the new tail. */
+ new = node_id | (old & _Q_LOCK_MASK);
+ if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+ break;
+ }
+ /* Set the 'next' pointer of the tail node in the queue */
+ tail_id = old & _Q_TAIL_MASK;
+ if (tail_id != 0) {
+ node->prev = arch_spin_decode_tail(tail_id);
+ WRITE_ONCE(node->prev->next, node);
+ }
/* Pass the virtual CPU to the lock holder if it is not running */
- owner = arch_load_niai4(&lp->lock);
+ owner = arch_spin_yield_target(old, node);
if (owner && arch_vcpu_is_preempted(owner - 1))
smp_yield_cpu(owner - 1);
+ /* Spin on the CPU local node->prev pointer */
+ if (tail_id != 0) {
+ count = spin_retry;
+ while (READ_ONCE(node->prev) != NULL) {
+ if (count-- >= 0)
+ continue;
+ count = spin_retry;
+ /* Query running state of lock holder again. */
+ owner = arch_spin_yield_target(old, node);
+ if (owner && arch_vcpu_is_preempted(owner - 1))
+ smp_yield_cpu(owner - 1);
+ }
+ }
+
+ /* Spin on the lock value in the spinlock_t */
count = spin_retry;
while (1) {
- owner = arch_load_niai4(&lp->lock);
- /* Try to get the lock if it is free. */
+ old = READ_ONCE(lp->lock);
+ owner = old & _Q_LOCK_CPU_MASK;
if (!owner) {
- if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
- return;
+ tail_id = old & _Q_TAIL_MASK;
+ new = ((tail_id != node_id) ? tail_id : 0) | lockval;
+ if (__atomic_cmpxchg_bool(&lp->lock, old, new))
+ /* Got the lock */
+ break;
continue;
}
if (count-- >= 0)
continue;
count = spin_retry;
- /*
- * For multiple layers of hypervisors, e.g. z/VM + LPAR
- * yield the CPU unconditionally. For LPAR rely on the
- * sense running status.
- */
if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
smp_yield_cpu(owner - 1);
}
+
+ /* Pass lock_spin job to next CPU in the queue */
+ if (node_id && tail_id != node_id) {
+ /* Wait until the next CPU has set up the 'next' pointer */
+ while ((next = READ_ONCE(node->next)) == NULL)
+ ;
+ next->prev = NULL;
+ }
+
+ out:
+ S390_lowcore.spinlock_index--;
}
-EXPORT_SYMBOL(arch_spin_lock_wait);
-void arch_spin_lock_wait_flags(arch_spinlock_t *lp, unsigned long flags)
+static inline void arch_spin_lock_classic(arch_spinlock_t *lp)
{
- int cpu = SPINLOCK_LOCKVAL;
- int owner, count;
+ int lockval, old, new, owner, count;
- local_irq_restore(flags);
+ lockval = SPINLOCK_LOCKVAL; /* cpu + 1 */
/* Pass the virtual CPU to the lock holder if it is not running */
- owner = arch_load_niai4(&lp->lock);
+ owner = arch_spin_yield_target(ACCESS_ONCE(lp->lock), NULL);
if (owner && arch_vcpu_is_preempted(owner - 1))
smp_yield_cpu(owner - 1);
count = spin_retry;
while (1) {
- owner = arch_load_niai4(&lp->lock);
+ old = arch_load_niai4(&lp->lock);
+ owner = old & _Q_LOCK_CPU_MASK;
/* Try to get the lock if it is free. */
if (!owner) {
- local_irq_disable();
- if (arch_cmpxchg_niai8(&lp->lock, 0, cpu))
- return;
- local_irq_restore(flags);
+ new = (old & _Q_TAIL_MASK) | lockval;
+ if (arch_cmpxchg_niai8(&lp->lock, old, new))
+ /* Got the lock */
+ return;
continue;
}
if (count-- >= 0)
continue;
count = spin_retry;
- /*
- * For multiple layers of hypervisors, e.g. z/VM + LPAR
- * yield the CPU unconditionally. For LPAR rely on the
- * sense running status.
- */
if (!MACHINE_IS_LPAR || arch_vcpu_is_preempted(owner - 1))
smp_yield_cpu(owner - 1);
}
}
-EXPORT_SYMBOL(arch_spin_lock_wait_flags);
+
+void arch_spin_lock_wait(arch_spinlock_t *lp)
+{
+ /* Use classic spinlocks + niai if the steal time is >= 10% */
+ if (test_cpu_flag(CIF_DEDICATED_CPU))
+ arch_spin_lock_queued(lp);
+ else
+ arch_spin_lock_classic(lp);
+}
+EXPORT_SYMBOL(arch_spin_lock_wait);
int arch_spin_trylock_retry(arch_spinlock_t *lp)
{
@@ -270,3 +391,16 @@ void arch_lock_relax(int cpu)
smp_yield_cpu(cpu - 1);
}
EXPORT_SYMBOL(arch_lock_relax);
+
+void arch_spin_relax(arch_spinlock_t *lp)
+{
+ int cpu;
+
+ cpu = READ_ONCE(lp->lock) & _Q_LOCK_CPU_MASK;
+ if (!cpu)
+ return;
+ if (MACHINE_IS_LPAR && !arch_vcpu_is_preempted(cpu - 1))
+ return;
+ smp_yield_cpu(cpu - 1);
+}
+EXPORT_SYMBOL(arch_spin_relax);