summaryrefslogtreecommitdiff
path: root/arch/powerpc/lib
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/lib')
-rw-r--r--arch/powerpc/lib/code-patching.c146
-rw-r--r--arch/powerpc/lib/qspinlock.c122
2 files changed, 191 insertions, 77 deletions
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index b00112d7ad46..c6ab46156cda 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -38,6 +38,7 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 *patch_addr
return 0;
failed:
+ mb(); /* sync */
return -EPERM;
}
@@ -204,9 +205,6 @@ void __init poking_init(void)
{
int ret;
- if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
- return;
-
if (mm_patch_enabled())
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
"powerpc/text_poke_mm:online",
@@ -309,10 +307,6 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
err = __patch_instruction(addr, instr, patch_addr);
- /* hwsync performed by __patch_instruction (sync) if successful */
- if (err)
- mb(); /* sync */
-
/* context synchronisation performed by __patch_instruction (isync or exception) */
stop_using_temp_mm(patching_mm, orig_mm);
@@ -378,6 +372,144 @@ int patch_instruction(u32 *addr, ppc_inst_t instr)
}
NOKPROBE_SYMBOL(patch_instruction);
+static int __patch_instructions(u32 *patch_addr, u32 *code, size_t len, bool repeat_instr)
+{
+ unsigned long start = (unsigned long)patch_addr;
+
+ /* Repeat instruction */
+ if (repeat_instr) {
+ ppc_inst_t instr = ppc_inst_read(code);
+
+ if (ppc_inst_prefixed(instr)) {
+ u64 val = ppc_inst_as_ulong(instr);
+
+ memset64((u64 *)patch_addr, val, len / 8);
+ } else {
+ u32 val = ppc_inst_val(instr);
+
+ memset32(patch_addr, val, len / 4);
+ }
+ } else {
+ memcpy(patch_addr, code, len);
+ }
+
+ smp_wmb(); /* smp write barrier */
+ flush_icache_range(start, start + len);
+ return 0;
+}
+
+/*
+ * A page is mapped and instructions that fit the page are patched.
+ * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
+ */
+static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool repeat_instr)
+{
+ struct mm_struct *patching_mm, *orig_mm;
+ unsigned long pfn = get_patch_pfn(addr);
+ unsigned long text_poke_addr;
+ spinlock_t *ptl;
+ u32 *patch_addr;
+ pte_t *pte;
+ int err;
+
+ patching_mm = __this_cpu_read(cpu_patching_context.mm);
+ text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+ pte = get_locked_pte(patching_mm, text_poke_addr, &ptl);
+ if (!pte)
+ return -ENOMEM;
+
+ __set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+
+ /* order PTE update before use, also serves as the hwsync */
+ asm volatile("ptesync" ::: "memory");
+
+ /* order context switch after arbitrary prior code */
+ isync();
+
+ orig_mm = start_using_temp_mm(patching_mm);
+
+ err = __patch_instructions(patch_addr, code, len, repeat_instr);
+
+ /* context synchronisation performed by __patch_instructions */
+ stop_using_temp_mm(patching_mm, orig_mm);
+
+ pte_clear(patching_mm, text_poke_addr, pte);
+ /*
+ * ptesync to order PTE update before TLB invalidation done
+ * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
+ */
+ local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize);
+
+ pte_unmap_unlock(pte, ptl);
+
+ return err;
+}
+
+/*
+ * A page is mapped and instructions that fit the page are patched.
+ * Assumes 'len' to be (PAGE_SIZE - offset_in_page(addr)) or below.
+ */
+static int __do_patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
+{
+ unsigned long pfn = get_patch_pfn(addr);
+ unsigned long text_poke_addr;
+ u32 *patch_addr;
+ pte_t *pte;
+ int err;
+
+ text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
+ patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
+
+ pte = __this_cpu_read(cpu_patching_context.pte);
+ __set_pte_at(&init_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL), 0);
+ /* See ptesync comment in radix__set_pte_at() */
+ if (radix_enabled())
+ asm volatile("ptesync" ::: "memory");
+
+ err = __patch_instructions(patch_addr, code, len, repeat_instr);
+
+ pte_clear(&init_mm, text_poke_addr, pte);
+ flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
+
+ return err;
+}
+
+/*
+ * Patch 'addr' with 'len' bytes of instructions from 'code'.
+ *
+ * If repeat_instr is true, the same instruction is filled for
+ * 'len' bytes.
+ */
+int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr)
+{
+ while (len > 0) {
+ unsigned long flags;
+ size_t plen;
+ int err;
+
+ plen = min_t(size_t, PAGE_SIZE - offset_in_page(addr), len);
+
+ local_irq_save(flags);
+ if (mm_patch_enabled())
+ err = __do_patch_instructions_mm(addr, code, plen, repeat_instr);
+ else
+ err = __do_patch_instructions(addr, code, plen, repeat_instr);
+ local_irq_restore(flags);
+ if (err)
+ return err;
+
+ len -= plen;
+ addr = (u32 *)((unsigned long)addr + plen);
+ if (!repeat_instr)
+ code = (u32 *)((unsigned long)code + plen);
+ }
+
+ return 0;
+}
+NOKPROBE_SYMBOL(patch_instructions);
+
int patch_branch(u32 *addr, unsigned long target, int flags)
{
ppc_inst_t instr;
diff --git a/arch/powerpc/lib/qspinlock.c b/arch/powerpc/lib/qspinlock.c
index 6dd2f46bd3ef..5de4dd549f6e 100644
--- a/arch/powerpc/lib/qspinlock.c
+++ b/arch/powerpc/lib/qspinlock.c
@@ -16,7 +16,8 @@ struct qnode {
struct qnode *next;
struct qspinlock *lock;
int cpu;
- int yield_cpu;
+ u8 sleepy; /* 1 if the previous vCPU was preempted or
+ * if the previous node was sleepy */
u8 locked; /* 1 if lock acquired */
};
@@ -43,7 +44,7 @@ static bool pv_sleepy_lock_sticky __read_mostly = false;
static u64 pv_sleepy_lock_interval_ns __read_mostly = 0;
static int pv_sleepy_lock_factor __read_mostly = 256;
static bool pv_yield_prev __read_mostly = true;
-static bool pv_yield_propagate_owner __read_mostly = true;
+static bool pv_yield_sleepy_owner __read_mostly = true;
static bool pv_prod_head __read_mostly = false;
static DEFINE_PER_CPU_ALIGNED(struct qnodes, qnodes);
@@ -247,22 +248,18 @@ static __always_inline void seen_sleepy_lock(void)
this_cpu_write(sleepy_lock_seen_clock, sched_clock());
}
-static __always_inline void seen_sleepy_node(struct qspinlock *lock, u32 val)
+static __always_inline void seen_sleepy_node(void)
{
if (pv_sleepy_lock) {
if (pv_sleepy_lock_interval_ns)
this_cpu_write(sleepy_lock_seen_clock, sched_clock());
- if (val & _Q_LOCKED_VAL) {
- if (!(val & _Q_SLEEPY_VAL))
- try_set_sleepy(lock, val);
- }
+ /* Don't set sleepy because we likely have a stale val */
}
}
-static struct qnode *get_tail_qnode(struct qspinlock *lock, u32 val)
+static struct qnode *get_tail_qnode(struct qspinlock *lock, int prev_cpu)
{
- int cpu = decode_tail_cpu(val);
- struct qnodes *qnodesp = per_cpu_ptr(&qnodes, cpu);
+ struct qnodes *qnodesp = per_cpu_ptr(&qnodes, prev_cpu);
int idx;
/*
@@ -353,77 +350,66 @@ static __always_inline bool yield_head_to_locked_owner(struct qspinlock *lock, u
return __yield_to_locked_owner(lock, val, paravirt, mustq);
}
-static __always_inline void propagate_yield_cpu(struct qnode *node, u32 val, int *set_yield_cpu, bool paravirt)
+static __always_inline void propagate_sleepy(struct qnode *node, u32 val, bool paravirt)
{
struct qnode *next;
int owner;
if (!paravirt)
return;
- if (!pv_yield_propagate_owner)
- return;
-
- owner = get_owner_cpu(val);
- if (*set_yield_cpu == owner)
+ if (!pv_yield_sleepy_owner)
return;
next = READ_ONCE(node->next);
if (!next)
return;
- if (vcpu_is_preempted(owner)) {
- next->yield_cpu = owner;
- *set_yield_cpu = owner;
- } else if (*set_yield_cpu != -1) {
- next->yield_cpu = owner;
- *set_yield_cpu = owner;
- }
+ if (next->sleepy)
+ return;
+
+ owner = get_owner_cpu(val);
+ if (vcpu_is_preempted(owner))
+ next->sleepy = 1;
}
/* Called inside spin_begin() */
-static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, u32 val, bool paravirt)
+static __always_inline bool yield_to_prev(struct qspinlock *lock, struct qnode *node, int prev_cpu, bool paravirt)
{
- int prev_cpu = decode_tail_cpu(val);
u32 yield_count;
- int yield_cpu;
bool preempted = false;
if (!paravirt)
goto relax;
- if (!pv_yield_propagate_owner)
- goto yield_prev;
-
- yield_cpu = READ_ONCE(node->yield_cpu);
- if (yield_cpu == -1) {
- /* Propagate back the -1 CPU */
- if (node->next && node->next->yield_cpu != -1)
- node->next->yield_cpu = yield_cpu;
+ if (!pv_yield_sleepy_owner)
goto yield_prev;
- }
-
- yield_count = yield_count_of(yield_cpu);
- if ((yield_count & 1) == 0)
- goto yield_prev; /* owner vcpu is running */
-
- if (get_owner_cpu(READ_ONCE(lock->val)) != yield_cpu)
- goto yield_prev; /* re-sample lock owner */
- spin_end();
-
- preempted = true;
- seen_sleepy_node(lock, val);
+ /*
+ * If the previous waiter was preempted it might not be able to
+ * propagate sleepy to us, so check the lock in that case too.
+ */
+ if (node->sleepy || vcpu_is_preempted(prev_cpu)) {
+ u32 val = READ_ONCE(lock->val);
- smp_rmb();
+ if (val & _Q_LOCKED_VAL) {
+ if (node->next && !node->next->sleepy) {
+ /*
+ * Propagate sleepy to next waiter. Only if
+ * owner is preempted, which allows the queue
+ * to become "non-sleepy" if vCPU preemption
+ * ceases to occur, even if the lock remains
+ * highly contended.
+ */
+ if (vcpu_is_preempted(get_owner_cpu(val)))
+ node->next->sleepy = 1;
+ }
- if (yield_cpu == node->yield_cpu) {
- if (node->next && node->next->yield_cpu != yield_cpu)
- node->next->yield_cpu = yield_cpu;
- yield_to_preempted(yield_cpu, yield_count);
- spin_begin();
- return preempted;
+ preempted = yield_to_locked_owner(lock, val, paravirt);
+ if (preempted)
+ return preempted;
+ }
+ node->sleepy = false;
}
- spin_begin();
yield_prev:
if (!pv_yield_prev)
@@ -436,7 +422,7 @@ yield_prev:
spin_end();
preempted = true;
- seen_sleepy_node(lock, val);
+ seen_sleepy_node();
smp_rmb(); /* See __yield_to_locked_owner comment */
@@ -546,7 +532,6 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
bool sleepy = false;
bool mustq = false;
int idx;
- int set_yield_cpu = -1;
int iters = 0;
BUILD_BUG_ON(CONFIG_NR_CPUS >= (1U << _Q_TAIL_CPU_BITS));
@@ -570,7 +555,7 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
node->next = NULL;
node->lock = lock;
node->cpu = smp_processor_id();
- node->yield_cpu = -1;
+ node->sleepy = 0;
node->locked = 0;
tail = encode_tail_cpu(node->cpu);
@@ -587,7 +572,8 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
* head of the waitqueue.
*/
if (old & _Q_TAIL_CPU_MASK) {
- struct qnode *prev = get_tail_qnode(lock, old);
+ int prev_cpu = decode_tail_cpu(old);
+ struct qnode *prev = get_tail_qnode(lock, prev_cpu);
/* Link @node into the waitqueue. */
WRITE_ONCE(prev->next, node);
@@ -597,16 +583,12 @@ static __always_inline void queued_spin_lock_mcs_queue(struct qspinlock *lock, b
while (!READ_ONCE(node->locked)) {
spec_barrier();
- if (yield_to_prev(lock, node, old, paravirt))
+ if (yield_to_prev(lock, node, prev_cpu, paravirt))
seen_preempted = true;
}
spec_barrier();
spin_end();
- /* Clear out stale propagated yield_cpu */
- if (paravirt && pv_yield_propagate_owner && node->yield_cpu != -1)
- node->yield_cpu = -1;
-
smp_rmb(); /* acquire barrier for the mcs lock */
/*
@@ -648,7 +630,7 @@ again:
}
}
- propagate_yield_cpu(node, val, &set_yield_cpu, paravirt);
+ propagate_sleepy(node, val, paravirt);
preempted = yield_head_to_locked_owner(lock, val, paravirt);
if (!maybe_stealers)
continue;
@@ -952,21 +934,21 @@ static int pv_yield_prev_get(void *data, u64 *val)
DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_prev, pv_yield_prev_get, pv_yield_prev_set, "%llu\n");
-static int pv_yield_propagate_owner_set(void *data, u64 val)
+static int pv_yield_sleepy_owner_set(void *data, u64 val)
{
- pv_yield_propagate_owner = !!val;
+ pv_yield_sleepy_owner = !!val;
return 0;
}
-static int pv_yield_propagate_owner_get(void *data, u64 *val)
+static int pv_yield_sleepy_owner_get(void *data, u64 *val)
{
- *val = pv_yield_propagate_owner;
+ *val = pv_yield_sleepy_owner;
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_propagate_owner, pv_yield_propagate_owner_get, pv_yield_propagate_owner_set, "%llu\n");
+DEFINE_SIMPLE_ATTRIBUTE(fops_pv_yield_sleepy_owner, pv_yield_sleepy_owner_get, pv_yield_sleepy_owner_set, "%llu\n");
static int pv_prod_head_set(void *data, u64 val)
{
@@ -998,7 +980,7 @@ static __init int spinlock_debugfs_init(void)
debugfs_create_file("qspl_pv_sleepy_lock_interval_ns", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_interval_ns);
debugfs_create_file("qspl_pv_sleepy_lock_factor", 0600, arch_debugfs_dir, NULL, &fops_pv_sleepy_lock_factor);
debugfs_create_file("qspl_pv_yield_prev", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_prev);
- debugfs_create_file("qspl_pv_yield_propagate_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_propagate_owner);
+ debugfs_create_file("qspl_pv_yield_sleepy_owner", 0600, arch_debugfs_dir, NULL, &fops_pv_yield_sleepy_owner);
debugfs_create_file("qspl_pv_prod_head", 0600, arch_debugfs_dir, NULL, &fops_pv_prod_head);
}