From 0c89649a70bed679fd408c1eb82fa99dbe1354a0 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 2 Apr 2020 22:12:12 +1000 Subject: powerpc/64s: Fix doorbell wakeup msgclr optimisation Commit 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in C") broke the doorbell wakeup optimisation introduced by commit a9af97aa0a12 ("powerpc/64s: msgclr when handling doorbell exceptions from system reset"). This patch restores the msgclr, in C code. It's now done in the system reset wakeup path rather than doorbell interrupt replay where it used to be, because it is always the right thing to do in the wakeup case, but it may be rarely of use in other interrupt replay situations in which case it's wasted work - we would have to run measurements to see if that was a worthwhile optimisation, and I suspect it would not be. The results are similar to those in the original commit, test on POWER8 of context_switch selftests benchmark with polling idle disabled (e.g., always nap, giving cross-CPU IPIs) gives the following results: broken patched Different threads, same core: 317k/s 375k/s +18.7% Different cores: 280k/s 282k/s +1.0% Fixes: 3282a3da25bd ("powerpc/64: Implement soft interrupt replay in C") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200402121212.1118218-1-npiggin@gmail.com --- arch/powerpc/kernel/exceptions-64s.S | 19 ------------------- arch/powerpc/kernel/irq.c | 13 +++++++++++++ 2 files changed, 13 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 18bbce143084..728ccb0f560c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -3121,22 +3121,3 @@ handle_dabr_fault: li r5,SIGSEGV bl bad_page_fault b interrupt_return - -/* - * When doorbell is triggered from system reset wakeup, the message is - * not cleared, so it would fire again when EE is enabled. - * - * When coming from local_irq_enable, there may be the same problem if - * we were hard disabled. - * - * Execute msgclr to clear pending exceptions before handling it. - */ -h_doorbell_common_msgclr: - LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36)) - PPC_MSGCLR(3) - b h_doorbell_common_virt - -doorbell_super_common_msgclr: - LOAD_REG_IMMEDIATE(r3, PPC_DBELL_MSGTYPE << (63-36)) - PPC_MSGCLRP(3) - b doorbell_super_common_virt diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index a25ed47087ee..1f1169856dc8 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -527,6 +527,19 @@ void irq_set_pending_from_srr1(unsigned long srr1) return; } + if (reason == PACA_IRQ_DBELL) { + /* + * When doorbell triggers a system reset wakeup, the message + * is not cleared, so if the doorbell interrupt is replayed + * and the IPI handled, the doorbell interrupt would still + * fire when EE is enabled. + * + * To avoid taking the superfluous doorbell interrupt, + * execute a msgclr here before the interrupt is replayed. + */ + ppc_msgclr(PPC_DBELL_MSGTYPE); + } + /* * The 0 index (SRR1[42:45]=b0000) must always evaluate to 0, * so this can be called unconditionally with the SRR1 wake -- cgit v1.2.3 From a95a0a1654f16366360399574e10efd87e867b39 Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Fri, 20 Mar 2020 16:31:19 +0530 Subject: powerpc/pseries: Fix MCE handling on pseries MCE handling on pSeries platform fails as recent rework to use common code for pSeries and PowerNV in machine check error handling tries to access per-cpu variables in realmode. The per-cpu variables may be outside the RMO region on pSeries platform and needs translation to be enabled for access. Just moving these per-cpu variable into RMO region did'nt help because we queue some work to workqueues in real mode, which again tries to touch per-cpu variables. Also fwnmi_release_errinfo() cannot be called when translation is not enabled. This patch fixes this by enabling translation in the exception handler when all required real mode handling is done. This change only affects the pSeries platform. Without this fix below kernel crash is seen on injecting SLB multihit: BUG: Unable to handle kernel data access on read at 0xc00000027b205950 Faulting instruction address: 0xc00000000003b7e0 Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries Modules linked in: mcetest_slb(OE+) af_packet(E) xt_tcpudp(E) ip6t_rpfilter(E) ip6t_REJECT(E) ipt_REJECT(E) xt_conntrack(E) ip_set(E) nfnetlink(E) ebtable_nat(E) ebtable_broute(E) ip6table_nat(E) ip6table_mangle(E) ip6table_raw(E) ip6table_security(E) iptable_nat(E) nf_nat(E) nf_conntrack(E) nf_defrag_ipv6(E) nf_defrag_ipv4(E) iptable_mangle(E) iptable_raw(E) iptable_security(E) ebtable_filter(E) ebtables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E) ip_tables(E) x_tables(E) xfs(E) ibmveth(E) vmx_crypto(E) gf128mul(E) uio_pdrv_genirq(E) uio(E) crct10dif_vpmsum(E) rtc_generic(E) btrfs(E) libcrc32c(E) xor(E) zstd_decompress(E) zstd_compress(E) raid6_pq(E) sr_mod(E) sd_mod(E) cdrom(E) ibmvscsi(E) scsi_transport_srp(E) crc32c_vpmsum(E) dm_mod(E) sg(E) scsi_mod(E) CPU: 34 PID: 8154 Comm: insmod Kdump: loaded Tainted: G OE 5.5.0-mahesh #1 NIP: c00000000003b7e0 LR: c0000000000f2218 CTR: 0000000000000000 REGS: c000000007dcb960 TRAP: 0300 Tainted: G OE (5.5.0-mahesh) MSR: 8000000000001003 CR: 28002428 XER: 20040000 CFAR: c0000000000f2214 DAR: c00000027b205950 DSISR: 40000000 IRQMASK: 0 GPR00: c0000000000f2218 c000000007dcbbf0 c000000001544800 c000000007dcbd70 GPR04: 0000000000000001 c000000007dcbc98 c008000000d00258 c0080000011c0000 GPR08: 0000000000000000 0000000300000003 c000000001035950 0000000003000048 GPR12: 000000027a1d0000 c000000007f9c000 0000000000000558 0000000000000000 GPR16: 0000000000000540 c008000001110000 c008000001110540 0000000000000000 GPR20: c00000000022af10 c00000025480fd70 c008000001280000 c00000004bfbb300 GPR24: c000000001442330 c00800000800000d c008000008000000 4009287a77000510 GPR28: 0000000000000000 0000000000000002 c000000001033d30 0000000000000001 NIP [c00000000003b7e0] save_mce_event+0x30/0x240 LR [c0000000000f2218] pseries_machine_check_realmode+0x2c8/0x4f0 Call Trace: Instruction dump: 3c4c0151 38429050 7c0802a6 60000000 fbc1fff0 fbe1fff8 f821ffd1 3d42ffaf 3fc2ffaf e98d0030 394a1150 3bdef530 <7d6a62aa> 1d2b0048 2f8b0063 380b0001 ---[ end trace 46fd63f36bbdd940 ]--- Fixes: 9ca766f9891d ("powerpc/64s/pseries: machine check convert to use common event code") Reviewed-by: Mahesh Salgaonkar Reviewed-by: Nicholas Piggin Signed-off-by: Ganesh Goudar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200320110119.10207-1-ganeshgr@linux.ibm.com --- arch/powerpc/platforms/pseries/ras.c | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index aa6208c8d4f0..1d1da639b8b7 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -686,6 +686,17 @@ static int mce_handle_error(struct pt_regs *regs, struct rtas_error_log *errp) #endif out: + /* + * Enable translation as we will be accessing per-cpu variables + * in save_mce_event() which may fall outside RMO region, also + * leave it enabled because subsequently we will be queuing work + * to workqueues where again per-cpu variables accessed, besides + * fwnmi_release_errinfo() crashes when called in realmode on + * pseries. + * Note: All the realmode handling like flushing SLB entries for + * SLB multihit is done by now. + */ + mtmsr(mfmsr() | MSR_IR | MSR_DR); save_mce_event(regs, disposition == RTAS_DISP_FULLY_RECOVERED, &mce_err, regs->nip, eaddr, paddr); -- cgit v1.2.3 From a36e8ba60b991d563677227f172db69e030797e6 Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Fri, 13 Mar 2020 11:22:37 +0530 Subject: powerpc/perf: Implement a global lock to avoid races between trace, core and thread imc events. IMC(In-memory Collection Counters) does performance monitoring in two different modes, i.e accumulation mode(core-imc and thread-imc events), and trace mode(trace-imc events). A cpu thread can either be in accumulation-mode or trace-mode at a time and this is done via the LDBAR register in POWER architecture. The current design does not address the races between thread-imc and trace-imc events. Patch implements a global id and lock to avoid the races between core, trace and thread imc events. With this global id-lock implementation, the system can either run core, thread or trace imc events at a time. i.e. to run any core-imc events, thread/trace imc events should not be enabled/monitored. Signed-off-by: Anju T Sudhakar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200313055238.8656-1-anju@linux.vnet.ibm.com --- arch/powerpc/perf/imc-pmu.c | 173 ++++++++++++++++++++++++++++++++++++++------ 1 file changed, 149 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c index cb50a9e1fd2d..eb82dda884e5 100644 --- a/arch/powerpc/perf/imc-pmu.c +++ b/arch/powerpc/perf/imc-pmu.c @@ -44,6 +44,16 @@ static DEFINE_PER_CPU(u64 *, trace_imc_mem); static struct imc_pmu_ref *trace_imc_refc; static int trace_imc_mem_size; +/* + * Global data structure used to avoid races between thread, + * core and trace-imc + */ +static struct imc_pmu_ref imc_global_refc = { + .lock = __MUTEX_INITIALIZER(imc_global_refc.lock), + .id = 0, + .refc = 0, +}; + static struct imc_pmu *imc_event_to_pmu(struct perf_event *event) { return container_of(event->pmu, struct imc_pmu, pmu); @@ -698,6 +708,16 @@ static int ppc_core_imc_cpu_offline(unsigned int cpu) return -EINVAL; ref->refc = 0; + /* + * Reduce the global reference count, if this is the + * last cpu in this core and core-imc event running + * in this cpu. + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_CORE) + imc_global_refc.refc--; + + mutex_unlock(&imc_global_refc.lock); } return 0; } @@ -710,6 +730,23 @@ static int core_imc_pmu_cpumask_init(void) ppc_core_imc_cpu_offline); } +static void reset_global_refc(struct perf_event *event) +{ + mutex_lock(&imc_global_refc.lock); + imc_global_refc.refc--; + + /* + * If no other thread is running any + * event for this domain(thread/core/trace), + * set the global id to zero. + */ + if (imc_global_refc.refc <= 0) { + imc_global_refc.refc = 0; + imc_global_refc.id = 0; + } + mutex_unlock(&imc_global_refc.lock); +} + static void core_imc_counters_release(struct perf_event *event) { int rc, core_id; @@ -759,6 +796,8 @@ static void core_imc_counters_release(struct perf_event *event) ref->refc = 0; } mutex_unlock(&ref->lock); + + reset_global_refc(event); } static int core_imc_event_init(struct perf_event *event) @@ -819,6 +858,29 @@ static int core_imc_event_init(struct perf_event *event) ++ref->refc; mutex_unlock(&ref->lock); + /* + * Since the system can run either in accumulation or trace-mode + * of IMC at a time, core-imc events are allowed only if no other + * trace/thread imc events are enabled/monitored. + * + * Take the global lock, and check the refc.id + * to know whether any other trace/thread imc + * events are running. + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_CORE) { + /* + * No other trace/thread imc events are running in + * the system, so set the refc.id to core-imc. + */ + imc_global_refc.id = IMC_DOMAIN_CORE; + imc_global_refc.refc++; + } else { + mutex_unlock(&imc_global_refc.lock); + return -EBUSY; + } + mutex_unlock(&imc_global_refc.lock); + event->hw.event_base = (u64)pcmi->vbase + (config & IMC_EVENT_OFFSET_MASK); event->destroy = core_imc_counters_release; return 0; @@ -877,7 +939,23 @@ static int ppc_thread_imc_cpu_online(unsigned int cpu) static int ppc_thread_imc_cpu_offline(unsigned int cpu) { - mtspr(SPRN_LDBAR, 0); + /* + * Set the bit 0 of LDBAR to zero. + * + * If bit 0 of LDBAR is unset, it will stop posting + * the counter data to memory. + * For thread-imc, bit 0 of LDBAR will be set to 1 in the + * event_add function. So reset this bit here, to stop the updates + * to memory in the cpu_offline path. + */ + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); + + /* Reduce the refc if thread-imc event running on this cpu */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_THREAD) + imc_global_refc.refc--; + mutex_unlock(&imc_global_refc.lock); + return 0; } @@ -916,7 +994,22 @@ static int thread_imc_event_init(struct perf_event *event) if (!target) return -EINVAL; + mutex_lock(&imc_global_refc.lock); + /* + * Check if any other trace/core imc events are running in the + * system, if not set the global id to thread-imc. + */ + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_THREAD) { + imc_global_refc.id = IMC_DOMAIN_THREAD; + imc_global_refc.refc++; + } else { + mutex_unlock(&imc_global_refc.lock); + return -EBUSY; + } + mutex_unlock(&imc_global_refc.lock); + event->pmu->task_ctx_nr = perf_sw_context; + event->destroy = reset_global_refc; return 0; } @@ -1063,10 +1156,12 @@ static void thread_imc_event_del(struct perf_event *event, int flags) int core_id; struct imc_pmu_ref *ref; - mtspr(SPRN_LDBAR, 0); - core_id = smp_processor_id() / threads_per_core; ref = &core_imc_refc[core_id]; + if (!ref) { + pr_debug("imc: Failed to get event reference count\n"); + return; + } mutex_lock(&ref->lock); ref->refc--; @@ -1082,6 +1177,10 @@ static void thread_imc_event_del(struct perf_event *event, int flags) ref->refc = 0; } mutex_unlock(&ref->lock); + + /* Set bit 0 of LDBAR to zero, to stop posting updates to memory */ + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); + /* * Take a snapshot and calculate the delta and update * the event counter values. @@ -1133,7 +1232,18 @@ static int ppc_trace_imc_cpu_online(unsigned int cpu) static int ppc_trace_imc_cpu_offline(unsigned int cpu) { - mtspr(SPRN_LDBAR, 0); + /* + * No need to set bit 0 of LDBAR to zero, as + * it is set to zero for imc trace-mode + * + * Reduce the refc if any trace-imc event running + * on this cpu. + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == IMC_DOMAIN_TRACE) + imc_global_refc.refc--; + mutex_unlock(&imc_global_refc.lock); + return 0; } @@ -1226,15 +1336,14 @@ static int trace_imc_event_add(struct perf_event *event, int flags) local_mem = get_trace_imc_event_base_addr(); ldbar_value = ((u64)local_mem & THREAD_IMC_LDBAR_MASK) | TRACE_IMC_ENABLE; - if (core_imc_refc) - ref = &core_imc_refc[core_id]; + /* trace-imc reference count */ + if (trace_imc_refc) + ref = &trace_imc_refc[core_id]; if (!ref) { - /* If core-imc is not enabled, use trace-imc reference count */ - if (trace_imc_refc) - ref = &trace_imc_refc[core_id]; - if (!ref) - return -EINVAL; + pr_debug("imc: Failed to get the event reference count\n"); + return -EINVAL; } + mtspr(SPRN_LDBAR, ldbar_value); mutex_lock(&ref->lock); if (ref->refc == 0) { @@ -1242,13 +1351,11 @@ static int trace_imc_event_add(struct perf_event *event, int flags) get_hard_smp_processor_id(smp_processor_id()))) { mutex_unlock(&ref->lock); pr_err("trace-imc: Unable to start the counters for core %d\n", core_id); - mtspr(SPRN_LDBAR, 0); return -EINVAL; } } ++ref->refc; mutex_unlock(&ref->lock); - return 0; } @@ -1274,16 +1381,13 @@ static void trace_imc_event_del(struct perf_event *event, int flags) int core_id = smp_processor_id() / threads_per_core; struct imc_pmu_ref *ref = NULL; - if (core_imc_refc) - ref = &core_imc_refc[core_id]; + if (trace_imc_refc) + ref = &trace_imc_refc[core_id]; if (!ref) { - /* If core-imc is not enabled, use trace-imc reference count */ - if (trace_imc_refc) - ref = &trace_imc_refc[core_id]; - if (!ref) - return; + pr_debug("imc: Failed to get event reference count\n"); + return; } - mtspr(SPRN_LDBAR, 0); + mutex_lock(&ref->lock); ref->refc--; if (ref->refc == 0) { @@ -1297,6 +1401,7 @@ static void trace_imc_event_del(struct perf_event *event, int flags) ref->refc = 0; } mutex_unlock(&ref->lock); + trace_imc_event_stop(event, flags); } @@ -1314,10 +1419,30 @@ static int trace_imc_event_init(struct perf_event *event) if (event->attr.sample_period == 0) return -ENOENT; + /* + * Take the global lock, and make sure + * no other thread is running any core/thread imc + * events + */ + mutex_lock(&imc_global_refc.lock); + if (imc_global_refc.id == 0 || imc_global_refc.id == IMC_DOMAIN_TRACE) { + /* + * No core/thread imc events are running in the + * system, so set the refc.id to trace-imc. + */ + imc_global_refc.id = IMC_DOMAIN_TRACE; + imc_global_refc.refc++; + } else { + mutex_unlock(&imc_global_refc.lock); + return -EBUSY; + } + mutex_unlock(&imc_global_refc.lock); + event->hw.idx = -1; target = event->hw.target; event->pmu->task_ctx_nr = perf_hw_context; + event->destroy = reset_global_refc; return 0; } @@ -1429,10 +1554,10 @@ static void cleanup_all_core_imc_memory(void) static void thread_imc_ldbar_disable(void *dummy) { /* - * By Zeroing LDBAR, we disable thread-imc - * updates. + * By setting 0th bit of LDBAR to zero, we disable thread-imc + * updates to memory. */ - mtspr(SPRN_LDBAR, 0); + mtspr(SPRN_LDBAR, (mfspr(SPRN_LDBAR) & (~(1UL << 63)))); } void thread_imc_disable(void) -- cgit v1.2.3 From 4bdd39460b5f57d4f165c4a05b22e66eb3a490ae Mon Sep 17 00:00:00 2001 From: Anju T Sudhakar Date: Fri, 13 Mar 2020 11:22:38 +0530 Subject: powerpc/powernv: Re-enable imc trace-mode in kernel commit <249fad734a25> ""powerpc/perf: Disable trace_imc pmu" disables IMC(In-Memory Collection) trace-mode in kernel, since frequent mode switching between accumulation mode and trace mode via the spr LDBAR in the hardware can trigger a checkstop(system crash). Patch to re-enable imc-trace mode in kernel. The previous patch(1/2) in this series will address the mode switching issue by implementing a global lock, and will restrict the usage of accumulation and trace-mode at a time. Signed-off-by: Anju T Sudhakar Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200313055238.8656-2-anju@linux.vnet.ibm.com --- arch/powerpc/platforms/powernv/opal-imc.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/opal-imc.c b/arch/powerpc/platforms/powernv/opal-imc.c index 968b9a4d1cd9..7824cc364bc4 100644 --- a/arch/powerpc/platforms/powernv/opal-imc.c +++ b/arch/powerpc/platforms/powernv/opal-imc.c @@ -268,14 +268,7 @@ static int opal_imc_counters_probe(struct platform_device *pdev) domain = IMC_DOMAIN_THREAD; break; case IMC_TYPE_TRACE: - /* - * FIXME. Using trace_imc events to monitor application - * or KVM thread performance can cause a checkstop - * (system crash). - * Disable it for now. - */ - pr_info_once("IMC: disabling trace_imc PMU\n"); - domain = -1; + domain = IMC_DOMAIN_TRACE; break; default: pr_warn("IMC Unknown Device type \n"); -- cgit v1.2.3 From 7ee417497a29028502cf952f419ab2635f563d51 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 27 Mar 2020 20:26:23 +0000 Subject: powerpc/ps3: Remove duplicate error message Remove a duplicate memory allocation failure error message. Signed-off-by: Markus Elfring Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/1bc5a16a22c487c478a204ebb7b80a22d2ad9cd0.1585340156.git.geoff@infradead.org --- arch/powerpc/platforms/ps3/os-area.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/ps3/os-area.c b/arch/powerpc/platforms/ps3/os-area.c index cbddd63caf2d..e8530371aed6 100644 --- a/arch/powerpc/platforms/ps3/os-area.c +++ b/arch/powerpc/platforms/ps3/os-area.c @@ -613,10 +613,8 @@ static int update_flash_db(void) /* Read in header and db from flash. */ header = kmalloc(buf_len, GFP_KERNEL); - if (!header) { - pr_debug("%s: kmalloc failed\n", __func__); + if (!header) return -ENOMEM; - } count = os_area_flash_read(header, buf_len, 0); if (count < 0) { -- cgit v1.2.3 From d3883fa0784832bb65df019ae6a291a87d146fb1 Mon Sep 17 00:00:00 2001 From: Geoff Levand Date: Fri, 27 Mar 2020 20:26:23 +0000 Subject: powerpc/ps3: Set CONFIG_UEVENT_HELPER=y in ps3_defconfig Set CONFIG_UEVENT_HELPER=y in ps3_defconfig. commit 1be01d4a57142ded23bdb9e0c8d9369e693b26cc (driver: base: Disable CONFIG_UEVENT_HELPER by default) disabled the CONFIG_UEVENT_HELPER option that is needed for hotplug and module loading by most older 32bit powerpc distributions that users typically install on the PS3. Signed-off-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/410cda9aa1a6e04434dfe1f9aa2103d0694f706c.1585340156.git.geoff@infradead.org --- arch/powerpc/configs/ps3_defconfig | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig index 4db51719342a..81b55c880fc3 100644 --- a/arch/powerpc/configs/ps3_defconfig +++ b/arch/powerpc/configs/ps3_defconfig @@ -60,6 +60,8 @@ CONFIG_CFG80211=m CONFIG_CFG80211_WEXT=y CONFIG_MAC80211=m # CONFIG_MAC80211_RC_MINSTREL is not set +CONFIG_UEVENT_HELPER=y +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_BLK_DEV_LOOP=y CONFIG_BLK_DEV_RAM=y CONFIG_BLK_DEV_RAM_SIZE=65535 -- cgit v1.2.3 From 9e62ccec3ba0a17c8050ea78500dfdd0e4c5c0cc Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 20 Mar 2020 11:20:12 +0100 Subject: powerpc: Add back __ARCH_WANT_SYS_LLSEEK macro This partially reverts commit caf6f9c8a326 ("asm-generic: Remove unneeded __ARCH_WANT_SYS_LLSEEK macro") When CONFIG_COMPAT is disabled on ppc64 the kernel does not build. There is resistance to both removing the llseek syscall from the 64bit syscall tables and building the llseek interface unconditionally. Signed-off-by: Michal Suchanek Reviewed-by: Arnd Bergmann Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/lkml/20190828151552.GA16855@infradead.org/ Link: https://lore.kernel.org/lkml/20190829214319.498c7de2@naga/ Link: https://lore.kernel.org/r/dd4575c51e31766e87f7e7fa121d099ab78d3290.1584699455.git.msuchanek@suse.de --- arch/powerpc/include/asm/unistd.h | 1 + fs/read_write.c | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index b0720c7c3fcf..700fcdac2e3c 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -31,6 +31,7 @@ #define __ARCH_WANT_SYS_SOCKETCALL #define __ARCH_WANT_SYS_FADVISE64 #define __ARCH_WANT_SYS_GETPGRP +#define __ARCH_WANT_SYS_LLSEEK #define __ARCH_WANT_SYS_NICE #define __ARCH_WANT_SYS_OLD_GETRLIMIT #define __ARCH_WANT_SYS_OLD_UNAME diff --git a/fs/read_write.c b/fs/read_write.c index 59d819c5b92e..bbfa9b12b15e 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -331,7 +331,8 @@ COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned i } #endif -#if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) +#if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) || \ + defined(__ARCH_WANT_SYS_LLSEEK) SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, unsigned long, offset_low, loff_t __user *, result, unsigned int, whence) -- cgit v1.2.3 From 3dd4eb83a9c08ed6af482a5417323a6c8f4fc7a7 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 20 Mar 2020 11:20:13 +0100 Subject: powerpc: move common register copy functions from signal_32.c to signal.c These functions are required for 64bit as well. Signed-off-by: Michal Suchanek Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/9fd6d9b7c5e91fab21159fe23534a2f16b4962d3.1584699455.git.msuchanek@suse.de --- arch/powerpc/kernel/signal.c | 141 ++++++++++++++++++++++++++++++++++++++++ arch/powerpc/kernel/signal_32.c | 140 --------------------------------------- 2 files changed, 141 insertions(+), 140 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index d215f9554553..4b0152108f61 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -18,12 +18,153 @@ #include #include #include +#include #include #include #include #include "signal.h" +#ifdef CONFIG_VSX +unsigned long copy_fpr_to_user(void __user *to, + struct task_struct *task) +{ + u64 buf[ELF_NFPREG]; + int i; + + /* save FPR copy to local buffer then write to the thread_struct */ + for (i = 0; i < (ELF_NFPREG - 1) ; i++) + buf[i] = task->thread.TS_FPR(i); + buf[i] = task->thread.fp_state.fpscr; + return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double)); +} + +unsigned long copy_fpr_from_user(struct task_struct *task, + void __user *from) +{ + u64 buf[ELF_NFPREG]; + int i; + + if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double))) + return 1; + for (i = 0; i < (ELF_NFPREG - 1) ; i++) + task->thread.TS_FPR(i) = buf[i]; + task->thread.fp_state.fpscr = buf[i]; + + return 0; +} + +unsigned long copy_vsx_to_user(void __user *to, + struct task_struct *task) +{ + u64 buf[ELF_NVSRHALFREG]; + int i; + + /* save FPR copy to local buffer then write to the thread_struct */ + for (i = 0; i < ELF_NVSRHALFREG; i++) + buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; + return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double)); +} + +unsigned long copy_vsx_from_user(struct task_struct *task, + void __user *from) +{ + u64 buf[ELF_NVSRHALFREG]; + int i; + + if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double))) + return 1; + for (i = 0; i < ELF_NVSRHALFREG ; i++) + task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + return 0; +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +unsigned long copy_ckfpr_to_user(void __user *to, + struct task_struct *task) +{ + u64 buf[ELF_NFPREG]; + int i; + + /* save FPR copy to local buffer then write to the thread_struct */ + for (i = 0; i < (ELF_NFPREG - 1) ; i++) + buf[i] = task->thread.TS_CKFPR(i); + buf[i] = task->thread.ckfp_state.fpscr; + return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double)); +} + +unsigned long copy_ckfpr_from_user(struct task_struct *task, + void __user *from) +{ + u64 buf[ELF_NFPREG]; + int i; + + if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double))) + return 1; + for (i = 0; i < (ELF_NFPREG - 1) ; i++) + task->thread.TS_CKFPR(i) = buf[i]; + task->thread.ckfp_state.fpscr = buf[i]; + + return 0; +} + +unsigned long copy_ckvsx_to_user(void __user *to, + struct task_struct *task) +{ + u64 buf[ELF_NVSRHALFREG]; + int i; + + /* save FPR copy to local buffer then write to the thread_struct */ + for (i = 0; i < ELF_NVSRHALFREG; i++) + buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; + return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double)); +} + +unsigned long copy_ckvsx_from_user(struct task_struct *task, + void __user *from) +{ + u64 buf[ELF_NVSRHALFREG]; + int i; + + if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double))) + return 1; + for (i = 0; i < ELF_NVSRHALFREG ; i++) + task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; + return 0; +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#else +inline unsigned long copy_fpr_to_user(void __user *to, + struct task_struct *task) +{ + return __copy_to_user(to, task->thread.fp_state.fpr, + ELF_NFPREG * sizeof(double)); +} + +inline unsigned long copy_fpr_from_user(struct task_struct *task, + void __user *from) +{ + return __copy_from_user(task->thread.fp_state.fpr, from, + ELF_NFPREG * sizeof(double)); +} + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +inline unsigned long copy_ckfpr_to_user(void __user *to, + struct task_struct *task) +{ + return __copy_to_user(to, task->thread.ckfp_state.fpr, + ELF_NFPREG * sizeof(double)); +} + +inline unsigned long copy_ckfpr_from_user(struct task_struct *task, + void __user *from) +{ + return __copy_from_user(task->thread.ckfp_state.fpr, from, + ELF_NFPREG * sizeof(double)); +} +#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ +#endif + /* Log an error when sending an unhandled signal to a process. Controlled * through debug.exception-trace sysctl. */ diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 1b090a76b444..4f96d29a22bf 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -235,146 +235,6 @@ struct rt_sigframe { int abigap[56]; }; -#ifdef CONFIG_VSX -unsigned long copy_fpr_to_user(void __user *to, - struct task_struct *task) -{ - u64 buf[ELF_NFPREG]; - int i; - - /* save FPR copy to local buffer then write to the thread_struct */ - for (i = 0; i < (ELF_NFPREG - 1) ; i++) - buf[i] = task->thread.TS_FPR(i); - buf[i] = task->thread.fp_state.fpscr; - return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double)); -} - -unsigned long copy_fpr_from_user(struct task_struct *task, - void __user *from) -{ - u64 buf[ELF_NFPREG]; - int i; - - if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double))) - return 1; - for (i = 0; i < (ELF_NFPREG - 1) ; i++) - task->thread.TS_FPR(i) = buf[i]; - task->thread.fp_state.fpscr = buf[i]; - - return 0; -} - -unsigned long copy_vsx_to_user(void __user *to, - struct task_struct *task) -{ - u64 buf[ELF_NVSRHALFREG]; - int i; - - /* save FPR copy to local buffer then write to the thread_struct */ - for (i = 0; i < ELF_NVSRHALFREG; i++) - buf[i] = task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET]; - return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double)); -} - -unsigned long copy_vsx_from_user(struct task_struct *task, - void __user *from) -{ - u64 buf[ELF_NVSRHALFREG]; - int i; - - if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double))) - return 1; - for (i = 0; i < ELF_NVSRHALFREG ; i++) - task->thread.fp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; - return 0; -} - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -unsigned long copy_ckfpr_to_user(void __user *to, - struct task_struct *task) -{ - u64 buf[ELF_NFPREG]; - int i; - - /* save FPR copy to local buffer then write to the thread_struct */ - for (i = 0; i < (ELF_NFPREG - 1) ; i++) - buf[i] = task->thread.TS_CKFPR(i); - buf[i] = task->thread.ckfp_state.fpscr; - return __copy_to_user(to, buf, ELF_NFPREG * sizeof(double)); -} - -unsigned long copy_ckfpr_from_user(struct task_struct *task, - void __user *from) -{ - u64 buf[ELF_NFPREG]; - int i; - - if (__copy_from_user(buf, from, ELF_NFPREG * sizeof(double))) - return 1; - for (i = 0; i < (ELF_NFPREG - 1) ; i++) - task->thread.TS_CKFPR(i) = buf[i]; - task->thread.ckfp_state.fpscr = buf[i]; - - return 0; -} - -unsigned long copy_ckvsx_to_user(void __user *to, - struct task_struct *task) -{ - u64 buf[ELF_NVSRHALFREG]; - int i; - - /* save FPR copy to local buffer then write to the thread_struct */ - for (i = 0; i < ELF_NVSRHALFREG; i++) - buf[i] = task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET]; - return __copy_to_user(to, buf, ELF_NVSRHALFREG * sizeof(double)); -} - -unsigned long copy_ckvsx_from_user(struct task_struct *task, - void __user *from) -{ - u64 buf[ELF_NVSRHALFREG]; - int i; - - if (__copy_from_user(buf, from, ELF_NVSRHALFREG * sizeof(double))) - return 1; - for (i = 0; i < ELF_NVSRHALFREG ; i++) - task->thread.ckfp_state.fpr[i][TS_VSRLOWOFFSET] = buf[i]; - return 0; -} -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -#else -inline unsigned long copy_fpr_to_user(void __user *to, - struct task_struct *task) -{ - return __copy_to_user(to, task->thread.fp_state.fpr, - ELF_NFPREG * sizeof(double)); -} - -inline unsigned long copy_fpr_from_user(struct task_struct *task, - void __user *from) -{ - return __copy_from_user(task->thread.fp_state.fpr, from, - ELF_NFPREG * sizeof(double)); -} - -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM -inline unsigned long copy_ckfpr_to_user(void __user *to, - struct task_struct *task) -{ - return __copy_to_user(to, task->thread.ckfp_state.fpr, - ELF_NFPREG * sizeof(double)); -} - -inline unsigned long copy_ckfpr_from_user(struct task_struct *task, - void __user *from) -{ - return __copy_from_user(task->thread.ckfp_state.fpr, from, - ELF_NFPREG * sizeof(double)); -} -#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -#endif - /* * Save the current user registers on the user stack. * We only save the altivec/spe registers if the process has used -- cgit v1.2.3 From d6c19bdee2ba3d9426d31cb82d036212c3b2fb47 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 20 Mar 2020 11:20:14 +0100 Subject: powerpc/perf: consolidate read_user_stack_32 There are two almost identical copies for 32bit and 64bit. The function is used only in 32bit code which will be split out in next patch so consolidate to one function. Signed-off-by: Michal Suchanek Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/0c21c919ed1296420199c78f7c3cfd29d3c7e909.1584699455.git.msuchanek@suse.de --- arch/powerpc/perf/callchain.c | 48 +++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index cbc251981209..c9a78c6e4361 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -161,18 +161,6 @@ static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) return read_user_stack_slow(ptr, ret, 8); } -static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) -{ - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || - ((unsigned long)ptr & 3)) - return -EFAULT; - - if (!probe_user_read(ret, ptr, sizeof(*ret))) - return 0; - - return read_user_stack_slow(ptr, ret, 4); -} - static inline int valid_user_sp(unsigned long sp, int is_64) { if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32) @@ -277,19 +265,9 @@ static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, } #else /* CONFIG_PPC64 */ -/* - * On 32-bit we just access the address and let hash_page create a - * HPTE if necessary, so there is no need to fall back to reading - * the page tables. Since this is called at interrupt level, - * do_page_fault() won't treat a DSI as a page fault. - */ -static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +static int read_user_stack_slow(void __user *ptr, void *buf, int nb) { - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || - ((unsigned long)ptr & 3)) - return -EFAULT; - - return probe_user_read(ret, ptr, sizeof(*ret)); + return 0; } static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, @@ -312,6 +290,28 @@ static inline int valid_user_sp(unsigned long sp, int is_64) #endif /* CONFIG_PPC64 */ +/* + * On 32-bit we just access the address and let hash_page create a + * HPTE if necessary, so there is no need to fall back to reading + * the page tables. Since this is called at interrupt level, + * do_page_fault() won't treat a DSI as a page fault. + */ +static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +{ + int rc; + + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || + ((unsigned long)ptr & 3)) + return -EFAULT; + + rc = probe_user_read(ret, ptr, sizeof(*ret)); + + if (IS_ENABLED(CONFIG_PPC64) && rc) + return read_user_stack_slow(ptr, ret, 4); + + return rc; +} + /* * Layout for non-RT signal frames */ -- cgit v1.2.3 From 2910428106ebf23a9a2176cb751749edb2ce57e2 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 20 Mar 2020 11:20:15 +0100 Subject: powerpc/perf: consolidate valid_user_sp -> invalid_user_sp Merge the 32bit and 64bit version. Halve the check constants on 32bit. Use STACK_TOP since it is defined. Passing is_64 is now redundant since is_32bit_task() is used to determine which callchain variant should be used. Use STACK_TOP and is_32bit_task() directly. This removes a page from the valid 32bit area on 64bit: #define TASK_SIZE_USER32 (0x0000000100000000UL - (1 * PAGE_SIZE)) #define STACK_TOP_USER32 TASK_SIZE_USER32 Change return value to bool. It is inverted by users anyway. Change to invalid_user_sp to avoid inverting the return value twice. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/be8e40fc0737fb28ad08b198552dee7cac1c5ce2.1584699455.git.msuchanek@suse.de --- arch/powerpc/perf/callchain.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index c9a78c6e4361..001d0473a61f 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -102,6 +102,14 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re } } +static inline bool invalid_user_sp(unsigned long sp) +{ + unsigned long mask = is_32bit_task() ? 3 : 7; + unsigned long top = STACK_TOP - (is_32bit_task() ? 16 : 32); + + return (!sp || (sp & mask) || (sp > top)); +} + #ifdef CONFIG_PPC64 /* * On 64-bit we don't want to invoke hash_page on user addresses from @@ -161,13 +169,6 @@ static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) return read_user_stack_slow(ptr, ret, 8); } -static inline int valid_user_sp(unsigned long sp, int is_64) -{ - if (!sp || (sp & 7) || sp > (is_64 ? TASK_SIZE : 0x100000000UL) - 32) - return 0; - return 1; -} - /* * 64-bit user processes use the same stack frame for RT and non-RT signals. */ @@ -226,7 +227,7 @@ static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, while (entry->nr < entry->max_stack) { fp = (unsigned long __user *) sp; - if (!valid_user_sp(sp, 1) || read_user_stack_64(fp, &next_sp)) + if (invalid_user_sp(sp) || read_user_stack_64(fp, &next_sp)) return; if (level > 0 && read_user_stack_64(&fp[2], &next_ip)) return; @@ -275,13 +276,6 @@ static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry { } -static inline int valid_user_sp(unsigned long sp, int is_64) -{ - if (!sp || (sp & 7) || sp > TASK_SIZE - 32) - return 0; - return 1; -} - #define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE #define sigcontext32 sigcontext #define mcontext32 mcontext @@ -423,7 +417,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, while (entry->nr < entry->max_stack) { fp = (unsigned int __user *) (unsigned long) sp; - if (!valid_user_sp(sp, 0) || read_user_stack_32(fp, &next_sp)) + if (invalid_user_sp(sp) || read_user_stack_32(fp, &next_sp)) return; if (level > 0 && read_user_stack_32(&fp[1], &next_ip)) return; -- cgit v1.2.3 From 0a7601b6ffddec11d7cc0bc3264daf0159f5e1a6 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 20 Mar 2020 11:20:16 +0100 Subject: powerpc/64: make buildable without CONFIG_COMPAT There are numerous references to 32bit functions in generic and 64bit code so ifdef them out. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/e5619617020ef3a1f54f0c076e7d74cb9ec9f3bf.1584699455.git.msuchanek@suse.de --- arch/powerpc/include/asm/thread_info.h | 4 ++-- arch/powerpc/kernel/Makefile | 5 +++-- arch/powerpc/kernel/entry_64.S | 2 ++ arch/powerpc/kernel/ptrace/Makefile | 2 +- arch/powerpc/kernel/signal.c | 3 +-- arch/powerpc/kernel/syscall_64.c | 6 ++---- arch/powerpc/kernel/vdso.c | 3 ++- arch/powerpc/perf/callchain.c | 8 +++++++- 8 files changed, 20 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/thread_info.h b/arch/powerpc/include/asm/thread_info.h index a2270749b282..ca6c97025704 100644 --- a/arch/powerpc/include/asm/thread_info.h +++ b/arch/powerpc/include/asm/thread_info.h @@ -162,10 +162,10 @@ static inline bool test_thread_local_flags(unsigned int flags) return (ti->local_flags & flags) != 0; } -#ifdef CONFIG_PPC64 +#ifdef CONFIG_COMPAT #define is_32bit_task() (test_thread_flag(TIF_32BIT)) #else -#define is_32bit_task() (1) +#define is_32bit_task() (IS_ENABLED(CONFIG_PPC32)) #endif #if defined(CONFIG_PPC64) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 570660efbb3d..1c4385852d3d 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -40,16 +40,17 @@ CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING endif obj-y := cputable.o syscalls.o \ - irq.o align.o signal_32.o pmc.o vdso.o \ + irq.o align.o signal_$(BITS).o pmc.o vdso.o \ process.o systbl.o idle.o \ signal.o sysfs.o cacheinfo.o time.o \ prom.o traps.o setup-common.o \ udbg.o misc.o io.o misc_$(BITS).o \ of_platform.o prom_parse.o obj-y += ptrace/ -obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o signal_64.o \ +obj-$(CONFIG_PPC64) += setup_64.o \ paca.o nvram_64.o firmware.o note.o \ syscall_64.o +obj-$(CONFIG_COMPAT) += sys_ppc32.o signal_32.o obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 63f0a4414618..9a1e5d636dea 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -52,8 +52,10 @@ SYS_CALL_TABLE: .tc sys_call_table[TC],sys_call_table +#ifdef CONFIG_COMPAT COMPAT_SYS_CALL_TABLE: .tc compat_sys_call_table[TC],compat_sys_call_table +#endif /* This value is used to mark exception frames on the stack. */ exception_marker: diff --git a/arch/powerpc/kernel/ptrace/Makefile b/arch/powerpc/kernel/ptrace/Makefile index e9d97c2d063e..c2f2402ebc8c 100644 --- a/arch/powerpc/kernel/ptrace/Makefile +++ b/arch/powerpc/kernel/ptrace/Makefile @@ -6,7 +6,7 @@ CFLAGS_ptrace-view.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y += ptrace.o ptrace-view.o -obj-$(CONFIG_PPC64) += ptrace32.o +obj-$(CONFIG_COMPAT) += ptrace32.o obj-$(CONFIG_VSX) += ptrace-vsx.o ifneq ($(CONFIG_VSX),y) obj-y += ptrace-novsx.o diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 4b0152108f61..a264989626fd 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -247,7 +247,6 @@ static void do_signal(struct task_struct *tsk) sigset_t *oldset = sigmask_to_save(); struct ksignal ksig = { .sig = 0 }; int ret; - int is32 = is_32bit_task(); BUG_ON(tsk != current); @@ -277,7 +276,7 @@ static void do_signal(struct task_struct *tsk) rseq_signal_deliver(&ksig, tsk->thread.regs); - if (is32) { + if (is_32bit_task()) { if (ksig.ka.sa.sa_flags & SA_SIGINFO) ret = handle_rt_signal32(&ksig, oldset, tsk); else diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index cf06eb443a80..c74295a7765b 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -22,7 +22,6 @@ notrace long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs) { - unsigned long ti_flags; syscall_fn f; if (IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG)) @@ -60,8 +59,7 @@ notrace long system_call_exception(long r3, long r4, long r5, local_irq_enable(); - ti_flags = current_thread_info()->flags; - if (unlikely(ti_flags & _TIF_SYSCALL_DOTRACE)) { + if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) { /* * We use the return value of do_syscall_trace_enter() as the * syscall number. If the syscall was rejected for any reason @@ -86,7 +84,7 @@ notrace long system_call_exception(long r3, long r4, long r5, /* May be faster to do array_index_nospec? */ barrier_nospec(); - if (unlikely(ti_flags & _TIF_32BIT)) { + if (unlikely(is_32bit_task())) { f = (void *)compat_sys_call_table[r0]; r3 &= 0x00000000ffffffffULL; diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index d3b77c15f9ce..f38f26e844b6 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -651,7 +651,8 @@ static void __init vdso_setup_syscall_map(void) if (sys_call_table[i] != sys_ni_syscall) vdso_data->syscall_map_64[i >> 5] |= 0x80000000UL >> (i & 0x1f); - if (compat_sys_call_table[i] != sys_ni_syscall) + if (IS_ENABLED(CONFIG_COMPAT) && + compat_sys_call_table[i] != sys_ni_syscall) vdso_data->syscall_map_32[i >> 5] |= 0x80000000UL >> (i & 0x1f); #else /* CONFIG_PPC64 */ diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index 001d0473a61f..b5afd0bec4f8 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -15,7 +15,7 @@ #include #include #include -#ifdef CONFIG_PPC64 +#ifdef CONFIG_COMPAT #include "../kernel/ppc32.h" #endif #include @@ -284,6 +284,7 @@ static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry #endif /* CONFIG_PPC64 */ +#if defined(CONFIG_PPC32) || defined(CONFIG_COMPAT) /* * On 32-bit we just access the address and let hash_page create a * HPTE if necessary, so there is no need to fall back to reading @@ -447,6 +448,11 @@ static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, sp = next_sp; } } +#else /* 32bit */ +static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{} +#endif /* 32bit */ void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) -- cgit v1.2.3 From 6e944aed8859d3b2de32ddb86748db9aefa43667 Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 20 Mar 2020 11:20:17 +0100 Subject: powerpc/64: Make COMPAT user-selectable disabled on littleendian by default. On bigendian ppc64 it is common to have 32bit legacy binaries but much less so on littleendian. Signed-off-by: Michal Suchanek Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/41393d6e895b0d3a47ee62f8f51e1cf888ad6226.1584699455.git.msuchanek@suse.de --- arch/powerpc/Kconfig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 497b7d0b2d7e..29d00b3959b9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -264,8 +264,9 @@ config PANIC_TIMEOUT default 180 config COMPAT - bool - default y if PPC64 + bool "Enable support for 32bit binaries" + depends on PPC64 + default y if !CPU_LITTLE_ENDIAN select COMPAT_BINFMT_ELF select ARCH_WANT_OLD_COMPAT_IPC select COMPAT_OLD_SIGACTION -- cgit v1.2.3 From 7c0eda1a04340a1de09bdf6521853e3bc0637c3b Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Fri, 20 Mar 2020 11:20:18 +0100 Subject: powerpc/perf: split callchain.c by bitness Building callchain.c with !COMPAT proved quite ugly with all the defines. Splitting out the 32bit and 64bit parts looks better. No code change intended. Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/a20027bf1074935a7934ee2a6757c99ea047e70d.1584699455.git.msuchanek@suse.de --- arch/powerpc/perf/Makefile | 5 +- arch/powerpc/perf/callchain.c | 356 +-------------------------------------- arch/powerpc/perf/callchain.h | 19 +++ arch/powerpc/perf/callchain_32.c | 196 +++++++++++++++++++++ arch/powerpc/perf/callchain_64.c | 174 +++++++++++++++++++ 5 files changed, 394 insertions(+), 356 deletions(-) create mode 100644 arch/powerpc/perf/callchain.h create mode 100644 arch/powerpc/perf/callchain_32.c create mode 100644 arch/powerpc/perf/callchain_64.c (limited to 'arch') diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index c155dcbb8691..53d614e98537 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -1,6 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o +obj-$(CONFIG_PERF_EVENTS) += callchain.o callchain_$(BITS).o perf_regs.o +ifdef CONFIG_COMPAT +obj-$(CONFIG_PERF_EVENTS) += callchain_32.o +endif obj-$(CONFIG_PPC_PERF_CTRS) += core-book3s.o bhrb.o obj64-$(CONFIG_PPC_PERF_CTRS) += ppc970-pmu.o power5-pmu.o \ diff --git a/arch/powerpc/perf/callchain.c b/arch/powerpc/perf/callchain.c index b5afd0bec4f8..dd5051015008 100644 --- a/arch/powerpc/perf/callchain.c +++ b/arch/powerpc/perf/callchain.c @@ -15,11 +15,9 @@ #include #include #include -#ifdef CONFIG_COMPAT -#include "../kernel/ppc32.h" -#endif #include +#include "callchain.h" /* * Is sp valid as the address of the next kernel stack frame after prev_sp? @@ -102,358 +100,6 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *re } } -static inline bool invalid_user_sp(unsigned long sp) -{ - unsigned long mask = is_32bit_task() ? 3 : 7; - unsigned long top = STACK_TOP - (is_32bit_task() ? 16 : 32); - - return (!sp || (sp & mask) || (sp > top)); -} - -#ifdef CONFIG_PPC64 -/* - * On 64-bit we don't want to invoke hash_page on user addresses from - * interrupt context, so if the access faults, we read the page tables - * to find which page (if any) is mapped and access it directly. - */ -static int read_user_stack_slow(void __user *ptr, void *buf, int nb) -{ - int ret = -EFAULT; - pgd_t *pgdir; - pte_t *ptep, pte; - unsigned shift; - unsigned long addr = (unsigned long) ptr; - unsigned long offset; - unsigned long pfn, flags; - void *kaddr; - - pgdir = current->mm->pgd; - if (!pgdir) - return -EFAULT; - - local_irq_save(flags); - ptep = find_current_mm_pte(pgdir, addr, NULL, &shift); - if (!ptep) - goto err_out; - if (!shift) - shift = PAGE_SHIFT; - - /* align address to page boundary */ - offset = addr & ((1UL << shift) - 1); - - pte = READ_ONCE(*ptep); - if (!pte_present(pte) || !pte_user(pte)) - goto err_out; - pfn = pte_pfn(pte); - if (!page_is_ram(pfn)) - goto err_out; - - /* no highmem to worry about here */ - kaddr = pfn_to_kaddr(pfn); - memcpy(buf, kaddr + offset, nb); - ret = 0; -err_out: - local_irq_restore(flags); - return ret; -} - -static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) -{ - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || - ((unsigned long)ptr & 7)) - return -EFAULT; - - if (!probe_user_read(ret, ptr, sizeof(*ret))) - return 0; - - return read_user_stack_slow(ptr, ret, 8); -} - -/* - * 64-bit user processes use the same stack frame for RT and non-RT signals. - */ -struct signal_frame_64 { - char dummy[__SIGNAL_FRAMESIZE]; - struct ucontext uc; - unsigned long unused[2]; - unsigned int tramp[6]; - struct siginfo *pinfo; - void *puc; - struct siginfo info; - char abigap[288]; -}; - -static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) -{ - if (nip == fp + offsetof(struct signal_frame_64, tramp)) - return 1; - if (vdso64_rt_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) - return 1; - return 0; -} - -/* - * Do some sanity checking on the signal frame pointed to by sp. - * We check the pinfo and puc pointers in the frame. - */ -static int sane_signal_64_frame(unsigned long sp) -{ - struct signal_frame_64 __user *sf; - unsigned long pinfo, puc; - - sf = (struct signal_frame_64 __user *) sp; - if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) || - read_user_stack_64((unsigned long __user *) &sf->puc, &puc)) - return 0; - return pinfo == (unsigned long) &sf->info && - puc == (unsigned long) &sf->uc; -} - -static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, - struct pt_regs *regs) -{ - unsigned long sp, next_sp; - unsigned long next_ip; - unsigned long lr; - long level = 0; - struct signal_frame_64 __user *sigframe; - unsigned long __user *fp, *uregs; - - next_ip = perf_instruction_pointer(regs); - lr = regs->link; - sp = regs->gpr[1]; - perf_callchain_store(entry, next_ip); - - while (entry->nr < entry->max_stack) { - fp = (unsigned long __user *) sp; - if (invalid_user_sp(sp) || read_user_stack_64(fp, &next_sp)) - return; - if (level > 0 && read_user_stack_64(&fp[2], &next_ip)) - return; - - /* - * Note: the next_sp - sp >= signal frame size check - * is true when next_sp < sp, which can happen when - * transitioning from an alternate signal stack to the - * normal stack. - */ - if (next_sp - sp >= sizeof(struct signal_frame_64) && - (is_sigreturn_64_address(next_ip, sp) || - (level <= 1 && is_sigreturn_64_address(lr, sp))) && - sane_signal_64_frame(sp)) { - /* - * This looks like an signal frame - */ - sigframe = (struct signal_frame_64 __user *) sp; - uregs = sigframe->uc.uc_mcontext.gp_regs; - if (read_user_stack_64(&uregs[PT_NIP], &next_ip) || - read_user_stack_64(&uregs[PT_LNK], &lr) || - read_user_stack_64(&uregs[PT_R1], &sp)) - return; - level = 0; - perf_callchain_store_context(entry, PERF_CONTEXT_USER); - perf_callchain_store(entry, next_ip); - continue; - } - - if (level == 0) - next_ip = lr; - perf_callchain_store(entry, next_ip); - ++level; - sp = next_sp; - } -} - -#else /* CONFIG_PPC64 */ -static int read_user_stack_slow(void __user *ptr, void *buf, int nb) -{ - return 0; -} - -static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, - struct pt_regs *regs) -{ -} - -#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE -#define sigcontext32 sigcontext -#define mcontext32 mcontext -#define ucontext32 ucontext -#define compat_siginfo_t struct siginfo - -#endif /* CONFIG_PPC64 */ - -#if defined(CONFIG_PPC32) || defined(CONFIG_COMPAT) -/* - * On 32-bit we just access the address and let hash_page create a - * HPTE if necessary, so there is no need to fall back to reading - * the page tables. Since this is called at interrupt level, - * do_page_fault() won't treat a DSI as a page fault. - */ -static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) -{ - int rc; - - if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || - ((unsigned long)ptr & 3)) - return -EFAULT; - - rc = probe_user_read(ret, ptr, sizeof(*ret)); - - if (IS_ENABLED(CONFIG_PPC64) && rc) - return read_user_stack_slow(ptr, ret, 4); - - return rc; -} - -/* - * Layout for non-RT signal frames - */ -struct signal_frame_32 { - char dummy[__SIGNAL_FRAMESIZE32]; - struct sigcontext32 sctx; - struct mcontext32 mctx; - int abigap[56]; -}; - -/* - * Layout for RT signal frames - */ -struct rt_signal_frame_32 { - char dummy[__SIGNAL_FRAMESIZE32 + 16]; - compat_siginfo_t info; - struct ucontext32 uc; - int abigap[56]; -}; - -static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) -{ - if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) - return 1; - if (vdso32_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso32_sigtramp) - return 1; - return 0; -} - -static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) -{ - if (nip == fp + offsetof(struct rt_signal_frame_32, - uc.uc_mcontext.mc_pad)) - return 1; - if (vdso32_rt_sigtramp && current->mm->context.vdso_base && - nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) - return 1; - return 0; -} - -static int sane_signal_32_frame(unsigned int sp) -{ - struct signal_frame_32 __user *sf; - unsigned int regs; - - sf = (struct signal_frame_32 __user *) (unsigned long) sp; - if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s)) - return 0; - return regs == (unsigned long) &sf->mctx; -} - -static int sane_rt_signal_32_frame(unsigned int sp) -{ - struct rt_signal_frame_32 __user *sf; - unsigned int regs; - - sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; - if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s)) - return 0; - return regs == (unsigned long) &sf->uc.uc_mcontext; -} - -static unsigned int __user *signal_frame_32_regs(unsigned int sp, - unsigned int next_sp, unsigned int next_ip) -{ - struct mcontext32 __user *mctx = NULL; - struct signal_frame_32 __user *sf; - struct rt_signal_frame_32 __user *rt_sf; - - /* - * Note: the next_sp - sp >= signal frame size check - * is true when next_sp < sp, for example, when - * transitioning from an alternate signal stack to the - * normal stack. - */ - if (next_sp - sp >= sizeof(struct signal_frame_32) && - is_sigreturn_32_address(next_ip, sp) && - sane_signal_32_frame(sp)) { - sf = (struct signal_frame_32 __user *) (unsigned long) sp; - mctx = &sf->mctx; - } - - if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) && - is_rt_sigreturn_32_address(next_ip, sp) && - sane_rt_signal_32_frame(sp)) { - rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; - mctx = &rt_sf->uc.uc_mcontext; - } - - if (!mctx) - return NULL; - return mctx->mc_gregs; -} - -static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, - struct pt_regs *regs) -{ - unsigned int sp, next_sp; - unsigned int next_ip; - unsigned int lr; - long level = 0; - unsigned int __user *fp, *uregs; - - next_ip = perf_instruction_pointer(regs); - lr = regs->link; - sp = regs->gpr[1]; - perf_callchain_store(entry, next_ip); - - while (entry->nr < entry->max_stack) { - fp = (unsigned int __user *) (unsigned long) sp; - if (invalid_user_sp(sp) || read_user_stack_32(fp, &next_sp)) - return; - if (level > 0 && read_user_stack_32(&fp[1], &next_ip)) - return; - - uregs = signal_frame_32_regs(sp, next_sp, next_ip); - if (!uregs && level <= 1) - uregs = signal_frame_32_regs(sp, next_sp, lr); - if (uregs) { - /* - * This looks like an signal frame, so restart - * the stack trace with the values in it. - */ - if (read_user_stack_32(&uregs[PT_NIP], &next_ip) || - read_user_stack_32(&uregs[PT_LNK], &lr) || - read_user_stack_32(&uregs[PT_R1], &sp)) - return; - level = 0; - perf_callchain_store_context(entry, PERF_CONTEXT_USER); - perf_callchain_store(entry, next_ip); - continue; - } - - if (level == 0) - next_ip = lr; - perf_callchain_store(entry, next_ip); - ++level; - sp = next_sp; - } -} -#else /* 32bit */ -static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, - struct pt_regs *regs) -{} -#endif /* 32bit */ - void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs) { diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h new file mode 100644 index 000000000000..7a2cb9e1181a --- /dev/null +++ b/arch/powerpc/perf/callchain.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _POWERPC_PERF_CALLCHAIN_H +#define _POWERPC_PERF_CALLCHAIN_H + +int read_user_stack_slow(void __user *ptr, void *buf, int nb); +void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs); +void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs); + +static inline bool invalid_user_sp(unsigned long sp) +{ + unsigned long mask = is_32bit_task() ? 3 : 7; + unsigned long top = STACK_TOP - (is_32bit_task() ? 16 : 32); + + return (!sp || (sp & mask) || (sp > top)); +} + +#endif /* _POWERPC_PERF_CALLCHAIN_H */ diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c new file mode 100644 index 000000000000..8aa951003141 --- /dev/null +++ b/arch/powerpc/perf/callchain_32.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Performance counter callchain support - powerpc architecture code + * + * Copyright © 2009 Paul Mackerras, IBM Corporation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "callchain.h" + +#ifdef CONFIG_PPC64 +#include "../kernel/ppc32.h" +#else /* CONFIG_PPC64 */ + +#define __SIGNAL_FRAMESIZE32 __SIGNAL_FRAMESIZE +#define sigcontext32 sigcontext +#define mcontext32 mcontext +#define ucontext32 ucontext +#define compat_siginfo_t struct siginfo + +#endif /* CONFIG_PPC64 */ + +/* + * On 32-bit we just access the address and let hash_page create a + * HPTE if necessary, so there is no need to fall back to reading + * the page tables. Since this is called at interrupt level, + * do_page_fault() won't treat a DSI as a page fault. + */ +static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret) +{ + int rc; + + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) || + ((unsigned long)ptr & 3)) + return -EFAULT; + + rc = probe_user_read(ret, ptr, sizeof(*ret)); + + if (IS_ENABLED(CONFIG_PPC64) && rc) + return read_user_stack_slow(ptr, ret, 4); + + return rc; +} + +/* + * Layout for non-RT signal frames + */ +struct signal_frame_32 { + char dummy[__SIGNAL_FRAMESIZE32]; + struct sigcontext32 sctx; + struct mcontext32 mctx; + int abigap[56]; +}; + +/* + * Layout for RT signal frames + */ +struct rt_signal_frame_32 { + char dummy[__SIGNAL_FRAMESIZE32 + 16]; + compat_siginfo_t info; + struct ucontext32 uc; + int abigap[56]; +}; + +static int is_sigreturn_32_address(unsigned int nip, unsigned int fp) +{ + if (nip == fp + offsetof(struct signal_frame_32, mctx.mc_pad)) + return 1; + if (vdso32_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso32_sigtramp) + return 1; + return 0; +} + +static int is_rt_sigreturn_32_address(unsigned int nip, unsigned int fp) +{ + if (nip == fp + offsetof(struct rt_signal_frame_32, + uc.uc_mcontext.mc_pad)) + return 1; + if (vdso32_rt_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso32_rt_sigtramp) + return 1; + return 0; +} + +static int sane_signal_32_frame(unsigned int sp) +{ + struct signal_frame_32 __user *sf; + unsigned int regs; + + sf = (struct signal_frame_32 __user *) (unsigned long) sp; + if (read_user_stack_32((unsigned int __user *) &sf->sctx.regs, ®s)) + return 0; + return regs == (unsigned long) &sf->mctx; +} + +static int sane_rt_signal_32_frame(unsigned int sp) +{ + struct rt_signal_frame_32 __user *sf; + unsigned int regs; + + sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; + if (read_user_stack_32((unsigned int __user *) &sf->uc.uc_regs, ®s)) + return 0; + return regs == (unsigned long) &sf->uc.uc_mcontext; +} + +static unsigned int __user *signal_frame_32_regs(unsigned int sp, + unsigned int next_sp, unsigned int next_ip) +{ + struct mcontext32 __user *mctx = NULL; + struct signal_frame_32 __user *sf; + struct rt_signal_frame_32 __user *rt_sf; + + /* + * Note: the next_sp - sp >= signal frame size check + * is true when next_sp < sp, for example, when + * transitioning from an alternate signal stack to the + * normal stack. + */ + if (next_sp - sp >= sizeof(struct signal_frame_32) && + is_sigreturn_32_address(next_ip, sp) && + sane_signal_32_frame(sp)) { + sf = (struct signal_frame_32 __user *) (unsigned long) sp; + mctx = &sf->mctx; + } + + if (!mctx && next_sp - sp >= sizeof(struct rt_signal_frame_32) && + is_rt_sigreturn_32_address(next_ip, sp) && + sane_rt_signal_32_frame(sp)) { + rt_sf = (struct rt_signal_frame_32 __user *) (unsigned long) sp; + mctx = &rt_sf->uc.uc_mcontext; + } + + if (!mctx) + return NULL; + return mctx->mc_gregs; +} + +void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned int sp, next_sp; + unsigned int next_ip; + unsigned int lr; + long level = 0; + unsigned int __user *fp, *uregs; + + next_ip = perf_instruction_pointer(regs); + lr = regs->link; + sp = regs->gpr[1]; + perf_callchain_store(entry, next_ip); + + while (entry->nr < entry->max_stack) { + fp = (unsigned int __user *) (unsigned long) sp; + if (invalid_user_sp(sp) || read_user_stack_32(fp, &next_sp)) + return; + if (level > 0 && read_user_stack_32(&fp[1], &next_ip)) + return; + + uregs = signal_frame_32_regs(sp, next_sp, next_ip); + if (!uregs && level <= 1) + uregs = signal_frame_32_regs(sp, next_sp, lr); + if (uregs) { + /* + * This looks like an signal frame, so restart + * the stack trace with the values in it. + */ + if (read_user_stack_32(&uregs[PT_NIP], &next_ip) || + read_user_stack_32(&uregs[PT_LNK], &lr) || + read_user_stack_32(&uregs[PT_R1], &sp)) + return; + level = 0; + perf_callchain_store_context(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); + continue; + } + + if (level == 0) + next_ip = lr; + perf_callchain_store(entry, next_ip); + ++level; + sp = next_sp; + } +} diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c new file mode 100644 index 000000000000..df1ffd8b20f2 --- /dev/null +++ b/arch/powerpc/perf/callchain_64.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Performance counter callchain support - powerpc architecture code + * + * Copyright © 2009 Paul Mackerras, IBM Corporation. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "callchain.h" + +/* + * On 64-bit we don't want to invoke hash_page on user addresses from + * interrupt context, so if the access faults, we read the page tables + * to find which page (if any) is mapped and access it directly. + */ +int read_user_stack_slow(void __user *ptr, void *buf, int nb) +{ + int ret = -EFAULT; + pgd_t *pgdir; + pte_t *ptep, pte; + unsigned int shift; + unsigned long addr = (unsigned long) ptr; + unsigned long offset; + unsigned long pfn, flags; + void *kaddr; + + pgdir = current->mm->pgd; + if (!pgdir) + return -EFAULT; + + local_irq_save(flags); + ptep = find_current_mm_pte(pgdir, addr, NULL, &shift); + if (!ptep) + goto err_out; + if (!shift) + shift = PAGE_SHIFT; + + /* align address to page boundary */ + offset = addr & ((1UL << shift) - 1); + + pte = READ_ONCE(*ptep); + if (!pte_present(pte) || !pte_user(pte)) + goto err_out; + pfn = pte_pfn(pte); + if (!page_is_ram(pfn)) + goto err_out; + + /* no highmem to worry about here */ + kaddr = pfn_to_kaddr(pfn); + memcpy(buf, kaddr + offset, nb); + ret = 0; +err_out: + local_irq_restore(flags); + return ret; +} + +static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret) +{ + if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) || + ((unsigned long)ptr & 7)) + return -EFAULT; + + if (!probe_user_read(ret, ptr, sizeof(*ret))) + return 0; + + return read_user_stack_slow(ptr, ret, 8); +} + +/* + * 64-bit user processes use the same stack frame for RT and non-RT signals. + */ +struct signal_frame_64 { + char dummy[__SIGNAL_FRAMESIZE]; + struct ucontext uc; + unsigned long unused[2]; + unsigned int tramp[6]; + struct siginfo *pinfo; + void *puc; + struct siginfo info; + char abigap[288]; +}; + +static int is_sigreturn_64_address(unsigned long nip, unsigned long fp) +{ + if (nip == fp + offsetof(struct signal_frame_64, tramp)) + return 1; + if (vdso64_rt_sigtramp && current->mm->context.vdso_base && + nip == current->mm->context.vdso_base + vdso64_rt_sigtramp) + return 1; + return 0; +} + +/* + * Do some sanity checking on the signal frame pointed to by sp. + * We check the pinfo and puc pointers in the frame. + */ +static int sane_signal_64_frame(unsigned long sp) +{ + struct signal_frame_64 __user *sf; + unsigned long pinfo, puc; + + sf = (struct signal_frame_64 __user *) sp; + if (read_user_stack_64((unsigned long __user *) &sf->pinfo, &pinfo) || + read_user_stack_64((unsigned long __user *) &sf->puc, &puc)) + return 0; + return pinfo == (unsigned long) &sf->info && + puc == (unsigned long) &sf->uc; +} + +void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry, + struct pt_regs *regs) +{ + unsigned long sp, next_sp; + unsigned long next_ip; + unsigned long lr; + long level = 0; + struct signal_frame_64 __user *sigframe; + unsigned long __user *fp, *uregs; + + next_ip = perf_instruction_pointer(regs); + lr = regs->link; + sp = regs->gpr[1]; + perf_callchain_store(entry, next_ip); + + while (entry->nr < entry->max_stack) { + fp = (unsigned long __user *) sp; + if (invalid_user_sp(sp) || read_user_stack_64(fp, &next_sp)) + return; + if (level > 0 && read_user_stack_64(&fp[2], &next_ip)) + return; + + /* + * Note: the next_sp - sp >= signal frame size check + * is true when next_sp < sp, which can happen when + * transitioning from an alternate signal stack to the + * normal stack. + */ + if (next_sp - sp >= sizeof(struct signal_frame_64) && + (is_sigreturn_64_address(next_ip, sp) || + (level <= 1 && is_sigreturn_64_address(lr, sp))) && + sane_signal_64_frame(sp)) { + /* + * This looks like an signal frame + */ + sigframe = (struct signal_frame_64 __user *) sp; + uregs = sigframe->uc.uc_mcontext.gp_regs; + if (read_user_stack_64(&uregs[PT_NIP], &next_ip) || + read_user_stack_64(&uregs[PT_LNK], &lr) || + read_user_stack_64(&uregs[PT_R1], &sp)) + return; + level = 0; + perf_callchain_store_context(entry, PERF_CONTEXT_USER); + perf_callchain_store(entry, next_ip); + continue; + } + + if (level == 0) + next_ip = lr; + perf_callchain_store(entry, next_ip); + ++level; + sp = next_sp; + } +} -- cgit v1.2.3 From 54fc3c681ded9437e4548e2501dc1136b23cfa9a Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Tue, 31 Mar 2020 12:23:38 +1100 Subject: powerpc/pseries/ddw: Extend upper limit for huge DMA window for persistent memory Unlike normal memory ("memory" compatible type in the FDT), the persistent memory ("ibm,pmemory" in the FDT) can be mapped anywhere in the guest physical space and it can be used for DMA. In order to maintain 1:1 mapping via the huge DMA window, we need to know the maximum physical address at the time of the window setup. So far we've been looking at "memory" nodes but "ibm,pmemory" does not have fixed addresses and the persistent memory may be mapped afterwards. Since the persistent memory is still backed with page structs, use MAX_PHYSMEM_BITS as the upper limit. This effectively disables huge DMA window in LPAR under pHyp if persistent memory is present but this is the best we can do for the moment. Signed-off-by: Alexey Kardashevskiy Tested-by: Wen Xiong Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200331012338.23773-1-aik@ozlabs.ru --- arch/powerpc/platforms/pseries/iommu.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c index 2e0a8eab5588..6d47b4a3ce39 100644 --- a/arch/powerpc/platforms/pseries/iommu.c +++ b/arch/powerpc/platforms/pseries/iommu.c @@ -945,6 +945,15 @@ static phys_addr_t ddw_memory_hotplug_max(void) phys_addr_t max_addr = memory_hotplug_max(); struct device_node *memory; + /* + * The "ibm,pmemory" can appear anywhere in the address space. + * Assuming it is still backed by page structs, set the upper limit + * for the huge DMA window as MAX_PHYSMEM_BITS. + */ + if (of_find_node_by_type(NULL, "ibm,pmemory")) + return (sizeof(phys_addr_t) * 8 <= MAX_PHYSMEM_BITS) ? + (phys_addr_t) -1 : (1ULL << MAX_PHYSMEM_BITS); + for_each_node_by_type(memory, "memory") { unsigned long start, size; int n_mem_addr_cells, n_mem_size_cells, len; -- cgit v1.2.3 From 60083063b755e29685902609e024ecd0c4a1a7d9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 13 Feb 2020 09:38:04 +0100 Subject: powerpc/time: Replace by The PowerPC time code is not a clock provider, and just needs to call of_clk_init(). Hence it can include instead of . Remove the #ifdef protecting the of_clk_init() call, as a stub is available for the !CONFIG_COMMON_CLK case. Signed-off-by: Geert Uytterhoeven Reviewed-by: Stephen Boyd Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200213083804.24315-1-geert+renesas@glider.be --- arch/powerpc/kernel/time.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bda9cb4a0a5f..2d9d3a3c61d6 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #include #include @@ -1149,9 +1149,7 @@ void __init time_init(void) init_decrementer_clockevent(); tick_setup_hrtimer_broadcast(); -#ifdef CONFIG_COMMON_CLK of_clk_init(NULL); -#endif } /* -- cgit v1.2.3 From abc3fce76adbdfa8f87272c784b388cd20b46049 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 2 Apr 2020 22:04:01 +1000 Subject: Revert "powerpc/64: irq_work avoid interrupt when called with hardware irqs enabled" This reverts commit ebb37cf3ffd39fdb6ec5b07111f8bb2f11d92c5f. That commit does not play well with soft-masked irq state manipulations in idle, interrupt replay, and possibly others due to tracing code sometimes using irq_work_queue (e.g., in trace_hardirqs_on()). That can cause PACA_IRQ_DEC to become set when it is not expected, and be ignored or cleared or cause warnings. The net result seems to be missing an irq_work until the next timer interrupt in the worst case which is usually not going to be noticed, however it could be a long time if the tick is disabled, which is against the spirit of irq_work and might cause real problems. The idea is still solid, but it would need more work. It's not really clear if it would be worth added complexity, so revert this for now (not a straight revert, but replace with a comment explaining why we might see interrupts happening, and gives git blame something to find). Fixes: ebb37cf3ffd3 ("powerpc/64: irq_work avoid interrupt when called with hardware irqs enabled") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200402120401.1115883-1-npiggin@gmail.com --- arch/powerpc/kernel/time.c | 44 +++++++++++++------------------------------- 1 file changed, 13 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 2d9d3a3c61d6..6fcae436ae51 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -522,35 +522,6 @@ static inline void clear_irq_work_pending(void) "i" (offsetof(struct paca_struct, irq_work_pending))); } -void arch_irq_work_raise(void) -{ - preempt_disable(); - set_irq_work_pending_flag(); - /* - * Non-nmi code running with interrupts disabled will replay - * irq_happened before it re-enables interrupts, so setthe - * decrementer there instead of causing a hardware exception - * which would immediately hit the masked interrupt handler - * and have the net effect of setting the decrementer in - * irq_happened. - * - * NMI interrupts can not check this when they return, so the - * decrementer hardware exception is raised, which will fire - * when interrupts are next enabled. - * - * BookE does not support this yet, it must audit all NMI - * interrupt handlers to ensure they call nmi_enter() so this - * check would be correct. - */ - if (IS_ENABLED(CONFIG_BOOKE) || !irqs_disabled() || in_nmi()) { - set_dec(1); - } else { - hard_irq_disable(); - local_paca->irq_happened |= PACA_IRQ_DEC; - } - preempt_enable(); -} - #else /* 32-bit */ DEFINE_PER_CPU(u8, irq_work_pending); @@ -559,16 +530,27 @@ DEFINE_PER_CPU(u8, irq_work_pending); #define test_irq_work_pending() __this_cpu_read(irq_work_pending) #define clear_irq_work_pending() __this_cpu_write(irq_work_pending, 0) +#endif /* 32 vs 64 bit */ + void arch_irq_work_raise(void) { + /* + * 64-bit code that uses irq soft-mask can just cause an immediate + * interrupt here that gets soft masked, if this is called under + * local_irq_disable(). It might be possible to prevent that happening + * by noticing interrupts are disabled and setting decrementer pending + * to be replayed when irqs are enabled. The problem there is that + * tracing can call irq_work_raise, including in code that does low + * level manipulations of irq soft-mask state (e.g., trace_hardirqs_on) + * which could get tangled up if we're messing with the same state + * here. + */ preempt_disable(); set_irq_work_pending_flag(); set_dec(1); preempt_enable(); } -#endif /* 32 vs 64 bit */ - #else /* CONFIG_IRQ_WORK */ #define test_irq_work_pending() 0 -- cgit v1.2.3 From d16a58f8854b194c964a4bbe8156ec624ebfdbd2 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Fri, 3 Apr 2020 23:10:05 +1000 Subject: powerpc: Improve ppc_save_regs() Make ppc_save_regs() a bit more useful: - Set NIP to our caller rather rather than the caller's caller (which is what we save to LR in the stack frame). - Set SOFTE to the current irq soft-mask state rather than uninitialised. - Zero CFAR rather than leave it uninitialised. In qemu, injecting a nmi to an idle CPU gives a nicer stack trace (note NIP, IRQMASK, CFAR). Oops: System Reset, sig: 6 [#1] LE PAGE_SIZE=64K MMU=Hash PREEMPT SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.6.0-rc2-00429-ga76e38fd80bf #1277 NIP: c0000000000b6e5c LR: c0000000000b6e5c CTR: c000000000b06270 REGS: c00000000173fb08 TRAP: 0100 Not tainted MSR: 9000000000001033 CR: 28000224 XER: 00000000 CFAR: c0000000016a2128 IRQMASK: c00000000173fc80 GPR00: c0000000000b6e5c c00000000173fc80 c000000001743400 c00000000173fb08 GPR04: 0000000000000000 0000000000000000 0000000000000008 0000000000000001 GPR08: 00000001fea80000 0000000000000000 0000000000000000 ffffffffffffffff GPR12: c000000000b06270 c000000001930000 00000000300026c0 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000003 c0000000016a2128 GPR20: c0000001ffc97148 0000000000000001 c000000000f289a8 0000000000080000 GPR24: c0000000016e1480 000000011dc870ba 0000000000000000 0000000000000003 GPR28: c0000000016a2128 c0000001ffc97148 c0000000016a2260 0000000000000003 NIP [c0000000000b6e5c] power9_idle_type+0x5c/0x70 LR [c0000000000b6e5c] power9_idle_type+0x5c/0x70 Call Trace: [c00000000173fc80] [c0000000000b6e5c] power9_idle_type+0x5c/0x70 (unreliable) [c00000000173fcb0] [c000000000b062b0] stop_loop+0x40/0x60 [c00000000173fce0] [c000000000b022d8] cpuidle_enter_state+0xa8/0x660 [c00000000173fd60] [c000000000b0292c] cpuidle_enter+0x4c/0x70 [c00000000173fda0] [c00000000017624c] call_cpuidle+0x4c/0x90 [c00000000173fdc0] [c000000000176768] do_idle+0x338/0x460 [c00000000173fe60] [c000000000176b3c] cpu_startup_entry+0x3c/0x40 [c00000000173fe90] [c0000000000126b4] rest_init+0x124/0x140 [c00000000173fed0] [c0000000010948d4] start_kernel+0x938/0x988 [c00000000173ff90] [c00000000000cdcc] start_here_common+0x1c/0x20 Oops: System Reset, sig: 6 [#1] LE PAGE_SIZE=64K MMU=Hash PREEMPT SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.6.0-rc2-00430-gddce91b8712f #1278 NIP: c00000000001d150 LR: c0000000000b6e5c CTR: c000000000b06270 REGS: c00000000173fb08 TRAP: 0100 Not tainted MSR: 9000000000001033 CR: 28000224 XER: 00000000 CFAR: 0000000000000000 IRQMASK: 1 GPR00: c0000000000b6e5c c00000000173fc80 c000000001743400 c00000000173fb08 GPR04: 0000000000000000 0000000000000000 0000000000000008 0000000000000001 GPR08: 00000001fea80000 0000000000000000 0000000000000000 ffffffffffffffff GPR12: c000000000b06270 c000000001930000 00000000300026c0 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000003 c0000000016a2128 GPR20: c0000001ffc97148 0000000000000001 c000000000f289a8 0000000000080000 GPR24: c0000000016e1480 00000000b68db8ce 0000000000000000 0000000000000003 GPR28: c0000000016a2128 c0000001ffc97148 c0000000016a2260 0000000000000003 NIP [c00000000001d150] replay_system_reset+0x30/0xa0 LR [c0000000000b6e5c] power9_idle_type+0x5c/0x70 Call Trace: [c00000000173fc80] [c0000000000b6e5c] power9_idle_type+0x5c/0x70 (unreliable) [c00000000173fcb0] [c000000000b062b0] stop_loop+0x40/0x60 [c00000000173fce0] [c000000000b022d8] cpuidle_enter_state+0xa8/0x660 [c00000000173fd60] [c000000000b0292c] cpuidle_enter+0x4c/0x70 [c00000000173fda0] [c00000000017624c] call_cpuidle+0x4c/0x90 [c00000000173fdc0] [c000000000176768] do_idle+0x338/0x460 [c00000000173fe60] [c000000000176b38] cpu_startup_entry+0x38/0x40 [c00000000173fe90] [c0000000000126b4] rest_init+0x124/0x140 [c00000000173fed0] [c0000000010948d4] start_kernel+0x938/0x988 [c00000000173ff90] [c00000000000cdcc] start_here_common+0x1c/0x20 Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200403131006.123243-1-npiggin@gmail.com --- arch/powerpc/kernel/ppc_save_regs.S | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/ppc_save_regs.S b/arch/powerpc/kernel/ppc_save_regs.S index f3bd0bbf2ae8..2d4d21bb46a9 100644 --- a/arch/powerpc/kernel/ppc_save_regs.S +++ b/arch/powerpc/kernel/ppc_save_regs.S @@ -55,14 +55,17 @@ _GLOBAL(ppc_save_regs) PPC_STL r29,29*SZL(r3) PPC_STL r30,30*SZL(r3) PPC_STL r31,31*SZL(r3) + lbz r0,PACAIRQSOFTMASK(r13) + PPC_STL r0,SOFTE-STACK_FRAME_OVERHEAD(r3) #endif /* go up one stack frame for SP */ PPC_LL r4,0(r1) PPC_STL r4,1*SZL(r3) /* get caller's LR */ PPC_LL r0,LRSAVE(r4) - PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3) PPC_STL r0,_LINK-STACK_FRAME_OVERHEAD(r3) + mflr r0 + PPC_STL r0,_NIP-STACK_FRAME_OVERHEAD(r3) mfmsr r0 PPC_STL r0,_MSR-STACK_FRAME_OVERHEAD(r3) mfctr r0 @@ -73,4 +76,5 @@ _GLOBAL(ppc_save_regs) PPC_STL r0,_CCR-STACK_FRAME_OVERHEAD(r3) li r0,0 PPC_STL r0,_TRAP-STACK_FRAME_OVERHEAD(r3) + PPC_STL r0,ORIG_GPR3-STACK_FRAME_OVERHEAD(r3) blr -- cgit v1.2.3