From 11f9c1d2fb497f69f83d4fab6fb7fc8a6884eded Mon Sep 17 00:00:00 2001 From: Cédric Le Goater Date: Mon, 4 Jan 2021 15:31:56 +0100 Subject: powerpc/mm: Move hpte_insert_repeating() prototype MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It fixes this W=1 compile error : ../arch/powerpc/mm/book3s64/hash_utils.c:1867:6: error: no previous prototype for ‘hpte_insert_repeating’ [-Werror=missing-prototypes] 1867 | long hpte_insert_repeating(unsigned long hash, unsigned long vpn, | ^~~~~~~~~~~~~~~~~~~~~ Signed-off-by: Cédric Le Goater Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210104143206.695198-14-clg@kaod.org --- arch/powerpc/mm/book3s64/hash_hugetlbpage.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c index b5e9fff8c217..a688e1324ae5 100644 --- a/arch/powerpc/mm/book3s64/hash_hugetlbpage.c +++ b/arch/powerpc/mm/book3s64/hash_hugetlbpage.c @@ -16,10 +16,6 @@ unsigned int hpage_shift; EXPORT_SYMBOL(hpage_shift); -extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, - unsigned long pa, unsigned long rlags, - unsigned long vflags, int psize, int ssize); - int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, unsigned int shift, unsigned int mmu_psize) -- cgit v1.2.3 From b5952f8125ae512420d5fc569adce591bea73bf5 Mon Sep 17 00:00:00 2001 From: Qian Cai Date: Sun, 10 May 2020 01:15:59 -0400 Subject: powerpc/mm/book3s64/iommu: fix some RCU-list locks It is safe to traverse mm->context.iommu_group_mem_list with either mem_list_mutex or the RCU read lock held. Silence a few RCU-list false positive warnings and fix a few missing RCU read locks. arch/powerpc/mm/book3s64/iommu_api.c:330 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by qemu-kvm/4305: #0: c000000bc3fe4d68 (&container->lock){+.+.}-{3:3}, at: tce_iommu_ioctl.part.9+0xc7c/0x1870 [vfio_iommu_spapr_tce] #1: c000000001501910 (mem_list_mutex){+.+.}-{3:3}, at: mm_iommu_get+0x50/0x190 ==== arch/powerpc/mm/book3s64/iommu_api.c:132 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by qemu-kvm/4305: #0: c000000bc3fe4d68 (&container->lock){+.+.}-{3:3}, at: tce_iommu_ioctl.part.9+0xc7c/0x1870 [vfio_iommu_spapr_tce] #1: c000000001501910 (mem_list_mutex){+.+.}-{3:3}, at: mm_iommu_do_alloc+0x120/0x5f0 ==== arch/powerpc/mm/book3s64/iommu_api.c:292 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by qemu-kvm/4312: #0: c000000ecafe23c8 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0xdc/0x950 [kvm] #1: c000000045e6c468 (&kvm->srcu){....}-{0:0}, at: kvmppc_h_put_tce+0x88/0x340 [kvm] ==== arch/powerpc/mm/book3s64/iommu_api.c:424 RCU-list traversed in non-reader section!! other info that might help us debug this: rcu_scheduler_active = 2, debug_locks = 1 2 locks held by qemu-kvm/4312: #0: c000000ecafe23c8 (&vcpu->mutex){+.+.}-{3:3}, at: kvm_vcpu_ioctl+0xdc/0x950 [kvm] #1: c000000045e6c468 (&kvm->srcu){....}-{0:0}, at: kvmppc_h_put_tce+0x88/0x340 [kvm] Signed-off-by: Qian Cai Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200510051559.1959-1-cai@lca.pw --- arch/powerpc/mm/book3s64/iommu_api.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/mm/book3s64/iommu_api.c b/arch/powerpc/mm/book3s64/iommu_api.c index 685d7bb3d26f..cd18e94d0843 100644 --- a/arch/powerpc/mm/book3s64/iommu_api.c +++ b/arch/powerpc/mm/book3s64/iommu_api.c @@ -129,7 +129,8 @@ good_exit: mutex_lock(&mem_list_mutex); - list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next) { + list_for_each_entry_rcu(mem2, &mm->context.iommu_group_mem_list, next, + lockdep_is_held(&mem_list_mutex)) { /* Overlap? */ if ((mem2->ua < (ua + (entries << PAGE_SHIFT))) && (ua < (mem2->ua + @@ -289,6 +290,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; + rcu_read_lock(); list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua <= ua) && (ua + size <= mem->ua + @@ -297,6 +299,7 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, break; } } + rcu_read_unlock(); return ret; } @@ -327,7 +330,8 @@ struct mm_iommu_table_group_mem_t *mm_iommu_get(struct mm_struct *mm, mutex_lock(&mem_list_mutex); - list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next, + lockdep_is_held(&mem_list_mutex)) { if ((mem->ua == ua) && (mem->entries == entries)) { ret = mem; ++mem->used; @@ -421,6 +425,7 @@ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, struct mm_iommu_table_group_mem_t *mem; unsigned long end; + rcu_read_lock(); list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if (mem->dev_hpa == MM_IOMMU_TABLE_INVALID_HPA) continue; @@ -437,6 +442,7 @@ bool mm_iommu_is_devmem(struct mm_struct *mm, unsigned long hpa, return true; } } + rcu_read_unlock(); return false; } -- cgit v1.2.3 From a4922f5442e7e6ce85da304e224d940edec2f1fb Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:15 +1000 Subject: powerpc/64s: move the hash fault handling logic to C The fault handling still has some complex logic particularly around hash table handling, in asm. Implement most of this in C. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-6-npiggin@gmail.com --- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 + arch/powerpc/kernel/exceptions-64s.S | 127 ++++++-------------------- arch/powerpc/mm/book3s64/hash_utils.c | 77 ++++++++++------ 3 files changed, 78 insertions(+), 127 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index f911bdb68d8b..1aa68094ad29 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -456,6 +456,7 @@ static inline unsigned long hpt_hash(unsigned long vpn, long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, unsigned long rlags, unsigned long vflags, int psize, int ssize); +int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned long dsisr); extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpage_prot); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f697fd00ac1c..91381fbdef42 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1401,14 +1401,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) * * Handling: * - Hash MMU - * Go to do_hash_page first to see if the HPT can be filled from an entry in - * the Linux page table. Hash faults can hit in kernel mode in a fairly + * Go to do_hash_fault, which attempts to fill the HPT from an entry in the + * Linux page table. Hash faults can hit in kernel mode in a fairly * arbitrary state (e.g., interrupts disabled, locks held) when accessing * "non-bolted" regions, e.g., vmalloc space. However these should always be - * backed by Linux page tables. + * backed by Linux page table entries. * - * If none is found, do a Linux page fault. Linux page faults can happen in - * kernel mode due to user copy operations of course. + * If no entry is found the Linux page fault handler is invoked (by + * do_hash_fault). Linux page faults can happen in kernel mode due to user + * copy operations of course. * * KVM: The KVM HDSI handler may perform a load with MSR[DR]=1 in guest * MMU context, which may cause a DSI in the host, which must go to the @@ -1439,27 +1440,29 @@ EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access ld r4,_DAR(r1) ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD andis. r0,r5,DSISR_DABRMATCH@h bne- 1f BEGIN_MMU_FTR_SECTION - ld r6,_MSR(r1) - li r3,0x300 - b do_hash_page /* Try to handle as hpte fault */ + bl do_hash_fault MMU_FTR_SECTION_ELSE - b handle_page_fault + bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + cmpdi r3,0 + beq+ interrupt_return + mr r5,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + ld r4,_DAR(r1) + bl __bad_page_fault + b interrupt_return -1: /* We have a data breakpoint exception - handle it */ - ld r4,_DAR(r1) - ld r5,_DSISR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_break +1: bl do_break /* * do_break() may have changed the NV GPRS while handling a breakpoint. * If so, we need to restore them with their updated values. */ REST_NVGPRS(r1) - b interrupt_return + b interrupt_return GEN_KVM data_access @@ -1554,13 +1557,19 @@ EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access ld r4,_DAR(r1) ld r5,_DSISR(r1) + addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION - ld r6,_MSR(r1) - li r3,0x400 - b do_hash_page /* Try to handle as hpte fault */ + bl do_hash_fault MMU_FTR_SECTION_ELSE - b handle_page_fault + bl do_page_fault ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) + cmpdi r3,0 + beq+ interrupt_return + mr r5,r3 + addi r3,r1,STACK_FRAME_OVERHEAD + ld r4,_DAR(r1) + bl __bad_page_fault + b interrupt_return GEN_KVM instruction_access @@ -3216,83 +3225,3 @@ disable_machine_check: RFI_TO_KERNEL 1: mtlr r0 blr - -/* - * Hash table stuff - */ - .balign IFETCH_ALIGN_BYTES -do_hash_page: -#ifdef CONFIG_PPC_BOOK3S_64 - lis r0,(DSISR_BAD_FAULT_64S | DSISR_KEYFAULT)@h - ori r0,r0,DSISR_BAD_FAULT_64S@l - and. r0,r5,r0 /* weird error? */ - bne- handle_page_fault /* if not, try to insert a HPTE */ - - /* - * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then - * don't call hash_page, just fail the fault. This is required to - * prevent re-entrancy problems in the hash code, namely perf - * interrupts hitting while something holds H_PAGE_BUSY, and taking a - * hash fault. See the comment in hash_preload(). - */ - ld r11, PACA_THREAD_INFO(r13) - lwz r0,TI_PREEMPT(r11) - andis. r0,r0,NMI_MASK@h - bne 77f - - /* - * r3 contains the trap number - * r4 contains the faulting address - * r5 contains dsisr - * r6 msr - * - * at return r3 = 0 for success, 1 for page fault, negative for error - */ - bl __hash_page /* build HPTE if possible */ - cmpdi r3,0 /* see if __hash_page succeeded */ - - /* Success */ - beq interrupt_return /* Return from exception on success */ - - /* Error */ - blt- 13f - - /* Reload DAR/DSISR into r4/r5 for handle_page_fault */ - ld r4,_DAR(r1) - ld r5,_DSISR(r1) -#endif /* CONFIG_PPC_BOOK3S_64 */ - -/* Here we have a page fault that hash_page can't handle. */ -handle_page_fault: - addi r3,r1,STACK_FRAME_OVERHEAD - bl do_page_fault - cmpdi r3,0 - beq+ interrupt_return - mr r5,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) - bl __bad_page_fault - b interrupt_return - -#ifdef CONFIG_PPC_BOOK3S_64 -/* We have a page fault that hash_page could handle but HV refused - * the PTE insertion - */ -13: mr r5,r3 - addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) - bl low_hash_fault - b interrupt_return -#endif - -/* - * We come here as a result of a DSI at a point where we don't want - * to call hash_page, such as when we are accessing memory (possibly - * user memory) inside a PMU interrupt that occurred while interrupts - * were soft-disabled. We want to invoke the exception handler for - * the access, or panic if there isn't a handler. - */ -77: addi r3,r1,STACK_FRAME_OVERHEAD - li r5,SIGSEGV - bl bad_page_fault - b interrupt_return diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 73b06adb6eeb..e866cae57e2f 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1512,16 +1512,40 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, - unsigned long msr) +int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned long dsisr) { unsigned long access = _PAGE_PRESENT | _PAGE_READ; unsigned long flags = 0; - struct mm_struct *mm = current->mm; - unsigned int region_id = get_region_id(ea); + struct mm_struct *mm; + unsigned int region_id; + int err; + + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) + goto page_fault; + + /* + * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then + * don't call hash_page, just fail the fault. This is required to + * prevent re-entrancy problems in the hash code, namely perf + * interrupts hitting while something holds H_PAGE_BUSY, and taking a + * hash fault. See the comment in hash_preload(). + * + * We come here as a result of a DSI at a point where we don't want + * to call hash_page, such as when we are accessing memory (possibly + * user memory) inside a PMU interrupt that occurred while interrupts + * were soft-disabled. We want to invoke the exception handler for + * the access, or panic if there isn't a handler. + */ + if (unlikely(in_nmi())) { + bad_page_fault(regs, ea, SIGSEGV); + return 0; + } + region_id = get_region_id(ea); if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID)) mm = &init_mm; + else + mm = current->mm; if (dsisr & DSISR_NOHPTE) flags |= HPTE_NOHPTE_UPDATE; @@ -1537,13 +1561,31 @@ int __hash_page(unsigned long trap, unsigned long ea, unsigned long dsisr, * 2) user space access kernel space. */ access |= _PAGE_PRIVILEGED; - if ((msr & MSR_PR) || (region_id == USER_REGION_ID)) + if (user_mode(regs) || (region_id == USER_REGION_ID)) access &= ~_PAGE_PRIVILEGED; - if (trap == 0x400) + if (regs->trap == 0x400) access |= _PAGE_EXEC; - return hash_page_mm(mm, ea, access, trap, flags); + err = hash_page_mm(mm, ea, access, regs->trap, flags); + if (unlikely(err < 0)) { + // failed to instert a hash PTE due to an hypervisor error + if (user_mode(regs)) { + if (IS_ENABLED(CONFIG_PPC_SUBPAGE_PROT) && err == -2) + _exception(SIGSEGV, regs, SEGV_ACCERR, ea); + else + _exception(SIGBUS, regs, BUS_ADRERR, ea); + } else { + bad_page_fault(regs, ea, SIGBUS); + } + err = 0; + + } else if (err) { +page_fault: + err = do_page_fault(regs, ea, dsisr); + } + + return err; } #ifdef CONFIG_PPC_MM_SLICES @@ -1843,27 +1885,6 @@ void flush_hash_range(unsigned long number, int local) } } -/* - * low_hash_fault is called when we the low level hash code failed - * to instert a PTE due to an hypervisor error - */ -void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc) -{ - enum ctx_state prev_state = exception_enter(); - - if (user_mode(regs)) { -#ifdef CONFIG_PPC_SUBPAGE_PROT - if (rc == -2) - _exception(SIGSEGV, regs, SEGV_ACCERR, address); - else -#endif - _exception(SIGBUS, regs, BUS_ADRERR, address); - } else - bad_page_fault(regs, address, SIGBUS); - - exception_exit(prev_state); -} - long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, unsigned long rflags, unsigned long vflags, int psize, int ssize) -- cgit v1.2.3 From a01a3f2ddbcda83e8572787c0ec1dcbeba86915a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:16 +1000 Subject: powerpc: remove arguments from fault handler functions Make mm fault handlers all just take the pt_regs * argument and load DAR/DSISR from that. Make those that return a value return long. This is done to make the function signatures match other handlers, which will help with a future patch to add wrappers. Explicit arguments could be added for performance but that would require more wrapper macro variants. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-7-npiggin@gmail.com --- arch/powerpc/include/asm/asm-prototypes.h | 4 ++-- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 2 +- arch/powerpc/include/asm/bug.h | 2 +- arch/powerpc/kernel/exceptions-64e.S | 2 -- arch/powerpc/kernel/exceptions-64s.S | 17 ++++------------- arch/powerpc/kernel/head_40x.S | 10 +++++----- arch/powerpc/kernel/head_8xx.S | 6 +++--- arch/powerpc/kernel/head_book3s_32.S | 5 ++--- arch/powerpc/kernel/head_booke.h | 4 +--- arch/powerpc/mm/book3s64/hash_utils.c | 8 +++++--- arch/powerpc/mm/book3s64/slb.c | 11 +++++++---- arch/powerpc/mm/fault.c | 5 ++--- 12 files changed, 33 insertions(+), 43 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index d0b832cbbec8..22c9d08fa3a4 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -82,8 +82,8 @@ void kernel_bad_stack(struct pt_regs *regs); void system_reset_exception(struct pt_regs *regs); void machine_check_exception(struct pt_regs *regs); void emulation_assist_interrupt(struct pt_regs *regs); -long do_slb_fault(struct pt_regs *regs, unsigned long ea); -void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err); +long do_slb_fault(struct pt_regs *regs); +void do_bad_slb_fault(struct pt_regs *regs); /* signals, syscalls and interrupts */ long sys_swapcontext(struct ucontext __user *old_ctx, diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 1aa68094ad29..36884bc0bcd7 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -456,7 +456,7 @@ static inline unsigned long hpt_hash(unsigned long vpn, long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, unsigned long rlags, unsigned long vflags, int psize, int ssize); -int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned long dsisr); +long do_hash_fault(struct pt_regs *regs); extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpage_prot); diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index 464f8ca8a5c9..f7827e993196 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -111,7 +111,7 @@ #ifndef __ASSEMBLY__ struct pt_regs; -extern int do_page_fault(struct pt_regs *, unsigned long, unsigned long); +long do_page_fault(struct pt_regs *); extern void bad_page_fault(struct pt_regs *, unsigned long, int); void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig); extern void _exception(int, struct pt_regs *, int, unsigned long); diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 74d07dc0bb48..43e71d86dcbf 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1011,8 +1011,6 @@ storage_fault_common: std r14,_DAR(r1) std r15,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - mr r4,r14 - mr r5,r15 ld r14,PACA_EXGEN+EX_R14(r13) ld r15,PACA_EXGEN+EX_R15(r13) bl do_page_fault diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 91381fbdef42..33f88a1dcd08 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1438,10 +1438,9 @@ EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) EXC_VIRT_END(data_access, 0x4300, 0x80) EXC_COMMON_BEGIN(data_access_common) GEN_COMMON data_access - ld r4,_DAR(r1) - ld r5,_DSISR(r1) + ld r4,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD - andis. r0,r5,DSISR_DABRMATCH@h + andis. r0,r4,DSISR_DABRMATCH@h bne- 1f BEGIN_MMU_FTR_SECTION bl do_hash_fault @@ -1504,10 +1503,9 @@ EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80) EXC_VIRT_END(data_access_slb, 0x4380, 0x80) EXC_COMMON_BEGIN(data_access_slb_common) GEN_COMMON data_access_slb - ld r4,_DAR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ + addi r3,r1,STACK_FRAME_OVERHEAD bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1519,8 +1517,6 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) RECONCILE_IRQ_STATE(r10, r11) - ld r4,_DAR(r1) - ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault b interrupt_return @@ -1555,8 +1551,6 @@ EXC_VIRT_BEGIN(instruction_access, 0x4400, 0x80) EXC_VIRT_END(instruction_access, 0x4400, 0x80) EXC_COMMON_BEGIN(instruction_access_common) GEN_COMMON instruction_access - ld r4,_DAR(r1) - ld r5,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION bl do_hash_fault @@ -1602,10 +1596,9 @@ EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80) EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80) EXC_COMMON_BEGIN(instruction_access_slb_common) GEN_COMMON instruction_access_slb - ld r4,_DAR(r1) - addi r3,r1,STACK_FRAME_OVERHEAD BEGIN_MMU_FTR_SECTION /* HPT case, do SLB fault */ + addi r3,r1,STACK_FRAME_OVERHEAD bl do_slb_fault cmpdi r3,0 bne- 1f @@ -1617,8 +1610,6 @@ MMU_FTR_SECTION_ELSE ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) std r3,RESULT(r1) RECONCILE_IRQ_STATE(r10, r11) - ld r4,_DAR(r1) - ld r5,RESULT(r1) addi r3,r1,STACK_FRAME_OVERHEAD bl do_bad_slb_fault b interrupt_return diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S index a1ae00689e0f..3c5577ac4dc8 100644 --- a/arch/powerpc/kernel/head_40x.S +++ b/arch/powerpc/kernel/head_40x.S @@ -179,9 +179,9 @@ _ENTRY(saved_ksp_limit) */ START_EXCEPTION(0x0300, DataStorage) EXCEPTION_PROLOG - mfspr r5, SPRN_ESR /* Grab the ESR, save it, pass arg3 */ + mfspr r5, SPRN_ESR /* Grab the ESR, save it */ stw r5, _ESR(r11) - mfspr r4, SPRN_DEAR /* Grab the DEAR, save it, pass arg2 */ + mfspr r4, SPRN_DEAR /* Grab the DEAR, save it */ stw r4, _DEAR(r11) EXC_XFER_LITE(0x300, handle_page_fault) @@ -191,9 +191,9 @@ _ENTRY(saved_ksp_limit) */ START_EXCEPTION(0x0400, InstructionAccess) EXCEPTION_PROLOG - mr r4,r12 /* Pass SRR0 as arg2 */ - stw r4, _DEAR(r11) - li r5,0 /* Pass zero as arg3 */ + li r5,0 + stw r5, _ESR(r11) /* Zero ESR */ + stw r12, _DEAR(r11) /* SRR0 as DEAR */ EXC_XFER_LITE(0x400, handle_page_fault) /* 0x0500 - External Interrupt Exception */ diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S index 52702f3db6df..0b2c247cfdff 100644 --- a/arch/powerpc/kernel/head_8xx.S +++ b/arch/powerpc/kernel/head_8xx.S @@ -312,14 +312,14 @@ DataStoreTLBMiss: . = 0x1300 InstructionTLBError: EXCEPTION_PROLOG - mr r4,r12 andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ andis. r10,r9,SRR1_ISI_NOPT@h beq+ .Litlbie - tlbie r4 + tlbie r12 /* 0x400 is InstructionAccess exception, needed by bad_page_fault() */ .Litlbie: - stw r4, _DAR(r11) + stw r12, _DAR(r11) + stw r5, _DSISR(r11) EXC_XFER_LITE(0x400, handle_page_fault) /* This is the data TLB error on the MPC8xx. This could be due to diff --git a/arch/powerpc/kernel/head_book3s_32.S b/arch/powerpc/kernel/head_book3s_32.S index 0c4fcb6e1a35..61f862fa9655 100644 --- a/arch/powerpc/kernel/head_book3s_32.S +++ b/arch/powerpc/kernel/head_book3s_32.S @@ -362,9 +362,9 @@ BEGIN_MMU_FTR_SECTION END_MMU_FTR_SECTION_IFSET(MMU_FTR_HPTE_TABLE) #endif #endif /* CONFIG_VMAP_STACK */ -1: mr r4,r12 andis. r5,r9,DSISR_SRR1_MATCH_32S@h /* Filter relevant SRR1 bits */ - stw r4, _DAR(r11) + stw r5, _DSISR(r11) + stw r12, _DAR(r11) EXC_XFER_LITE(0x400, handle_page_fault) /* External interrupt */ @@ -686,7 +686,6 @@ handle_page_fault_tramp_1: #ifdef CONFIG_VMAP_STACK EXCEPTION_PROLOG_2 handle_dar_dsisr=1 #endif - lwz r4, _DAR(r11) lwz r5, _DSISR(r11) /* fall through */ handle_page_fault_tramp_2: diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h index 74e230c200fb..0fbdacc7fab7 100644 --- a/arch/powerpc/kernel/head_booke.h +++ b/arch/powerpc/kernel/head_booke.h @@ -476,9 +476,7 @@ label: NORMAL_EXCEPTION_PROLOG(INST_STORAGE); \ mfspr r5,SPRN_ESR; /* Grab the ESR and save it */ \ stw r5,_ESR(r11); \ - mr r4,r12; /* Pass SRR0 as arg2 */ \ - stw r4, _DEAR(r11); \ - li r5,0; /* Pass zero as arg3 */ \ + stw r12, _DEAR(r11); /* Pass SRR0 as arg2 */ \ EXC_XFER_LITE(0x0400, handle_page_fault) #define ALIGNMENT_EXCEPTION \ diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index e866cae57e2f..9a499af3eebf 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1512,13 +1512,15 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned long dsisr) +long do_hash_fault(struct pt_regs *regs) { + unsigned long ea = regs->dar; + unsigned long dsisr = regs->dsisr; unsigned long access = _PAGE_PRESENT | _PAGE_READ; unsigned long flags = 0; struct mm_struct *mm; unsigned int region_id; - int err; + long err; if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) goto page_fault; @@ -1582,7 +1584,7 @@ int do_hash_fault(struct pt_regs *regs, unsigned long ea, unsigned long dsisr) } else if (err) { page_fault: - err = do_page_fault(regs, ea, dsisr); + err = do_page_fault(regs); } return err; diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 584567970c11..985902ce0272 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -813,8 +813,9 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) return slb_insert_entry(ea, context, flags, ssize, false); } -long do_slb_fault(struct pt_regs *regs, unsigned long ea) +long do_slb_fault(struct pt_regs *regs) { + unsigned long ea = regs->dar; unsigned long id = get_region_id(ea); /* IRQs are not reconciled here, so can't check irqs_disabled */ @@ -865,13 +866,15 @@ long do_slb_fault(struct pt_regs *regs, unsigned long ea) } } -void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err) +void do_bad_slb_fault(struct pt_regs *regs) { + int err = regs->result; + if (err == -EFAULT) { if (user_mode(regs)) - _exception(SIGSEGV, regs, SEGV_BNDERR, ea); + _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); else - bad_page_fault(regs, ea, SIGSEGV); + bad_page_fault(regs, regs->dar, SIGSEGV); } else if (err == -EINVAL) { unrecoverable_exception(regs); } else { diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 8961b44f350c..273ff845eccf 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -542,12 +542,11 @@ retry: } NOKPROBE_SYMBOL(__do_page_fault); -int do_page_fault(struct pt_regs *regs, unsigned long address, - unsigned long error_code) +long do_page_fault(struct pt_regs *regs) { const struct exception_table_entry *entry; enum ctx_state prev_state = exception_enter(); - int rc = __do_page_fault(regs, address, error_code); + int rc = __do_page_fault(regs, regs->dar, regs->dsisr); exception_exit(prev_state); if (likely(!rc)) return 0; -- cgit v1.2.3 From 8458c628a53ba4311b2df12370be1a6f1870ff37 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:21 +1000 Subject: powerpc: bad_page_fault get registers from regs Similar to the previous patch this makes interrupt handler function types more regular so they can be wrapped with the next patch. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-12-npiggin@gmail.com --- arch/powerpc/include/asm/bug.h | 4 ++-- arch/powerpc/kernel/entry_32.S | 3 +-- arch/powerpc/kernel/exceptions-64e.S | 3 +-- arch/powerpc/kernel/traps.c | 2 +- arch/powerpc/mm/book3s64/hash_utils.c | 4 ++-- arch/powerpc/mm/book3s64/slb.c | 2 +- arch/powerpc/mm/fault.c | 6 +++--- arch/powerpc/platforms/8xx/machine_check.c | 2 +- 8 files changed, 12 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index f7827e993196..4220789b9a97 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -112,8 +112,8 @@ struct pt_regs; long do_page_fault(struct pt_regs *); -extern void bad_page_fault(struct pt_regs *, unsigned long, int); -void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig); +void bad_page_fault(struct pt_regs *, int); +void __bad_page_fault(struct pt_regs *regs, int sig); extern void _exception(int, struct pt_regs *, int, unsigned long); extern void _exception_pkey(struct pt_regs *, unsigned long, int); extern void die(const char *, struct pt_regs *, long); diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index d6ea3f2d6cc0..b102b40c4988 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -672,9 +672,8 @@ handle_page_fault: lwz r0,_TRAP(r1) clrrwi r0,r0,1 stw r0,_TRAP(r1) - mr r5,r3 + mr r4,r3 /* err arg for bad_page_fault */ addi r3,r1,STACK_FRAME_OVERHEAD - lwz r4,_DAR(r1) bl __bad_page_fault b ret_from_except_full diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 1a7291daadfd..003999c7836c 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -1016,9 +1016,8 @@ storage_fault_common: bne- 1f b ret_from_except_lite 1: bl save_nvgprs - mr r5,r3 + mr r4,r3 addi r3,r1,STACK_FRAME_OVERHEAD - ld r4,_DAR(r1) bl __bad_page_fault b ret_from_except diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index dfb0d3325f4b..f7370145be19 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1612,7 +1612,7 @@ bad: if (user_mode(regs)) _exception(sig, regs, code, regs->dar); else - bad_page_fault(regs, regs->dar, sig); + bad_page_fault(regs, sig); bail: exception_exit(prev_state); diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 9a499af3eebf..1a270cc37d97 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1539,7 +1539,7 @@ long do_hash_fault(struct pt_regs *regs) * the access, or panic if there isn't a handler. */ if (unlikely(in_nmi())) { - bad_page_fault(regs, ea, SIGSEGV); + bad_page_fault(regs, SIGSEGV); return 0; } @@ -1578,7 +1578,7 @@ long do_hash_fault(struct pt_regs *regs) else _exception(SIGBUS, regs, BUS_ADRERR, ea); } else { - bad_page_fault(regs, ea, SIGBUS); + bad_page_fault(regs, SIGBUS); } err = 0; diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 985902ce0272..c581548b533f 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -874,7 +874,7 @@ void do_bad_slb_fault(struct pt_regs *regs) if (user_mode(regs)) _exception(SIGSEGV, regs, SEGV_BNDERR, regs->dar); else - bad_page_fault(regs, regs->dar, SIGSEGV); + bad_page_fault(regs, SIGSEGV); } else if (err == -EINVAL) { unrecoverable_exception(regs); } else { diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 273ff845eccf..5dd3248b47c7 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -566,7 +566,7 @@ NOKPROBE_SYMBOL(do_page_fault); * It is called from the DSI and ISI handlers in head.S and from some * of the procedures in traps.c. */ -void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +void __bad_page_fault(struct pt_regs *regs, int sig) { int is_write = page_fault_is_write(regs->dsisr); @@ -604,7 +604,7 @@ void __bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) die("Kernel access of bad area", regs, sig); } -void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) +void bad_page_fault(struct pt_regs *regs, int sig) { const struct exception_table_entry *entry; @@ -613,5 +613,5 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) if (entry) instruction_pointer_set(regs, extable_fixup(entry)); else - __bad_page_fault(regs, address, sig); + __bad_page_fault(regs, sig); } diff --git a/arch/powerpc/platforms/8xx/machine_check.c b/arch/powerpc/platforms/8xx/machine_check.c index 88dedf38eccd..656365975895 100644 --- a/arch/powerpc/platforms/8xx/machine_check.c +++ b/arch/powerpc/platforms/8xx/machine_check.c @@ -26,7 +26,7 @@ int machine_check_8xx(struct pt_regs *regs) * to deal with that than having a wart in the mcheck handler. * -- BenH */ - bad_page_fault(regs, regs->dar, SIGBUS); + bad_page_fault(regs, SIGBUS); return 1; #else return 0; -- cgit v1.2.3 From bf0e2374aa7b4f8b01fd59fcb0746a9b6b05326a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:25 +1000 Subject: powerpc/64s: split do_hash_fault This is required for subsequent interrupt wrapper implementation. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-16-npiggin@gmail.com --- arch/powerpc/mm/book3s64/hash_utils.c | 56 +++++++++++++++++++++-------------- 1 file changed, 33 insertions(+), 23 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 1a270cc37d97..d7d3a80a51d4 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1512,7 +1512,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -long do_hash_fault(struct pt_regs *regs) +static long __do_hash_fault(struct pt_regs *regs) { unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; @@ -1522,27 +1522,6 @@ long do_hash_fault(struct pt_regs *regs) unsigned int region_id; long err; - if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) - goto page_fault; - - /* - * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then - * don't call hash_page, just fail the fault. This is required to - * prevent re-entrancy problems in the hash code, namely perf - * interrupts hitting while something holds H_PAGE_BUSY, and taking a - * hash fault. See the comment in hash_preload(). - * - * We come here as a result of a DSI at a point where we don't want - * to call hash_page, such as when we are accessing memory (possibly - * user memory) inside a PMU interrupt that occurred while interrupts - * were soft-disabled. We want to invoke the exception handler for - * the access, or panic if there isn't a handler. - */ - if (unlikely(in_nmi())) { - bad_page_fault(regs, SIGSEGV); - return 0; - } - region_id = get_region_id(ea); if ((region_id == VMALLOC_REGION_ID) || (region_id == IO_REGION_ID)) mm = &init_mm; @@ -1581,8 +1560,39 @@ long do_hash_fault(struct pt_regs *regs) bad_page_fault(regs, SIGBUS); } err = 0; + } + + return err; +} + +long do_hash_fault(struct pt_regs *regs) +{ + unsigned long dsisr = regs->dsisr; + long err; + + if (unlikely(dsisr & (DSISR_BAD_FAULT_64S | DSISR_KEYFAULT))) + goto page_fault; + + /* + * If we are in an "NMI" (e.g., an interrupt when soft-disabled), then + * don't call hash_page, just fail the fault. This is required to + * prevent re-entrancy problems in the hash code, namely perf + * interrupts hitting while something holds H_PAGE_BUSY, and taking a + * hash fault. See the comment in hash_preload(). + * + * We come here as a result of a DSI at a point where we don't want + * to call hash_page, such as when we are accessing memory (possibly + * user memory) inside a PMU interrupt that occurred while interrupts + * were soft-disabled. We want to invoke the exception handler for + * the access, or panic if there isn't a handler. + */ + if (unlikely(in_nmi())) { + bad_page_fault(regs, SIGSEGV); + return 0; + } - } else if (err) { + err = __do_hash_fault(regs); + if (err) { page_fault: err = do_page_fault(regs); } -- cgit v1.2.3 From e44370abb2e99299678ec6b209f8aad574fa5f36 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:27 +1000 Subject: powerpc/64s: slb comment update This makes a small improvement to the description of the SLB interrupt environment. Move the memory access restrictions into one paragraph, and the interrupt restrictions into the next rather than mix them. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-18-npiggin@gmail.com --- arch/powerpc/mm/book3s64/slb.c | 28 +++++++++++++++------------- 1 file changed, 15 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index c581548b533f..14c62b685f0c 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -825,19 +825,21 @@ long do_slb_fault(struct pt_regs *regs) return -EINVAL; /* - * SLB kernel faults must be very careful not to touch anything - * that is not bolted. E.g., PACA and global variables are okay, - * mm->context stuff is not. - * - * SLB user faults can access all of kernel memory, but must be - * careful not to touch things like IRQ state because it is not - * "reconciled" here. The difficulty is that we must use - * fast_exception_return to return from kernel SLB faults without - * looking at possible non-bolted memory. We could test user vs - * kernel faults in the interrupt handler asm and do a full fault, - * reconcile, ret_from_except for user faults which would make them - * first class kernel code. But for performance it's probably nicer - * if they go via fast_exception_return too. + * SLB kernel faults must be very careful not to touch anything that is + * not bolted. E.g., PACA and global variables are okay, mm->context + * stuff is not. SLB user faults may access all of memory (and induce + * one recursive SLB kernel fault), so the kernel fault must not + * trample on the user fault state at those points. + */ + + /* + * The interrupt state is not reconciled, for performance, so that + * fast_interrupt_return can be used. The handler must not touch local + * irq state, or schedule. We could test for usermode and upgrade to a + * normal process context (synchronous) interrupt for those, which + * would make them first-class kernel code and able to be traced and + * instrumented, although performance would suffer a bit, it would + * probably be a good tradeoff. */ if (id >= LINEAR_MAP_REGION_ID) { long err; -- cgit v1.2.3 From 3a96570ffceb15c6ed9cc6f990f172dcdc8ac279 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:38 +1000 Subject: powerpc: convert interrupt handlers to use wrappers Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-29-npiggin@gmail.com --- arch/powerpc/include/asm/asm-prototypes.h | 29 --------- arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 - arch/powerpc/include/asm/debug.h | 3 - arch/powerpc/include/asm/hw_irq.h | 9 --- arch/powerpc/include/asm/interrupt.h | 66 ++++++++++++++++++++ arch/powerpc/include/asm/nmi.h | 2 +- arch/powerpc/kernel/dbell.c | 6 +- arch/powerpc/kernel/irq.c | 3 +- arch/powerpc/kernel/mce.c | 5 +- arch/powerpc/kernel/process.c | 3 +- arch/powerpc/kernel/syscall_64.c | 1 + arch/powerpc/kernel/tau_6xx.c | 3 +- arch/powerpc/kernel/time.c | 3 +- arch/powerpc/kernel/traps.c | 86 +++++++++++++++++---------- arch/powerpc/kernel/watchdog.c | 7 ++- arch/powerpc/kvm/book3s_hv.c | 1 + arch/powerpc/kvm/book3s_hv_builtin.c | 1 + arch/powerpc/kvm/booke.c | 1 + arch/powerpc/mm/book3s64/hash_utils.c | 12 +++- arch/powerpc/mm/book3s64/slb.c | 7 ++- arch/powerpc/mm/fault.c | 5 +- arch/powerpc/platforms/cell/ras.c | 6 +- arch/powerpc/platforms/cell/ras.h | 9 ++- arch/powerpc/platforms/powernv/idle.c | 1 + 24 files changed, 169 insertions(+), 101 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h index 22c9d08fa3a4..939f3c94c8f3 100644 --- a/arch/powerpc/include/asm/asm-prototypes.h +++ b/arch/powerpc/include/asm/asm-prototypes.h @@ -56,35 +56,6 @@ int exit_vmx_usercopy(void); int enter_vmx_ops(void); void *exit_vmx_ops(void *dest); -/* Traps */ -long machine_check_early(struct pt_regs *regs); -long hmi_exception_realmode(struct pt_regs *regs); -void SMIException(struct pt_regs *regs); -void handle_hmi_exception(struct pt_regs *regs); -void instruction_breakpoint_exception(struct pt_regs *regs); -void RunModeException(struct pt_regs *regs); -void single_step_exception(struct pt_regs *regs); -void program_check_exception(struct pt_regs *regs); -void alignment_exception(struct pt_regs *regs); -void StackOverflow(struct pt_regs *regs); -void stack_overflow_exception(struct pt_regs *regs); -void kernel_fp_unavailable_exception(struct pt_regs *regs); -void altivec_unavailable_exception(struct pt_regs *regs); -void vsx_unavailable_exception(struct pt_regs *regs); -void fp_unavailable_tm(struct pt_regs *regs); -void altivec_unavailable_tm(struct pt_regs *regs); -void vsx_unavailable_tm(struct pt_regs *regs); -void facility_unavailable_exception(struct pt_regs *regs); -void TAUException(struct pt_regs *regs); -void altivec_assist_exception(struct pt_regs *regs); -void unrecoverable_exception(struct pt_regs *regs); -void kernel_bad_stack(struct pt_regs *regs); -void system_reset_exception(struct pt_regs *regs); -void machine_check_exception(struct pt_regs *regs); -void emulation_assist_interrupt(struct pt_regs *regs); -long do_slb_fault(struct pt_regs *regs); -void do_bad_slb_fault(struct pt_regs *regs); - /* signals, syscalls and interrupts */ long sys_swapcontext(struct ucontext __user *old_ctx, struct ucontext __user *new_ctx, diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h index 36884bc0bcd7..f911bdb68d8b 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h +++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h @@ -456,7 +456,6 @@ static inline unsigned long hpt_hash(unsigned long vpn, long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long pa, unsigned long rlags, unsigned long vflags, int psize, int ssize); -long do_hash_fault(struct pt_regs *regs); extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, unsigned long flags, int ssize, int subpage_prot); diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index 0550eceab3ca..86a14736c76c 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -50,9 +50,6 @@ bool ppc_breakpoint_available(void); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, unsigned long error_code, int brkpt); -#else - -void do_break(struct pt_regs *regs); #endif #endif /* _ASM_POWERPC_DEBUG_H */ diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 75c2b137fc00..614957f74cee 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -50,15 +50,6 @@ #ifndef __ASSEMBLY__ -extern void replay_system_reset(void); -extern void replay_soft_interrupts(void); - -extern void timer_interrupt(struct pt_regs *); -extern void performance_monitor_exception(struct pt_regs *regs); -extern void WatchdogException(struct pt_regs *regs); -extern void unknown_exception(struct pt_regs *regs); -void unknown_async_exception(struct pt_regs *regs); - #ifdef CONFIG_PPC64 #include diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 3df1921cfda3..4ffbd3d75324 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -232,4 +232,70 @@ __visible noinstr long func(struct pt_regs *regs) \ \ static __always_inline long ____##func(struct pt_regs *regs) + +/* Interrupt handlers */ +/* kernel/traps.c */ +DECLARE_INTERRUPT_HANDLER_NMI(system_reset_exception); +#ifdef CONFIG_PPC_BOOK3S_64 +DECLARE_INTERRUPT_HANDLER_ASYNC(machine_check_exception); +#else +DECLARE_INTERRUPT_HANDLER_NMI(machine_check_exception); +#endif +DECLARE_INTERRUPT_HANDLER(SMIException); +DECLARE_INTERRUPT_HANDLER(handle_hmi_exception); +DECLARE_INTERRUPT_HANDLER(unknown_exception); +DECLARE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception); +DECLARE_INTERRUPT_HANDLER(instruction_breakpoint_exception); +DECLARE_INTERRUPT_HANDLER(RunModeException); +DECLARE_INTERRUPT_HANDLER(single_step_exception); +DECLARE_INTERRUPT_HANDLER(program_check_exception); +DECLARE_INTERRUPT_HANDLER(emulation_assist_interrupt); +DECLARE_INTERRUPT_HANDLER(alignment_exception); +DECLARE_INTERRUPT_HANDLER(StackOverflow); +DECLARE_INTERRUPT_HANDLER(stack_overflow_exception); +DECLARE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(altivec_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(vsx_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(facility_unavailable_exception); +DECLARE_INTERRUPT_HANDLER(fp_unavailable_tm); +DECLARE_INTERRUPT_HANDLER(altivec_unavailable_tm); +DECLARE_INTERRUPT_HANDLER(vsx_unavailable_tm); +DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi); +DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async); +DECLARE_INTERRUPT_HANDLER_RAW(performance_monitor_exception); +DECLARE_INTERRUPT_HANDLER(DebugException); +DECLARE_INTERRUPT_HANDLER(altivec_assist_exception); +DECLARE_INTERRUPT_HANDLER(CacheLockingException); +DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException); +DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException); +DECLARE_INTERRUPT_HANDLER(unrecoverable_exception); +DECLARE_INTERRUPT_HANDLER(WatchdogException); +DECLARE_INTERRUPT_HANDLER(kernel_bad_stack); + +/* slb.c */ +DECLARE_INTERRUPT_HANDLER_RAW(do_slb_fault); +DECLARE_INTERRUPT_HANDLER(do_bad_slb_fault); + +/* hash_utils.c */ +DECLARE_INTERRUPT_HANDLER_RAW(do_hash_fault); + +/* fault.c */ +DECLARE_INTERRUPT_HANDLER_RET(do_page_fault); +DECLARE_INTERRUPT_HANDLER(do_bad_page_fault_segv); + +/* process.c */ +DECLARE_INTERRUPT_HANDLER(do_break); + +/* time.c */ +DECLARE_INTERRUPT_HANDLER_ASYNC(timer_interrupt); + +/* mce.c */ +DECLARE_INTERRUPT_HANDLER_NMI(machine_check_early); +DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode); + +DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException); + +void replay_system_reset(void); +void replay_soft_interrupts(void); + #endif /* _ASM_POWERPC_INTERRUPT_H */ diff --git a/arch/powerpc/include/asm/nmi.h b/arch/powerpc/include/asm/nmi.h index 63eccea93796..160abcb8e9fa 100644 --- a/arch/powerpc/include/asm/nmi.h +++ b/arch/powerpc/include/asm/nmi.h @@ -4,7 +4,7 @@ #ifdef CONFIG_PPC_WATCHDOG extern void arch_touch_nmi_watchdog(void); -void soft_nmi_interrupt(struct pt_regs *regs); +long soft_nmi_interrupt(struct pt_regs *regs); #else static inline void arch_touch_nmi_watchdog(void) {} #endif diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c index 52680cf07c9d..6a7ecfca5c3b 100644 --- a/arch/powerpc/kernel/dbell.c +++ b/arch/powerpc/kernel/dbell.c @@ -12,13 +12,14 @@ #include #include +#include #include #include #include #ifdef CONFIG_SMP -void doorbell_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -39,9 +40,8 @@ void doorbell_exception(struct pt_regs *regs) set_irq_regs(old_regs); } #else /* CONFIG_SMP */ -void doorbell_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(doorbell_exception) { printk(KERN_WARNING "Received doorbell on non-smp system\n"); } #endif /* CONFIG_SMP */ - diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 6b1eca53e36c..2055d204d08e 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -54,6 +54,7 @@ #include #include +#include #include #include #include @@ -665,7 +666,7 @@ void __do_irq(struct pt_regs *regs) irq_exit(); } -void do_IRQ(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ) { struct pt_regs *old_regs = set_irq_regs(regs); void *cursp, *irqsp, *sirqsp; diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c index 3e3a84127c0e..cfe96cd3f48b 100644 --- a/arch/powerpc/kernel/mce.c +++ b/arch/powerpc/kernel/mce.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -583,7 +584,7 @@ EXPORT_SYMBOL_GPL(machine_check_print_event_info); * * regs->nip and regs->msr contains srr0 and ssr1. */ -long notrace machine_check_early(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(machine_check_early) { long handled = 0; u8 ftrace_enabled = this_cpu_get_ftrace_enabled(); @@ -717,7 +718,7 @@ long hmi_handle_debugtrig(struct pt_regs *regs) /* * Return values: */ -long hmi_exception_realmode(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode) { int ret; diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4f0f81e9420b..8520ed5ae144 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -41,6 +41,7 @@ #include #include +#include #include #include #include @@ -659,7 +660,7 @@ static void do_break_handler(struct pt_regs *regs) } } -void do_break(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(do_break) { current->thread.trap_nr = TRAP_HWBKPT; if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, regs->dsisr, diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c index e0eb2a502db3..316c3ba16554 100644 --- a/arch/powerpc/kernel/syscall_64.c +++ b/arch/powerpc/kernel/syscall_64.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/tau_6xx.c b/arch/powerpc/kernel/tau_6xx.c index 0b4694b8d248..3f300eccc09e 100644 --- a/arch/powerpc/kernel/tau_6xx.c +++ b/arch/powerpc/kernel/tau_6xx.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -100,7 +101,7 @@ static void TAUupdate(int cpu) * with interrupts disabled */ -void TAUException(struct pt_regs * regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(TAUException) { int cpu = smp_processor_id(); diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 83633a24ce78..7636773b028f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -57,6 +57,7 @@ #include #include +#include #include #include #include @@ -571,7 +572,7 @@ void arch_irq_work_raise(void) * timer_interrupt - gets called when the decrementer overflows, * with interrupts disabled. */ -void timer_interrupt(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(timer_interrupt) { struct clock_event_device *evt = this_cpu_ptr(&decrementers); u64 *next_tb = this_cpu_ptr(&decrementers_next_tb); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 2c5986109412..51e56b7fceb7 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -430,8 +431,7 @@ nonrecoverable: regs->msr &= ~MSR_RI; #endif } - -void system_reset_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception) { unsigned long hsrr0, hsrr1; bool saved_hsrrs = false; @@ -519,6 +519,8 @@ out: this_cpu_set_ftrace_enabled(ftrace_enabled); /* What should we do here? We could issue a shutdown or hard reset. */ + + return 0; } NOKPROBE_SYMBOL(system_reset_exception); @@ -805,7 +807,11 @@ void die_mce(const char *str, struct pt_regs *regs, long err) } NOKPROBE_SYMBOL(die_mce); -void machine_check_exception(struct pt_regs *regs) +#ifdef CONFIG_PPC_BOOK3S_64 +DEFINE_INTERRUPT_HANDLER_ASYNC(machine_check_exception) +#else +DEFINE_INTERRUPT_HANDLER_NMI(machine_check_exception) +#endif { int recover = 0; @@ -855,10 +861,16 @@ bail: die_mce("Unrecoverable Machine check", regs, SIGBUS); if (nmi) nmi_exit(); + +#ifdef CONFIG_PPC_BOOK3S_64 + return; +#else + return 0; +#endif } NOKPROBE_SYMBOL(machine_check_exception); -void SMIException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(SMIException) /* async? */ { die("System Management Interrupt", regs, SIGABRT); } @@ -1044,7 +1056,7 @@ static void p9_hmi_special_emu(struct pt_regs *regs) } #endif /* CONFIG_VSX */ -void handle_hmi_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception) { struct pt_regs *old_regs; @@ -1073,7 +1085,7 @@ void handle_hmi_exception(struct pt_regs *regs) set_irq_regs(old_regs); } -void unknown_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(unknown_exception) { enum ctx_state prev_state = exception_enter(); @@ -1085,7 +1097,7 @@ void unknown_exception(struct pt_regs *regs) exception_exit(prev_state); } -void unknown_async_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception) { enum ctx_state prev_state = exception_enter(); @@ -1097,7 +1109,7 @@ void unknown_async_exception(struct pt_regs *regs) exception_exit(prev_state); } -void instruction_breakpoint_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception) { enum ctx_state prev_state = exception_enter(); @@ -1112,12 +1124,12 @@ bail: exception_exit(prev_state); } -void RunModeException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(RunModeException) { _exception(SIGTRAP, regs, TRAP_UNK, 0); } -void single_step_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(single_step_exception) { enum ctx_state prev_state = exception_enter(); @@ -1579,7 +1591,7 @@ sigill: } -void program_check_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(program_check_exception) { enum ctx_state prev_state = exception_enter(); @@ -1593,7 +1605,7 @@ NOKPROBE_SYMBOL(program_check_exception); * This occurs when running in hypervisor mode on POWER6 or later * and an illegal instruction is encountered. */ -void emulation_assist_interrupt(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt) { enum ctx_state prev_state = exception_enter(); @@ -1604,7 +1616,7 @@ void emulation_assist_interrupt(struct pt_regs *regs) } NOKPROBE_SYMBOL(emulation_assist_interrupt); -void alignment_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(alignment_exception) { enum ctx_state prev_state = exception_enter(); int sig, code, fixed = 0; @@ -1654,7 +1666,7 @@ bail: exception_exit(prev_state); } -void StackOverflow(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(StackOverflow) { pr_crit("Kernel stack overflow in process %s[%d], r1=%lx\n", current->comm, task_pid_nr(current), regs->gpr[1]); @@ -1663,7 +1675,7 @@ void StackOverflow(struct pt_regs *regs) panic("kernel stack overflow"); } -void stack_overflow_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(stack_overflow_exception) { enum ctx_state prev_state = exception_enter(); @@ -1672,7 +1684,7 @@ void stack_overflow_exception(struct pt_regs *regs) exception_exit(prev_state); } -void kernel_fp_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception) { enum ctx_state prev_state = exception_enter(); @@ -1683,7 +1695,7 @@ void kernel_fp_unavailable_exception(struct pt_regs *regs) exception_exit(prev_state); } -void altivec_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception) { enum ctx_state prev_state = exception_enter(); @@ -1702,7 +1714,7 @@ bail: exception_exit(prev_state); } -void vsx_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception) { if (user_mode(regs)) { /* A user program has executed an vsx instruction, @@ -1733,7 +1745,7 @@ static void tm_unavailable(struct pt_regs *regs) die("Unrecoverable TM Unavailable Exception", regs, SIGABRT); } -void facility_unavailable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(facility_unavailable_exception) { static char *facility_strings[] = { [FSCR_FP_LG] = "FPU", @@ -1853,7 +1865,7 @@ out: #ifdef CONFIG_PPC_TRANSACTIONAL_MEM -void fp_unavailable_tm(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(fp_unavailable_tm) { /* Note: This does not handle any kind of FP laziness. */ @@ -1886,7 +1898,7 @@ void fp_unavailable_tm(struct pt_regs *regs) tm_recheckpoint(¤t->thread); } -void altivec_unavailable_tm(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(altivec_unavailable_tm) { /* See the comments in fp_unavailable_tm(). This function operates * the same way. @@ -1901,7 +1913,7 @@ void altivec_unavailable_tm(struct pt_regs *regs) current->thread.used_vr = 1; } -void vsx_unavailable_tm(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(vsx_unavailable_tm) { /* See the comments in fp_unavailable_tm(). This works similarly, * though we're loading both FP and VEC registers in here. @@ -1926,7 +1938,9 @@ void vsx_unavailable_tm(struct pt_regs *regs) } #endif /* CONFIG_PPC_TRANSACTIONAL_MEM */ -static void performance_monitor_exception_nmi(struct pt_regs *regs) +#ifdef CONFIG_PPC64 +DECLARE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi); +DEFINE_INTERRUPT_HANDLER_NMI(performance_monitor_exception_nmi) { nmi_enter(); @@ -1935,9 +1949,13 @@ static void performance_monitor_exception_nmi(struct pt_regs *regs) perf_irq(regs); nmi_exit(); + + return 0; } +#endif -static void performance_monitor_exception_async(struct pt_regs *regs) +DECLARE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async); +DEFINE_INTERRUPT_HANDLER_ASYNC(performance_monitor_exception_async) { irq_enter(); @@ -1948,7 +1966,7 @@ static void performance_monitor_exception_async(struct pt_regs *regs) irq_exit(); } -void performance_monitor_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_RAW(performance_monitor_exception) { /* * On 64-bit, if perf interrupts hit in a local_irq_disable @@ -1960,6 +1978,8 @@ void performance_monitor_exception(struct pt_regs *regs) performance_monitor_exception_nmi(regs); else performance_monitor_exception_async(regs); + + return 0; } #ifdef CONFIG_PPC_ADV_DEBUG_REGS @@ -2022,7 +2042,7 @@ static void handle_debug(struct pt_regs *regs, unsigned long debug_status) mtspr(SPRN_DBCR0, current->thread.debug.dbcr0); } -void DebugException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(DebugException) { unsigned long debug_status = regs->dsisr; @@ -2095,7 +2115,7 @@ NOKPROBE_SYMBOL(DebugException); #endif /* CONFIG_PPC_ADV_DEBUG_REGS */ #ifdef CONFIG_ALTIVEC -void altivec_assist_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(altivec_assist_exception) { int err; @@ -2129,7 +2149,7 @@ void altivec_assist_exception(struct pt_regs *regs) #endif /* CONFIG_ALTIVEC */ #ifdef CONFIG_FSL_BOOKE -void CacheLockingException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(CacheLockingException) { unsigned long error_code = regs->dsisr; @@ -2144,7 +2164,7 @@ void CacheLockingException(struct pt_regs *regs) #endif /* CONFIG_FSL_BOOKE */ #ifdef CONFIG_SPE -void SPEFloatingPointException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(SPEFloatingPointException) { extern int do_spe_mathemu(struct pt_regs *regs); unsigned long spefscr; @@ -2196,7 +2216,7 @@ void SPEFloatingPointException(struct pt_regs *regs) return; } -void SPEFloatingPointRoundException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException) { extern int speround_handler(struct pt_regs *regs); int err; @@ -2238,7 +2258,7 @@ void SPEFloatingPointRoundException(struct pt_regs *regs) * in the MSR is 0. This indicates that SRR0/1 are live, and that * we therefore lost state by taking this exception. */ -void unrecoverable_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(unrecoverable_exception) { pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n", regs->trap, regs->nip, regs->msr); @@ -2258,7 +2278,7 @@ void __attribute__ ((weak)) WatchdogHandler(struct pt_regs *regs) return; } -void WatchdogException(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(WatchdogException) /* XXX NMI? async? */ { printk (KERN_EMERG "PowerPC Book-E Watchdog Exception\n"); WatchdogHandler(regs); @@ -2269,7 +2289,7 @@ void WatchdogException(struct pt_regs *regs) * We enter here if we discover during exception entry that we are * running in supervisor mode with a userspace value in the stack pointer. */ -void kernel_bad_stack(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(kernel_bad_stack) { printk(KERN_EMERG "Bad kernel stack pointer %lx at %lx\n", regs->gpr[1], regs->nip); diff --git a/arch/powerpc/kernel/watchdog.c b/arch/powerpc/kernel/watchdog.c index 3ae13c2a10cf..be6b22838555 100644 --- a/arch/powerpc/kernel/watchdog.c +++ b/arch/powerpc/kernel/watchdog.c @@ -26,6 +26,7 @@ #include #include +#include #include #include @@ -248,14 +249,14 @@ static void watchdog_timer_interrupt(int cpu) watchdog_smp_panic(cpu, tb); } -void soft_nmi_interrupt(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_NMI(soft_nmi_interrupt) { unsigned long flags; int cpu = raw_smp_processor_id(); u64 tb; if (!cpumask_test_cpu(cpu, &wd_cpus_enabled)) - return; + return 0; nmi_enter(); @@ -292,6 +293,8 @@ void soft_nmi_interrupt(struct pt_regs *regs) out: nmi_exit(); + + return 0; } static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index d348e77cee20..43e64e60ee33 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -53,6 +53,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 8053efdf7ea7..10fc274bea65 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 288a9820ec01..bd2bb73021d8 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index d7d3a80a51d4..cf167f6d825d 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -38,6 +38,7 @@ #include #include +#include #include #include #include @@ -1512,7 +1513,8 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap, } EXPORT_SYMBOL_GPL(hash_page); -static long __do_hash_fault(struct pt_regs *regs) +DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault); +DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) { unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; @@ -1565,7 +1567,11 @@ static long __do_hash_fault(struct pt_regs *regs) return err; } -long do_hash_fault(struct pt_regs *regs) +/* + * The _RAW interrupt entry checks for the in_nmi() case before + * running the full handler. + */ +DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) { unsigned long dsisr = regs->dsisr; long err; @@ -1587,7 +1593,7 @@ long do_hash_fault(struct pt_regs *regs) * the access, or panic if there isn't a handler. */ if (unlikely(in_nmi())) { - bad_page_fault(regs, SIGSEGV); + do_bad_page_fault_segv(regs); return 0; } diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c index 14c62b685f0c..c91bd85eb90e 100644 --- a/arch/powerpc/mm/book3s64/slb.c +++ b/arch/powerpc/mm/book3s64/slb.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include @@ -813,7 +814,7 @@ static long slb_allocate_user(struct mm_struct *mm, unsigned long ea) return slb_insert_entry(ea, context, flags, ssize, false); } -long do_slb_fault(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_RAW(do_slb_fault) { unsigned long ea = regs->dar; unsigned long id = get_region_id(ea); @@ -833,7 +834,7 @@ long do_slb_fault(struct pt_regs *regs) */ /* - * The interrupt state is not reconciled, for performance, so that + * This is a raw interrupt handler, for performance, so that * fast_interrupt_return can be used. The handler must not touch local * irq state, or schedule. We could test for usermode and upgrade to a * normal process context (synchronous) interrupt for those, which @@ -868,7 +869,7 @@ long do_slb_fault(struct pt_regs *regs) } } -void do_bad_slb_fault(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(do_bad_slb_fault) { int err = regs->result; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index fef92efad733..f8eb42aaafab 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include @@ -540,7 +541,7 @@ retry: } NOKPROBE_SYMBOL(__do_page_fault); -long do_page_fault(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) { const struct exception_table_entry *entry; enum ctx_state prev_state; @@ -624,7 +625,7 @@ void bad_page_fault(struct pt_regs *regs, int sig) } #ifdef CONFIG_PPC_BOOK3S_64 -void do_bad_page_fault_segv(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(do_bad_page_fault_segv) { bad_page_fault(regs, SIGSEGV); } diff --git a/arch/powerpc/platforms/cell/ras.c b/arch/powerpc/platforms/cell/ras.c index 6ea480539419..4325c05bedd9 100644 --- a/arch/powerpc/platforms/cell/ras.c +++ b/arch/powerpc/platforms/cell/ras.c @@ -49,7 +49,7 @@ static void dump_fir(int cpu) } -void cbe_system_error_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(cbe_system_error_exception) { int cpu = smp_processor_id(); @@ -58,7 +58,7 @@ void cbe_system_error_exception(struct pt_regs *regs) dump_stack(); } -void cbe_maintenance_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(cbe_maintenance_exception) { int cpu = smp_processor_id(); @@ -70,7 +70,7 @@ void cbe_maintenance_exception(struct pt_regs *regs) dump_stack(); } -void cbe_thermal_exception(struct pt_regs *regs) +DEFINE_INTERRUPT_HANDLER(cbe_thermal_exception) { int cpu = smp_processor_id(); diff --git a/arch/powerpc/platforms/cell/ras.h b/arch/powerpc/platforms/cell/ras.h index 6c2e6bc0062e..226dbd48efad 100644 --- a/arch/powerpc/platforms/cell/ras.h +++ b/arch/powerpc/platforms/cell/ras.h @@ -2,9 +2,12 @@ #ifndef RAS_H #define RAS_H -extern void cbe_system_error_exception(struct pt_regs *regs); -extern void cbe_maintenance_exception(struct pt_regs *regs); -extern void cbe_thermal_exception(struct pt_regs *regs); +#include + +DECLARE_INTERRUPT_HANDLER(cbe_system_error_exception); +DECLARE_INTERRUPT_HANDLER(cbe_maintenance_exception); +DECLARE_INTERRUPT_HANDLER(cbe_thermal_exception); + extern void cbe_ras_init(void); #endif /* RAS_H */ diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c index e6f461812856..999997d9e9a9 100644 --- a/arch/powerpc/platforms/powernv/idle.c +++ b/arch/powerpc/platforms/powernv/idle.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From a008f8f9fd67ffb13d906ef4ea6235a3d62dfdb6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:41 +1000 Subject: powerpc/64s/hash: improve context tracking of hash faults This moves the 64s/hash context tracking from hash_page_mm() to __do_hash_fault(), so it's no longer called by OCXL / SPU accelerators, which was certainly the wrong thing to be doing, because those callers are not low level interrupt handlers, so should have entered a kernel context tracking already. Then remain in kernel context for the duration of the fault, rather than enter/exit for the hash fault then enter/exit for the page fault, which is pointless. Even still, calling exception_enter/exit in __do_hash_fault seems questionable because that's touching per-cpu variables, tracing, etc., which might have been interrupted by this hash fault or themselves cause hash faults. But maybe I miss something because hash_page_mm very deliberately calls trace_hash_fault too, for example. So for now go with it, it's no worse than before, in this regard. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-32-npiggin@gmail.com --- arch/powerpc/include/asm/bug.h | 1 + arch/powerpc/mm/book3s64/hash_utils.c | 7 ++++--- arch/powerpc/mm/fault.c | 39 +++++++++++++++++++++++++---------- 3 files changed, 33 insertions(+), 14 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/include/asm/bug.h b/arch/powerpc/include/asm/bug.h index c10ae0a9bbaf..d1635ffbb179 100644 --- a/arch/powerpc/include/asm/bug.h +++ b/arch/powerpc/include/asm/bug.h @@ -112,6 +112,7 @@ struct pt_regs; long do_page_fault(struct pt_regs *); +long hash__do_page_fault(struct pt_regs *); void bad_page_fault(struct pt_regs *, int); void __bad_page_fault(struct pt_regs *regs, int sig); void do_bad_page_fault_segv(struct pt_regs *regs); diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index cf167f6d825d..d681dc5a7b1c 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1289,7 +1289,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long flags) { bool is_thp; - enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; unsigned long vsid; pte_t *ptep; @@ -1491,7 +1490,6 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, DBG_LOW(" -> rc=%d\n", rc); bail: - exception_exit(prev_state); return rc; } EXPORT_SYMBOL_GPL(hash_page_mm); @@ -1516,6 +1514,7 @@ EXPORT_SYMBOL_GPL(hash_page); DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault); DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) { + enum ctx_state prev_state = exception_enter(); unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; unsigned long access = _PAGE_PRESENT | _PAGE_READ; @@ -1564,6 +1563,8 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) err = 0; } + exception_exit(prev_state); + return err; } @@ -1600,7 +1601,7 @@ DEFINE_INTERRUPT_HANDLER_RAW(do_hash_fault) err = __do_hash_fault(regs); if (err) { page_fault: - err = do_page_fault(regs); + err = hash__do_page_fault(regs); } return err; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 8552ab6c008b..9c4220efc20f 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -387,7 +387,7 @@ static void sanity_check_fault(bool is_write, bool is_user, * The return value is 0 if the fault was handled, or the signal * number if this is a kernel fault that can't be handled here. */ -static int __do_page_fault(struct pt_regs *regs, unsigned long address, +static int ___do_page_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code) { struct vm_area_struct * vma; @@ -537,36 +537,53 @@ retry: return 0; } -NOKPROBE_SYMBOL(__do_page_fault); +NOKPROBE_SYMBOL(___do_page_fault); -DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) +static long __do_page_fault(struct pt_regs *regs) { const struct exception_table_entry *entry; - enum ctx_state prev_state; long err; - prev_state = exception_enter(); - err = __do_page_fault(regs, regs->dar, regs->dsisr); + err = ___do_page_fault(regs, regs->dar, regs->dsisr); if (likely(!err)) - goto out; + return err; entry = search_exception_tables(regs->nip); if (likely(entry)) { instruction_pointer_set(regs, extable_fixup(entry)); - err = 0; + return 0; } else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64)) { - /* 32 and 64e handle this in asm */ __bad_page_fault(regs, err); - err = 0; + return 0; + } else { + /* 32 and 64e handle the bad page fault in asm */ + return err; } +} +NOKPROBE_SYMBOL(__do_page_fault); + +DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) +{ + enum ctx_state prev_state = exception_enter(); + long err; + + err = __do_page_fault(regs); -out: exception_exit(prev_state); return err; } NOKPROBE_SYMBOL(do_page_fault); +#ifdef CONFIG_PPC_BOOK3S_64 +/* Same as do_page_fault but interrupt entry has already run in do_hash_fault */ +long hash__do_page_fault(struct pt_regs *regs) +{ + return __do_page_fault(regs); +} +NOKPROBE_SYMBOL(hash__do_page_fault); +#endif + /* * bad_page_fault is called when we have a bad access from the kernel. * It is called from the DSI and ISI handlers in head.S and from some -- cgit v1.2.3 From 540d4d34bef4ec58aba12b159030492616d6f54e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sat, 30 Jan 2021 23:08:42 +1000 Subject: powerpc/64: context tracking move to interrupt wrappers This moves exception_enter/exit calls to wrapper functions for synchronous interrupts. More interrupt handlers are covered by this than previously. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210130130852.2952424-33-npiggin@gmail.com --- arch/powerpc/include/asm/interrupt.h | 9 ++++++ arch/powerpc/kernel/traps.c | 58 +++++------------------------------ arch/powerpc/mm/book3s64/hash_utils.c | 3 -- arch/powerpc/mm/fault.c | 9 +----- 4 files changed, 17 insertions(+), 62 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 488bdd5bd922..e65ce3e2b071 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -7,10 +7,16 @@ #include struct interrupt_state { +#ifdef CONFIG_PPC64 + enum ctx_state ctx_state; +#endif }; static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) { +#ifdef CONFIG_PPC64 + state->ctx_state = exception_enter(); +#endif } /* @@ -29,6 +35,9 @@ static inline void interrupt_enter_prepare(struct pt_regs *regs, struct interrup */ static inline void interrupt_exit_prepare(struct pt_regs *regs, struct interrupt_state *state) { +#ifdef CONFIG_PPC64 + exception_exit(state->ctx_state); +#endif } static inline void interrupt_async_enter_prepare(struct pt_regs *regs, struct interrupt_state *state) diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 20c90a3548f6..9c26fb41b275 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1087,41 +1087,28 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(handle_hmi_exception) DEFINE_INTERRUPT_HANDLER(unknown_exception) { - enum ctx_state prev_state = exception_enter(); - printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); _exception(SIGTRAP, regs, TRAP_UNK, 0); - - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER_ASYNC(unknown_async_exception) { - enum ctx_state prev_state = exception_enter(); - printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); _exception(SIGTRAP, regs, TRAP_UNK, 0); - - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER(instruction_breakpoint_exception) { - enum ctx_state prev_state = exception_enter(); - if (notify_die(DIE_IABR_MATCH, "iabr_match", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - goto bail; + return; if (debugger_iabr_match(regs)) - goto bail; + return; _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip); - -bail: - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER(RunModeException) @@ -1131,8 +1118,6 @@ DEFINE_INTERRUPT_HANDLER(RunModeException) DEFINE_INTERRUPT_HANDLER(single_step_exception) { - enum ctx_state prev_state = exception_enter(); - clear_single_step(regs); clear_br_trace(regs); @@ -1141,14 +1126,11 @@ DEFINE_INTERRUPT_HANDLER(single_step_exception) if (notify_die(DIE_SSTEP, "single_step", regs, 5, 5, SIGTRAP) == NOTIFY_STOP) - goto bail; + return; if (debugger_sstep(regs)) - goto bail; + return; _exception(SIGTRAP, regs, TRAP_TRACE, regs->nip); - -bail: - exception_exit(prev_state); } NOKPROBE_SYMBOL(single_step_exception); @@ -1591,11 +1573,7 @@ sigill: DEFINE_INTERRUPT_HANDLER(program_check_exception) { - enum ctx_state prev_state = exception_enter(); - do_program_check(regs); - - exception_exit(prev_state); } NOKPROBE_SYMBOL(program_check_exception); @@ -1605,25 +1583,19 @@ NOKPROBE_SYMBOL(program_check_exception); */ DEFINE_INTERRUPT_HANDLER(emulation_assist_interrupt) { - enum ctx_state prev_state = exception_enter(); - regs->msr |= REASON_ILLEGAL; do_program_check(regs); - - exception_exit(prev_state); } NOKPROBE_SYMBOL(emulation_assist_interrupt); DEFINE_INTERRUPT_HANDLER(alignment_exception) { - enum ctx_state prev_state = exception_enter(); int sig, code, fixed = 0; unsigned long reason; interrupt_cond_local_irq_enable(regs); reason = get_reason(regs); - if (reason & REASON_BOUNDARY) { sig = SIGBUS; code = BUS_ADRALN; @@ -1631,7 +1603,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception) } if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT)) - goto bail; + return; /* we don't implement logging of alignment exceptions */ if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS)) @@ -1641,7 +1613,7 @@ DEFINE_INTERRUPT_HANDLER(alignment_exception) /* skip over emulated instruction */ regs->nip += inst_length(reason); emulate_single_step(regs); - goto bail; + return; } /* Operand address was bad */ @@ -1657,9 +1629,6 @@ bad: _exception(sig, regs, code, regs->dar); else bad_page_fault(regs, sig); - -bail: - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER(StackOverflow) @@ -1673,41 +1642,28 @@ DEFINE_INTERRUPT_HANDLER(StackOverflow) DEFINE_INTERRUPT_HANDLER(stack_overflow_exception) { - enum ctx_state prev_state = exception_enter(); - die("Kernel stack overflow", regs, SIGSEGV); - - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER(kernel_fp_unavailable_exception) { - enum ctx_state prev_state = exception_enter(); - printk(KERN_EMERG "Unrecoverable FP Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable FP Unavailable Exception", regs, SIGABRT); - - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER(altivec_unavailable_exception) { - enum ctx_state prev_state = exception_enter(); - if (user_mode(regs)) { /* A user program has executed an altivec instruction, but this kernel doesn't support altivec. */ _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - goto bail; + return; } printk(KERN_EMERG "Unrecoverable VMX/Altivec Unavailable Exception " "%lx at %lx\n", regs->trap, regs->nip); die("Unrecoverable VMX/Altivec Unavailable Exception", regs, SIGABRT); - -bail: - exception_exit(prev_state); } DEFINE_INTERRUPT_HANDLER(vsx_unavailable_exception) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index d681dc5a7b1c..fb7c10524bcd 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1514,7 +1514,6 @@ EXPORT_SYMBOL_GPL(hash_page); DECLARE_INTERRUPT_HANDLER_RET(__do_hash_fault); DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) { - enum ctx_state prev_state = exception_enter(); unsigned long ea = regs->dar; unsigned long dsisr = regs->dsisr; unsigned long access = _PAGE_PRESENT | _PAGE_READ; @@ -1563,8 +1562,6 @@ DEFINE_INTERRUPT_HANDLER_RET(__do_hash_fault) err = 0; } - exception_exit(prev_state); - return err; } diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 9c4220efc20f..b26a7643fc6e 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -564,14 +564,7 @@ NOKPROBE_SYMBOL(__do_page_fault); DEFINE_INTERRUPT_HANDLER_RET(do_page_fault) { - enum ctx_state prev_state = exception_enter(); - long err; - - err = __do_page_fault(regs); - - exception_exit(prev_state); - - return err; + return __do_page_fault(regs); } NOKPROBE_SYMBOL(do_page_fault); -- cgit v1.2.3 From a2496049f1f1006178d0db706a8451dd03bd3ec6 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 Dec 2020 23:47:25 +1000 Subject: powerpc/64s/radix: add warning and comments in mm_cpumask trim Add a comment explaining part of the logic for mm_cpumask trimming, and add a (hopefully graceful) check and warning in case something gets it wrong. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201217134731.488135-2-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index fb66d154b26c..40f496a45cd8 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -653,13 +653,14 @@ static void do_exit_flush_lazy_tlb(void *arg) { struct mm_struct *mm = arg; unsigned long pid = mm->context.id; + int cpu = smp_processor_id(); /* * A kthread could have done a mmget_not_zero() after the flushing CPU - * checked mm_is_singlethreaded, and be in the process of - * kthread_use_mm when interrupted here. In that case, current->mm will - * be set to mm, because kthread_use_mm() setting ->mm and switching to - * the mm is done with interrupts off. + * checked mm_cpumask, and be in the process of kthread_use_mm when + * interrupted here. In that case, current->mm will be set to mm, + * because kthread_use_mm() setting ->mm and switching to the mm is + * done with interrupts off. */ if (current->mm == mm) goto out_flush; @@ -673,8 +674,22 @@ static void do_exit_flush_lazy_tlb(void *arg) mmdrop(mm); } - atomic_dec(&mm->context.active_cpus); - cpumask_clear_cpu(smp_processor_id(), mm_cpumask(mm)); + /* + * This IPI is only initiated from a CPU which is running mm which + * is a single-threaded process, so there will not be another racing + * IPI coming in where we would find our cpumask already clear. + * + * Nothing else clears our bit in the cpumask except CPU offlining, + * in which case we should not be taking IPIs here. However check + * this just in case the logic is wrong somewhere, and don't underflow + * the active_cpus count. + */ + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { + atomic_dec(&mm->context.active_cpus); + cpumask_clear_cpu(cpu, mm_cpumask(mm)); + } else { + WARN_ON_ONCE(1); + } out_flush: _tlbiel_pid(pid, RIC_FLUSH_ALL); -- cgit v1.2.3 From 26418b36a11f2eaf2556aa8cefe86132907e311f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 Dec 2020 23:47:26 +1000 Subject: powerpc/64s/radix: refactor TLB flush type selection The logic to decide what kind of TLB flush is required (local, global, or IPI) is spread multiple times over the several kinds of TLB flushes. Move it all into a single function which may issue IPIs if necessary, and also returns a flush type that is to be used. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201217134731.488135-3-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 176 +++++++++++++++++++---------------- 1 file changed, 94 insertions(+), 82 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 40f496a45cd8..c7c4552f0cc2 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -627,15 +627,6 @@ void radix__local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmadd } EXPORT_SYMBOL(radix__local_flush_tlb_page); -static bool mm_is_singlethreaded(struct mm_struct *mm) -{ - if (atomic_read(&mm->context.copros) > 0) - return false; - if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) - return true; - return false; -} - static bool mm_needs_flush_escalation(struct mm_struct *mm) { /* @@ -707,10 +698,58 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm) smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, (void *)mm, 1); } +#else /* CONFIG_SMP */ +static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } +#endif /* CONFIG_SMP */ + +enum tlb_flush_type { + FLUSH_TYPE_LOCAL, + FLUSH_TYPE_GLOBAL, +}; + +static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) +{ + if (mm_is_thread_local(mm)) + return FLUSH_TYPE_LOCAL; + + /* Coprocessors require TLBIE to invalidate nMMU. */ + if (atomic_read(&mm->context.copros) > 0) + return FLUSH_TYPE_GLOBAL; + + /* + * In the fullmm case there's no point doing the exit_flush_lazy_tlbs + * because the mm is being taken down anyway, and a TLBIE tends to + * be faster than an IPI+TLBIEL. + */ + if (fullmm) + return FLUSH_TYPE_GLOBAL; + + /* + * If we are running the only thread of a single-threaded process, + * then we should almost always be able to trim off the rest of the + * CPU mask (except in the case of use_mm() races), so always try + * trimming the mask. + */ + if (atomic_read(&mm->mm_users) <= 1 && current->mm == mm) { + exit_flush_lazy_tlbs(mm); + /* + * use_mm() race could prevent IPIs from being able to clear + * the cpumask here, however those users are established + * after our first check (and so after the PTEs are removed), + * and the TLB still gets flushed by the IPI, so this CPU + * will only require a local flush. + */ + return FLUSH_TYPE_LOCAL; + } + + return FLUSH_TYPE_GLOBAL; +} +#ifdef CONFIG_SMP void radix__flush_tlb_mm(struct mm_struct *mm) { unsigned long pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -718,16 +757,13 @@ void radix__flush_tlb_mm(struct mm_struct *mm) preempt_disable(); /* - * Order loads of mm_cpumask vs previous stores to clear ptes before - * the invalidate. See barrier in switch_mm_irqs_off + * Order loads of mm_cpumask (in flush_type_needed) vs previous + * stores to clear ptes before the invalidate. See barrier in + * switch_mm_irqs_off */ smp_mb(); - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - exit_flush_lazy_tlbs(mm); - goto local; - } - + type = flush_type_needed(mm, false); + if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt = H_RPTI_TARGET_CMMU; @@ -744,7 +780,6 @@ void radix__flush_tlb_mm(struct mm_struct *mm) _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); } } else { -local: _tlbiel_pid(pid, RIC_FLUSH_TLB); } preempt_enable(); @@ -754,6 +789,7 @@ EXPORT_SYMBOL(radix__flush_tlb_mm); static void __flush_all_mm(struct mm_struct *mm, bool fullmm) { unsigned long pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -761,13 +797,8 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - if (!fullmm) { - exit_flush_lazy_tlbs(mm); - goto local; - } - } + type = flush_type_needed(mm, fullmm); + if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | @@ -782,7 +813,6 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) else _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); } else { -local: _tlbiel_pid(pid, RIC_FLUSH_ALL); } preempt_enable(); @@ -798,6 +828,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, int psize) { unsigned long pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -805,11 +836,8 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - exit_flush_lazy_tlbs(mm); - goto local; - } + type = flush_type_needed(mm, false); + if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt, pg_sizes, size; @@ -827,7 +855,6 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, else _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); } else { -local: _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); } preempt_enable(); @@ -843,8 +870,6 @@ void radix__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) } EXPORT_SYMBOL(radix__flush_tlb_page); -#else /* CONFIG_SMP */ -static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } #endif /* CONFIG_SMP */ static void do_tlbiel_kernel(void *info) @@ -908,7 +933,9 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, unsigned int page_shift = mmu_psize_defs[mmu_virtual_psize].shift; unsigned long page_size = 1UL << page_shift; unsigned long nr_pages = (end - start) >> page_shift; - bool local, full; + bool fullmm = (end == TLB_FLUSH_ALL); + bool flush_pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -916,24 +943,16 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - if (end != TLB_FLUSH_ALL) { - exit_flush_lazy_tlbs(mm); - goto is_local; - } - } - local = false; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_single_page_flush_ceiling); - } else { -is_local: - local = true; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_local_single_page_flush_ceiling); - } + type = flush_type_needed(mm, fullmm); + + if (fullmm) + flush_pid = true; + else if (type == FLUSH_TYPE_GLOBAL) + flush_pid = nr_pages > tlb_single_page_flush_ceiling; + else + flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; - if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize); @@ -943,8 +962,8 @@ is_local: tgt |= H_RPTI_TARGET_NMMU; pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes, start, end); - } else if (full) { - if (local) { + } else if (flush_pid) { + if (type == FLUSH_TYPE_LOCAL) { _tlbiel_pid(pid, RIC_FLUSH_TLB); } else { if (cputlb_use_tlbie()) { @@ -967,7 +986,7 @@ is_local: hflush = true; } - if (local) { + if (type == FLUSH_TYPE_LOCAL) { asm volatile("ptesync": : :"memory"); __tlbiel_va_range(start, end, pid, page_size, mmu_virtual_psize); if (hflush) @@ -1100,32 +1119,28 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned int page_shift = mmu_psize_defs[psize].shift; unsigned long page_size = 1UL << page_shift; unsigned long nr_pages = (end - start) >> page_shift; - bool local, full; + bool fullmm = (end == TLB_FLUSH_ALL); + bool flush_pid; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) return; + fullmm = (end == TLB_FLUSH_ALL); + preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - if (end != TLB_FLUSH_ALL) { - exit_flush_lazy_tlbs(mm); - goto is_local; - } - } - local = false; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_single_page_flush_ceiling); - } else { -is_local: - local = true; - full = (end == TLB_FLUSH_ALL || - nr_pages > tlb_local_single_page_flush_ceiling); - } + type = flush_type_needed(mm, fullmm); - if (!mmu_has_feature(MMU_FTR_GTSE) && !local) { + if (fullmm) + flush_pid = true; + else if (type == FLUSH_TYPE_GLOBAL) + flush_pid = nr_pages > tlb_single_page_flush_ceiling; + else + flush_pid = nr_pages > tlb_local_single_page_flush_ceiling; + + if (!mmu_has_feature(MMU_FTR_GTSE) && type == FLUSH_TYPE_GLOBAL) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long type = H_RPTI_TYPE_TLB; unsigned long pg_sizes = psize_to_rpti_pgsize(psize); @@ -1135,8 +1150,8 @@ is_local: if (atomic_read(&mm->context.copros) > 0) tgt |= H_RPTI_TARGET_NMMU; pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end); - } else if (full) { - if (local) { + } else if (flush_pid) { + if (type == FLUSH_TYPE_LOCAL) { _tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB); } else { if (cputlb_use_tlbie()) { @@ -1152,7 +1167,7 @@ is_local: } } else { - if (local) + if (type == FLUSH_TYPE_LOCAL) _tlbiel_va_range(start, end, pid, page_size, psize, also_pwc); else if (cputlb_use_tlbie()) _tlbie_va_range(start, end, pid, page_size, psize, also_pwc); @@ -1179,6 +1194,7 @@ static void radix__flush_tlb_pwc_range_psize(struct mm_struct *mm, unsigned long void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) { unsigned long pid, end; + enum tlb_flush_type type; pid = mm->context.id; if (unlikely(pid == MMU_NO_CONTEXT)) @@ -1195,11 +1211,8 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) /* Otherwise first do the PWC, then iterate the pages. */ preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ - if (!mm_is_thread_local(mm)) { - if (unlikely(mm_is_singlethreaded(mm))) { - exit_flush_lazy_tlbs(mm); - goto local; - } + type = flush_type_needed(mm, false); + if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt, type, pg_sizes; @@ -1218,7 +1231,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) _tlbiel_va_range_multicast(mm, addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); } else { -local: _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); } -- cgit v1.2.3 From 54bb503345b81399575e2b7a3a6497ae212ad827 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 Dec 2020 23:47:27 +1000 Subject: powerpc/64s/radix: Check for no TLB flush required If there are no CPUs in mm_cpumask, no TLB flush is required at all. This patch adds a check for this case. Currently it's not tested for, in fact mm_is_thread_local() returns false if the current CPU is not in mm_cpumask, so it's treated as a global flush. This can come up in some cases like exec failure before the new mm has ever been switched to. This patch reduces TLBIE instructions required to build a kernel from about 120,000 to 45,000. Another situation it could help is page reclaim, KSM, THP, etc., (i.e., asynch operations external to the process) where the process is sleeping and has all TLBs flushed out of all CPUs. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201217134731.488135-4-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 38 ++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index c7c4552f0cc2..3d8b494c39f6 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -703,13 +703,19 @@ static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } #endif /* CONFIG_SMP */ enum tlb_flush_type { + FLUSH_TYPE_NONE, FLUSH_TYPE_LOCAL, FLUSH_TYPE_GLOBAL, }; static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) { - if (mm_is_thread_local(mm)) + int active_cpus = atomic_read(&mm->context.active_cpus); + int cpu = smp_processor_id(); + + if (active_cpus == 0) + return FLUSH_TYPE_NONE; + if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) return FLUSH_TYPE_LOCAL; /* Coprocessors require TLBIE to invalidate nMMU. */ @@ -763,7 +769,9 @@ void radix__flush_tlb_mm(struct mm_struct *mm) */ smp_mb(); type = flush_type_needed(mm, false); - if (type == FLUSH_TYPE_GLOBAL) { + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_pid(pid, RIC_FLUSH_TLB); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt = H_RPTI_TARGET_CMMU; @@ -779,8 +787,6 @@ void radix__flush_tlb_mm(struct mm_struct *mm) } else { _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_TLB); } - } else { - _tlbiel_pid(pid, RIC_FLUSH_TLB); } preempt_enable(); } @@ -798,7 +804,9 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ type = flush_type_needed(mm, fullmm); - if (type == FLUSH_TYPE_GLOBAL) { + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_pid(pid, RIC_FLUSH_ALL); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt = H_RPTI_TARGET_CMMU; unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | @@ -812,8 +820,6 @@ static void __flush_all_mm(struct mm_struct *mm, bool fullmm) _tlbie_pid(pid, RIC_FLUSH_ALL); else _tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL); - } else { - _tlbiel_pid(pid, RIC_FLUSH_ALL); } preempt_enable(); } @@ -837,7 +843,9 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ type = flush_type_needed(mm, false); - if (type == FLUSH_TYPE_GLOBAL) { + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt, pg_sizes, size; @@ -854,8 +862,6 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, _tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB); else _tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB); - } else { - _tlbiel_va(vmaddr, pid, psize, RIC_FLUSH_TLB); } preempt_enable(); } @@ -944,6 +950,8 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ type = flush_type_needed(mm, fullmm); + if (type == FLUSH_TYPE_NONE) + goto out; if (fullmm) flush_pid = true; @@ -1008,6 +1016,7 @@ static inline void __radix__flush_tlb_range(struct mm_struct *mm, hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false); } } +out: preempt_enable(); } @@ -1132,6 +1141,8 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ type = flush_type_needed(mm, fullmm); + if (type == FLUSH_TYPE_NONE) + goto out; if (fullmm) flush_pid = true; @@ -1175,6 +1186,7 @@ static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm, _tlbiel_va_range_multicast(mm, start, end, pid, page_size, psize, also_pwc); } +out: preempt_enable(); } @@ -1212,7 +1224,9 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) preempt_disable(); smp_mb(); /* see radix__flush_tlb_mm */ type = flush_type_needed(mm, false); - if (type == FLUSH_TYPE_GLOBAL) { + if (type == FLUSH_TYPE_LOCAL) { + _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); + } else if (type == FLUSH_TYPE_GLOBAL) { if (!mmu_has_feature(MMU_FTR_GTSE)) { unsigned long tgt, type, pg_sizes; @@ -1230,8 +1244,6 @@ void radix__flush_tlb_collapsed_pmd(struct mm_struct *mm, unsigned long addr) else _tlbiel_va_range_multicast(mm, addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); - } else { - _tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true); } preempt_enable(); -- cgit v1.2.3 From 780de40601aabeca41bc9aa717a329a77aa85e1a Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 Dec 2020 23:47:28 +1000 Subject: powerpc/64s/radix: Allow mm_cpumask trimming from external sources mm_cpumask trimming is currently restricted to be issued by the current thread of a single-threaded mm. This patch relaxes that and allows the mask to be trimmed from any context. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201217134731.488135-5-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 3d8b494c39f6..c27ef2ff8950 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -666,20 +666,16 @@ static void do_exit_flush_lazy_tlb(void *arg) } /* - * This IPI is only initiated from a CPU which is running mm which - * is a single-threaded process, so there will not be another racing - * IPI coming in where we would find our cpumask already clear. - * - * Nothing else clears our bit in the cpumask except CPU offlining, - * in which case we should not be taking IPIs here. However check - * this just in case the logic is wrong somewhere, and don't underflow - * the active_cpus count. + * This IPI may be initiated from any source including those not + * running the mm, so there may be a racing IPI that comes after + * this one which finds the cpumask already clear. Check and avoid + * underflowing the active_cpus count in that case. The race should + * not otherwise be a problem, but the TLB must be flushed because + * that's what the caller expects. */ if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { atomic_dec(&mm->context.active_cpus); cpumask_clear_cpu(cpu, mm_cpumask(mm)); - } else { - WARN_ON_ONCE(1); } out_flush: -- cgit v1.2.3 From 9393544842d6c85ebfc387c43a5059f8171d598f Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 Dec 2020 23:47:29 +1000 Subject: powerpc/64s/radix: occasionally attempt to trim mm_cpumask A single-threaded process that is flushing its own address space is so far the only case where the mm_cpumask is attempted to be trimmed. This patch expands that to flush in other situations, multi-threaded processes and external sources. For now it's a relatively simple occasional trim attempt. The main aim is to add the mechanism, tweaking and tuning can come with more data. Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201217134731.488135-6-npiggin@gmail.com --- arch/powerpc/mm/book3s64/radix_tlb.c | 60 +++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index c27ef2ff8950..1ef5d4e1a50a 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -639,10 +639,8 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm) return false; } -#ifdef CONFIG_SMP -static void do_exit_flush_lazy_tlb(void *arg) +static void exit_lazy_flush_tlb(struct mm_struct *mm) { - struct mm_struct *mm = arg; unsigned long pid = mm->context.id; int cpu = smp_processor_id(); @@ -682,6 +680,13 @@ out_flush: _tlbiel_pid(pid, RIC_FLUSH_ALL); } +#ifdef CONFIG_SMP +static void do_exit_flush_lazy_tlb(void *arg) +{ + struct mm_struct *mm = arg; + exit_lazy_flush_tlb(mm); +} + static void exit_flush_lazy_tlbs(struct mm_struct *mm) { /* @@ -694,10 +699,32 @@ static void exit_flush_lazy_tlbs(struct mm_struct *mm) smp_call_function_many(mm_cpumask(mm), do_exit_flush_lazy_tlb, (void *)mm, 1); } + #else /* CONFIG_SMP */ static inline void exit_flush_lazy_tlbs(struct mm_struct *mm) { } #endif /* CONFIG_SMP */ +static DEFINE_PER_CPU(unsigned int, mm_cpumask_trim_clock); + +/* + * Interval between flushes at which we send out IPIs to check whether the + * mm_cpumask can be trimmed for the case where it's not a single-threaded + * process flushing its own mm. The intent is to reduce the cost of later + * flushes. Don't want this to be so low that it adds noticable cost to TLB + * flushing, or so high that it doesn't help reduce global TLBIEs. + */ +static unsigned long tlb_mm_cpumask_trim_timer = 1073; + +static bool tick_and_test_trim_clock(void) +{ + if (__this_cpu_inc_return(mm_cpumask_trim_clock) == + tlb_mm_cpumask_trim_timer) { + __this_cpu_write(mm_cpumask_trim_clock, 0); + return true; + } + return false; +} + enum tlb_flush_type { FLUSH_TYPE_NONE, FLUSH_TYPE_LOCAL, @@ -711,8 +738,20 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) if (active_cpus == 0) return FLUSH_TYPE_NONE; - if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) + if (active_cpus == 1 && cpumask_test_cpu(cpu, mm_cpumask(mm))) { + if (current->mm != mm) { + /* + * Asynchronous flush sources may trim down to nothing + * if the process is not running, so occasionally try + * to trim. + */ + if (tick_and_test_trim_clock()) { + exit_lazy_flush_tlb(mm); + return FLUSH_TYPE_NONE; + } + } return FLUSH_TYPE_LOCAL; + } /* Coprocessors require TLBIE to invalidate nMMU. */ if (atomic_read(&mm->context.copros) > 0) @@ -744,6 +783,19 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) return FLUSH_TYPE_LOCAL; } + /* + * Occasionally try to trim down the cpumask. It's possible this can + * bring the mask to zero, which results in no flush. + */ + if (tick_and_test_trim_clock()) { + exit_flush_lazy_tlbs(mm); + if (current->mm == mm) + return FLUSH_TYPE_LOCAL; + if (cpumask_test_cpu(cpu, mm_cpumask(mm))) + exit_lazy_flush_tlb(mm); + return FLUSH_TYPE_NONE; + } + return FLUSH_TYPE_GLOBAL; } -- cgit v1.2.3 From 032b7f08932c9b212952d6d585e45b2941b3e8be Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 17 Dec 2020 23:47:30 +1000 Subject: powerpc/64s/radix: serialize_against_pte_lookup IPIs trim mm_cpumask serialize_against_pte_lookup() performs IPIs to all CPUs in mm_cpumask. Take this opportunity to try trim the CPU out of mm_cpumask. This can reduce the cost of future serialize_against_pte_lookup() and/or the cost of future TLB flushes. Reported-by: kernel test robot Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20201217134731.488135-7-npiggin@gmail.com --- arch/powerpc/mm/book3s64/pgtable.c | 13 ++++++++++--- arch/powerpc/mm/book3s64/radix_tlb.c | 20 +++++++++++++------- 2 files changed, 23 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 5b3a3bae21aa..78c492e86752 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -79,10 +79,17 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); } -static void do_nothing(void *unused) -{ +void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush); +static void do_serialize(void *arg) +{ + /* We've taken the IPI, so try to trim the mask while here */ + if (radix_enabled()) { + struct mm_struct *mm = arg; + exit_lazy_flush_tlb(mm, false); + } } + /* * Serialize against find_current_mm_pte which does lock-less * lookup in page tables with local interrupts disabled. For huge pages @@ -96,7 +103,7 @@ static void do_nothing(void *unused) void serialize_against_pte_lookup(struct mm_struct *mm) { smp_mb(); - smp_call_function_many(mm_cpumask(mm), do_nothing, NULL, 1); + smp_call_function_many(mm_cpumask(mm), do_serialize, mm, 1); } /* diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index 1ef5d4e1a50a..d7f1a6bd08ef 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -639,7 +639,11 @@ static bool mm_needs_flush_escalation(struct mm_struct *mm) return false; } -static void exit_lazy_flush_tlb(struct mm_struct *mm) +/* + * If always_flush is true, then flush even if this CPU can't be removed + * from mm_cpumask. + */ +void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush) { unsigned long pid = mm->context.id; int cpu = smp_processor_id(); @@ -652,7 +656,7 @@ static void exit_lazy_flush_tlb(struct mm_struct *mm) * done with interrupts off. */ if (current->mm == mm) - goto out_flush; + goto out; if (current->active_mm == mm) { WARN_ON_ONCE(current->mm != NULL); @@ -674,17 +678,19 @@ static void exit_lazy_flush_tlb(struct mm_struct *mm) if (cpumask_test_cpu(cpu, mm_cpumask(mm))) { atomic_dec(&mm->context.active_cpus); cpumask_clear_cpu(cpu, mm_cpumask(mm)); + always_flush = true; } -out_flush: - _tlbiel_pid(pid, RIC_FLUSH_ALL); +out: + if (always_flush) + _tlbiel_pid(pid, RIC_FLUSH_ALL); } #ifdef CONFIG_SMP static void do_exit_flush_lazy_tlb(void *arg) { struct mm_struct *mm = arg; - exit_lazy_flush_tlb(mm); + exit_lazy_flush_tlb(mm, true); } static void exit_flush_lazy_tlbs(struct mm_struct *mm) @@ -746,7 +752,7 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) * to trim. */ if (tick_and_test_trim_clock()) { - exit_lazy_flush_tlb(mm); + exit_lazy_flush_tlb(mm, true); return FLUSH_TYPE_NONE; } } @@ -792,7 +798,7 @@ static enum tlb_flush_type flush_type_needed(struct mm_struct *mm, bool fullmm) if (current->mm == mm) return FLUSH_TYPE_LOCAL; if (cpumask_test_cpu(cpu, mm_cpumask(mm))) - exit_lazy_flush_tlb(mm); + exit_lazy_flush_tlb(mm, true); return FLUSH_TYPE_NONE; } -- cgit v1.2.3 From 2bb421a3d93601aa81bc39af7aac7280303e0761 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 11 Feb 2021 00:08:03 +1100 Subject: powerpc/mm/64s: Fix no previous prototype warning As reported by lkp: arch/powerpc/mm/book3s64/radix_tlb.c:646:6: warning: no previous prototype for function 'exit_lazy_flush_tlb' Fix it by moving the prototype into the existing header. Fixes: 032b7f08932c ("powerpc/64s/radix: serialize_against_pte_lookup IPIs trim mm_cpumask") Reported-by: kernel test robot Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210210130804.3190952-2-mpe@ellerman.id.au --- arch/powerpc/mm/book3s64/internal.h | 2 ++ arch/powerpc/mm/book3s64/pgtable.c | 4 ++-- arch/powerpc/mm/book3s64/radix_tlb.c | 2 ++ 3 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/mm/book3s64/internal.h b/arch/powerpc/mm/book3s64/internal.h index c12d78ee42f5..5045048ce244 100644 --- a/arch/powerpc/mm/book3s64/internal.h +++ b/arch/powerpc/mm/book3s64/internal.h @@ -15,4 +15,6 @@ static inline bool stress_slb(void) void slb_setup_new_exec(void); +void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush); + #endif /* ARCH_POWERPC_MM_BOOK3S64_INTERNAL_H */ diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 78c492e86752..9ffa65074cb0 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -20,6 +20,8 @@ #include #include +#include "internal.h" + unsigned long __pmd_frag_nr; EXPORT_SYMBOL(__pmd_frag_nr); unsigned long __pmd_frag_size_shift; @@ -79,8 +81,6 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr, return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd)); } -void exit_lazy_flush_tlb(struct mm_struct *mm, bool always_flush); - static void do_serialize(void *arg) { /* We've taken the IPI, so try to trim the mask while here */ diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index d7f1a6bd08ef..409e61210789 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -18,6 +18,8 @@ #include #include +#include "internal.h" + #define RIC_FLUSH_TLB 0 #define RIC_FLUSH_PWC 1 #define RIC_FLUSH_ALL 2 -- cgit v1.2.3 From ec94b9b23d620d40ab2ced094a30c22bb8d69b9f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 3 Feb 2021 10:28:11 +0530 Subject: powerpc/mm: Add PG_dcache_clean to indicate dcache clean state This just add a better name for PG_arch_1. No functional change in this patch. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20210203045812.234439-2-aneesh.kumar@linux.ibm.com --- arch/powerpc/include/asm/cacheflush.h | 6 ++++++ arch/powerpc/include/asm/kvm_ppc.h | 4 ++-- arch/powerpc/mm/book3s64/hash_utils.c | 4 ++-- arch/powerpc/mm/mem.c | 4 ++-- arch/powerpc/mm/pgtable.c | 14 +++++++------- 5 files changed, 19 insertions(+), 13 deletions(-) (limited to 'arch/powerpc/mm/book3s64') diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h index 138e46d8c04e..f63495109f63 100644 --- a/arch/powerpc/include/asm/cacheflush.h +++ b/arch/powerpc/include/asm/cacheflush.h @@ -8,6 +8,12 @@ #include #include +/* + * This flag is used to indicate that the page pointed to by a pte is clean + * and does not require cleaning before returning it to the user. + */ +#define PG_dcache_clean PG_arch_1 + #ifdef CONFIG_PPC_BOOK3S_64 /* * Book3s has no ptesync after setting a pte, so without this ptesync it's diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 259ba4ce9ad3..46dcc40617ed 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -881,9 +881,9 @@ static inline void kvmppc_mmu_flush_icache(kvm_pfn_t pfn) /* Clear i-cache for new pages */ page = pfn_to_page(pfn); - if (!test_bit(PG_arch_1, &page->flags)) { + if (!test_bit(PG_dcache_clean, &page->flags)) { flush_dcache_icache_page(page); - set_bit(PG_arch_1, &page->flags); + set_bit(PG_dcache_clean, &page->flags); } } diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index fb7c10524bcd..581b20a2feaf 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -1144,10 +1144,10 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap) page = pte_page(pte); /* page is dirty */ - if (!test_bit(PG_arch_1, &page->flags) && !PageReserved(page)) { + if (!test_bit(PG_dcache_clean, &page->flags) && !PageReserved(page)) { if (trap == 0x400) { flush_dcache_icache_page(page); - set_bit(PG_arch_1, &page->flags); + set_bit(PG_dcache_clean, &page->flags); } else pp |= HPTE_R_N; } diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index ed64ca80d5fd..883e67d37bbc 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -489,8 +489,8 @@ void flush_dcache_page(struct page *page) if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) return; /* avoid an atomic op if possible */ - if (test_bit(PG_arch_1, &page->flags)) - clear_bit(PG_arch_1, &page->flags); + if (test_bit(PG_dcache_clean, &page->flags)) + clear_bit(PG_dcache_clean, &page->flags); } EXPORT_SYMBOL(flush_dcache_page); diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c index 3a41545e5c07..354611940118 100644 --- a/arch/powerpc/mm/pgtable.c +++ b/arch/powerpc/mm/pgtable.c @@ -82,9 +82,9 @@ static pte_t set_pte_filter_hash(pte_t pte) struct page *pg = maybe_pte_to_page(pte); if (!pg) return pte; - if (!test_bit(PG_arch_1, &pg->flags)) { + if (!test_bit(PG_dcache_clean, &pg->flags)) { flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); } } return pte; @@ -117,13 +117,13 @@ static inline pte_t set_pte_filter(pte_t pte) return pte; /* If the page clean, we move on */ - if (test_bit(PG_arch_1, &pg->flags)) + if (test_bit(PG_dcache_clean, &pg->flags)) return pte; /* If it's an exec fault, we flush the cache and make it clean */ if (is_exec_fault()) { flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); return pte; } @@ -162,12 +162,12 @@ static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma, goto bail; /* If the page is already clean, we move on */ - if (test_bit(PG_arch_1, &pg->flags)) + if (test_bit(PG_dcache_clean, &pg->flags)) goto bail; - /* Clean the page and set PG_arch_1 */ + /* Clean the page and set PG_dcache_clean */ flush_dcache_icache_page(pg); - set_bit(PG_arch_1, &pg->flags); + set_bit(PG_dcache_clean, &pg->flags); bail: return pte_mkexec(pte); -- cgit v1.2.3