From 33c45ef8adc8a7cf781b2566d50e6ea8e97b3596 Mon Sep 17 00:00:00 2001 From: Gregory CLEMENT Date: Mon, 19 Sep 2016 12:02:50 +0200 Subject: ARM: mvebu: Select corediv clk for all mvebu v7 SoC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the commit bd3677ff31a3 ("clk: mvebu: Remove corediv clock from Armada XP"), the corediv clk is no more selected for Armada XP, however this clock is used for Armada XP using the compatible armada-370-corediv-clock. While since commit 1594d568c6e3 ("clk: mvebu: Move corediv config to mvebu config") Armada 38x and Armada 375 got corediv support again, not only Armada XP was missed but also Armada 39x. Actually all the SoC selecting MVEBU_V7 config need this clock: git grep "\-corediv-clock" arch/arm/boot/dts arch/arm/boot/dts/armada-370-xp.dtsi: compatible = "marvell,armada-370-corediv-clock"; arch/arm/boot/dts/armada-375.dtsi: compatible = "marvell,armada-375-corediv-clock"; arch/arm/boot/dts/armada-38x.dtsi: compatible = "marvell,armada-380-corediv-clock"; arch/arm/boot/dts/armada-39x.dtsi: compatible = "marvell,armada-390-corediv-clock" This commit now fixes this behavior by letting MVEBU_V7 select MVEBU_CLK_COREDIV. Fixes: bd3677ff31a3 ("clk: mvebu: Remove corediv clock from Armada XP") Cc: stable@vger.kernel.org Reported-by: Uwe Kleine-König Acked-by: Uwe Kleine-König Signed-off-by: Gregory CLEMENT --- arch/arm/mach-mvebu/Kconfig | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index f9b6bd306cfe..541647f57192 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -23,6 +23,7 @@ config MACH_MVEBU_V7 select CACHE_L2X0 select ARM_CPU_SUSPEND select MACH_MVEBU_ANY + select MVEBU_CLK_COREDIV config MACH_ARMADA_370 bool "Marvell Armada 370 boards" @@ -32,7 +33,6 @@ config MACH_ARMADA_370 select CPU_PJ4B select MACH_MVEBU_V7 select PINCTRL_ARMADA_370 - select MVEBU_CLK_COREDIV help Say 'Y' here if you want your kernel to support boards based on the Marvell Armada 370 SoC with device tree. @@ -50,7 +50,6 @@ config MACH_ARMADA_375 select HAVE_SMP select MACH_MVEBU_V7 select PINCTRL_ARMADA_375 - select MVEBU_CLK_COREDIV help Say 'Y' here if you want your kernel to support boards based on the Marvell Armada 375 SoC with device tree. @@ -68,7 +67,6 @@ config MACH_ARMADA_38X select HAVE_SMP select MACH_MVEBU_V7 select PINCTRL_ARMADA_38X - select MVEBU_CLK_COREDIV help Say 'Y' here if you want your kernel to support boards based on the Marvell Armada 380/385 SoC with device tree. -- cgit v1.2.3 From 51227bf52008bd4c4c50da4b749bbc6e7bbbca52 Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Tue, 6 Sep 2016 19:41:11 +0200 Subject: arm64: dts: marvell: fix clocksource for CP110 master SPI0 I2C and SPI interfaces share common clock trees within the CP110 HW block. It occurred that SPI0 interface has wrong clock assignment in the device tree, which is fixed in this commit to a proper value. Fixes: 728dacc7f4dd ("arm64: dts: marvell: initial DT description of ...") Signed-off-by: Marcin Wojtas CC: v4.7+ Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi index da31bbbbb59e..399271853aad 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi @@ -131,7 +131,7 @@ #address-cells = <0x1>; #size-cells = <0x0>; cell-index = <1>; - clocks = <&cpm_syscon0 0 3>; + clocks = <&cpm_syscon0 1 21>; status = "disabled"; }; -- cgit v1.2.3 From 0362fcc9d6834034bc0cdb1d9b02a1b9baf96a2a Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Thu, 22 Sep 2016 12:02:19 +0800 Subject: arm64: dts: rockchip: remove always-on and boot-on from vcc_sd Please don't add these for vcc_sd, and mmc-core/driver will control it. Otherwise, it will waste energy even without sdmmc in slot. Moreover, it will causes a bug: If we insert/remove sd card, we could see [9.337271] mmc0: new ultra high speed SDR25 SDHC card at address 0007 [9.345144] mmcblk0: mmc0:0007 SD32G 29.3 GiB This is okay for normal sd insert/remove test, but when I debug some issues for sdmmc, I did unbind/bind test. And there is a interesting phenomenon when we bind the driver again: [58.314069] mmc0: new high speed SDHC card at address 0007 [58.320282] mmcblk0: mmc0:0007 SD32G 29.3 GiB So the sd card could just support high speed without power cycle since the vcc_sd is always on, which makes the sd card fail to reinit its internal ocr mask. Signed-off-by: Shawn Lin Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts | 2 -- arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts | 2 -- 2 files changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts b/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts index 46cdddfcea6c..353314ca7426 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts @@ -258,8 +258,6 @@ }; vcc_sd: SWITCH_REG1 { - regulator-always-on; - regulator-boot-on; regulator-name = "vcc_sd"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts index 5797933ef80e..13b7f1edad10 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts @@ -152,8 +152,6 @@ gpio = <&gpio3 11 GPIO_ACTIVE_LOW>; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; - regulator-always-on; - regulator-boot-on; vin-supply = <&vcc_io>; }; -- cgit v1.2.3 From 3ce0fefc51bd56381b1b9a92835cf8f9db3f2ef8 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 29 Sep 2016 10:00:14 -0700 Subject: ARCv2: intc: untangle SMP, MCIP and IDU The IDU intc is technically part of MCIP (Multi-core IP) hence historically was only available in a SMP hardware build (and thus only in a SMP kernel build). Now that hardware restriction has been lifted, so a UP kernel needs to support it. This requires breaking mcip.c into parts which are strictly SMP (inter-core interrupts) and IDU which in reality is just another intc and thus has no bearing on SMP. This change allows IDU in UP builds and with a suitable device tree, we can have the cascaded intc system ARCv2 core intc <---> ARCv2 IDU intc <---> periperals Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 17 +++++++++-------- arch/arc/include/asm/mcip.h | 16 ++++++++++++++++ arch/arc/kernel/mcip.c | 31 +++++++++++-------------------- 3 files changed, 36 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index ecd12379e2cd..6f67895cd9c4 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -186,14 +186,6 @@ if SMP config ARC_HAS_COH_CACHES def_bool n -config ARC_MCIP - bool "ARConnect Multicore IP (MCIP) Support " - depends on ISA_ARCV2 - help - This IP block enables SMP in ARC-HS38 cores. - It provides for cross-core interrupts, multi-core debug - hardware semaphores, shared memory,.... - config NR_CPUS int "Maximum number of CPUs (2-4096)" range 2 4096 @@ -211,6 +203,15 @@ config ARC_SMP_HALT_ON_RESET endif #SMP +config ARC_MCIP + bool "ARConnect Multicore IP (MCIP) Support " + depends on ISA_ARCV2 + default y if SMP + help + This IP block enables SMP in ARC-HS38 cores. + It provides for cross-core interrupts, multi-core debug + hardware semaphores, shared memory,.... + menuconfig ARC_CACHE bool "Enable Cache Support" default y diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h index 847e3bbe387f..c8fbe4114bad 100644 --- a/arch/arc/include/asm/mcip.h +++ b/arch/arc/include/asm/mcip.h @@ -55,6 +55,22 @@ struct mcip_cmd { #define IDU_M_DISTRI_DEST 0x2 }; +struct mcip_bcr { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad3:8, + idu:1, llm:1, num_cores:6, + iocoh:1, gfrc:1, dbg:1, pad2:1, + msg:1, sem:1, ipi:1, pad:1, + ver:8; +#else + unsigned int ver:8, + pad:1, ipi:1, sem:1, msg:1, + pad2:1, dbg:1, gfrc:1, iocoh:1, + num_cores:6, llm:1, idu:1, + pad3:8; +#endif +}; + /* * MCIP programming model * diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 72f9179b1a24..c424d5abc318 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -15,11 +15,12 @@ #include #include -static char smp_cpuinfo_buf[128]; -static int idu_detected; - static DEFINE_RAW_SPINLOCK(mcip_lock); +#ifdef CONFIG_SMP + +static char smp_cpuinfo_buf[128]; + static void mcip_setup_per_cpu(int cpu) { smp_ipi_irq_setup(cpu, IPI_IRQ); @@ -86,21 +87,7 @@ static void mcip_ipi_clear(int irq) static void mcip_probe_n_setup(void) { - struct mcip_bcr { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad3:8, - idu:1, llm:1, num_cores:6, - iocoh:1, gfrc:1, dbg:1, pad2:1, - msg:1, sem:1, ipi:1, pad:1, - ver:8; -#else - unsigned int ver:8, - pad:1, ipi:1, sem:1, msg:1, - pad2:1, dbg:1, gfrc:1, iocoh:1, - num_cores:6, llm:1, idu:1, - pad3:8; -#endif - } mp; + struct mcip_bcr mp; READ_BCR(ARC_REG_MCIP_BCR, mp); @@ -114,7 +101,6 @@ static void mcip_probe_n_setup(void) IS_AVAIL1(mp.gfrc, "GFRC")); cpuinfo_arc700[0].extn.gfrc = mp.gfrc; - idu_detected = mp.idu; if (mp.dbg) { __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf); @@ -130,6 +116,8 @@ struct plat_smp_ops plat_smp_ops = { .ipi_clear = mcip_ipi_clear, }; +#endif + /*************************************************************************** * ARCv2 Interrupt Distribution Unit (IDU) * @@ -295,8 +283,11 @@ idu_of_init(struct device_node *intc, struct device_node *parent) /* Read IDU BCR to confirm nr_irqs */ int nr_irqs = of_irq_count(intc); int i, irq; + struct mcip_bcr mp; + + READ_BCR(ARC_REG_MCIP_BCR, mp); - if (!idu_detected) + if (!mp.idu) panic("IDU not detected, but DeviceTree using it"); pr_info("MCIP: IDU referenced from Devicetree %d irqs\n", nr_irqs); -- cgit v1.2.3 From 27f3d2a3b59f573a398c9acc810c16ebca07be78 Mon Sep 17 00:00:00 2001 From: Daniel Mentz Date: Tue, 4 Oct 2016 16:34:27 -0700 Subject: ARC: [build] Support gz, lzma compressed uImage Add support for lzma compressed uImage. Support for gzip was already available but could not be enabled because we were missing CONFIG_HAVE_KERNEL_GZIP in arch/arc/Kconfig. Signed-off-by: Daniel Mentz Cc: linux-snps-arc@lists.infradead.org Cc: Vineet Gupta Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 2 ++ arch/arc/boot/Makefile | 16 ++++++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 6f67895cd9c4..ac0b309aced5 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -41,6 +41,8 @@ config ARC select PERF_USE_VMALLOC select HAVE_DEBUG_STACKOVERFLOW select HAVE_GENERIC_DMA_COHERENT + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZMA config MIGHT_HAVE_PCI bool diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile index e597cb34c16a..f94cf151e06a 100644 --- a/arch/arc/boot/Makefile +++ b/arch/arc/boot/Makefile @@ -14,9 +14,15 @@ UIMAGE_ENTRYADDR = $(LINUX_START_TEXT) suffix-y := bin suffix-$(CONFIG_KERNEL_GZIP) := gz +suffix-$(CONFIG_KERNEL_LZMA) := lzma -targets += uImage uImage.bin uImage.gz -extra-y += vmlinux.bin vmlinux.bin.gz +targets += uImage +targets += uImage.bin +targets += uImage.gz +targets += uImage.lzma +extra-y += vmlinux.bin +extra-y += vmlinux.bin.gz +extra-y += vmlinux.bin.lzma $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) @@ -24,12 +30,18 @@ $(obj)/vmlinux.bin: vmlinux FORCE $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) +$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzma) + $(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE $(call if_changed,uimage,none) $(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE $(call if_changed,uimage,gzip) +$(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE + $(call if_changed,uimage,lzma) + $(obj)/uImage: $(obj)/uImage.$(suffix-y) @ln -sf $(notdir $<) $@ @echo ' Image $@ is ready' -- cgit v1.2.3 From 12e721964e7feb555c3ee499a3f85c194d3d36ea Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 12 Oct 2016 13:43:38 +0200 Subject: s390: ignore pkey system calls Ignore the pkey systems calls since they don't make any sense on s390. In addition any user could trigger a warning if issueing the pkey_free system call, if it would be wired up on a system without pkey support. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/unistd.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 02613bad8bbb..3066031a73fe 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -9,6 +9,9 @@ #include #define __IGNORE_time +#define __IGNORE_pkey_mprotect +#define __IGNORE_pkey_alloc +#define __IGNORE_pkey_free #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_SYS_ALARM -- cgit v1.2.3 From b5003b5f0a19b6b37ab32b1f0c6b5da2cb3f0903 Mon Sep 17 00:00:00 2001 From: Shyam Saini Date: Thu, 13 Oct 2016 21:50:07 +0530 Subject: s390/mm: use hugetlb_bad_size() Update setup_hugepagesz() to call hugetlb_bad_size() when unsupported hugepage size is found. Signed-off-by: Shyam Saini Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/mm/hugetlbpage.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index cd404aa3931c..4a0c5bce3552 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -217,6 +217,7 @@ static __init int setup_hugepagesz(char *opt) } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); } else { + hugetlb_bad_size(); pr_err("hugepagesz= specifies an unsupported page size %s\n", string); return 0; -- cgit v1.2.3 From d0208639dbc6fe97a25054df44faa2d19aca9380 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Oct 2016 11:08:31 +0200 Subject: s390/dumpstack: restore reliable indicator for call traces Before merging all different stack tracers the call traces printed had an indicator if an entry can be considered reliable or not. Unreliable entries were put in braces, reliable not. Currently all lines contain these extra braces. This patch restores the old behaviour by adding an extra "reliable" parameter to the callback functions. Only show_trace makes currently use of it. Before: [ 0.804751] Call Trace: [ 0.804753] ([<000000000017d0e0>] try_to_wake_up+0x318/0x5e0) [ 0.804756] ([<0000000000161d64>] create_worker+0x174/0x1c0) After: [ 0.804751] Call Trace: [ 0.804753] ([<000000000017d0e0>] try_to_wake_up+0x318/0x5e0) [ 0.804756] [<0000000000161d64>] create_worker+0x174/0x1c0 Fixes: 758d39ebd3d5 ("s390/dumpstack: merge all four stack tracers") Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 2 +- arch/s390/kernel/dumpstack.c | 17 +++++++++++------ arch/s390/kernel/perf_event.c | 2 +- arch/s390/kernel/stacktrace.c | 4 ++-- arch/s390/oprofile/init.c | 2 +- 5 files changed, 16 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 03323175de30..602af692efdc 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -192,7 +192,7 @@ struct task_struct; struct mm_struct; struct seq_file; -typedef int (*dump_trace_func_t)(void *data, unsigned long address); +typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable); void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, unsigned long sp); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 6693383bc01b..518f615ad0a2 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -38,10 +38,10 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp, if (sp < low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; + if (func(data, sf->gprs[8], 0)) + return sp; /* Follow the backchain. */ while (1) { - if (func(data, sf->gprs[8])) - return sp; low = sp; sp = sf->back_chain; if (!sp) @@ -49,6 +49,8 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp, if (sp <= low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; + if (func(data, sf->gprs[8], 1)) + return sp; } /* Zero backchain detected, check for interrupt frame. */ sp = (unsigned long) (sf + 1); @@ -56,7 +58,7 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp, return sp; regs = (struct pt_regs *) sp; if (!user_mode(regs)) { - if (func(data, regs->psw.addr)) + if (func(data, regs->psw.addr, 1)) return sp; } low = sp; @@ -90,7 +92,7 @@ struct return_address_data { int depth; }; -static int __return_address(void *data, unsigned long address) +static int __return_address(void *data, unsigned long address, int reliable) { struct return_address_data *rd = data; @@ -109,9 +111,12 @@ unsigned long return_address(int depth) } EXPORT_SYMBOL_GPL(return_address); -static int show_address(void *data, unsigned long address) +static int show_address(void *data, unsigned long address, int reliable) { - printk("([<%016lx>] %pSR)\n", address, (void *)address); + if (reliable) + printk(" [<%016lx>] %pSR \n", address, (void *)address); + else + printk("([<%016lx>] %pSR)\n", address, (void *)address); return 0; } diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 17431f63de00..955a7b6fa0a4 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -222,7 +222,7 @@ static int __init service_level_perf_register(void) } arch_initcall(service_level_perf_register); -static int __perf_callchain_kernel(void *data, unsigned long address) +static int __perf_callchain_kernel(void *data, unsigned long address, int reliable) { struct perf_callchain_entry_ctx *entry = data; diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 44f84b23d4e5..355db9db8210 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -27,12 +27,12 @@ static int __save_address(void *data, unsigned long address, int nosched) return 1; } -static int save_address(void *data, unsigned long address) +static int save_address(void *data, unsigned long address, int reliable) { return __save_address(data, address, 0); } -static int save_address_nosched(void *data, unsigned long address) +static int save_address_nosched(void *data, unsigned long address, int reliable) { return __save_address(data, address, 1); } diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 16f4c3960b87..9a4de4599c7b 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -13,7 +13,7 @@ #include #include -static int __s390_backtrace(void *data, unsigned long address) +static int __s390_backtrace(void *data, unsigned long address, int reliable) { unsigned int *depth = data; -- cgit v1.2.3 From a790634544f5f98364b0aafe9d7e669810d96360 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Oct 2016 11:59:58 +0200 Subject: s390/dumpstack: use pr_cont where appropriate Use pr_cont instead of simple printk calls when lines will be continued. This fixes the kernel output of various lines printed on e.g. a warning: Before: [ 0.840604] Krnl PSW : 0404c00180000000 000000000017d1d2 [ 0.840606] (try_to_wake_up+0x382/0x5e0) [ 0.840610] R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 [ 0.840611] RI:0 EA:3 After: [ 0.831772] Krnl PSW : 0404c00180000000 000000000017d14a (try_to_wake_up+0x382/0x5e0) [ 0.831776] R:0 T:1 IO:0 EX:0 Key:0 M:1 W:0 P:0 AS:3 CC:0 PM:0 RI:0 EA:3 Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dumpstack.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 518f615ad0a2..4bebe72b7780 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -168,13 +168,13 @@ void show_registers(struct pt_regs *regs) mode = user_mode(regs) ? "User" : "Krnl"; printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr); if (!user_mode(regs)) - printk(" (%pSR)", (void *)regs->psw.addr); - printk("\n"); + pr_cont(" (%pSR)", (void *)regs->psw.addr); + pr_cont("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e, psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm); - printk(" RI:%x EA:%x", psw->ri, psw->eaba); - printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode, + pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba); + printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); printk(" %016lx %016lx %016lx %016lx\n", regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); -- cgit v1.2.3 From 4d062487f3431f124e3a2420c0da0b7a2388dc80 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Oct 2016 12:07:35 +0200 Subject: s390/disassambler: use pr_cont where appropriate Just like for dumpstack use pr_cont instead of simple printk calls to fix the output when disassembling a piece of code. Before: [ 0.840627] Krnl Code: 000000000017d1c6: a77400f7 brc 7,17d3b4 [ 0.840630] 000000000017d1ca: 92015000 mvi 0(%r5),1 [ 0.840634] #000000000017d1ce: a7f40001 brc 15,17d1d0 After: [ 0.831792] Krnl Code: 000000000017d13e: a77400f7 brc 7,17d32c 000000000017d142: 92015000 mvi 0(%r5),1 #000000000017d146: a7f40001 brc 15,17d148 Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dis.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 43446fa2a4e5..c74c59236f44 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -2014,12 +2014,12 @@ void show_code(struct pt_regs *regs) *ptr++ = '\t'; ptr += print_insn(ptr, code + start, addr); start += opsize; - printk("%s", buffer); + pr_cont("%s", buffer); ptr = buffer; ptr += sprintf(ptr, "\n "); hops++; } - printk("\n"); + pr_cont("\n"); } void print_fn_code(unsigned char *code, unsigned long len) -- cgit v1.2.3 From dcddba96cdbc5d0e4d4a17bf22cfd9b2f038a4ca Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 17 Oct 2016 13:07:46 +0200 Subject: s390/dumpstack: get rid of return_address again With commit ef6000b4c670 ("Disable the __builtin_return_address() warning globally after all)" the kernel does not warn at all again if __builtin_return_address(n) is called with n > 0. Besides the fact that this was a false warning on s390 anyway, due to the always present backchain, we can now revert commit 5606330627ab ("s390/dumpstack: implement and use return_address()") again, to simplify the code again. After all I shouldn't have had return_address() implememted at all to workaround this issue. So get rid of this again. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ftrace.h | 4 +--- arch/s390/kernel/dumpstack.c | 24 ------------------------ 2 files changed, 1 insertion(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 64053d9ac3f2..836c56290499 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -12,9 +12,7 @@ #ifndef __ASSEMBLY__ -unsigned long return_address(int depth); - -#define ftrace_return_address(n) return_address(n) +#define ftrace_return_address(n) __builtin_return_address(n) void _mcount(void); void ftrace_caller(void); diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 4bebe72b7780..34345c0a3c46 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -87,30 +87,6 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, } EXPORT_SYMBOL_GPL(dump_trace); -struct return_address_data { - unsigned long address; - int depth; -}; - -static int __return_address(void *data, unsigned long address, int reliable) -{ - struct return_address_data *rd = data; - - if (rd->depth--) - return 0; - rd->address = address; - return 1; -} - -unsigned long return_address(int depth) -{ - struct return_address_data rd = { .depth = depth + 2 }; - - dump_trace(__return_address, &rd, NULL, current_stack_pointer()); - return rd.address; -} -EXPORT_SYMBOL_GPL(return_address); - static int show_address(void *data, unsigned long address, int reliable) { if (reliable) -- cgit v1.2.3 From 1b283eea6228880b765bc40fe4e555416437ce58 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 7 Oct 2016 10:52:17 +0200 Subject: ARM: dts: fix the SD card on the Snowball This fixes a very annoying regression on the Snowball SD card that has been around for a while. It turns out that the device tree does not configure the direction pins properly, nor sets up the pins for the voltage converter properly at boot. Unless all things are correctly set up, the feedback clock will not work, and makes the driver spew messages in the console (but it works, very slowly): root@Ux500:/ mount /dev/mmcblk0p2 /mnt/ [ 9.953460] mmci-pl18x 80126000.sdi0_per1: error during DMA transfer! [ 9.960296] mmcblk0: error -110 sending status command, retrying [ 9.966461] mmcblk0: error -110 sending status command, retrying [ 9.972534] mmcblk0: error -110 sending status command, aborting Fix this by rectifying the device tree to correspond to that of the Ux500 HREF boards plus the DAT31DIR setting that is unique for the Snowball, and things start working smoothly. Add in the SDR12 and SDR25 modes which this host can do without any problems. I don't know if this has ever been correct, sadly. It works after this patch. Cc: stable@vger.kernel.org Reported-by: Daniel Lezcano Cc: Ulf Hansson Signed-off-by: Linus Walleij Signed-off-by: Olof Johansson --- arch/arm/boot/dts/ste-snowball.dts | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts index b3df1c60d465..386eee6de232 100644 --- a/arch/arm/boot/dts/ste-snowball.dts +++ b/arch/arm/boot/dts/ste-snowball.dts @@ -239,14 +239,25 @@ arm,primecell-periphid = <0x10480180>; max-frequency = <100000000>; bus-width = <4>; + cap-sd-highspeed; cap-mmc-highspeed; + sd-uhs-sdr12; + sd-uhs-sdr25; + /* All direction control is used */ + st,sig-dir-cmd; + st,sig-dir-dat0; + st,sig-dir-dat2; + st,sig-dir-dat31; + st,sig-pin-fbclk; + full-pwr-cycle; vmmc-supply = <&ab8500_ldo_aux3_reg>; vqmmc-supply = <&vmmci>; pinctrl-names = "default", "sleep"; pinctrl-0 = <&sdi0_default_mode>; pinctrl-1 = <&sdi0_sleep_mode>; - cd-gpios = <&gpio6 26 GPIO_ACTIVE_LOW>; // 218 + /* GPIO218 MMC_CD */ + cd-gpios = <&gpio6 26 GPIO_ACTIVE_LOW>; status = "okay"; }; @@ -549,7 +560,7 @@ /* VMMCI level-shifter enable */ snowball_cfg3 { pins = "GPIO217_AH12"; - ste,config = <&gpio_out_lo>; + ste,config = <&gpio_out_hi>; }; /* VMMCI level-shifter voltage select */ snowball_cfg4 { -- cgit v1.2.3 From 2723605169d8f58ebc2881d2b8a59e6ee6fe074a Mon Sep 17 00:00:00 2001 From: Scott Branden Date: Sat, 8 Oct 2016 13:41:04 -0700 Subject: ARM: multi_v7_defconfig: Enable Intel e1000e driver Enable support for the Intel e1000e driver Signed-off-by: Ray Jui Signed-off-by: Scott Branden Signed-off-by: Olof Johansson --- arch/arm/configs/multi_v7_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 437d0740dec6..11f37ed1dbff 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -850,6 +850,7 @@ CONFIG_PWM_SUN4I=y CONFIG_PWM_TEGRA=y CONFIG_PWM_VT8500=y CONFIG_PHY_HIX5HD2_SATA=y +CONFIG_E1000E=y CONFIG_PWM_STI=y CONFIG_PWM_BCM2835=y CONFIG_PWM_BRCMSTB=m -- cgit v1.2.3 From 7c6273194445fe1316084d3096f9311c3dfa4da6 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Tue, 18 Oct 2016 20:52:28 +0800 Subject: arm64: dts: rockchip: remove the abuse of keep-power-in-suspend It was invented for sdio only, and should not be used for sdmmc or emmc. Remove it. Signed-off-by: Shawn Lin Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts | 1 - arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts | 2 -- 2 files changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts b/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts index 353314ca7426..e5eeca2c2456 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts @@ -116,7 +116,6 @@ cap-mmc-highspeed; clock-frequency = <150000000>; disable-wp; - keep-power-in-suspend; non-removable; num-slots = <1>; vmmc-supply = <&vcc_io>; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts index 13b7f1edad10..ea0a8eceefd4 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts @@ -199,7 +199,6 @@ bus-width = <8>; cap-mmc-highspeed; disable-wp; - keep-power-in-suspend; mmc-pwrseq = <&emmc_pwrseq>; mmc-hs200-1_2v; mmc-hs200-1_8v; @@ -348,7 +347,6 @@ clock-freq-min-max = <400000 50000000>; cap-sd-highspeed; card-detect-delay = <200>; - keep-power-in-suspend; num-slots = <1>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; -- cgit v1.2.3 From 04946fb60fb157faafa01658dff3131d49f49ccb Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 18 Oct 2016 10:24:49 +0100 Subject: ARM: fix oops when using older ARMv4T CPUs Alexander Shiyan reports that CLPS711x fails at boot time in the data exception handler due to a NULL pointer dereference. This is caused by the late-v4t abort handler overwriting R9 (which becomes zero). Fix this by making the abort handler save and restore R9. Unable to handle kernel NULL pointer dereference at virtual address 00000008 pgd = c3b58000 [00000008] *pgd=800000000, *pte=00000000, *ppte=feff4140 Internal error: Oops: 63c11817 [#1] PREEMPT ARM CPU: 0 PID: 448 Comm: ash Not tainted 4.8.1+ #1 Hardware name: Cirrus Logic CLPS711X (Device Tree Support) task: c39e03a0 ti: c3b4e000 task.ti: c3b4e000 PC is at __dabt_svc+0x4c/0x60 LR is at do_page_fault+0x144/0x2ac pc : [] lr : [] psr: 60000093 sp : c3b4fe6c ip : 00000001 fp : b6f1bf88 r10: c387a5a0 r9 : 00000000 r8 : e4e0e001 r7 : bee3ef83 r6 : 00100000 r5 : 80000013 r4 : c022fcf8 r3 : 00000000 r2 : 00000008 r1 : bf000000 r0 : 00000000 Flags: nZCv IRQs off FIQs on Mode SVC_32 ISA ARM Segment user Control: 0000217f Table: c3b58055 DAC: 00000055 Process ash (pid: 448, stack limit = 0xc3b4e190) Stack: (0xc3b4fe6c to 0xc3b50000) fe60: bee3ef83 c05168d1 ffffffff 00000000 c3adfe80 fe80: c3a03300 00000000 c3b4fed0 c3a03400 bee3ef83 c387a5a0 b6f1bf88 00000001 fea0: c3b4febc 00000076 c022fcf8 80000013 ffffffff 0000003f bf000000 bee3ef83 fec0: 00000004 00000000 c3adfe80 c00e432c 00000812 00000005 00000001 00000006 fee0: b6f1b000 00000000 00010000 0003c944 0004d000 0004d439 00010000 b6f1b000 ff00: 00000005 00000000 00015ecc c3b4fed0 0000000a 00000000 00000000 c00a1dc0 ff20: befff000 c3a03300 c3b4e000 c0507cd8 c0508024 fffffff8 c3a03300 00000000 ff40: c0516a58 c00a35bc c39e03a0 000001c0 bea84ce8 0004e008 c3b3a000 c00a3ac0 ff60: c3b40374 c3b3a000 bea84d11 00000000 c0500188 bea84d11 bea84ce8 00000001 ff80: 0000000b c000a304 c3b4e000 00000000 bea84ce4 c00a3cd0 00000000 bea84d11 ffa0: bea84ce8 c000a160 bea84d11 bea84ce8 bea84d11 bea84ce8 0004e008 0004d450 ffc0: bea84d11 bea84ce8 00000001 0000000b b6f45ee4 00000000 b6f5ff70 bea84ce4 ffe0: b6f2f130 bea84cb0 b6f2f194 b6ef29f4 a0000010 bea84d11 02c7cffa 02c7cffd [] (__dabt_svc) from [] (__copy_to_user_std+0xf8/0x330) [] (__copy_to_user_std) from [] +(load_elf_binary+0x920/0x107c) [] (load_elf_binary) from [] +(search_binary_handler+0x80/0x16c) [] (search_binary_handler) from [] +(do_execveat_common+0x418/0x600) [] (do_execveat_common) from [] (do_execve+0x28/0x30) [] (do_execve) from [] (ret_fast_syscall+0x0/0x30) Code: e1a0200d eb00136b e321f093 e59d104c (e5891008) ---[ end trace 4b4f8086ebef98c5 ]--- Fixes: e6978e4bf181 ("ARM: save and reset the address limit when entering an exception") Reported-by: Alexander Shiyan Tested-by: Alexander Shiyan Signed-off-by: Russell King --- arch/arm/mm/abort-lv4t.S | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S index 6d8e8e3365d1..4cdfab31a0b6 100644 --- a/arch/arm/mm/abort-lv4t.S +++ b/arch/arm/mm/abort-lv4t.S @@ -7,7 +7,7 @@ * : r4 = aborted context pc * : r5 = aborted context psr * - * Returns : r4-r5, r10-r11, r13 preserved + * Returns : r4-r5, r9-r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -48,7 +48,10 @@ ENTRY(v4t_late_abort) /* c */ b do_DataAbort @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m /* d */ b do_DataAbort @ ldc rd, [rn, #m] /* e */ b .data_unknown -/* f */ +/* f */ b .data_unknown + +.data_unknown_r9: + ldr r9, [sp], #4 .data_unknown: @ Part of jumptable mov r0, r4 mov r1, r8 @@ -57,6 +60,7 @@ ENTRY(v4t_late_abort) .data_arm_ldmstm: tst r8, #1 << 21 @ check writeback bit beq do_DataAbort @ no writeback -> no fixup + str r9, [sp, #-4]! mov r7, #0x11 orr r7, r7, #0x1100 and r6, r8, r7 @@ -75,12 +79,14 @@ ENTRY(v4t_late_abort) subne r7, r7, r6, lsl #2 @ Undo increment addeq r7, r7, r6, lsl #2 @ Undo decrement str r7, [r2, r9, lsr #14] @ Put register 'Rn' + ldr r9, [sp], #4 b do_DataAbort .data_arm_lateldrhpre: tst r8, #1 << 21 @ Check writeback bit beq do_DataAbort @ No writeback -> no fixup .data_arm_lateldrhpost: + str r9, [sp, #-4]! and r9, r8, #0x00f @ get Rm / low nibble of immediate value tst r8, #1 << 22 @ if (immediate offset) andne r6, r8, #0xf00 @ { immediate high nibble @@ -93,6 +99,7 @@ ENTRY(v4t_late_abort) subne r7, r7, r6 @ Undo incrmenet addeq r7, r7, r6 @ Undo decrement str r7, [r2, r9, lsr #14] @ Put register 'Rn' + ldr r9, [sp], #4 b do_DataAbort .data_arm_lateldrpreconst: @@ -101,12 +108,14 @@ ENTRY(v4t_late_abort) .data_arm_lateldrpostconst: movs r6, r8, lsl #20 @ Get offset beq do_DataAbort @ zero -> no fixup + str r9, [sp, #-4]! and r9, r8, #15 << 16 @ Extract 'n' from instruction ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' tst r8, #1 << 23 @ Check U bit subne r7, r7, r6, lsr #20 @ Undo increment addeq r7, r7, r6, lsr #20 @ Undo decrement str r7, [r2, r9, lsr #14] @ Put register 'Rn' + ldr r9, [sp], #4 b do_DataAbort .data_arm_lateldrprereg: @@ -115,6 +124,7 @@ ENTRY(v4t_late_abort) .data_arm_lateldrpostreg: and r7, r8, #15 @ Extract 'm' from instruction ldr r6, [r2, r7, lsl #2] @ Get register 'Rm' + str r9, [sp, #-4]! mov r9, r8, lsr #7 @ get shift count ands r9, r9, #31 and r7, r8, #0x70 @ get shift type @@ -126,33 +136,33 @@ ENTRY(v4t_late_abort) b .data_arm_apply_r6_and_rn b .data_arm_apply_r6_and_rn @ 1: LSL #0 nop - b .data_unknown @ 2: MUL? + b .data_unknown_r9 @ 2: MUL? nop - b .data_unknown @ 3: MUL? + b .data_unknown_r9 @ 3: MUL? nop mov r6, r6, lsr r9 @ 4: LSR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, lsr #32 @ 5: LSR #32 b .data_arm_apply_r6_and_rn - b .data_unknown @ 6: MUL? + b .data_unknown_r9 @ 6: MUL? nop - b .data_unknown @ 7: MUL? + b .data_unknown_r9 @ 7: MUL? nop mov r6, r6, asr r9 @ 8: ASR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, asr #32 @ 9: ASR #32 b .data_arm_apply_r6_and_rn - b .data_unknown @ A: MUL? + b .data_unknown_r9 @ A: MUL? nop - b .data_unknown @ B: MUL? + b .data_unknown_r9 @ B: MUL? nop mov r6, r6, ror r9 @ C: ROR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, rrx @ D: RRX b .data_arm_apply_r6_and_rn - b .data_unknown @ E: MUL? + b .data_unknown_r9 @ E: MUL? nop - b .data_unknown @ F: MUL? + b .data_unknown_r9 @ F: MUL? .data_thumb_abort: ldrh r8, [r4] @ read instruction @@ -190,6 +200,7 @@ ENTRY(v4t_late_abort) .data_thumb_pushpop: tst r8, #1 << 10 beq .data_unknown + str r9, [sp, #-4]! and r6, r8, #0x55 @ hweight8(r8) + R bit and r9, r8, #0xaa add r6, r6, r9, lsr #1 @@ -204,9 +215,11 @@ ENTRY(v4t_late_abort) addeq r7, r7, r6, lsl #2 @ increment SP if PUSH subne r7, r7, r6, lsl #2 @ decrement SP if POP str r7, [r2, #13 << 2] + ldr r9, [sp], #4 b do_DataAbort .data_thumb_ldmstm: + str r9, [sp, #-4]! and r6, r8, #0x55 @ hweight8(r8) and r9, r8, #0xaa add r6, r6, r9, lsr #1 @@ -219,4 +232,5 @@ ENTRY(v4t_late_abort) and r6, r6, #15 @ number of regs to transfer sub r7, r7, r6, lsl #2 @ always decrement str r7, [r2, r9, lsr #6] + ldr r9, [sp], #4 b do_DataAbort -- cgit v1.2.3 From 6127d124ee4eb9c39983813cc9803f3654ab7e16 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 18 Oct 2016 21:46:18 +0100 Subject: ARM: wire up new pkey syscalls Wire up the new pkey syscalls for ARM. Signed-off-by: Russell King --- arch/arm/include/asm/unistd.h | 2 +- arch/arm/include/uapi/asm/unistd.h | 3 +++ arch/arm/kernel/calls.S | 3 +++ 3 files changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 194b69923389..ada0d29a660f 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -19,7 +19,7 @@ * This may need to be greater than __NR_last_syscall+1 in order to * account for the padding in the syscall table */ -#define __NR_syscalls (396) +#define __NR_syscalls (400) #define __ARCH_WANT_STAT64 #define __ARCH_WANT_SYS_GETHOSTNAME diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index 2cb9dc770e1d..314100a06ccb 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -420,6 +420,9 @@ #define __NR_copy_file_range (__NR_SYSCALL_BASE+391) #define __NR_preadv2 (__NR_SYSCALL_BASE+392) #define __NR_pwritev2 (__NR_SYSCALL_BASE+393) +#define __NR_pkey_mprotect (__NR_SYSCALL_BASE+394) +#define __NR_pkey_alloc (__NR_SYSCALL_BASE+395) +#define __NR_pkey_free (__NR_SYSCALL_BASE+396) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 703fa0f3cd8f..08030b18f10a 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -403,6 +403,9 @@ CALL(sys_copy_file_range) CALL(sys_preadv2) CALL(sys_pwritev2) + CALL(sys_pkey_mprotect) +/* 395 */ CALL(sys_pkey_alloc) + CALL(sys_pkey_free) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted -- cgit v1.2.3 From 889882bce2a5f69242c1f3acd840983f467499b9 Mon Sep 17 00:00:00 2001 From: Lukasz Odzioba Date: Tue, 4 Oct 2016 18:26:26 +0200 Subject: perf/x86/intel/cstate: Add C-state residency events for Knights Landing Although KNL does support C1,C6,PC2,PC3,PC6 states, the patch only supports C6,PC2,PC3,PC6, because there is no counter for C1. C6 residency counter MSR on KNL has a different address than other platforms which is handled as a new quirk flag. Signed-off-by: Lukasz Odzioba Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Rafael J. Wysocki Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: bp@suse.de Cc: dave.hansen@linux.intel.com Cc: kan.liang@intel.com Link: http://lkml.kernel.org/r/1475598386-19597-1-git-send-email-lukasz.odzioba@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/cstate.c | 30 ++++++++++++++++++++++++++---- 1 file changed, 26 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 3ca87b5a8677..4f5ac726335f 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -48,7 +48,8 @@ * Scope: Core * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW + * SKL,KNL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 @@ -56,15 +57,16 @@ * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 - * Available model: SNB,IVB,HSW,BDW,SKL + * Available model: SNB,IVB,HSW,BDW,SKL,KNL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 - * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW + * SKL,KNL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 @@ -118,6 +120,7 @@ struct cstate_model { /* Quirk flags */ #define SLM_PKG_C6_USE_C7_MSR (1UL << 0) +#define KNL_CORE_C6_MSR (1UL << 1) struct perf_cstate_msr { u64 msr; @@ -488,6 +491,18 @@ static const struct cstate_model slm_cstates __initconst = { .quirks = SLM_PKG_C6_USE_C7_MSR, }; + +static const struct cstate_model knl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES), + .quirks = KNL_CORE_C6_MSR, +}; + + + #define X86_CSTATES_MODEL(model, states) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } @@ -523,6 +538,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), + + X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); @@ -558,6 +575,11 @@ static int __init cstate_probe(const struct cstate_model *cm) if (cm->quirks & SLM_PKG_C6_USE_C7_MSR) pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY; + /* KNL has different MSR for CORE C6 */ + if (cm->quirks & KNL_CORE_C6_MSR) + pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY; + + has_cstate_core = cstate_probe_msr(cm->core_events, PERF_CSTATE_CORE_EVENT_MAX, core_msr, core_events_attrs); -- cgit v1.2.3 From 3da43104d3187184d7417569cb3360511229f761 Mon Sep 17 00:00:00 2001 From: Noam Camus Date: Wed, 19 Oct 2016 14:25:03 +0300 Subject: ARC: Adjust cpuinfo for non-continuous cpu ids num_possible_cpus() returns how many CPUs may be present on system. However we want the highest possible CPU number. This may be differ in a sparsed possible CPUs map. Such map achived by OF for plat-eznps. For example if we have: possible cpus mask 0,3 Then: num_possible_cpus() is equal 2 while nr_cpu_ids is equal 4. Only for value 4 c_start() will provide correct cpuinfo at procfs. Signed-off-by: Noam Camus Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 3df7f9c72f42..75e540972135 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -507,7 +507,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) * way to pass it w/o having to kmalloc/free a 2 byte string. * Encode cpu-id as 0xFFcccc, which is decoded by show routine. */ - return *pos < num_possible_cpus() ? cpu_to_ptr(*pos) : NULL; + return *pos < nr_cpu_ids ? cpu_to_ptr(*pos) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) -- cgit v1.2.3 From 1dec78585328db00e33fb18dc1a6deed0e2095a5 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Wed, 19 Oct 2016 14:38:50 -0700 Subject: ARC: fix build warning in elf.h The cast valid since TASK_SIZE * 2 will never actually cause overflow. | CC fs/binfmt_elf.o | In file included from ../include/linux/elf.h:4:0, | from ../include/linux/module.h:15, | from ../fs/binfmt_elf.c:12: | ../fs/binfmt_elf.c: In function load_elf_binar: | ../arch/arc/include/asm/elf.h:57:29: warning: integer overflow in expression [-Woverflow] | #define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) | ^ | ../fs/binfmt_elf.c:921:16: note: in expansion of macro ELF_ET_DYN_BASE | load_bias = ELF_ET_DYN_BASE - vaddr; Signed-off-by: Vineet Gupta --- arch/arc/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h index 7096f97a1434..aa2d6da9d187 100644 --- a/arch/arc/include/asm/elf.h +++ b/arch/arc/include/asm/elf.h @@ -54,7 +54,7 @@ extern int elf_check_arch(const struct elf32_hdr *); * the loader. We need to make sure that it is out of the way of the program * that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) +#define ELF_ET_DYN_BASE (2UL * TASK_SIZE / 3) /* * When the program starts, a1 contains a pointer to a function to be -- cgit v1.2.3 From 6a34e0e6b49f50d2626c15ed75b76031f12bd637 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Thu, 22 Sep 2016 03:35:16 -0500 Subject: arm64: dts: Add timer erratum property for LS2080A and LS1043A Both the LS1043A and LS2080A platforms are affected by the Freescale A008585 erratum. Advertise it in their respective device trees. Signed-off-by: Scott Wood Acked-by: Marc Zyngier Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi | 1 + arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index 220ac7057d12..97d331ec2500 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -123,6 +123,7 @@ <1 14 0xf08>, /* Physical Non-Secure PPI */ <1 11 0xf08>, /* Virtual PPI */ <1 10 0xf08>; /* Hypervisor PPI */ + fsl,erratum-a008585; }; pmu { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi index 337da90bd7da..7f0dc13b4087 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi @@ -195,6 +195,7 @@ <1 14 4>, /* Physical Non-Secure PPI, active-low */ <1 11 4>, /* Virtual PPI, active-low */ <1 10 4>; /* Hypervisor PPI, active-low */ + fsl,erratum-a008585; }; pmu { -- cgit v1.2.3 From 62623d5f918fb1c8ed86b03b9a86cc81f1cb1878 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 20 Oct 2016 13:32:55 +1100 Subject: KVM: PPC: Book3S HV: Fix build error when SMP=n Commit 5d375199ea96 ("KVM: PPC: Book3S HV: Set server for passed-through interrupts") broke the SMP=n build: arch/powerpc/kvm/book3s_hv_rm_xics.c:758:2: error: implicit declaration of function 'get_hard_smp_processor_id' That is because we lost the implicit include of asm/smp.h, so include it explicitly to get the definition for get_hard_smp_processor_id(). Fixes: 5d375199ea96 ("KVM: PPC: Book3S HV: Set server for passed-through interrupts") Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/book3s_hv_rm_xics.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 82ff5de8b1e7..a0ea63ac2b52 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "book3s_xics.h" -- cgit v1.2.3 From 80f23935cadb1c654e81951f5a8b7ceae0acc1b4 Mon Sep 17 00:00:00 2001 From: Segher Boessenkool Date: Thu, 6 Oct 2016 13:42:19 +0000 Subject: powerpc: Convert cmp to cmpd in idle enter sequence PowerPC's "cmp" instruction has four operands. Normally people write "cmpw" or "cmpd" for the second cmp operand 0 or 1. But, frequently people forget, and write "cmp" with just three operands. With older binutils this is silently accepted as if this was "cmpw", while often "cmpd" is wanted. With newer binutils GAS will complain about this for 64-bit code. For 32-bit code it still silently assumes "cmpw" is what is meant. In this instance the code comes directly from ISA v2.07, including the cmp, but cmpd is correct. Backport to stable so that new toolchains can build old kernels. Fixes: 948cf67c4726 ("powerpc: Add NAP mode support on Power7 in HV mode") Cc: stable@vger.kernel.org # v3.0 Reviewed-by: Vaidyanathan Srinivasan Signed-off-by: Segher Boessenkool Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 01b8a13f0224..3919332965af 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -26,7 +26,7 @@ extern u64 pnv_first_deep_stop_state; std r0,0(r1); \ ptesync; \ ld r0,0(r1); \ -1: cmp cr0,r0,r0; \ +1: cmpd cr0,r0,r0; \ bne 1b; \ IDLE_INST; \ b . -- cgit v1.2.3 From bcf4f311e034dc661b5e641595ef1f50af27b5bf Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 20 Sep 2016 13:43:19 +0900 Subject: ARM: uniphier: select ARCH_HAS_RESET_CONTROLLER The UniPhier reset driver (drivers/reset/reset-uniphier.c) has been merged. Select ARCH_HAS_RESET_CONTROLLER from the SoC Kconfig. Signed-off-by: Masahiro Yamada Acked-by: Philipp Zabel --- arch/arm/mach-uniphier/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/mach-uniphier/Kconfig b/arch/arm/mach-uniphier/Kconfig index 82dddee3a469..3930fbba30b4 100644 --- a/arch/arm/mach-uniphier/Kconfig +++ b/arch/arm/mach-uniphier/Kconfig @@ -1,6 +1,7 @@ config ARCH_UNIPHIER bool "Socionext UniPhier SoCs" depends on ARCH_MULTI_V7 + select ARCH_HAS_RESET_CONTROLLER select ARM_AMBA select ARM_GLOBAL_TIMER select ARM_GIC -- cgit v1.2.3 From 75924903c51d0697b989035f6baebccb2a7367cd Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sat, 8 Oct 2016 11:25:34 +0900 Subject: arm64: uniphier: select ARCH_HAS_RESET_CONTROLLER The UniPhier reset driver (drivers/reset/reset-uniphier.c) has been merged. Select ARCH_HAS_RESET_CONTROLLER from the SoC Kconfig. Signed-off-by: Masahiro Yamada --- arch/arm64/Kconfig.platforms | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index cfbdf02ef566..101794f5ce10 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -190,6 +190,7 @@ config ARCH_THUNDER config ARCH_UNIPHIER bool "Socionext UniPhier SoC Family" + select ARCH_HAS_RESET_CONTROLLER select PINCTRL help This enables support for Socionext UniPhier SoC family. -- cgit v1.2.3 From 1bdb60ef18655596d56b9b7268ad0bf5214e00e4 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 21 Oct 2016 17:27:57 +0900 Subject: ARM: dts: uniphier: change MIO node to SD control node I made a mistake bacuse the Media I/O block is not implemented in these SoCs. Signed-off-by: Masahiro Yamada --- arch/arm/boot/dts/uniphier-pro5.dtsi | 4 ++-- arch/arm/boot/dts/uniphier-pxs2.dtsi | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/uniphier-pro5.dtsi b/arch/arm/boot/dts/uniphier-pro5.dtsi index 2c49c3614bda..5357ea9c14b1 100644 --- a/arch/arm/boot/dts/uniphier-pro5.dtsi +++ b/arch/arm/boot/dts/uniphier-pro5.dtsi @@ -184,11 +184,11 @@ }; &mio_clk { - compatible = "socionext,uniphier-pro5-mio-clock"; + compatible = "socionext,uniphier-pro5-sd-clock"; }; &mio_rst { - compatible = "socionext,uniphier-pro5-mio-reset"; + compatible = "socionext,uniphier-pro5-sd-reset"; }; &peri_clk { diff --git a/arch/arm/boot/dts/uniphier-pxs2.dtsi b/arch/arm/boot/dts/uniphier-pxs2.dtsi index 8789cd518933..950f07ba0337 100644 --- a/arch/arm/boot/dts/uniphier-pxs2.dtsi +++ b/arch/arm/boot/dts/uniphier-pxs2.dtsi @@ -197,11 +197,11 @@ }; &mio_clk { - compatible = "socionext,uniphier-pxs2-mio-clock"; + compatible = "socionext,uniphier-pxs2-sd-clock"; }; &mio_rst { - compatible = "socionext,uniphier-pxs2-mio-reset"; + compatible = "socionext,uniphier-pxs2-sd-reset"; }; &peri_clk { -- cgit v1.2.3 From 8e68c65d111a57a4cbe41dc886bb2a1e671e0b6e Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 21 Oct 2016 16:45:21 +0900 Subject: arm64: dts: uniphier: change MIO node to SD control node I made a mistake bacuse the Media I/O block is not implemented in this SoC. Signed-off-by: Masahiro Yamada --- arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi index 08fd7cf7769c..56a1b2e92cf3 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi @@ -257,18 +257,18 @@ reg = <0x59801000 0x400>; }; - mioctrl@59810000 { - compatible = "socionext,uniphier-mioctrl", + sdctrl@59810000 { + compatible = "socionext,uniphier-ld20-sdctrl", "simple-mfd", "syscon"; reg = <0x59810000 0x800>; - mio_clk: clock { - compatible = "socionext,uniphier-ld20-mio-clock"; + sd_clk: clock { + compatible = "socionext,uniphier-ld20-sd-clock"; #clock-cells = <1>; }; - mio_rst: reset { - compatible = "socionext,uniphier-ld20-mio-reset"; + sd_rst: reset { + compatible = "socionext,uniphier-ld20-sd-reset"; #reset-cells = <1>; }; }; -- cgit v1.2.3 From 963d790468a2f581abf039b45edac79af5e16e55 Mon Sep 17 00:00:00 2001 From: Ray Jui Date: Wed, 20 Jul 2016 14:53:51 -0700 Subject: arm64: dts: Updated NAND DT properties for NS2 SVK This patch adds NAND DT properties for NS2 SVK to configure the bus width width and OOB sector size Signed-off-by: Prafulla Kota Signed-off-by: Ray Jui Signed-off-by: Florian Fainelli --- arch/arm64/boot/dts/broadcom/ns2-svk.dts | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/broadcom/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/ns2-svk.dts index 2d7872a36b91..b09f3bc5c6c1 100644 --- a/arch/arm64/boot/dts/broadcom/ns2-svk.dts +++ b/arch/arm64/boot/dts/broadcom/ns2-svk.dts @@ -164,6 +164,8 @@ nand-ecc-mode = "hw"; nand-ecc-strength = <8>; nand-ecc-step-size = <512>; + nand-bus-width = <16>; + brcm,nand-oob-sector-size = <16>; #address-cells = <1>; #size-cells = <1>; }; -- cgit v1.2.3 From eef0b282bb586259d35548851cf6a4ce847bb804 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 22 Oct 2016 10:20:55 -0200 Subject: ARM: imx: gpc: Initialize all power domains Since commit 0159ec670763dd ("PM / Domains: Verify the PM domain is present when adding a provider") the following regression is observed on imx6: imx-gpc: probe of 20dc000.gpc failed with error -22 The gpc probe fails because of_genpd_add_provider_onecell() now checks if all the domains are initialized via pm_genpd_present() function and it fails because not all the power domains are initialized. In order to fix this error, initialize all the power domains from imx_gpc_domains[], not only the imx6q_pu_domain.base one. Reported-by: Olof's autobooter Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/mach-imx/gpc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index 0df062d8b2c9..d0463e9d8a08 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -430,7 +430,8 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) return 0; - pm_genpd_init(&imx6q_pu_domain.base, NULL, false); + for (i = 0; i < ARRAY_SIZE(imx_gpc_domains); i++) + pm_genpd_init(imx_gpc_domains[i], NULL, false); return of_genpd_add_provider_onecell(dev->of_node, &imx_gpc_onecell_data); -- cgit v1.2.3 From f9d1f7a7ad919c93dfb708aae6e19d33c5437443 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 22 Oct 2016 10:20:56 -0200 Subject: ARM: imx: gpc: Fix the imx_gpc_genpd_init() error path If of_genpd_add_provider_onecell() fails the following kernel crash is observed on a kernel built with multi_v7_defconfig: [ 1.739301] [00000040] *pgd=00000000 [ 1.739310] Internal error: Oops: 5 [#1] SMP ARM [ 1.739319] Modules linked in: [ 1.739328] CPU: 1 PID: 95 Comm: kworker/1:4 Not tainted 4.8.0-11897-g6b5e09a #1 [ 1.739331] Hardware name: Freescale i.MX6 Quad/DualLite (Device Tree) [ 1.739352] Workqueue: pm genpd_power_off_work_fn [ 1.739356] task: ee63d400 task.stack: ee70a000 [ 1.739365] PC is at mutex_lock+0xc/0x4c [ 1.739374] LR is at regulator_disable+0x2c/0x60 [ 1.739379] pc : [] lr : [] psr: 60000013 [ 1.739379] sp : ee70beb0 ip : 10624dd3 fp : ee6e6280 [ 1.739382] r10: eefb0900 r9 : 00000000 r8 : c1309918 [ 1.739385] r7 : 00000000 r6 : 00000040 r5 : 00000000 r4 : 00000040 [ 1.739390] r3 : 0000004c r2 : 7fffd540 r1 : 000001e4 r0 : 00000040 Instead of returning of_genpd_add_provider_onecell() directly, we should check its return value and in the case of error we should unwind the previously taken actions, which in these case are: - Call imx6q_pm_pu_power_off() - Set imx6q_pu_domain.reg back to NULL Setting imx6q_pu_domain.reg to NULL in the error case is important as it will prevent further operations in the pu_reg regulator. This kernel crash is not observed with imx_v6_v7_defconfig because it selects GPU and VPU drivers, which are consumers of the GPC block and thus change the refcount of the pu_reg regulator. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/mach-imx/gpc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index d0463e9d8a08..b54db47f6f32 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -408,7 +408,7 @@ static struct genpd_onecell_data imx_gpc_onecell_data = { static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) { struct clk *clk; - int i; + int i, ret; imx6q_pu_domain.reg = pu_reg; @@ -432,12 +432,20 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) for (i = 0; i < ARRAY_SIZE(imx_gpc_domains); i++) pm_genpd_init(imx_gpc_domains[i], NULL, false); - return of_genpd_add_provider_onecell(dev->of_node, + + ret = of_genpd_add_provider_onecell(dev->of_node, &imx_gpc_onecell_data); + if (ret) + goto power_off; + + return 0; +power_off: + imx6q_pm_pu_power_off(&imx6q_pu_domain.base); clk_err: while (i--) clk_put(imx6q_pu_domain.clk[i]); + imx6q_pu_domain.reg = NULL; return -EINVAL; } -- cgit v1.2.3 From 47ece7fef4e4206cdcee7c28ac3bca3ede0a1908 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 19 Oct 2016 13:42:55 +0200 Subject: s390/dumpstack: use pr_cont within show_stack and die Use pr_cont instead of printk calls also within show_stack and die in order to avoid extra line breaks. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/dumpstack.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 34345c0a3c46..55d4fe174fd9 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -119,14 +119,14 @@ void show_stack(struct task_struct *task, unsigned long *sp) else stack = (unsigned long *)task->thread.ksp; } + printk(KERN_DEFAULT "Stack:\n"); for (i = 0; i < 20; i++) { if (((addr_t) stack & (THREAD_SIZE-1)) == 0) break; - if ((i * sizeof(long) % 32) == 0) - printk("%s ", i == 0 ? "" : "\n"); - printk("%016lx ", *stack++); + if (i % 4 == 0) + printk(KERN_DEFAULT " "); + pr_cont("%016lx%c", *stack++, i % 4 == 3 ? '\n' : ' '); } - printk("\n"); show_trace(task, (unsigned long)sp); } @@ -186,14 +186,14 @@ void die(struct pt_regs *regs, const char *str) printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff, regs->int_code >> 17, ++die_counter); #ifdef CONFIG_PREEMPT - printk("PREEMPT "); + pr_cont("PREEMPT "); #endif #ifdef CONFIG_SMP - printk("SMP "); + pr_cont("SMP "); #endif if (debug_pagealloc_enabled()) - printk("DEBUG_PAGEALLOC"); - printk("\n"); + pr_cont("DEBUG_PAGEALLOC"); + pr_cont("\n"); notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); print_modules(); show_regs(regs); -- cgit v1.2.3 From 4a65429457a5d271dd3b00598b3ec75fe8b5103c Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Tue, 18 Oct 2016 17:32:18 +0200 Subject: s390/mm: fix zone calculation in arch_add_memory() Standby (hotplug) memory should be added to ZONE_MOVABLE on s390. After commit 199071f1 "s390/mm: make arch_add_memory() NUMA aware", arch_add_memory() used memblock_end_of_DRAM() to find out the end of ZONE_NORMAL and the beginning of ZONE_MOVABLE. However, commit 7f36e3e5 "memory-hotplug: add hot-added memory ranges to memblock before allocate node_data for a node." moved the call of memblock_add_node() before the call of arch_add_memory() in add_memory_resource(), and thus changed the return value of memblock_end_of_DRAM() when called in arch_add_memory(). As a result, arch_add_memory() will think that all memory blocks should be added to ZONE_NORMAL. Fix this by changing the logic in arch_add_memory() so that it will manually iterate over all zones of a given node to find out which zone a memory block should be added to. Reviewed-by: Heiko Carstens Signed-off-by: Gerald Schaefer Signed-off-by: Martin Schwidefsky --- arch/s390/mm/init.c | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index f56a39bd8ba6..b3e9d18f2ec6 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -151,36 +151,40 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { - unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM()); - unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS); + unsigned long zone_start_pfn, zone_end_pfn, nr_pages; unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); - unsigned long nr_pages; - int rc, zone_enum; + pg_data_t *pgdat = NODE_DATA(nid); + struct zone *zone; + int rc, i; rc = vmem_add_mapping(start, size); if (rc) return rc; - while (size_pages > 0) { - if (start_pfn < dma_end_pfn) { - nr_pages = (start_pfn + size_pages > dma_end_pfn) ? - dma_end_pfn - start_pfn : size_pages; - zone_enum = ZONE_DMA; - } else if (start_pfn < normal_end_pfn) { - nr_pages = (start_pfn + size_pages > normal_end_pfn) ? - normal_end_pfn - start_pfn : size_pages; - zone_enum = ZONE_NORMAL; + for (i = 0; i < MAX_NR_ZONES; i++) { + zone = pgdat->node_zones + i; + if (zone_idx(zone) != ZONE_MOVABLE) { + /* Add range within existing zone limits, if possible */ + zone_start_pfn = zone->zone_start_pfn; + zone_end_pfn = zone->zone_start_pfn + + zone->spanned_pages; } else { - nr_pages = size_pages; - zone_enum = ZONE_MOVABLE; + /* Add remaining range to ZONE_MOVABLE */ + zone_start_pfn = start_pfn; + zone_end_pfn = start_pfn + size_pages; } - rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum, - start_pfn, size_pages); + if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn) + continue; + nr_pages = (start_pfn + size_pages > zone_end_pfn) ? + zone_end_pfn - start_pfn : size_pages; + rc = __add_pages(nid, zone, start_pfn, nr_pages); if (rc) break; start_pfn += nr_pages; size_pages -= nr_pages; + if (!size_pages) + break; } if (rc) vmem_remove_mapping(start, size); -- cgit v1.2.3 From 56c46222af0d09149fadec2a3ce9d4889de01cc6 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 21 Oct 2016 20:03:05 +1100 Subject: powerpc/64: Re-fix race condition between going idle and entering guest Commit 8117ac6a6c2f ("powerpc/powernv: Switch off MMU before entering nap/sleep/rvwinkle mode", 2014-12-10) fixed a race condition where one thread entering a KVM guest could switch the MMU context to the guest while another thread was still in host kernel context with the MMU on. That commit moved the point where a thread entering a power-saving mode set its kvm_hstate.hwthread_state field in its PACA to KVM_HWTHREAD_IN_IDLE from a point where the MMU was on to after the MMU had been switched off. That commit also added a comment explaining that we have to switch to real mode before setting hwthread_state to avoid this race. Nevertheless, commit 4eae2c9ae54a ("powerpc/powernv: Make pnv_powersave_common more generic", 2016-07-08) subsequently moved the setting of hwthread_state back to a point where the MMU is on, thus reintroducing the race, despite the comment saying that this should not be done being included in full in the context lines of the patch that did it. This fixes the race again and adds a bigger and shoutier comment explaining the potential race condition. Fixes: 4eae2c9ae54a ("powerpc/powernv: Make pnv_powersave_common more generic") Cc: stable@vger.kernel.org # v4.8+ Signed-off-by: Paul Mackerras Reviewed-by: Shreyas B. Prabhu Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index bd739fed26e3..0d8712a835d2 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -163,12 +163,6 @@ _GLOBAL(pnv_powersave_common) std r9,_MSR(r1) std r1,PACAR1(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're entering idle */ - li r4,KVM_HWTHREAD_IN_IDLE - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif - /* * Go to real mode to do the nap, as required by the architecture. * Also, we need to be in real mode before setting hwthread_state, @@ -185,6 +179,26 @@ _GLOBAL(pnv_powersave_common) .globl pnv_enter_arch207_idle_mode pnv_enter_arch207_idle_mode: +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle */ + li r4,KVM_HWTHREAD_IN_IDLE + /******************************************************/ + /* N O T E W E L L ! ! ! N O T E W E L L */ + /* The following store to HSTATE_HWTHREAD_STATE(r13) */ + /* MUST occur in real mode, i.e. with the MMU off, */ + /* and the MMU must stay off until we clear this flag */ + /* and test HSTATE_HWTHREAD_REQ(r13) in the system */ + /* reset interrupt vector in exceptions-64s.S. */ + /* The reason is that another thread can switch the */ + /* MMU to a guest context whenever this flag is set */ + /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ + /* that would potentially cause this thread to start */ + /* executing instructions from guest memory in */ + /* hypervisor mode, leading to a host crash or data */ + /* corruption, or worse. */ + /******************************************************/ + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif stb r3,PACA_THREAD_IDLE_STATE(r13) cmpwi cr3,r3,PNV_THREAD_SLEEP bge cr3,2f @@ -250,6 +264,12 @@ enter_winkle: * r3 - requested stop state */ power_enter_stop: +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle */ + li r4,KVM_HWTHREAD_IN_IDLE + /* DO THIS IN REAL MODE! See comment above. */ + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif /* * Check if the requested state is a deep idle state. */ -- cgit v1.2.3 From 09b7e37b18eecc1e347f4b1a3bc863f32801f634 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Fri, 21 Oct 2016 20:04:17 +1100 Subject: powerpc/64: Fix race condition in setting lock bit in idle/wakeup code This fixes a race condition where one thread that is entering or leaving a power-saving state can inadvertently ignore the lock bit that was set by another thread, and potentially also clear it. The core_idle_lock_held function is called when the lock bit is seen to be set. It polls the lock bit until it is clear, then does a lwarx to load the word containing the lock bit and thread idle bits so it can be updated. However, it is possible that the value loaded with the lwarx has the lock bit set, even though an immediately preceding lwz loaded a value with the lock bit clear. If this happens then we go ahead and update the word despite the lock bit being set, and when called from pnv_enter_arch207_idle_mode, we will subsequently clear the lock bit. No identifiable misbehaviour has been attributed to this race. This fixes it by checking the lock bit in the value loaded by the lwarx. If it is set then we just go back and keep on polling. Fixes: b32aadc1a8ed ("powerpc/powernv: Fix race in updating core_idle_state") Cc: stable@vger.kernel.org # v4.2+ Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/idle_book3s.S | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 0d8712a835d2..72dac0b58061 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -90,6 +90,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) * Threads will spin in HMT_LOW until the lock bit is cleared. * r14 - pointer to core_idle_state * r15 - used to load contents of core_idle_state + * r9 - used as a temporary variable */ core_idle_lock_held: @@ -99,6 +100,8 @@ core_idle_lock_held: bne 3b HMT_MEDIUM lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bne core_idle_lock_held blr /* -- cgit v1.2.3 From 44d524218c65e1f2e6d945b09165562852298015 Mon Sep 17 00:00:00 2001 From: Stefan Agner Date: Mon, 17 Oct 2016 18:51:27 -0700 Subject: ARM: dts: vf610: fix IRQ flag of global timer The global timer IRQ (PPI[0], PPI 11 in device tree terms) is a rising edge interrupt. The ARM Cortex-A5 MPCore TRM in Chapter 10.1.2. Interrupt types and sources says: "Interrupt is rising-edge sensitive." The bits seem to be read-only, hence this missconfiguration had no negative effect. However, with commit 992345a58e0c ("irqchip/gic: WARN if setting the interrupt type for a PPI fails") warnings such as this get printed: GIC: PPI11 is secure or misconfigured With this change the new configuration matches the default configuration and no warning is printed anymore. Signed-off-by: Stefan Agner Signed-off-by: Shawn Guo --- arch/arm/boot/dts/vf500.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/vf500.dtsi b/arch/arm/boot/dts/vf500.dtsi index a3824e61bd72..d7fdb2a7d97b 100644 --- a/arch/arm/boot/dts/vf500.dtsi +++ b/arch/arm/boot/dts/vf500.dtsi @@ -70,7 +70,7 @@ global_timer: timer@40002200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x40002200 0x20>; - interrupts = ; + interrupts = ; interrupt-parent = <&intc>; clocks = <&clks VF610_CLK_PLATFORM_BUS>; }; -- cgit v1.2.3 From f1caa61df2a3dc4c58316295c5dc5edba4c68d85 Mon Sep 17 00:00:00 2001 From: Sinan Kaya Date: Mon, 24 Oct 2016 00:31:31 -0400 Subject: ACPI/PCI: pci_link: penalize SCI correctly Ondrej reported that IRQs stopped working in v4.7 on several platforms. A typical scenario, from Ondrej's VT82C694X/694X, is: ACPI: Using PIC for interrupt routing ACPI: PCI Interrupt Link [LNKA] (IRQs 1 3 4 5 6 7 10 *11 12 14 15) ACPI: No IRQ available for PCI Interrupt Link [LNKA] 8139too 0000:00:0f.0: PCI INT A: no GSI We're using PIC routing, so acpi_irq_balance == 0, and LNKA is already active at IRQ 11. In that case, acpi_pci_link_allocate() only tries to use the active IRQ (IRQ 11) which also happens to be the SCI. We should penalize the SCI by PIRQ_PENALTY_PCI_USING, but irq_get_trigger_type(11) returns something other than IRQ_TYPE_LEVEL_LOW, so we penalize it by PIRQ_PENALTY_ISA_ALWAYS instead, which makes acpi_pci_link_allocate() assume the IRQ isn't available and give up. Add acpi_penalize_sci_irq() so platforms can tell us the SCI IRQ, trigger, and polarity directly and we don't have to depend on irq_get_trigger_type(). Fixes: 103544d86976 (ACPI,PCI,IRQ: reduce resource requirements) Link: http://lkml.kernel.org/r/201609251512.05657.linux@rainbow-software.org Reported-by: Ondrej Zary Acked-by: Bjorn Helgaas Signed-off-by: Sinan Kaya Tested-by: Jonathan Liu Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/boot.c | 1 + drivers/acpi/pci_link.c | 30 +++++++++++++++--------------- include/linux/acpi.h | 1 + 3 files changed, 17 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8a5abaa7d453..931ced8ca345 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -454,6 +454,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + acpi_penalize_sci_irq(bus_irq, trigger, polarity); /* * stash over-ride to indicate we've been here diff --git a/drivers/acpi/pci_link.c b/drivers/acpi/pci_link.c index 6229b022a5d4..74bf96efae95 100644 --- a/drivers/acpi/pci_link.c +++ b/drivers/acpi/pci_link.c @@ -87,6 +87,7 @@ struct acpi_pci_link { static LIST_HEAD(acpi_link_list); static DEFINE_MUTEX(acpi_link_lock); +static int sci_irq = -1, sci_penalty; /* -------------------------------------------------------------------------- PCI Link Device Management @@ -496,25 +497,13 @@ static int acpi_irq_get_penalty(int irq) { int penalty = 0; - /* - * Penalize IRQ used by ACPI SCI. If ACPI SCI pin attributes conflict - * with PCI IRQ attributes, mark ACPI SCI as ISA_ALWAYS so it won't be - * use for PCI IRQs. - */ - if (irq == acpi_gbl_FADT.sci_interrupt) { - u32 type = irq_get_trigger_type(irq) & IRQ_TYPE_SENSE_MASK; - - if (type != IRQ_TYPE_LEVEL_LOW) - penalty += PIRQ_PENALTY_ISA_ALWAYS; - else - penalty += PIRQ_PENALTY_PCI_USING; - } + if (irq == sci_irq) + penalty += sci_penalty; if (irq < ACPI_MAX_ISA_IRQS) return penalty + acpi_isa_irq_penalty[irq]; - penalty += acpi_irq_pci_sharing_penalty(irq); - return penalty; + return penalty + acpi_irq_pci_sharing_penalty(irq); } int __init acpi_irq_penalty_init(void) @@ -881,6 +870,17 @@ bool acpi_isa_irq_available(int irq) acpi_irq_get_penalty(irq) < PIRQ_PENALTY_ISA_ALWAYS); } +void acpi_penalize_sci_irq(int irq, int trigger, int polarity) +{ + sci_irq = irq; + + if (trigger == ACPI_MADT_TRIGGER_LEVEL && + polarity == ACPI_MADT_POLARITY_ACTIVE_LOW) + sci_penalty = PIRQ_PENALTY_PCI_USING; + else + sci_penalty = PIRQ_PENALTY_ISA_ALWAYS; +} + /* * Over-ride default table to reserve additional IRQs for use by ISA * e.g. acpi_irq_isa=5 diff --git a/include/linux/acpi.h b/include/linux/acpi.h index ddbeda6dbdc8..689a8b9b9c8f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -326,6 +326,7 @@ struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); bool acpi_isa_irq_available(int irq); +void acpi_penalize_sci_irq(int irq, int trigger, int polarity); void acpi_pci_irq_disable (struct pci_dev *dev); extern int ec_read(u8 addr, u8 *val); -- cgit v1.2.3 From 4edd601c5a9c5094daa714e65063e623826f3bcc Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Mon, 24 Oct 2016 10:32:12 -0200 Subject: ARM: imx: mach-imx6q: Fix the PHY ID mask for AR8031 AR8031 and AR8035 have the same PHY ID mask of 0xffffffef. So fix it and make it match with the PHY ID mask definition at drivers/net/phy/at803x.c. Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mach-imx6q.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 97fd25105e2c..45801b27ee5c 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -173,7 +173,7 @@ static void __init imx6q_enet_phy_init(void) ksz9021rn_phy_fixup); phy_register_fixup_for_uid(PHY_ID_KSZ9031, MICREL_PHY_ID_MASK, ksz9031rn_phy_fixup); - phy_register_fixup_for_uid(PHY_ID_AR8031, 0xffffffff, + phy_register_fixup_for_uid(PHY_ID_AR8031, 0xffffffef, ar8031_phy_fixup); phy_register_fixup_for_uid(PHY_ID_AR8035, 0xffffffef, ar8035_phy_fixup); -- cgit v1.2.3 From 91e040a79df73d371f70792f30380d4e44805250 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 20 Oct 2016 07:39:45 -0700 Subject: ARC: syscall for userspace cmpxchg assist Older ARC700 cores (ARC750 specifically) lack instructions to implement atomic r-w-w. This is problematic for userspace libraries such as NPTL which need atomic primitives. So enable them by providing kernel assist. This is costly but really the only sane soluton (othern than tight spinning using the otherwise availiable atomic exchange EX instruciton). Good thing is there are only a few of these cores running Linux out in the wild. This only works on UP systems. Reviewed-by: Colin Ian King Signed-off-by: Vineet Gupta --- arch/arc/include/asm/syscalls.h | 1 + arch/arc/include/uapi/asm/unistd.h | 9 +++++---- arch/arc/kernel/process.c | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h index e56f9fcc5581..772b67ca56e7 100644 --- a/arch/arc/include/asm/syscalls.h +++ b/arch/arc/include/asm/syscalls.h @@ -17,6 +17,7 @@ int sys_clone_wrapper(int, int, int, int, int); int sys_cacheflush(uint32_t, uint32_t uint32_t); int sys_arc_settls(void *); int sys_arc_gettls(void); +int sys_arc_usr_cmpxchg(int *, int, int); #include diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h index 41fa2ec9e02c..9a34136d84b2 100644 --- a/arch/arc/include/uapi/asm/unistd.h +++ b/arch/arc/include/uapi/asm/unistd.h @@ -27,18 +27,19 @@ #define NR_syscalls __NR_syscalls +/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */ +#define __NR_sysfs (__NR_arch_specific_syscall + 3) + /* ARC specific syscall */ #define __NR_cacheflush (__NR_arch_specific_syscall + 0) #define __NR_arc_settls (__NR_arch_specific_syscall + 1) #define __NR_arc_gettls (__NR_arch_specific_syscall + 2) +#define __NR_arc_usr_cmpxchg (__NR_arch_specific_syscall + 4) __SYSCALL(__NR_cacheflush, sys_cacheflush) __SYSCALL(__NR_arc_settls, sys_arc_settls) __SYSCALL(__NR_arc_gettls, sys_arc_gettls) - - -/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */ -#define __NR_sysfs (__NR_arch_specific_syscall + 3) +__SYSCALL(__NR_arc_usr_cmpxchg, sys_arc_usr_cmpxchg) __SYSCALL(__NR_sysfs, sys_sysfs) #undef __SYSCALL diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index be1972bd2729..59aa43cb146e 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -41,6 +41,39 @@ SYSCALL_DEFINE0(arc_gettls) return task_thread_info(current)->thr_ptr; } +SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new) +{ + int uval; + int ret; + + /* + * This is only for old cores lacking LLOCK/SCOND, which by defintion + * can't possibly be SMP. Thus doesn't need to be SMP safe. + * And this also helps reduce the overhead for serializing in + * the UP case + */ + WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP)); + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + preempt_disable(); + + ret = __get_user(uval, uaddr); + if (ret) + goto done; + + if (uval != expected) + ret = -EAGAIN; + else + ret = __put_user(new, uaddr); + +done: + preempt_enable(); + + return ret; +} + void arch_cpu_idle(void) { /* sleep, but enable all interrupts before committing */ -- cgit v1.2.3 From cf986d470208fbdd68b6934a86ccd81c04408484 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 13 Oct 2016 15:58:59 -0700 Subject: ARCv2: IOC: use @ioc_enable not @ioc_exist where intended if user disables IOC from debugger at startup (by clearing @ioc_enable), @ioc_exists is cleared too. This means boot prints don't capture the fact that IOC was present but disabled which could be misleading. So invert how we use @ioc_enable and @ioc_exists and make it more canonical. @ioc_exists represent whether hardware is present or not and stays same whether enabled or not. @ioc_enable is still user driven, but will be auto-disabled if IOC hardware is not present, i.e. if @ioc_exist=0. This is opposite to what we were doing before, but much clearer. This means @ioc_enable is now the "exported" toggle in rest of code such as dma mapping API. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/cache.h | 2 +- arch/arc/mm/cache.c | 10 ++++++---- arch/arc/mm/dma.c | 4 ++-- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index fb781e34f322..b3410ff6a62d 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -53,7 +53,7 @@ extern void arc_cache_init(void); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); extern void read_decode_cache_bcr(void); -extern int ioc_exists; +extern int ioc_enable; extern unsigned long perip_base, perip_end; #endif /* !__ASSEMBLY__ */ diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 97dddbefb86a..518ff76771f3 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -22,8 +22,8 @@ #include static int l2_line_sz; -int ioc_exists; -volatile int slc_enable = 1, ioc_enable = 1; +static int ioc_exists; +int slc_enable = 1, ioc_enable = 1; unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */ unsigned long perip_end = 0xFFFFFFFF; /* legacy value */ @@ -113,8 +113,10 @@ static void read_decode_cache_bcr_arcv2(int cpu) } READ_BCR(ARC_REG_CLUSTER_BCR, cbcr); - if (cbcr.c && ioc_enable) + if (cbcr.c) ioc_exists = 1; + else + ioc_enable = 0; /* HS 2.0 didn't have AUX_VOL */ if (cpuinfo_arc700[cpu].core.family > 0x51) { @@ -1002,7 +1004,7 @@ void arc_cache_init(void) read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_DISABLE); } - if (is_isa_arcv2() && ioc_exists) { + if (is_isa_arcv2() && ioc_enable) { /* IO coherency base - 0x8z */ write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000); /* IO coherency aperture size - 512Mb: 0x8z-0xAz */ diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 20afc65e22dc..60aab5a7522b 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -45,7 +45,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size, * -For coherent data, Read/Write to buffers terminate early in cache * (vs. always going to memory - thus are faster) */ - if ((is_isa_arcv2() && ioc_exists) || + if ((is_isa_arcv2() && ioc_enable) || (attrs & DMA_ATTR_NON_CONSISTENT)) need_coh = 0; @@ -97,7 +97,7 @@ static void arc_dma_free(struct device *dev, size_t size, void *vaddr, int is_non_coh = 1; is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) || - (is_isa_arcv2() && ioc_exists); + (is_isa_arcv2() && ioc_enable); if (PageHighMem(page) || !is_non_coh) iounmap((void __force __iomem *)vaddr); -- cgit v1.2.3 From d624716b6c67e60681180786564b92ddb521148a Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 19 Oct 2016 18:33:29 -0600 Subject: sparc64: Setup a scheduling domain for highest level cache. Individual scheduler domain should consist different hierarchy consisting of cores sharing similar property. Currently, no scheduler domain is defined separately for the cores that shares the last level cache. As a result, the scheduler fails to take advantage of cache locality while migrating tasks during load balancing. Here are the cpu masks currently present for sparc that are/can be used in scheduler domain construction. cpu_core_map : set based on the cores that shares l1 cache. core_core_sib_map : is set based on the socket id. The prior SPARC notion of socket was defined as highest level of shared cache. However, the MD record on T7 platforms now describes the CPUs that share the physical socket and this is no longer tied to shared cache. That's why a separate cpu mask needs to be created that truly represent highest level of shared cache for all platforms. Signed-off-by: Atish Patra Reviewed-by: Chris Hyser Signed-off-by: David S. Miller --- arch/sparc/include/asm/cpudata_64.h | 5 ++-- arch/sparc/include/asm/topology_64.h | 8 ++++++- arch/sparc/kernel/mdesc.c | 46 ++++++++++++++++++++++-------------- arch/sparc/kernel/smp_64.c | 8 +++++++ 4 files changed, 46 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index a6cfdabb6054..5b0ed48e5b0c 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -24,9 +24,10 @@ typedef struct { unsigned int icache_line_size; unsigned int ecache_size; unsigned int ecache_line_size; - unsigned short sock_id; + unsigned short sock_id; /* physical package */ unsigned short core_id; - int proc_id; + unsigned short max_cache_id; /* groupings of highest shared cache */ + unsigned short proc_id; /* strand (aka HW thread) id */ } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index bec481aaca16..7b4898a36eee 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -44,14 +44,20 @@ int __node_distance(int, int); #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) #define topology_core_id(cpu) (cpu_data(cpu).core_id) #define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu]) +#define topology_core_cache_cpumask(cpu) (&cpu_core_sib_cache_map[cpu]) #define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #endif /* CONFIG_SMP */ extern cpumask_t cpu_core_map[NR_CPUS]; extern cpumask_t cpu_core_sib_map[NR_CPUS]; +extern cpumask_t cpu_core_sib_cache_map[NR_CPUS]; + +/** + * Return cores that shares the last level cache. + */ static inline const struct cpumask *cpu_coregroup_mask(int cpu) { - return &cpu_core_map[cpu]; + return &cpu_core_sib_cache_map[cpu]; } #endif /* _ASM_SPARC64_TOPOLOGY_H */ diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 11228861d9b4..8a6982dfd733 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -645,13 +645,20 @@ static void __mark_core_id(struct mdesc_handle *hp, u64 node, cpu_data(*id).core_id = core_id; } -static void __mark_sock_id(struct mdesc_handle *hp, u64 node, - int sock_id) +static void __mark_max_cache_id(struct mdesc_handle *hp, u64 node, + int max_cache_id) { const u64 *id = mdesc_get_property(hp, node, "id", NULL); - if (*id < num_possible_cpus()) - cpu_data(*id).sock_id = sock_id; + if (*id < num_possible_cpus()) { + cpu_data(*id).max_cache_id = max_cache_id; + + /** + * On systems without explicit socket descriptions socket + * is max_cache_id + */ + cpu_data(*id).sock_id = max_cache_id; + } } static void mark_core_ids(struct mdesc_handle *hp, u64 mp, @@ -660,10 +667,11 @@ static void mark_core_ids(struct mdesc_handle *hp, u64 mp, find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10); } -static void mark_sock_ids(struct mdesc_handle *hp, u64 mp, - int sock_id) +static void mark_max_cache_ids(struct mdesc_handle *hp, u64 mp, + int max_cache_id) { - find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10); + find_back_node_value(hp, mp, "cpu", __mark_max_cache_id, + max_cache_id, 10); } static void set_core_ids(struct mdesc_handle *hp) @@ -694,14 +702,15 @@ static void set_core_ids(struct mdesc_handle *hp) } } -static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level) +static int set_max_cache_ids_by_cache(struct mdesc_handle *hp, int level) { u64 mp; int idx = 1; int fnd = 0; - /* Identify unique sockets by looking for cpus backpointed to by - * shared level n caches. + /** + * Identify unique highest level of shared cache by looking for cpus + * backpointed to by shared level N caches. */ mdesc_for_each_node_by_name(hp, mp, "cache") { const u64 *cur_lvl; @@ -709,8 +718,7 @@ static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level) cur_lvl = mdesc_get_property(hp, mp, "level", NULL); if (*cur_lvl != level) continue; - - mark_sock_ids(hp, mp, idx); + mark_max_cache_ids(hp, mp, idx); idx++; fnd = 1; } @@ -745,15 +753,17 @@ static void set_sock_ids(struct mdesc_handle *hp) { u64 mp; - /* If machine description exposes sockets data use it. - * Otherwise fallback to use shared L3 or L2 caches. + /** + * Find the highest level of shared cache which pre-T7 is also + * the socket. */ + if (!set_max_cache_ids_by_cache(hp, 3)) + set_max_cache_ids_by_cache(hp, 2); + + /* If machine description exposes sockets data use it.*/ mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets"); if (mp != MDESC_NODE_NULL) - return set_sock_ids_by_socket(hp, mp); - - if (!set_sock_ids_by_cache(hp, 3)) - set_sock_ids_by_cache(hp, 2); + set_sock_ids_by_socket(hp, mp); } static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index d3035ba6cd31..8182f7caf5b1 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -63,9 +63,13 @@ cpumask_t cpu_core_map[NR_CPUS] __read_mostly = cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; +cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS - 1] = CPU_MASK_NONE }; + EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_core_map); EXPORT_SYMBOL(cpu_core_sib_map); +EXPORT_SYMBOL(cpu_core_sib_cache_map); static cpumask_t smp_commenced_mask; @@ -1265,6 +1269,10 @@ void smp_fill_in_sib_core_maps(void) unsigned int j; for_each_present_cpu(j) { + if (cpu_data(i).max_cache_id == + cpu_data(j).max_cache_id) + cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]); + if (cpu_data(i).sock_id == cpu_data(j).sock_id) cpumask_set_cpu(j, &cpu_core_sib_map[i]); } -- cgit v1.2.3 From 844bdf1b2a2f1790eba9e679bda1c632ee744d4e Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 21 Oct 2016 15:25:54 +0200 Subject: sparc64: Fix old style declaration GCC warnings Fix [-Wold-style-declaration] GCC warnings by moving the inline keyword before the return type. Signed-off-by: Tobias Klnuser Signed-off-by: David S. Miller --- arch/sparc/include/asm/spinlock_64.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 87990b7c6b0d..07c9f2e9bf57 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -96,7 +96,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long fla /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */ -static void inline arch_read_lock(arch_rwlock_t *lock) +static inline void arch_read_lock(arch_rwlock_t *lock) { unsigned long tmp1, tmp2; @@ -119,7 +119,7 @@ static void inline arch_read_lock(arch_rwlock_t *lock) : "memory"); } -static int inline arch_read_trylock(arch_rwlock_t *lock) +static inline int arch_read_trylock(arch_rwlock_t *lock) { int tmp1, tmp2; @@ -140,7 +140,7 @@ static int inline arch_read_trylock(arch_rwlock_t *lock) return tmp1; } -static void inline arch_read_unlock(arch_rwlock_t *lock) +static inline void arch_read_unlock(arch_rwlock_t *lock) { unsigned long tmp1, tmp2; @@ -156,7 +156,7 @@ static void inline arch_read_unlock(arch_rwlock_t *lock) : "memory"); } -static void inline arch_write_lock(arch_rwlock_t *lock) +static inline void arch_write_lock(arch_rwlock_t *lock) { unsigned long mask, tmp1, tmp2; @@ -181,7 +181,7 @@ static void inline arch_write_lock(arch_rwlock_t *lock) : "memory"); } -static void inline arch_write_unlock(arch_rwlock_t *lock) +static inline void arch_write_unlock(arch_rwlock_t *lock) { __asm__ __volatile__( " stw %%g0, [%0]" @@ -190,7 +190,7 @@ static void inline arch_write_unlock(arch_rwlock_t *lock) : "memory"); } -static int inline arch_write_trylock(arch_rwlock_t *lock) +static inline int arch_write_trylock(arch_rwlock_t *lock) { unsigned long mask, tmp1, tmp2, result; -- cgit v1.2.3 From ee9e83973d54d94ff776892219b723de54429548 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Fri, 21 Oct 2016 15:39:03 +0200 Subject: sparc32: Fix old style declaration GCC warnings Fix [-Wold-style-declaration] GCC warnings by moving the inline keyword before the return type. Signed-off-by: Tobias Klauser Signed-off-by: David S. Miller --- arch/sparc/include/asm/spinlock_32.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index d9c5876c6121..8011e79f59c9 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -134,7 +134,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) *(volatile __u32 *)&lp->lock = ~0U; } -static void inline arch_write_unlock(arch_rwlock_t *lock) +static inline void arch_write_unlock(arch_rwlock_t *lock) { __asm__ __volatile__( " st %%g0, [%0]" -- cgit v1.2.3 From aa95ce361ed95c72ac42dcb315166bce5cf1a014 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 Aug 2016 14:41:33 -0700 Subject: sparc64: Delete __ret_efault. It is completely unused. Signed-off-by: David S. Miller --- arch/sparc/include/asm/uaccess_64.h | 1 - arch/sparc/kernel/head_64.S | 8 +------- 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index b68acc563235..f8518df34b63 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -82,7 +82,6 @@ static inline int access_ok(int type, const void __user * addr, unsigned long si return 1; } -void __ret_efault(void); void __retl_efault(void); /* Uh, these should become the main single-value transfer routines.. diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index beba6c11554c..603d73654295 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -926,13 +926,7 @@ tlb_type: .word 0 /* Must NOT end up in BSS */ EXPORT_SYMBOL(tlb_type) .section ".fixup",#alloc,#execinstr - .globl __ret_efault, __retl_efault, __ret_one, __retl_one -ENTRY(__ret_efault) - ret - restore %g0, -EFAULT, %o0 -ENDPROC(__ret_efault) -EXPORT_SYMBOL(__ret_efault) - + .globl __retl_efault, __ret_one, __retl_one ENTRY(__retl_efault) retl mov -EFAULT, %o0 -- cgit v1.2.3 From 83a17d2661674d8c198adc0e183418f72aabab79 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 15 Aug 2016 14:47:54 -0700 Subject: sparc64: Prepare to move to more saner user copy exception handling. The fixup helper function mechanism for handling user copy fault handling is not %100 accurrate, and can never be made so. We are going to transition the code to return the running return return length, which is always kept track in one or more registers of each of these routines. In order to convert them one by one, we have to allow the existing behavior to continue functioning. Therefore make all the copy code that wants the fixup helper to be used return negative one. After all of the user copy routines have been converted, this logic and the fixup helpers themselves can be removed completely. Signed-off-by: David S. Miller --- arch/sparc/include/asm/uaccess_64.h | 21 +++++++++++++++------ arch/sparc/kernel/head_64.S | 23 +++++++++++------------ arch/sparc/lib/GENcopy_from_user.S | 2 +- arch/sparc/lib/GENcopy_to_user.S | 2 +- arch/sparc/lib/NG2copy_from_user.S | 4 ++-- arch/sparc/lib/NG2copy_to_user.S | 4 ++-- arch/sparc/lib/NG4copy_from_user.S | 4 ++-- arch/sparc/lib/NG4copy_to_user.S | 4 ++-- arch/sparc/lib/NGcopy_from_user.S | 2 +- arch/sparc/lib/NGcopy_to_user.S | 2 +- arch/sparc/lib/U1copy_from_user.S | 4 ++-- arch/sparc/lib/U1copy_to_user.S | 4 ++-- arch/sparc/lib/U3copy_from_user.S | 4 ++-- arch/sparc/lib/U3copy_to_user.S | 4 ++-- arch/sparc/lib/copy_in_user.S | 2 +- 15 files changed, 47 insertions(+), 39 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index f8518df34b63..0244012435c8 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -198,8 +198,11 @@ copy_from_user(void *to, const void __user *from, unsigned long size) check_object_size(to, size, false); ret = ___copy_from_user(to, from, size); - if (unlikely(ret)) - ret = copy_from_user_fixup(to, from, size); + if (unlikely(ret)) { + if ((long)ret < 0) + ret = copy_from_user_fixup(to, from, size); + return ret; + } return ret; } @@ -218,8 +221,11 @@ copy_to_user(void __user *to, const void *from, unsigned long size) check_object_size(from, size, true); ret = ___copy_to_user(to, from, size); - if (unlikely(ret)) - ret = copy_to_user_fixup(to, from, size); + if (unlikely(ret)) { + if ((long)ret < 0) + ret = copy_to_user_fixup(to, from, size); + return ret; + } return ret; } #define __copy_to_user copy_to_user @@ -234,8 +240,11 @@ copy_in_user(void __user *to, void __user *from, unsigned long size) { unsigned long ret = ___copy_in_user(to, from, size); - if (unlikely(ret)) - ret = copy_in_user_fixup(to, from, size); + if (unlikely(ret)) { + if ((long)ret < 0) + ret = copy_in_user_fixup(to, from, size); + return ret; + } return ret; } #define __copy_in_user copy_in_user diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 603d73654295..5f17de6b5fc3 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -926,41 +926,40 @@ tlb_type: .word 0 /* Must NOT end up in BSS */ EXPORT_SYMBOL(tlb_type) .section ".fixup",#alloc,#execinstr - .globl __retl_efault, __ret_one, __retl_one ENTRY(__retl_efault) retl mov -EFAULT, %o0 ENDPROC(__retl_efault) -ENTRY(__retl_one) +ENTRY(__retl_mone) retl - mov 1, %o0 -ENDPROC(__retl_one) + mov -1, %o0 +ENDPROC(__retl_mone) -ENTRY(__retl_one_fp) +ENTRY(__retl_mone_fp) VISExitHalf retl mov 1, %o0 -ENDPROC(__retl_one_fp) +ENDPROC(__retl_mone_fp) -ENTRY(__ret_one_asi) +ENTRY(__ret_mone_asi) wr %g0, ASI_AIUS, %asi ret restore %g0, 1, %o0 -ENDPROC(__ret_one_asi) +ENDPROC(__ret_mone_asi) -ENTRY(__retl_one_asi) +ENTRY(__retl_mone_asi) wr %g0, ASI_AIUS, %asi retl mov 1, %o0 -ENDPROC(__retl_one_asi) +ENDPROC(__retl_mone_asi) -ENTRY(__retl_one_asi_fp) +ENTRY(__retl_mone_asi_fp) wr %g0, ASI_AIUS, %asi VISExitHalf retl mov 1, %o0 -ENDPROC(__retl_one_asi_fp) +ENDPROC(__retl_mone_asi_fp) ENTRY(__retl_o1) retl diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S index b7d0bd6b1406..5bce68202246 100644 --- a/arch/sparc/lib/GENcopy_from_user.S +++ b/arch/sparc/lib/GENcopy_from_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S index 780550e1afc7..f663ce3ee8d9 100644 --- a/arch/sparc/lib/GENcopy_to_user.S +++ b/arch/sparc/lib/GENcopy_to_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S index d5242b8c4f94..4d47fa5519d4 100644 --- a/arch/sparc/lib/NG2copy_from_user.S +++ b/arch/sparc/lib/NG2copy_from_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, __retl_mone_asi;\ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, __retl_mone_asi_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S index 4e962d993b10..2078d752709a 100644 --- a/arch/sparc/lib/NG2copy_to_user.S +++ b/arch/sparc/lib/NG2copy_to_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, __retl_mone_asi;\ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, __retl_mone_asi_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S index 2e8ee7ad07a9..f9746e7cf25e 100644 --- a/arch/sparc/lib/NG4copy_from_user.S +++ b/arch/sparc/lib/NG4copy_from_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, __retl_mone_asi;\ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, __retl_mone_asi_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S index be0bf4590df8..5fa44349adde 100644 --- a/arch/sparc/lib/NG4copy_to_user.S +++ b/arch/sparc/lib/NG4copy_to_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, __retl_mone_asi;\ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, __retl_mone_asi_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S index 5d1e4d1ac21e..e61694c444af 100644 --- a/arch/sparc/lib/NGcopy_from_user.S +++ b/arch/sparc/lib/NGcopy_from_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_one_asi;\ + .word 98b, __ret_mone_asi;\ .text; \ .align 4; diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S index ff630dcb273c..2d6b33d3998f 100644 --- a/arch/sparc/lib/NGcopy_to_user.S +++ b/arch/sparc/lib/NGcopy_to_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_one_asi;\ + .word 98b, __ret_mone_asi;\ .text; \ .align 4; diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S index ecc5692fa2b4..1ad59fbac7a7 100644 --- a/arch/sparc/lib/U1copy_from_user.S +++ b/arch/sparc/lib/U1copy_from_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, __retl_mone_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S index 9eea392e44d4..adcc3a510185 100644 --- a/arch/sparc/lib/U1copy_to_user.S +++ b/arch/sparc/lib/U1copy_to_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, __retl_mone_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S index 88ad73d86fe4..1046e2b083fe 100644 --- a/arch/sparc/lib/U3copy_from_user.S +++ b/arch/sparc/lib/U3copy_from_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, __retl_mone_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S index 845139d75537..032b0c5419b0 100644 --- a/arch/sparc/lib/U3copy_to_user.S +++ b/arch/sparc/lib/U3copy_to_user.S @@ -7,7 +7,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; @@ -15,7 +15,7 @@ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, __retl_mone_fp;\ .text; \ .align 4; diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S index 482de093bdae..86e6663dd4e0 100644 --- a/arch/sparc/lib/copy_in_user.S +++ b/arch/sparc/lib/copy_in_user.S @@ -13,7 +13,7 @@ 98: x,y; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, __retl_mone; \ .text; \ .align 4; -- cgit v1.2.3 From 0096ac9f47b1a2e851b3165d44065d18e5f13d58 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 15 Aug 2016 15:08:18 -0700 Subject: sparc64: Convert copy_in_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/copy_in_user.S | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S index 86e6663dd4e0..0252b218de45 100644 --- a/arch/sparc/lib/copy_in_user.S +++ b/arch/sparc/lib/copy_in_user.S @@ -9,18 +9,33 @@ #define XCC xcc -#define EX(x,y) \ +#define EX(x,y,z) \ 98: x,y; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, z; \ .text; \ .align 4; +#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8) +#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4) +#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1) + .register %g2,#scratch .register %g3,#scratch .text +__retl_o4_plus_8: + add %o4, %o2, %o4 + retl + add %o4, 8, %o0 +__retl_o2_plus_4: + retl + add %o2, 4, %o0 +__retl_o2_plus_1: + retl + add %o2, 1, %o0 + .align 32 /* Don't try to get too fancy here, just nice and @@ -45,8 +60,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x7, %o4 and %o2, 0x7, %o2 1: subcc %o4, 0x8, %o4 - EX(ldxa [%o1] %asi, %o5) - EX(stxa %o5, [%o0] %asi) + EX_O4(ldxa [%o1] %asi, %o5) + EX_O4(stxa %o5, [%o0] %asi) add %o1, 0x8, %o1 bgu,pt %XCC, 1b add %o0, 0x8, %o0 @@ -54,8 +69,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX(lduwa [%o1] %asi, %o5) - EX(stwa %o5, [%o0] %asi) + EX_O2_4(lduwa [%o1] %asi, %o5) + EX_O2_4(stwa %o5, [%o0] %asi) add %o1, 0x4, %o1 add %o0, 0x4, %o0 1: cmp %o2, 0 @@ -71,8 +86,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ 82: subcc %o2, 4, %o2 - EX(lduwa [%o1] %asi, %g1) - EX(stwa %g1, [%o0] %asi) + EX_O2_4(lduwa [%o1] %asi, %g1) + EX_O2_4(stwa %g1, [%o0] %asi) add %o1, 4, %o1 bgu,pt %XCC, 82b add %o0, 4, %o0 @@ -83,8 +98,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX(lduba [%o1] %asi, %g1) - EX(stba %g1, [%o0] %asi) + EX_O2_1(lduba [%o1] %asi, %g1) + EX_O2_1(stba %g1, [%o0] %asi) add %o1, 1, %o1 bgu,pt %XCC, 90b add %o0, 1, %o0 -- cgit v1.2.3 From d0796b555ba60c22eb41ae39a8362156cb08eee9 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 15 Aug 2016 15:26:38 -0700 Subject: sparc64: Convert GENcopy_{from,to}_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/GENcopy_from_user.S | 4 ++-- arch/sparc/lib/GENcopy_to_user.S | 4 ++-- arch/sparc/lib/GENmemcpy.S | 48 +++++++++++++++++++++++++++----------- 3 files changed, 38 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S index 5bce68202246..69a439fa2fc1 100644 --- a/arch/sparc/lib/GENcopy_from_user.S +++ b/arch/sparc/lib/GENcopy_from_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S index f663ce3ee8d9..9947427ce354 100644 --- a/arch/sparc/lib/GENcopy_to_user.S +++ b/arch/sparc/lib/GENcopy_to_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S index 89358ee94851..059ea24ad73d 100644 --- a/arch/sparc/lib/GENmemcpy.S +++ b/arch/sparc/lib/GENmemcpy.S @@ -4,21 +4,18 @@ */ #ifdef __KERNEL__ +#include #define GLOBAL_SPARE %g7 #else #define GLOBAL_SPARE %g5 #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST(x,y) x #endif #ifndef LOAD @@ -45,6 +42,29 @@ .register %g3,#scratch .text + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +ENTRY(GEN_retl_o4_1) + add %o4, %o2, %o4 + retl + add %o4, 1, %o0 +ENDPROC(GEN_retl_o4_1) +ENTRY(GEN_retl_g1_8) + add %g1, %o2, %g1 + retl + add %g1, 8, %o0 +ENDPROC(GEN_retl_g1_8) +ENTRY(GEN_retl_o2_4) + retl + add %o2, 4, %o0 +ENDPROC(GEN_retl_o2_4) +ENTRY(GEN_retl_o2_1) + retl + add %o2, 1, %o0 +ENDPROC(GEN_retl_o2_1) +#endif + .align 64 .globl FUNC_NAME @@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %o4, %o4 sub %o2, %o4, %o2 1: subcc %o4, 1, %o4 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1) + EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1) add %o1, 1, %o1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x7, %g1 sub %o2, %g1, %o2 1: subcc %g1, 0x8, %g1 - EX_LD(LOAD(ldx, %o1, %g2)) - EX_ST(STORE(stx, %g2, %o0)) + EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8) + EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8) add %o1, 0x8, %o1 bne,pt %XCC, 1b add %o0, 0x8, %o0 @@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4) + EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1) + EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl -- cgit v1.2.3 From cb736fdbb208eb3420f1a2eb2bfc024a6e9dcada Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 15 Aug 2016 16:07:50 -0700 Subject: sparc64: Convert U1copy_{from,to}_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/U1copy_from_user.S | 8 +- arch/sparc/lib/U1copy_to_user.S | 8 +- arch/sparc/lib/U1memcpy.S | 345 +++++++++++++++++++++++++------------- 3 files changed, 237 insertions(+), 124 deletions(-) (limited to 'arch') diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S index 1ad59fbac7a7..bb6ff73229e3 100644 --- a/arch/sparc/lib/U1copy_from_user.S +++ b/arch/sparc/lib/U1copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_fp;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S index adcc3a510185..ed92ce739558 100644 --- a/arch/sparc/lib/U1copy_to_user.S +++ b/arch/sparc/lib/U1copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_fp;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S index 97e1b211090c..4f0d50b33a72 100644 --- a/arch/sparc/lib/U1memcpy.S +++ b/arch/sparc/lib/U1memcpy.S @@ -5,6 +5,7 @@ */ #ifdef __KERNEL__ +#include #include #include #include @@ -24,21 +25,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -79,53 +76,169 @@ faligndata %f7, %f8, %f60; \ faligndata %f8, %f9, %f62; -#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ - EX_LD_FP(LOAD_BLK(%src, %fdest)); \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ - add %src, 0x40, %src; \ - subcc %len, 0x40, %len; \ - be,pn %xcc, jmptgt; \ - add %dest, 0x40, %dest; \ - -#define LOOP_CHUNK1(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) -#define LOOP_CHUNK2(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) -#define LOOP_CHUNK3(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) +#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \ + EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ + add %src, 0x40, %src; \ + subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \ + be,pn %xcc, jmptgt; \ + add %dest, 0x40, %dest; \ + +#define LOOP_CHUNK1(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest) +#define LOOP_CHUNK2(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest) +#define LOOP_CHUNK3(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest) #define DO_SYNC membar #Sync; #define STORE_SYNC(dest, fsrc) \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ add %dest, 0x40, %dest; \ DO_SYNC #define STORE_JUMP(dest, fsrc, target) \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \ add %dest, 0x40, %dest; \ ba,pt %xcc, target; \ nop; -#define FINISH_VISCHUNK(dest, f0, f1, left) \ - subcc %left, 8, %left;\ - bl,pn %xcc, 95f; \ - faligndata %f0, %f1, %f48; \ - EX_ST_FP(STORE(std, %f48, %dest)); \ +#define FINISH_VISCHUNK(dest, f0, f1) \ + subcc %g3, 8, %g3; \ + bl,pn %xcc, 95f; \ + faligndata %f0, %f1, %f48; \ + EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \ add %dest, 8, %dest; -#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ - subcc %left, 8, %left; \ - bl,pn %xcc, 95f; \ +#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ + subcc %g3, 8, %g3; \ + bl,pn %xcc, 95f; \ fsrc2 %f0, %f1; -#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ - UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ +#define UNEVEN_VISCHUNK(dest, f0, f1) \ + UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ ba,a,pt %xcc, 93f; .register %g2,#scratch .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +ENTRY(U1_g1_1_fp) + VISExitHalf + add %g1, 1, %g1 + add %g1, %g2, %g1 + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_1_fp) +ENTRY(U1_g2_0_fp) + VISExitHalf + retl + add %g2, %o2, %o0 +ENDPROC(U1_g2_0_fp) +ENTRY(U1_g2_8_fp) + VISExitHalf + add %g2, 8, %g2 + retl + add %g2, %o2, %o0 +ENDPROC(U1_g2_8_fp) +ENTRY(U1_gs_0_fp) + VISExitHalf + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_0_fp) +ENTRY(U1_gs_80_fp) + VISExitHalf + add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_80_fp) +ENTRY(U1_gs_40_fp) + VISExitHalf + add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_40_fp) +ENTRY(U1_g3_0_fp) + VISExitHalf + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_0_fp) +ENTRY(U1_g3_8_fp) + VISExitHalf + add %g3, 8, %g3 + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_8_fp) +ENTRY(U1_o2_0_fp) + VISExitHalf + retl + mov %o2, %o0 +ENDPROC(U1_o2_0_fp) +ENTRY(U1_o2_1_fp) + VISExitHalf + retl + add %o2, 1, %o0 +ENDPROC(U1_o2_1_fp) +ENTRY(U1_gs_0) + VISExitHalf + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_0) +ENTRY(U1_gs_8) + VISExitHalf + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, 0x8, %o0 +ENDPROC(U1_gs_8) +ENTRY(U1_gs_10) + VISExitHalf + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, 0x10, %o0 +ENDPROC(U1_gs_10) +ENTRY(U1_o2_0) + retl + mov %o2, %o0 +ENDPROC(U1_o2_0) +ENTRY(U1_o2_8) + retl + add %o2, 8, %o0 +ENDPROC(U1_o2_8) +ENTRY(U1_o2_4) + retl + add %o2, 4, %o0 +ENDPROC(U1_o2_4) +ENTRY(U1_o2_1) + retl + add %o2, 1, %o0 +ENDPROC(U1_o2_1) +ENTRY(U1_g1_0) + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_0) +ENTRY(U1_g1_1) + add %g1, 1, %g1 + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_1) +ENTRY(U1_gs_0_o2_adj) + and %o2, 7, %o2 + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_0_o2_adj) +ENTRY(U1_gs_8_o2_adj) + and %o2, 7, %o2 + add %GLOBAL_SPARE, 8, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_8_o2_adj) +#endif + .align 64 .globl FUNC_NAME @@ -167,8 +280,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ and %g2, 0x38, %g2 1: subcc %g1, 0x1, %g1 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp) + EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp) bgu,pt %XCC, 1b add %o1, 0x1, %o1 @@ -179,20 +292,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pt %icc, 3f alignaddr %o1, %g0, %o1 - EX_LD_FP(LOAD(ldd, %o1, %f4)) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) + EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f4, %f6, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) be,pn %icc, 3f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f6, %f4, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) bne,pt %icc, 1b add %o0, 0x8, %o0 @@ -215,13 +328,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %g1, %GLOBAL_SPARE, %g1 subcc %o2, %g3, %o2 - EX_LD_FP(LOAD_BLK(%o1, %f0)) + EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp) add %o1, 0x40, %o1 add %g1, %g3, %g1 - EX_LD_FP(LOAD_BLK(%o1, %f16)) + EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp) add %o1, 0x40, %o1 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE - EX_LD_FP(LOAD_BLK(%o1, %f32)) + EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp) add %o1, 0x40, %o1 /* There are 8 instances of the unrolled loop, @@ -241,11 +354,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 64 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f0, %f2, %f48 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) @@ -262,11 +375,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 56f) 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f2, %f4, %f48 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) @@ -283,11 +396,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 57f) 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f4, %f6, %f48 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) @@ -304,11 +417,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 58f) 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f6, %f8, %f48 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) @@ -325,11 +438,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 59f) 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f8, %f10, %f48 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) @@ -346,11 +459,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 60f) 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f10, %f12, %f48 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) @@ -367,11 +480,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 61f) 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f12, %f14, %f48 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) @@ -388,11 +501,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 62f) 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f14, %f16, %f48 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) @@ -408,53 +521,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, 63f) -40: FINISH_VISCHUNK(o0, f0, f2, g3) -41: FINISH_VISCHUNK(o0, f2, f4, g3) -42: FINISH_VISCHUNK(o0, f4, f6, g3) -43: FINISH_VISCHUNK(o0, f6, f8, g3) -44: FINISH_VISCHUNK(o0, f8, f10, g3) -45: FINISH_VISCHUNK(o0, f10, f12, g3) -46: FINISH_VISCHUNK(o0, f12, f14, g3) -47: UNEVEN_VISCHUNK(o0, f14, f0, g3) -48: FINISH_VISCHUNK(o0, f16, f18, g3) -49: FINISH_VISCHUNK(o0, f18, f20, g3) -50: FINISH_VISCHUNK(o0, f20, f22, g3) -51: FINISH_VISCHUNK(o0, f22, f24, g3) -52: FINISH_VISCHUNK(o0, f24, f26, g3) -53: FINISH_VISCHUNK(o0, f26, f28, g3) -54: FINISH_VISCHUNK(o0, f28, f30, g3) -55: UNEVEN_VISCHUNK(o0, f30, f0, g3) -56: FINISH_VISCHUNK(o0, f32, f34, g3) -57: FINISH_VISCHUNK(o0, f34, f36, g3) -58: FINISH_VISCHUNK(o0, f36, f38, g3) -59: FINISH_VISCHUNK(o0, f38, f40, g3) -60: FINISH_VISCHUNK(o0, f40, f42, g3) -61: FINISH_VISCHUNK(o0, f42, f44, g3) -62: FINISH_VISCHUNK(o0, f44, f46, g3) -63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) - -93: EX_LD_FP(LOAD(ldd, %o1, %f2)) +40: FINISH_VISCHUNK(o0, f0, f2) +41: FINISH_VISCHUNK(o0, f2, f4) +42: FINISH_VISCHUNK(o0, f4, f6) +43: FINISH_VISCHUNK(o0, f6, f8) +44: FINISH_VISCHUNK(o0, f8, f10) +45: FINISH_VISCHUNK(o0, f10, f12) +46: FINISH_VISCHUNK(o0, f12, f14) +47: UNEVEN_VISCHUNK(o0, f14, f0) +48: FINISH_VISCHUNK(o0, f16, f18) +49: FINISH_VISCHUNK(o0, f18, f20) +50: FINISH_VISCHUNK(o0, f20, f22) +51: FINISH_VISCHUNK(o0, f22, f24) +52: FINISH_VISCHUNK(o0, f24, f26) +53: FINISH_VISCHUNK(o0, f26, f28) +54: FINISH_VISCHUNK(o0, f28, f30) +55: UNEVEN_VISCHUNK(o0, f30, f0) +56: FINISH_VISCHUNK(o0, f32, f34) +57: FINISH_VISCHUNK(o0, f34, f36) +58: FINISH_VISCHUNK(o0, f36, f38) +59: FINISH_VISCHUNK(o0, f38, f40) +60: FINISH_VISCHUNK(o0, f40, f42) +61: FINISH_VISCHUNK(o0, f42, f44) +62: FINISH_VISCHUNK(o0, f44, f46) +63: UNEVEN_VISCHUNK_LAST(o0, f46, f0) + +93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f0, %f2, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) bl,pn %xcc, 95f add %o0, 8, %o0 - EX_LD_FP(LOAD(ldd, %o1, %f0)) + EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f2, %f0, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) bge,pt %xcc, 93b add %o0, 8, %o0 95: brz,pt %o2, 2f mov %g1, %o1 -1: EX_LD_FP(LOAD(ldub, %o1, %o3)) +1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp) add %o1, 1, %o1 subcc %o2, 1, %o2 - EX_ST_FP(STORE(stb, %o3, %o0)) + EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp) bne,pt %xcc, 1b add %o0, 1, %o0 @@ -470,27 +583,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 72: andn %o2, 0xf, %GLOBAL_SPARE and %o2, 0xf, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0) subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop - EX_LD(LOAD(ldx, %o1, %o5)) + EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0) sub %o2, 0x8, %o2 - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop - EX_LD(LOAD(lduw, %o1, %o5)) + EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0) sub %o2, 0x4, %o2 - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -504,9 +617,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %g1, %g1 sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1, %o5)) +1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0) subcc %g1, 1, %g1 - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -522,16 +635,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, %o3 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0) sub %o3, %g1, %o3 andn %o2, 0x7, %GLOBAL_SPARE sllx %g2, %g1, %g2 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj) subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -549,9 +662,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ bne,pn %XCC, 90f sub %o0, %o1, %o3 -1: EX_LD(LOAD(lduw, %o1, %g1)) +1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0) subcc %o2, 4, %o2 - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -559,9 +672,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov EX_RETVAL(%o4), %o0 .align 32 -90: EX_LD(LOAD(ldub, %o1, %g1)) +90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0) subcc %o2, 1, %o2 - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl -- cgit v1.2.3 From 95707704800988093a9b9a27e0f2f67f5b4bf2fa Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Oct 2016 18:58:05 -0700 Subject: sparc64: Convert NG4copy_{from,to}_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/NG4copy_from_user.S | 8 +- arch/sparc/lib/NG4copy_to_user.S | 8 +- arch/sparc/lib/NG4memcpy.S | 294 ++++++++++++++++++++++++++++--------- 3 files changed, 231 insertions(+), 79 deletions(-) (limited to 'arch') diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S index f9746e7cf25e..16a286c1a528 100644 --- a/arch/sparc/lib/NG4copy_from_user.S +++ b/arch/sparc/lib/NG4copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2012 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x, y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S index 5fa44349adde..6b0276ffc858 100644 --- a/arch/sparc/lib/NG4copy_to_user.S +++ b/arch/sparc/lib/NG4copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2012 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 8e13ee1f4454..75bb93b1437f 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include #include #include #define GLOBAL_SPARE %g7 @@ -46,22 +47,19 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x +#define EX_ST_FP(x,y) x #endif -#ifndef EX_RETVAL -#define EX_RETVAL(x) x -#endif #ifndef LOAD #define LOAD(type,addr,dest) type [addr], dest @@ -94,6 +92,158 @@ .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_asi_fp: + VISExitHalf +__restore_asi: + retl + wr %g0, ASI_AIUS, %asi + +ENTRY(NG4_retl_o2) + ba,pt %xcc, __restore_asi + mov %o2, %o0 +ENDPROC(NG4_retl_o2) +ENTRY(NG4_retl_o2_plus_1) + ba,pt %xcc, __restore_asi + add %o2, 1, %o0 +ENDPROC(NG4_retl_o2_plus_1) +ENTRY(NG4_retl_o2_plus_4) + ba,pt %xcc, __restore_asi + add %o2, 4, %o0 +ENDPROC(NG4_retl_o2_plus_4) +ENTRY(NG4_retl_o2_plus_o5) + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5) +ENTRY(NG4_retl_o2_plus_o5_plus_4) + add %o5, 4, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_4) +ENTRY(NG4_retl_o2_plus_o5_plus_8) + add %o5, 8, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_8) +ENTRY(NG4_retl_o2_plus_o5_plus_16) + add %o5, 16, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_16) +ENTRY(NG4_retl_o2_plus_o5_plus_24) + add %o5, 24, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_24) +ENTRY(NG4_retl_o2_plus_o5_plus_32) + add %o5, 32, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_32) +ENTRY(NG4_retl_o2_plus_g1) + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1) +ENTRY(NG4_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_1) +ENTRY(NG4_retl_o2_plus_g1_plus_8) + add %g1, 8, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_8) +ENTRY(NG4_retl_o2_plus_o4) + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4) +ENTRY(NG4_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_8) +ENTRY(NG4_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_16) +ENTRY(NG4_retl_o2_plus_o4_plus_24) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_24) +ENTRY(NG4_retl_o2_plus_o4_plus_32) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_32) +ENTRY(NG4_retl_o2_plus_o4_plus_40) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_40) +ENTRY(NG4_retl_o2_plus_o4_plus_48) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_48) +ENTRY(NG4_retl_o2_plus_o4_plus_56) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_56) +ENTRY(NG4_retl_o2_plus_o4_plus_64) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_64) +ENTRY(NG4_retl_o2_plus_o4_fp) + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_8_fp) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_16_fp) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_24_fp) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_32_fp) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_40_fp) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_48_fp) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_56_fp) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_64_fp) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp) +#endif .align 64 .globl FUNC_NAME @@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, 51f sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) + +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 1, %o1 subcc %g1, 1, %g1 add %o0, 1, %o0 bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) LOAD(prefetch, %o1 + 0x080, #n_reads_strong) @@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, .Llarge_aligned sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 8, %o1 subcc %g1, 8, %g1 add %o0, 8, %o0 bne,pt %icc, 1b - EX_ST(STORE(stx, %g2, %o0 - 0x08)) + EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8) .Llarge_aligned: /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ andn %o2, 0x3f, %o4 sub %o2, %o4, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4) add %o1, 0x40, %o1 - EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) + EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4) subcc %o4, 0x40, %o4 - EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) - EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) - EX_ST(STORE_INIT(%g1, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64) + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64) + EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64) + EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) - EX_ST(STORE_INIT(%g3, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48) + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) - EX_ST(STORE_INIT(%o5, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32) + EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g3, %o0)) + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8) add %o0, 0x08, %o0 bne,pt %icc, 1b LOAD(prefetch, %o1 + 0x200, #n_reads_strong) @@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %o4, %o2 alignaddr %o1, %g0, %g1 add %o1, %o4, %o1 - EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0)) -1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2)) + EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4) +1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4) subcc %o4, 0x40, %o4 - EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4)) - EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6)) - EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8)) - EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10)) - EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12)) - EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14)) + EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0)) + EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64) faligndata %f2, %f4, %f18 add %g1, 0x40, %g1 faligndata %f4, %f6, %f20 @@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ faligndata %f10, %f12, %f26 faligndata %f12, %f14, %f28 faligndata %f14, %f0, %f30 - EX_ST_FP(STORE(std, %f16, %o0 + 0x00)) - EX_ST_FP(STORE(std, %f18, %o0 + 0x08)) - EX_ST_FP(STORE(std, %f20, %o0 + 0x10)) - EX_ST_FP(STORE(std, %f22, %o0 + 0x18)) - EX_ST_FP(STORE(std, %f24, %o0 + 0x20)) - EX_ST_FP(STORE(std, %f26, %o0 + 0x28)) - EX_ST_FP(STORE(std, %f28, %o0 + 0x30)) - EX_ST_FP(STORE(std, %f30, %o0 + 0x38)) + EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64) + EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56) + EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48) + EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40) + EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32) + EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24) + EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16) + EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8) add %o0, 0x40, %o0 bne,pt %icc, 1b LOAD(prefetch, %g1 + 0x200, #n_reads_strong) @@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andncc %o2, 0x20 - 1, %o5 be,pn %icc, 2f sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) - EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5) add %o1, 0x20, %o1 subcc %o5, 0x20, %o5 - EX_ST(STORE(stx, %g1, %o0 + 0x00)) - EX_ST(STORE(stx, %g2, %o0 + 0x08)) - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) - EX_ST(STORE(stx, %o4, %o0 + 0x18)) + EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32) + EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8) bne,pt %icc, 1b add %o0, 0x20, %o0 2: andcc %o2, 0x18, %o5 be,pt %icc, 3f sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) add %o1, 0x08, %o1 add %o0, 0x08, %o0 subcc %o5, 0x08, %o5 bne,pt %icc, 1b - EX_ST(STORE(stx, %g1, %o0 - 0x08)) + EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8) 3: brz,pt %o2, .Lexit cmp %o2, 0x04 bl,pn %icc, .Ltiny nop - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2) add %o1, 0x04, %o1 add %o0, 0x04, %o0 subcc %o2, 0x04, %o2 bne,pn %icc, .Ltiny - EX_ST(STORE(stw, %g1, %o0 - 0x04)) + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4) ba,a,pt %icc, .Lexit .Lmedium_unaligned: /* First get dest 8 byte aligned. */ @@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, 2f sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 1, %o1 subcc %g1, 1, %g1 add %o0, 1, %o0 bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 2: and %o1, 0x7, %g1 brz,pn %g1, .Lmedium_noprefetch @@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov 64, %g2 sub %g2, %g1, %g2 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2) sllx %o4, %g1, %o4 andn %o2, 0x08 - 1, %o5 sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5) add %o1, 0x08, %o1 subcc %o5, 0x08, %o5 srlx %g3, %g2, GLOBAL_SPARE or GLOBAL_SPARE, %o4, GLOBAL_SPARE - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8) add %o0, 0x08, %o0 bne,pt %icc, 1b sllx %g3, %g1, %o4 @@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %icc, .Lsmall_unaligned .Ltiny: - EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) + EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) subcc %o2, 1, %o2 be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x00)) - EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) + EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2) subcc %o2, 1, %o2 be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x01)) - EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) + EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2) ba,pt %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x02)) + EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2) .Lsmall: andcc %g2, 0x3, %g0 @@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x4 - 1, %o5 sub %o2, %o5, %o2 1: - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) add %o1, 0x04, %o1 subcc %o5, 0x04, %o5 add %o0, 0x04, %o0 bne,pt %icc, 1b - EX_ST(STORE(stw, %g1, %o0 - 0x04)) + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4) brz,pt %o2, .Lexit nop ba,a,pt %icc, .Ltiny .Lsmall_unaligned: -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) add %o1, 1, %o1 add %o0, 1, %o0 subcc %o2, 1, %o2 bne,pt %icc, 1b - EX_ST(STORE(stb, %g1, %o0 - 0x01)) + EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) ba,a,pt %icc, .Lexit .size FUNC_NAME, .-FUNC_NAME -- cgit v1.2.3 From 7ae3aaf53f1695877ccd5ebbc49ea65991e41f1e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Oct 2016 19:32:12 -0700 Subject: sparc64: Convert NGcopy_{from,to}_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/NGcopy_from_user.S | 4 +- arch/sparc/lib/NGcopy_to_user.S | 4 +- arch/sparc/lib/NGmemcpy.S | 233 ++++++++++++++++++++++++++------------ 3 files changed, 162 insertions(+), 79 deletions(-) (limited to 'arch') diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S index e61694c444af..9cd42fcbc781 100644 --- a/arch/sparc/lib/NGcopy_from_user.S +++ b/arch/sparc/lib/NGcopy_from_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_mone_asi;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S index 2d6b33d3998f..5c358afd464e 100644 --- a/arch/sparc/lib/NGcopy_to_user.S +++ b/arch/sparc/lib/NGcopy_to_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_mone_asi;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S index 96a14caf6966..d88c4ed50a00 100644 --- a/arch/sparc/lib/NGmemcpy.S +++ b/arch/sparc/lib/NGmemcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include #include #include #define GLOBAL_SPARE %g7 @@ -27,15 +28,11 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST(x,y) x #endif #ifndef LOAD @@ -79,6 +76,92 @@ .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_asi: + ret + wr %g0, ASI_AIUS, %asi + restore +ENTRY(NG_ret_i2_plus_i4_plus_1) + ba,pt %xcc, __restore_asi + add %i2, %i5, %i0 +ENDPROC(NG_ret_i2_plus_i4_plus_1) +ENTRY(NG_ret_i2_plus_g1) + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1) +ENTRY(NG_ret_i2_plus_g1_minus_8) + sub %g1, 8, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_8) +ENTRY(NG_ret_i2_plus_g1_minus_16) + sub %g1, 16, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_16) +ENTRY(NG_ret_i2_plus_g1_minus_24) + sub %g1, 24, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_24) +ENTRY(NG_ret_i2_plus_g1_minus_32) + sub %g1, 32, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_32) +ENTRY(NG_ret_i2_plus_g1_minus_40) + sub %g1, 40, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_40) +ENTRY(NG_ret_i2_plus_g1_minus_48) + sub %g1, 48, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_48) +ENTRY(NG_ret_i2_plus_g1_minus_56) + sub %g1, 56, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_56) +ENTRY(NG_ret_i2_plus_i4) + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_plus_i4) +ENTRY(NG_ret_i2_plus_i4_minus_8) + sub %i4, 8, %i4 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_plus_i4_minus_8) +ENTRY(NG_ret_i2_plus_8) + ba,pt %xcc, __restore_asi + add %i2, 8, %i0 +ENDPROC(NG_ret_i2_plus_8) +ENTRY(NG_ret_i2_plus_4) + ba,pt %xcc, __restore_asi + add %i2, 4, %i0 +ENDPROC(NG_ret_i2_plus_4) +ENTRY(NG_ret_i2_plus_1) + ba,pt %xcc, __restore_asi + add %i2, 1, %i0 +ENDPROC(NG_ret_i2_plus_1) +ENTRY(NG_ret_i2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_plus_1) +ENTRY(NG_ret_i2) + ba,pt %xcc, __restore_asi + mov %i2, %i0 +ENDPROC(NG_ret_i2) +ENTRY(NG_ret_i2_and_7_plus_i4) + and %i2, 7, %i2 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_and_7_plus_i4) +#endif + .align 64 .globl FUNC_NAME @@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ sub %g0, %i4, %i4 ! bytes to align dst sub %i2, %i4, %i2 1: subcc %i4, 1, %i4 - EX_LD(LOAD(ldub, %i1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1) + EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1) add %i1, 1, %i1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ and %i4, 0x7, GLOBAL_SPARE sll GLOBAL_SPARE, 3, GLOBAL_SPARE mov 64, %i5 - EX_LD(LOAD_TWIN(%i1, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1) sub %i5, GLOBAL_SPARE, %i5 mov 16, %o4 mov 32, %o5 @@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ srlx WORD3, PRE_SHIFT, TMP; \ or WORD2, TMP, WORD2; -8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) +8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) LOAD(prefetch, %i1 + %i3, #one_read) - EX_ST(STORE_INIT(%g2, %o0 + 0x00)) - EX_ST(STORE_INIT(%g3, %o0 + 0x08)) + EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1) + EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%g2, %o0 + 0x20)) - EX_ST(STORE_INIT(%g3, %o0 + 0x28)) + EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 8b @@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ba,pt %XCC, 60f add %i1, %i4, %i1 -9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) +9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) LOAD(prefetch, %i1 + %i3, #one_read) - EX_ST(STORE_INIT(%g3, %o0 + 0x00)) - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) + EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1) + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) - EX_ST(STORE_INIT(%g2, %o0 + 0x18)) + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%g3, %o0 + 0x20)) - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) + EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) - EX_ST(STORE_INIT(%g2, %o0 + 0x38)) + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 9b @@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ * one twin load ahead, then add 8 back into source when * we finish the loop. */ - EX_LD(LOAD_TWIN(%i1, %o4, %o5)) + EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1) mov 16, %o7 mov 32, %g2 mov 48, %g3 mov 64, %o1 -1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) +1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) LOAD(prefetch, %i1 + %o1, #one_read) - EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) - EX_ST(STORE_INIT(%o4, %o0 + 0x18)) - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) - EX_ST(STORE_INIT(%o5, %o0 + 0x20)) - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) - EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5)) + EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) - EX_ST(STORE_INIT(%o4, %o0 + 0x38)) + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 1b add %o0, 64, %o0 @@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ mov 32, %g2 mov 48, %g3 mov 64, %o1 -1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5)) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) +1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) LOAD(prefetch, %i1 + %o1, #one_read) - EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line - EX_ST(STORE_INIT(%o5, %o0 + 0x08)) - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) + EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line + EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) add %i1, 64, %i1 - EX_ST(STORE_INIT(%o4, %o0 + 0x20)) - EX_ST(STORE_INIT(%o5, %o0 + 0x28)) - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) + EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 1b add %o0, 64, %o0 @@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ andn %i2, 0xf, %i4 and %i2, 0xf, %i2 1: subcc %i4, 0x10, %i4 - EX_LD(LOAD(ldx, %i1, %o4)) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4) add %i1, 0x08, %i1 - EX_LD(LOAD(ldx, %i1, %g1)) + EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4) sub %i1, 0x08, %i1 - EX_ST(STORE(stx, %o4, %i1 + %i3)) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4) add %i1, 0x8, %i1 - EX_ST(STORE(stx, %g1, %i1 + %i3)) + EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8) bgu,pt %XCC, 1b add %i1, 0x8, %i1 73: andcc %i2, 0x8, %g0 be,pt %XCC, 1f nop sub %i2, 0x8, %i2 - EX_LD(LOAD(ldx, %i1, %o4)) - EX_ST(STORE(stx, %o4, %i1 + %i3)) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8) add %i1, 0x8, %i1 1: andcc %i2, 0x4, %g0 be,pt %XCC, 1f nop sub %i2, 0x4, %i2 - EX_LD(LOAD(lduw, %i1, %i5)) - EX_ST(STORE(stw, %i5, %i1 + %i3)) + EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4) + EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4) add %i1, 0x4, %i1 1: cmp %i2, 0 be,pt %XCC, 85f @@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ sub %i2, %g1, %i2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %i1, %i5)) - EX_ST(STORE(stb, %i5, %i1 + %i3)) + EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1) + EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1) bgu,pt %icc, 1b add %i1, 1, %i1 @@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ 8: mov 64, %i3 andn %i1, 0x7, %i1 - EX_LD(LOAD(ldx, %i1, %g2)) + EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2) sub %i3, %g1, %i3 andn %i2, 0x7, %i4 sllx %g2, %g1, %g2 1: add %i1, 0x8, %i1 - EX_LD(LOAD(ldx, %i1, %g3)) + EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4) subcc %i4, 0x8, %i4 srlx %g3, %i3, %i5 or %i5, %g2, %i5 - EX_ST(STORE(stx, %i5, %o0)) + EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ 1: subcc %i2, 4, %i2 - EX_LD(LOAD(lduw, %i1, %g1)) - EX_ST(STORE(stw, %g1, %i1 + %i3)) + EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4) + EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4) bgu,pt %XCC, 1b add %i1, 4, %i1 @@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ .align 32 90: subcc %i2, 1, %i2 - EX_LD(LOAD(ldub, %i1, %g1)) - EX_ST(STORE(stb, %g1, %i1 + %i3)) + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1) + EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1) bgu,pt %XCC, 90b add %i1, 1, %i1 ret -- cgit v1.2.3 From e93704e4464fdc191f73fce35129c18de2ebf95d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Oct 2016 20:46:44 -0700 Subject: sparc64: Convert NG2copy_{from,to}_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/NG2copy_from_user.S | 8 +- arch/sparc/lib/NG2copy_to_user.S | 8 +- arch/sparc/lib/NG2memcpy.S | 228 +++++++++++++++++++++++-------------- 3 files changed, 153 insertions(+), 91 deletions(-) (limited to 'arch') diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S index 4d47fa5519d4..b79a6998d87c 100644 --- a/arch/sparc/lib/NG2copy_from_user.S +++ b/arch/sparc/lib/NG2copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S index 2078d752709a..dcec55f254ab 100644 --- a/arch/sparc/lib/NG2copy_to_user.S +++ b/arch/sparc/lib/NG2copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index d5f585df2f3f..c629dbd121b6 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include #include #include #define GLOBAL_SPARE %g7 @@ -32,21 +33,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -140,45 +137,110 @@ fsrc2 %x6, %f12; \ fsrc2 %x7, %f14; #define FREG_LOAD_1(base, x0) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)) + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1) #define FREG_LOAD_2(base, x0, x1) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); #define FREG_LOAD_3(base, x0, x1, x2) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); #define FREG_LOAD_4(base, x0, x1, x2, x3) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \ - EX_LD_FP(LOAD(ldd, base + 0x30, %x6)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1); .register %g2,#scratch .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_fp: + VISExitHalf +__restore_asi: + retl + wr %g0, ASI_AIUS, %asi +ENTRY(NG2_retl_o2) + ba,pt %xcc, __restore_asi + mov %o2, %o0 +ENDPROC(NG2_retl_o2) +ENTRY(NG2_retl_o2_plus_1) + ba,pt %xcc, __restore_asi + add %o2, 1, %o0 +ENDPROC(NG2_retl_o2_plus_1) +ENTRY(NG2_retl_o2_plus_4) + ba,pt %xcc, __restore_asi + add %o2, 4, %o0 +ENDPROC(NG2_retl_o2_plus_4) +ENTRY(NG2_retl_o2_plus_8) + ba,pt %xcc, __restore_asi + add %o2, 8, %o0 +ENDPROC(NG2_retl_o2_plus_8) +ENTRY(NG2_retl_o2_plus_o4_plus_1) + add %o4, 1, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_1) +ENTRY(NG2_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_8) +ENTRY(NG2_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_16) +ENTRY(NG2_retl_o2_plus_g1_fp) + ba,pt %xcc, __restore_fp + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_fp) +ENTRY(NG2_retl_o2_plus_g1_plus_64_fp) + add %g1, 64, %g1 + ba,pt %xcc, __restore_fp + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp) +ENTRY(NG2_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_plus_1) +ENTRY(NG2_retl_o2_and_7_plus_o4) + and %o2, 7, %o2 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_and_7_plus_o4) +ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8) + and %o2, 7, %o2 + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8) +#endif + .align 64 .globl FUNC_NAME @@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %o4, %o4 ! bytes to align dst sub %o2, %o4, %o2 1: subcc %o4, 1, %o4 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1) + EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1) add %o1, 1, %o1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop /* fall through for 0 < low bits < 8 */ 110: sub %o4, 64, %g2 - EX_LD_FP(LOAD_BLK(%g2, %f0)) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) + EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 120: sub %o4, 56, %g2 FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 130: sub %o4, 48, %g2 FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_6(f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 140: sub %o4, 40, %g2 FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_5(f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 150: sub %o4, 32, %g2 FREG_LOAD_4(%g2, f0, f2, f4, f6) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_4(f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 160: sub %o4, 24, %g2 FREG_LOAD_3(%g2, f0, f2, f4) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_3(f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 170: sub %o4, 16, %g2 FREG_LOAD_2(%g2, f0, f2) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_2(f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 180: sub %o4, 8, %g2 FREG_LOAD_1(%g2, f0) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_1(f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop 190: -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) subcc %g1, 64, %g1 - EX_LD_FP(LOAD_BLK(%o4, %f0)) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64) add %o4, 64, %o4 bne,pt %xcc, 1b LOAD(prefetch, %o4 + 64, #one_read) @@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0xf, %o4 and %o2, 0xf, %o2 1: subcc %o4, 0x10, %o4 - EX_LD(LOAD(ldx, %o1, %o5)) + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16) add %o1, 0x08, %o1 - EX_LD(LOAD(ldx, %o1, %g1)) + EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16) sub %o1, 0x08, %o1 - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) + EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop sub %o2, 0x8, %o2 - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4) + EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g1, %o2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, GLOBAL_SPARE andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2) sub GLOBAL_SPARE, %g1, GLOBAL_SPARE andn %o2, 0x7, %o4 sllx %g2, %g1, %g2 1: add %o1, 0x8, %o1 - EX_LD(LOAD(ldx, %o1, %g3)) + EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4) subcc %o4, 0x8, %o4 srlx %g3, GLOBAL_SPARE, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4) + EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1) + EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl -- cgit v1.2.3 From ee841d0aff649164080e445e84885015958d8ff4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Oct 2016 21:20:35 -0700 Subject: sparc64: Convert U3copy_{from,to}_user to accurate exception reporting. Report the exact number of bytes which have not been successfully copied when an exception occurs, using the running remaining length. Signed-off-by: David S. Miller --- arch/sparc/lib/U3copy_from_user.S | 8 +- arch/sparc/lib/U3copy_to_user.S | 8 +- arch/sparc/lib/U3memcpy.S | 227 ++++++++++++++++++++++++++------------ 3 files changed, 162 insertions(+), 81 deletions(-) (limited to 'arch') diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S index 1046e2b083fe..db73010a1af8 100644 --- a/arch/sparc/lib/U3copy_from_user.S +++ b/arch/sparc/lib/U3copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S index 032b0c5419b0..c4ee858e352a 100644 --- a/arch/sparc/lib/U3copy_to_user.S +++ b/arch/sparc/lib/U3copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_mone_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S index 491ee69e4995..54f98706b03b 100644 --- a/arch/sparc/lib/U3memcpy.S +++ b/arch/sparc/lib/U3memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include #include #include #define GLOBAL_SPARE %g7 @@ -22,21 +23,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -77,6 +74,87 @@ */ .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_fp: + VISExitHalf + retl + nop +ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) + add %g1, 1, %g1 + add %g2, %g1, %g2 + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) +ENTRY(U3_retl_o2_plus_g2_fp) + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_fp) +ENTRY(U3_retl_o2_plus_g2_plus_8_fp) + add %g2, 8, %g2 + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_plus_8_fp) +ENTRY(U3_retl_o2) + retl + mov %o2, %o0 +ENDPROC(U3_retl_o2) +ENTRY(U3_retl_o2_plus_1) + retl + add %o2, 1, %o0 +ENDPROC(U3_retl_o2_plus_1) +ENTRY(U3_retl_o2_plus_4) + retl + add %o2, 4, %o0 +ENDPROC(U3_retl_o2_plus_4) +ENTRY(U3_retl_o2_plus_8) + retl + add %o2, 8, %o0 +ENDPROC(U3_retl_o2_plus_8) +ENTRY(U3_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + retl + add %o2, %g1, %o0 +ENDPROC(U3_retl_o2_plus_g1_plus_1) +ENTRY(U3_retl_o2_fp) + ba,pt %xcc, __restore_fp + mov %o2, %o0 +ENDPROC(U3_retl_o2_fp) +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) + sll %o3, 6, %o3 + add %o3, 0x80, %o3 + ba,pt %xcc, __restore_fp + add %o2, %o3, %o0 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) + sll %o3, 6, %o3 + add %o3, 0x40, %o3 + ba,pt %xcc, __restore_fp + add %o2, %o3, %o0 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) +ENTRY(U3_retl_o2_plus_GS_plus_0x10) + add GLOBAL_SPARE, 0x10, GLOBAL_SPARE + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_plus_GS_plus_0x10) +ENTRY(U3_retl_o2_plus_GS_plus_0x08) + add GLOBAL_SPARE, 0x08, GLOBAL_SPARE + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_plus_GS_plus_0x08) +ENTRY(U3_retl_o2_and_7_plus_GS) + and %o2, 7, %o2 + retl + add %o2, GLOBAL_SPARE, %o2 +ENDPROC(U3_retl_o2_and_7_plus_GS) +ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) + add GLOBAL_SPARE, 8, GLOBAL_SPARE + and %o2, 7, %o2 + retl + add %o2, GLOBAL_SPARE, %o2 +ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) +#endif + .align 64 /* The cheetah's flexible spine, oversized liver, enlarged heart, @@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ and %g2, 0x38, %g2 1: subcc %g1, 0x1, %g1 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1) + EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1) bgu,pt %XCC, 1b add %o1, 0x1, %o1 @@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pt %icc, 3f alignaddr %o1, %g0, %o1 - EX_LD_FP(LOAD(ldd, %o1, %f4)) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) + EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f4, %f6, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8) be,pn %icc, 3f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f6, %f4, %f2 - EX_ST_FP(STORE(std, %f2, %o0)) + EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8) bne,pt %icc, 1b add %o0, 0x8, %o0 @@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ LOAD(prefetch, %o1 + 0x080, #one_read) LOAD(prefetch, %o1 + 0x0c0, #one_read) LOAD(prefetch, %o1 + 0x100, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2) LOAD(prefetch, %o1 + 0x140, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2) LOAD(prefetch, %o1 + 0x180, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2) LOAD(prefetch, %o1 + 0x1c0, #one_read) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2) faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2) faligndata %f8, %f10, %f24 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2) faligndata %f10, %f12, %f26 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2) subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE add %o1, 0x40, %o1 @@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 64 1: - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f12, %f14, %f28 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f0, %f2, %f16 add %o0, 0x40, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) subcc %o3, 0x01, %o3 faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f8, %f10, %f24 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80) LOAD(prefetch, %o1 + 0x1c0, #one_read) faligndata %f10, %f12, %f26 bg,pt %XCC, 1b @@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ /* Finally we copy the last full 64-byte block. */ 2: - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f12, %f14, %f28 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f8, %f10, %f24 cmp %g1, 0 be,pt %XCC, 1f add %o0, 0x40, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40) 1: faligndata %f10, %f12, %f26 faligndata %f12, %f14, %f28 faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40) add %o0, 0x40, %o0 add %o1, 0x40, %o1 membar #Sync @@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g2, %o2 be,a,pt %XCC, 1f - EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2)) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f0, %f2, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) be,pn %XCC, 2f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f2, %f0, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) bne,pn %XCC, 1b add %o0, 0x8, %o0 @@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andcc %o2, 0x8, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x8, %o1 + sub %o2, 8, %o2 1: andcc %o2, 0x4, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2) + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x4, %o1 + sub %o2, 4, %o2 1: andcc %o2, 0x2, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(lduh, %o1, %o5)) - EX_ST(STORE(sth, %o5, %o1 + %o3)) + EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2) + EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x2, %o1 + sub %o2, 2, %o2 1: andcc %o2, 0x1, %g0 be,pt %icc, 85f nop - EX_LD(LOAD(ldub, %o1, %o5)) + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2) ba,pt %xcc, 85f - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2) .align 64 70: /* 16 < len <= 64 */ @@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0xf, GLOBAL_SPARE and %o2, 0xf, %o2 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE - EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop sub %o2, 0x8, %o2 - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4) + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g1, %o2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, %o3 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2) sub %o3, %g1, %o3 andn %o2, 0x7, GLOBAL_SPARE sllx %g2, %g1, %g2 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS) subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4) + EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1) + EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl -- cgit v1.2.3 From 614da3d9685b67917cab48c8452fd8bf93de0867 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Oct 2016 21:22:27 -0700 Subject: sparc64: Delete now unused user copy assembler helpers. All of __ret{,l}_mone{_asi,_fp,_asi_fpu} are now unused. Signed-off-by: David S. Miller --- arch/sparc/kernel/head_64.S | 30 ------------------------------ 1 file changed, 30 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index 5f17de6b5fc3..6aa3da152c20 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -931,36 +931,6 @@ ENTRY(__retl_efault) mov -EFAULT, %o0 ENDPROC(__retl_efault) -ENTRY(__retl_mone) - retl - mov -1, %o0 -ENDPROC(__retl_mone) - -ENTRY(__retl_mone_fp) - VISExitHalf - retl - mov 1, %o0 -ENDPROC(__retl_mone_fp) - -ENTRY(__ret_mone_asi) - wr %g0, ASI_AIUS, %asi - ret - restore %g0, 1, %o0 -ENDPROC(__ret_mone_asi) - -ENTRY(__retl_mone_asi) - wr %g0, ASI_AIUS, %asi - retl - mov 1, %o0 -ENDPROC(__retl_mone_asi) - -ENTRY(__retl_mone_asi_fp) - wr %g0, ASI_AIUS, %asi - VISExitHalf - retl - mov 1, %o0 -ENDPROC(__retl_mone_asi_fp) - ENTRY(__retl_o1) retl mov %o1, %o0 -- cgit v1.2.3 From 0fd0ff01d4c3c01e7fe69b762ee1a13236639acc Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 24 Oct 2016 21:25:31 -0700 Subject: sparc64: Delete now unused user copy fixup functions. Now that all of the user copy routines are converted to return accurate residual lengths when an exception occurs, we no longer need the broken fixup routines. Signed-off-by: David S. Miller --- arch/sparc/include/asm/uaccess_64.h | 36 ++----------------- arch/sparc/lib/Makefile | 2 +- arch/sparc/lib/user_fixup.c | 71 ------------------------------------- 3 files changed, 4 insertions(+), 105 deletions(-) delete mode 100644 arch/sparc/lib/user_fixup.c (limited to 'arch') diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index 0244012435c8..5373136c412b 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -188,64 +188,34 @@ int __get_user_bad(void); unsigned long __must_check ___copy_from_user(void *to, const void __user *from, unsigned long size); -unsigned long copy_from_user_fixup(void *to, const void __user *from, - unsigned long size); static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long size) { - unsigned long ret; - check_object_size(to, size, false); - ret = ___copy_from_user(to, from, size); - if (unlikely(ret)) { - if ((long)ret < 0) - ret = copy_from_user_fixup(to, from, size); - return ret; - } - - return ret; + return ___copy_from_user(to, from, size); } #define __copy_from_user copy_from_user unsigned long __must_check ___copy_to_user(void __user *to, const void *from, unsigned long size); -unsigned long copy_to_user_fixup(void __user *to, const void *from, - unsigned long size); static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long size) { - unsigned long ret; - check_object_size(from, size, true); - ret = ___copy_to_user(to, from, size); - if (unlikely(ret)) { - if ((long)ret < 0) - ret = copy_to_user_fixup(to, from, size); - return ret; - } - return ret; + return ___copy_to_user(to, from, size); } #define __copy_to_user copy_to_user unsigned long __must_check ___copy_in_user(void __user *to, const void __user *from, unsigned long size); -unsigned long copy_in_user_fixup(void __user *to, void __user *from, - unsigned long size); static inline unsigned long __must_check copy_in_user(void __user *to, void __user *from, unsigned long size) { - unsigned long ret = ___copy_in_user(to, from, size); - - if (unlikely(ret)) { - if ((long)ret < 0) - ret = copy_in_user_fixup(to, from, size); - return ret; - } - return ret; + return ___copy_in_user(to, from, size); } #define __copy_in_user copy_in_user diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 885f00e81d1a..69912d2f8b54 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o -lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o +lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o obj-$(CONFIG_SPARC64) += iomap.o diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c deleted file mode 100644 index ac96ae236709..000000000000 --- a/arch/sparc/lib/user_fixup.c +++ /dev/null @@ -1,71 +0,0 @@ -/* user_fixup.c: Fix up user copy faults. - * - * Copyright (C) 2004 David S. Miller - */ - -#include -#include -#include -#include -#include - -#include - -/* Calculating the exact fault address when using - * block loads and stores can be very complicated. - * - * Instead of trying to be clever and handling all - * of the cases, just fix things up simply here. - */ - -static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset) -{ - unsigned long fault_addr = current_thread_info()->fault_address; - unsigned long end = start + size; - - if (fault_addr < start || fault_addr >= end) { - *offset = 0; - } else { - *offset = fault_addr - start; - size = end - fault_addr; - } - return size; -} - -unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size) -{ - unsigned long offset; - - size = compute_size((unsigned long) from, size, &offset); - if (likely(size)) - memset(to + offset, 0, size); - - return size; -} -EXPORT_SYMBOL(copy_from_user_fixup); - -unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size) -{ - unsigned long offset; - - return compute_size((unsigned long) to, size, &offset); -} -EXPORT_SYMBOL(copy_to_user_fixup); - -unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size) -{ - unsigned long fault_addr = current_thread_info()->fault_address; - unsigned long start = (unsigned long) to; - unsigned long end = start + size; - - if (fault_addr >= start && fault_addr < end) - return end - fault_addr; - - start = (unsigned long) from; - end = start + size; - if (fault_addr >= start && fault_addr < end) - return end - fault_addr; - - return size; -} -EXPORT_SYMBOL(copy_in_user_fixup); -- cgit v1.2.3 From 7fbe6ac02485504b964b283aca62b36b4313ca79 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 24 Oct 2016 08:31:27 -0500 Subject: x86/unwind: Fix empty stack dereference in guess unwinder Vince Waver reported the following bug: WARNING: CPU: 0 PID: 21338 at arch/x86/mm/fault.c:435 vmalloc_fault+0x58/0x1f0 CPU: 0 PID: 21338 Comm: perf_fuzzer Not tainted 4.8.0+ #37 Hardware name: Hewlett-Packard HP Compaq Pro 6305 SFF/1850, BIOS K06 v02.57 08/16/2013 Call Trace: ? dump_stack+0x46/0x59 ? __warn+0xd5/0xee ? vmalloc_fault+0x58/0x1f0 ? __do_page_fault+0x6d/0x48e ? perf_log_throttle+0xa4/0xf4 ? trace_page_fault+0x22/0x30 ? __unwind_start+0x28/0x42 ? perf_callchain_kernel+0x75/0xac ? get_perf_callchain+0x13a/0x1f0 ? perf_callchain+0x6a/0x6c ? perf_prepare_sample+0x71/0x2eb ? perf_event_output_forward+0x1a/0x54 ? __default_send_IPI_shortcut+0x10/0x2d ? __perf_event_overflow+0xfb/0x167 ? x86_pmu_handle_irq+0x113/0x150 ? native_read_msr+0x6/0x34 ? perf_event_nmi_handler+0x22/0x39 ? perf_ibs_nmi_handler+0x4a/0x51 ? perf_event_nmi_handler+0x22/0x39 ? nmi_handle+0x4d/0xf0 ? perf_ibs_handle_irq+0x3d1/0x3d1 ? default_do_nmi+0x3c/0xd5 ? do_nmi+0x92/0x102 ? end_repeat_nmi+0x1a/0x1e ? entry_SYSCALL_64_after_swapgs+0x12/0x4a ? entry_SYSCALL_64_after_swapgs+0x12/0x4a ? entry_SYSCALL_64_after_swapgs+0x12/0x4a ^A4---[ end trace 632723104d47d31a ]--- BUG: stack guard page was hit at ffffc90008500000 (stack is ffffc900084fc000..ffffc900084fffff) kernel stack overflow (page fault): 0000 [#1] SMP ... The NMI hit in the entry code right after setting up the stack pointer from 'cpu_current_top_of_stack', so the kernel stack was empty. The 'guess' version of __unwind_start() attempted to dereference the "top of stack" pointer, which is not actually *on* the stack. Add a check in the guess unwinder to deal with an empty stack. (The frame pointer unwinder already has such a check.) Reported-by: Vince Weaver Signed-off-by: Josh Poimboeuf Cc: Andy Lutomirski Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 7c7900f89770 ("x86/unwind: Add new unwind interface and implementations") Link: http://lkml.kernel.org/r/20161024133127.e5evgeebdbohnmpb@treble Signed-off-by: Ingo Molnar --- arch/x86/kernel/unwind_guess.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c index 9298993dc8b7..2d721e533cf4 100644 --- a/arch/x86/kernel/unwind_guess.c +++ b/arch/x86/kernel/unwind_guess.c @@ -47,7 +47,14 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, get_stack_info(first_frame, state->task, &state->stack_info, &state->stack_mask); - if (!__kernel_text_address(*first_frame)) + /* + * The caller can provide the address of the first frame directly + * (first_frame) or indirectly (regs->sp) to indicate which stack frame + * to start unwinding at. Skip ahead until we reach it. + */ + if (!unwind_done(state) && + (!on_stack(&state->stack_info, first_frame, sizeof(long)) || + !__kernel_text_address(*first_frame))) unwind_next_frame(state); } EXPORT_SYMBOL_GPL(__unwind_start); -- cgit v1.2.3 From a2209b742e6cf978b85d4f31a25a269c3d3b062b Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 24 Oct 2016 09:00:12 -0600 Subject: x86/build: Fix build with older GCC versions Older GCC (observed with 4.1.x) doesn't support -Wno-override-init and also doesn't ignore unknown -Wno-* options. Signed-off-by: Jan Beulich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Valdis Kletnieks Cc: Valdis.Kletnieks@vt.edu Fixes: 5e44258d16 "x86/build: Reduce the W=1 warnings noise when compiling x86 syscall tables" Link: http://lkml.kernel.org/r/580E3E1C02000078001191C4@prv-mh.provo.novell.com Signed-off-by: Ingo Molnar --- arch/x86/entry/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 77f28ce9c646..9976fcecd17e 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -5,8 +5,8 @@ OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y -CFLAGS_syscall_64.o += -Wno-override-init -CFLAGS_syscall_32.o += -Wno-override-init +CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,) +CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,) obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o -- cgit v1.2.3 From d320b9a5bd85f6178cc3ed8b0a1a9960f2b5bc7b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 24 Oct 2016 17:33:18 +0200 Subject: x86/quirks: Hide maybe-uninitialized warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc -Wmaybe-uninitialized detects that quirk_intel_brickland_xeon_ras_cap uses uninitialized data when CONFIG_PCI is not set: arch/x86/kernel/quirks.c: In function ‘quirk_intel_brickland_xeon_ras_cap’: arch/x86/kernel/quirks.c:641:13: error: ‘capid0’ is used uninitialized in this function [-Werror=uninitialized] However, the function is also not called in this configuration, so we can avoid the warning by moving the existing #ifdef to cover it as well. Signed-off-by: Arnd Bergmann Cc: Bjorn Helgaas Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-pci@vger.kernel.org Link: http://lkml.kernel.org/r/20161024153325.2752428-1-arnd@arndb.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/quirks.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 51402a7e4ca6..0bee04d41bed 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -625,8 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3, amd_disable_seq_and_redirect_scrub); -#endif - #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) #include #include @@ -657,3 +655,4 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap); #endif +#endif -- cgit v1.2.3 From b429ae4d5b565a71dfffd759dfcd4f6c093ced94 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 25 Oct 2016 16:23:26 -0700 Subject: sparc64: Fix illegal relative branches in hypervisor patched TLB code. When we copy code over to patch another piece of code, we can only use PC-relative branches that target code within that piece of code. Such PC-relative branches cannot be made to external symbols because the patch moves the location of the code and thus modifies the relative address of external symbols. Use an absolute jmpl to fix this problem. Signed-off-by: David S. Miller --- arch/sparc/mm/ultra.S | 65 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index b4f4733abc6e..85de139bfad6 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -30,7 +30,7 @@ .text .align 32 .globl __flush_tlb_mm -__flush_tlb_mm: /* 18 insns */ +__flush_tlb_mm: /* 19 insns */ /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ ldxa [%o1] ASI_DMMU, %g2 cmp %g2, %o0 @@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */ .align 32 .globl __flush_tlb_pending -__flush_tlb_pending: /* 26 insns */ +__flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 sllx %o1, 3, %o1 @@ -113,7 +113,7 @@ __flush_tlb_pending: /* 26 insns */ .align 32 .globl __flush_tlb_kernel_range -__flush_tlb_kernel_range: /* 16 insns */ +__flush_tlb_kernel_range: /* 19 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f @@ -131,6 +131,9 @@ __flush_tlb_kernel_range: /* 16 insns */ retl nop nop + nop + nop + nop __spitfire_flush_tlb_mm_slow: rdpr %pstate, %g1 @@ -309,19 +312,28 @@ __hypervisor_tlb_tl0_error: ret restore -__hypervisor_flush_tlb_mm: /* 10 insns */ +__hypervisor_flush_tlb_mm: /* 19 insns */ mov %o0, %o2 /* ARG2: mmu context */ mov 0, %o0 /* ARG0: CPU lists unimplemented */ mov 0, %o1 /* ARG1: CPU lists unimplemented */ mov HV_MMU_ALL, %o3 /* ARG3: flags */ mov HV_FAST_MMU_DEMAP_CTX, %o5 ta HV_FAST_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_FAST_MMU_DEMAP_CTX, %o1 retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o5 + jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop -__hypervisor_flush_tlb_page: /* 11 insns */ +__hypervisor_flush_tlb_page: /* 22 insns */ /* %o0 = context, %o1 = vaddr */ mov %o0, %g2 mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ @@ -330,10 +342,21 @@ __hypervisor_flush_tlb_page: /* 11 insns */ srlx %o0, PAGE_SHIFT, %o0 sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop + nop + nop __hypervisor_flush_tlb_pending: /* 16 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ @@ -347,14 +370,25 @@ __hypervisor_flush_tlb_pending: /* 16 insns */ srlx %o0, PAGE_SHIFT, %o0 sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 brnz,pt %g1, 1b nop retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop + nop + nop -__hypervisor_flush_tlb_kernel_range: /* 16 insns */ +__hypervisor_flush_tlb_kernel_range: /* 19 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f @@ -366,12 +400,15 @@ __hypervisor_flush_tlb_kernel_range: /* 16 insns */ mov 0, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 3f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 brnz,pt %g2, 1b sub %g2, %g3, %g2 2: retl nop +3: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop #ifdef DCACHE_ALIASING_POSSIBLE /* XXX Niagara and friends have an 8K cache, so no aliasing is @@ -819,28 +856,28 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_flush_tlb_mm), %o1 or %o1, %lo(__hypervisor_flush_tlb_mm), %o1 call tlb_patch_one - mov 10, %o2 + mov 19, %o2 sethi %hi(__flush_tlb_page), %o0 or %o0, %lo(__flush_tlb_page), %o0 sethi %hi(__hypervisor_flush_tlb_page), %o1 or %o1, %lo(__hypervisor_flush_tlb_page), %o1 call tlb_patch_one - mov 11, %o2 + mov 22, %o2 sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__hypervisor_flush_tlb_pending), %o1 or %o1, %lo(__hypervisor_flush_tlb_pending), %o1 call tlb_patch_one - mov 16, %o2 + mov 27, %o2 sethi %hi(__flush_tlb_kernel_range), %o0 or %o0, %lo(__flush_tlb_kernel_range), %o0 sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 16, %o2 + mov 19, %o2 #ifdef DCACHE_ALIASING_POSSIBLE sethi %hi(__flush_dcache_page), %o0 -- cgit v1.2.3 From 9d9fa230206a3aea6ef451646c97122f04777983 Mon Sep 17 00:00:00 2001 From: James Clarke Date: Mon, 24 Oct 2016 19:49:25 +0100 Subject: sparc: Handle negative offsets in arch_jump_label_transform Additionally, if the offset will overflow the immediate for a ba,pt instruction, fall back on a standard ba to get an extra 3 bits. Signed-off-by: James Clarke Signed-off-by: David S. Miller --- arch/sparc/kernel/jump_label.c | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c index 59bbeff55024..07933b9e9ce0 100644 --- a/arch/sparc/kernel/jump_label.c +++ b/arch/sparc/kernel/jump_label.c @@ -13,19 +13,30 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - u32 val; u32 *insn = (u32 *) (unsigned long) entry->code; + u32 val; if (type == JUMP_LABEL_JMP) { s32 off = (s32)entry->target - (s32)entry->code; + bool use_v9_branch = false; + + BUG_ON(off & 3); #ifdef CONFIG_SPARC64 - /* ba,pt %xcc, . + (off << 2) */ - val = 0x10680000 | ((u32) off >> 2); -#else - /* ba . + (off << 2) */ - val = 0x10800000 | ((u32) off >> 2); + if (off <= 0xfffff && off >= -0x100000) + use_v9_branch = true; #endif + if (use_v9_branch) { + /* WDISP19 - target is . + immed << 2 */ + /* ba,pt %xcc, . + off */ + val = 0x10680000 | (((u32) off >> 2) & 0x7ffff); + } else { + /* WDISP22 - target is . + immed << 2 */ + BUG_ON(off > 0x7fffff); + BUG_ON(off < -0x800000); + /* ba . + off */ + val = 0x10800000 | (((u32) off >> 2) & 0x3fffff); + } } else { val = 0x01000000; } -- cgit v1.2.3 From 849c498766060a16aad5b0e0d03206726e7d2fa4 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 25 Oct 2016 19:43:17 -0700 Subject: sparc64: Handle extremely large kernel TSB range flushes sanely. If the number of pages we are flushing is more than twice the number of entries in the TSB, just scan the TSB table for matches rather than probing each and every page in the range. Based upon a patch and report by James Clarke. Signed-off-by: David S. Miller --- arch/sparc/mm/tsb.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch') diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index f2b77112e9d8..e20fbbafb0b0 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr) return (tag == (vaddr >> 22)); } +static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end) +{ + unsigned long idx; + + for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) { + struct tsb *ent = &swapper_tsb[idx]; + unsigned long match = idx << 13; + + match |= (ent->tag << 22); + if (match >= start && match < end) + ent->tag = (1UL << TSB_TAG_INVALID_BIT); + } +} + /* TSB flushes need only occur on the processor initiating the address * space modification, not on each cpu the address space has run on. * Only the TLB flush needs that treatment. @@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) { unsigned long v; + if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES) + return flush_tsb_kernel_range_scan(start, end); + for (v = start; v < end; v += PAGE_SIZE) { unsigned long hash = tsb_hash(v, PAGE_SHIFT, KERNEL_TSB_NENTRIES); -- cgit v1.2.3 From 8ef4227615e158faa4ee85a1d6466782f7e22f2f Mon Sep 17 00:00:00 2001 From: Dave Airlie Date: Mon, 24 Oct 2016 15:27:59 +1000 Subject: x86/io: add interface to reserve io memtype for a resource range. (v1.1) A recent change to the mm code in: 87744ab3832b mm: fix cache mode tracking in vm_insert_mixed() started enforcing checking the memory type against the registered list for amixed pfn insertion mappings. It happens that the drm drivers for a number of gpus relied on this being broken. Currently the driver only inserted VRAM mappings into the tracking table when they came from the kernel, and userspace mappings never landed in the table. This led to a regression where all the mapping end up as UC instead of WC now. I've considered a number of solutions but since this needs to be fixed in fixes and not next, and some of the solutions were going to introduce overhead that hadn't been there before I didn't consider them viable at this stage. These mainly concerned hooking into the TTM io reserve APIs, but these API have a bunch of fast paths I didn't want to unwind to add this to. The solution I've decided on is to add a new API like the arch_phys_wc APIs (these would have worked but wc_del didn't take a range), and use them from the drivers to add a WC compatible mapping to the table for all VRAM on those GPUs. This means we can then create userspace mapping that won't get degraded to UC. v1.1: use CONFIG_X86_PAT + add some comments in io.h Cc: Toshi Kani Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Andy Lutomirski Cc: Denys Vlasenko Cc: Brian Gerst Cc: x86@kernel.org Cc: mcgrof@suse.com Cc: Dan Williams Acked-by: Ingo Molnar Reviewed-by: Thomas Gleixner Signed-off-by: Dave Airlie --- arch/x86/include/asm/io.h | 6 ++++++ arch/x86/mm/pat.c | 14 ++++++++++++++ include/linux/io.h | 22 ++++++++++++++++++++++ 3 files changed, 42 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index de25aad07853..d34bd370074b 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -351,4 +351,10 @@ extern void arch_phys_wc_del(int handle); #define arch_phys_wc_add arch_phys_wc_add #endif +#ifdef CONFIG_X86_PAT +extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size); +extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size); +#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc +#endif + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 170cc4ff057b..83e701f160a9 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -730,6 +730,20 @@ void io_free_memtype(resource_size_t start, resource_size_t end) free_memtype(start, end); } +int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) +{ + enum page_cache_mode type = _PAGE_CACHE_MODE_WC; + + return io_reserve_memtype(start, start + size, &type); +} +EXPORT_SYMBOL(arch_io_reserve_memtype_wc); + +void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) +{ + io_free_memtype(start, start + size); +} +EXPORT_SYMBOL(arch_io_free_memtype_wc); + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { diff --git a/include/linux/io.h b/include/linux/io.h index e2c8419278c1..82ef36eac8a1 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -141,4 +141,26 @@ enum { void *memremap(resource_size_t offset, size_t size, unsigned long flags); void memunmap(void *addr); +/* + * On x86 PAT systems we have memory tracking that keeps track of + * the allowed mappings on memory ranges. This tracking works for + * all the in-kernel mapping APIs (ioremap*), but where the user + * wishes to map a range from a physical device into user memory + * the tracking won't be updated. This API is to be used by + * drivers which remap physical device pages into userspace, + * and wants to make sure they are mapped WC and not UC. + */ +#ifndef arch_io_reserve_memtype_wc +static inline int arch_io_reserve_memtype_wc(resource_size_t base, + resource_size_t size) +{ + return 0; +} + +static inline void arch_io_free_memtype_wc(resource_size_t base, + resource_size_t size) +{ +} +#endif + #endif /* _LINUX_IO_H */ -- cgit v1.2.3 From 5de0a8c0c240338cb5b73363b0673c6aa804bb1c Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Mon, 24 Oct 2016 15:01:48 -0400 Subject: x86: Fix export for mcount and __fentry__ Commit 784d5699eddc5 ("x86: move exports to actual definitions") removed the EXPORT_SYMBOL(__fentry__) and EXPORT_SYMBOL(mcount) from x8664_ksyms_64.c, and added EXPORT_SYMBOL(function_hook) in mcount_64.S instead. The problem is that function_hook isn't a function at all, but a macro that is defined as either mcount or __fentry__ depending on the support from gcc. Originally, I thought this was a macro issue, like what __stringify() is used for. But the problem is a bit deeper. The Makefile.build has some magic that does post processing of files to create the CRC bindings. It does some searches for EXPORT_SYMBOL() and because it finds a macro name and not the actual functions, this causes function_hook not to be converted into mcount or __fentry__ and they are missed. Instead of adding more magic to Makefile.build, just add EXPORT_SYMBOL() for mcount and __fentry__ where the ifdef is used. Since this is assembly and not C, it doesn't require being set after the function is defined. Signed-off-by: Steven Rostedt Tested-by: Borislav Petkov Cc: Gabriel C Cc: Nicholas Piggin Cc: Al Viro Link: http://lkml.kernel.org/r/20161024150148.4f9d90e4@gandalf.local.home Signed-off-by: Thomas Gleixner --- arch/x86/kernel/mcount_64.S | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index efe73aacf966..7b0d3da52fb4 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -18,8 +18,10 @@ #ifdef CC_USING_FENTRY # define function_hook __fentry__ +EXPORT_SYMBOL(__fentry__) #else # define function_hook mcount +EXPORT_SYMBOL(mcount) #endif /* All cases save the original rbp (8 bytes) */ @@ -295,7 +297,6 @@ trace: jmp fgraph_trace END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ -EXPORT_SYMBOL(function_hook) #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER -- cgit v1.2.3 From 9078210ef4016539c909fb67164d3f1c27323d8b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 25 Oct 2016 16:08:19 +0100 Subject: KVM: MIPS: Fix lazy user ASID regenerate for SMP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_mips_check_asids() runs before entering the guest and performs lazy regeneration of host ASID for guest usermode, using last_user_gasid to track the last guest ASID in the VCPU that was used by guest usermode on any host CPU. last_user_gasid is reset after performing the lazy ASID regeneration on the current CPU, and by kvm_arch_vcpu_load() if the host ASID for guest usermode is regenerated due to staleness (to cancel outstanding lazy ASID regenerations). Unfortunately neither case handles SMP hosts correctly: - When the lazy ASID regeneration is performed it should apply to all CPUs (as last_user_gasid does), so reset the ASID on other CPUs to zero to trigger regeneration when the VCPU is next loaded on those CPUs. - When the ASID is found to be stale on the current CPU, we should not cancel lazy ASID regenerations globally, so drop the reset of last_user_gasid altogether here. Both cases would require a guest ASID change and two host CPU migrations (and in the latter case one of the CPUs to start a new ASID cycle) before guest usermode could potentially access stale user pages from a previously running ASID in the same VCPU. Fixes: 25b08c7fb0e4 ("KVM: MIPS: Invalidate TLB by regenerating ASIDs") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/mips/kvm/mips.c | 5 ++++- arch/mips/kvm/mmu.c | 4 ---- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 622037d851a3..06a60b19acfb 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -426,7 +426,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, static void kvm_mips_check_asids(struct kvm_vcpu *vcpu) { struct mips_coproc *cop0 = vcpu->arch.cop0; - int cpu = smp_processor_id(); + int i, cpu = smp_processor_id(); unsigned int gasid; /* @@ -442,6 +442,9 @@ static void kvm_mips_check_asids(struct kvm_vcpu *vcpu) vcpu); vcpu->arch.guest_user_asid[cpu] = vcpu->arch.guest_user_mm.context.asid[cpu]; + for_each_possible_cpu(i) + if (i != cpu) + vcpu->arch.guest_user_asid[cpu] = 0; vcpu->arch.last_user_gasid = gasid; } } diff --git a/arch/mips/kvm/mmu.c b/arch/mips/kvm/mmu.c index 03883ba806e2..3b677c851be0 100644 --- a/arch/mips/kvm/mmu.c +++ b/arch/mips/kvm/mmu.c @@ -260,13 +260,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) if ((vcpu->arch.guest_user_asid[cpu] ^ asid_cache(cpu)) & asid_version_mask(cpu)) { - u32 gasid = kvm_read_c0_guest_entryhi(vcpu->arch.cop0) & - KVM_ENTRYHI_ASID; - kvm_get_new_mmu_context(&vcpu->arch.guest_user_mm, cpu, vcpu); vcpu->arch.guest_user_asid[cpu] = vcpu->arch.guest_user_mm.context.asid[cpu]; - vcpu->arch.last_user_gasid = gasid; newasid++; kvm_debug("[%d]: cpu_context: %#lx\n", cpu, -- cgit v1.2.3 From ede5f3e7b54a4347be4d8525269eae50902bd7cd Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 25 Oct 2016 16:11:11 +0100 Subject: KVM: MIPS: Make ERET handle ERL before EXL MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ERET instruction to return from exception is used for returning from exception level (Status.EXL) and error level (Status.ERL). If both bits are set however we should be returning from ERL first, as ERL can interrupt EXL, for example when an NMI is taken. KVM however checks EXL first. Fix the order of the checks to match the pseudocode in the instruction set manual. Fixes: e685c689f3a8 ("KVM/MIPS32: Privileged instruction/target branch emulation.") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: # 3.10.x- Signed-off-by: Paolo Bonzini --- arch/mips/kvm/emulate.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index 8770f32c9e0b..c45ef0f13dfa 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -790,15 +790,15 @@ enum emulation_result kvm_mips_emul_eret(struct kvm_vcpu *vcpu) struct mips_coproc *cop0 = vcpu->arch.cop0; enum emulation_result er = EMULATE_DONE; - if (kvm_read_c0_guest_status(cop0) & ST0_EXL) { + if (kvm_read_c0_guest_status(cop0) & ST0_ERL) { + kvm_clear_c0_guest_status(cop0, ST0_ERL); + vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0); + } else if (kvm_read_c0_guest_status(cop0) & ST0_EXL) { kvm_debug("[%#lx] ERET to %#lx\n", vcpu->arch.pc, kvm_read_c0_guest_epc(cop0)); kvm_clear_c0_guest_status(cop0, ST0_EXL); vcpu->arch.pc = kvm_read_c0_guest_epc(cop0); - } else if (kvm_read_c0_guest_status(cop0) & ST0_ERL) { - kvm_clear_c0_guest_status(cop0, ST0_ERL); - vcpu->arch.pc = kvm_read_c0_guest_errorepc(cop0); } else { kvm_err("[%#lx] ERET when MIPS_SR_EXL|MIPS_SR_ERL == 0\n", vcpu->arch.pc); -- cgit v1.2.3 From e1e575f6b026734be3b1f075e780e91ab08ca541 Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 25 Oct 2016 16:11:12 +0100 Subject: KVM: MIPS: Precalculate MMIO load resume PC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The advancing of the PC when completing an MMIO load is done before re-entering the guest, i.e. before restoring the guest ASID. However if the load is in a branch delay slot it may need to access guest code to read the prior branch instruction. This isn't safe in TLB mapped code at the moment, nor in the future when we'll access unmapped guest segments using direct user accessors too, as it could read the branch from host user memory instead. Therefore calculate the resume PC in advance while we're still in the right context and save it in the new vcpu->arch.io_pc (replacing the no longer needed vcpu->arch.pending_load_cause), and restore it on MMIO completion. Fixes: e685c689f3a8 ("KVM/MIPS32: Privileged instruction/target branch emulation.") Signed-off-by: James Hogan Cc: Paolo Bonzini Cc: "Radim Krčmář Cc: Ralf Baechle Cc: linux-mips@linux-mips.org Cc: kvm@vger.kernel.org Cc: # 3.10.x- Signed-off-by: Paolo Bonzini --- arch/mips/include/asm/kvm_host.h | 7 ++++--- arch/mips/kvm/emulate.c | 24 +++++++++++++++--------- 2 files changed, 19 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index 07f58cfc1ab9..bebec370324f 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -293,7 +293,10 @@ struct kvm_vcpu_arch { /* Host KSEG0 address of the EI/DI offset */ void *kseg0_commpage; - u32 io_gpr; /* GPR used as IO source/target */ + /* Resume PC after MMIO completion */ + unsigned long io_pc; + /* GPR used as IO source/target */ + u32 io_gpr; struct hrtimer comparecount_timer; /* Count timer control KVM register */ @@ -315,8 +318,6 @@ struct kvm_vcpu_arch { /* Bitmask of pending exceptions to be cleared */ unsigned long pending_exceptions_clr; - u32 pending_load_cause; - /* Save/Restore the entryhi register when are are preempted/scheduled back in */ unsigned long preempt_entryhi; diff --git a/arch/mips/kvm/emulate.c b/arch/mips/kvm/emulate.c index c45ef0f13dfa..aa0937423e28 100644 --- a/arch/mips/kvm/emulate.c +++ b/arch/mips/kvm/emulate.c @@ -1528,13 +1528,25 @@ enum emulation_result kvm_mips_emulate_load(union mips_instruction inst, struct kvm_vcpu *vcpu) { enum emulation_result er = EMULATE_DO_MMIO; + unsigned long curr_pc; u32 op, rt; u32 bytes; rt = inst.i_format.rt; op = inst.i_format.opcode; - vcpu->arch.pending_load_cause = cause; + /* + * Find the resume PC now while we have safe and easy access to the + * prior branch instruction, and save it for + * kvm_mips_complete_mmio_load() to restore later. + */ + curr_pc = vcpu->arch.pc; + er = update_pc(vcpu, cause); + if (er == EMULATE_FAIL) + return er; + vcpu->arch.io_pc = vcpu->arch.pc; + vcpu->arch.pc = curr_pc; + vcpu->arch.io_gpr = rt; switch (op) { @@ -2494,9 +2506,8 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, goto done; } - er = update_pc(vcpu, vcpu->arch.pending_load_cause); - if (er == EMULATE_FAIL) - return er; + /* Restore saved resume PC */ + vcpu->arch.pc = vcpu->arch.io_pc; switch (run->mmio.len) { case 4: @@ -2518,11 +2529,6 @@ enum emulation_result kvm_mips_complete_mmio_load(struct kvm_vcpu *vcpu, break; } - if (vcpu->arch.pending_load_cause & CAUSEF_BD) - kvm_debug("[%#lx] Completing %d byte BD Load to gpr %d (0x%08lx) type %d\n", - vcpu->arch.pc, run->mmio.len, vcpu->arch.io_gpr, *gpr, - vcpu->mmio_needed); - done: return er; } -- cgit v1.2.3 From 45c7ee43a5184ddbff652ee0d2e826f86f1b616b Mon Sep 17 00:00:00 2001 From: Janosch Frank Date: Wed, 26 Oct 2016 08:48:11 +0200 Subject: KVM: s390: Fix STHYI buffer alignment for diag224 Diag224 requires a page-aligned 4k buffer to store the name table into. kmalloc does not guarantee page alignment, hence we replace it with __get_free_page for the buffer allocation. Cc: stable@vger.kernel.org # v4.8+ Reported-by: Michael Holzheu Signed-off-by: Janosch Frank Reviewed-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- arch/s390/kvm/sthyi.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/sthyi.c b/arch/s390/kvm/sthyi.c index bd98b7d25200..05c98bb853cf 100644 --- a/arch/s390/kvm/sthyi.c +++ b/arch/s390/kvm/sthyi.c @@ -315,7 +315,7 @@ static void fill_diag(struct sthyi_sctns *sctns) if (r < 0) goto out; - diag224_buf = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA); + diag224_buf = (void *)__get_free_page(GFP_KERNEL | GFP_DMA); if (!diag224_buf || diag224(diag224_buf)) goto out; @@ -378,7 +378,7 @@ static void fill_diag(struct sthyi_sctns *sctns) sctns->par.infpval1 |= PAR_WGHT_VLD; out: - kfree(diag224_buf); + free_page((unsigned long)diag224_buf); vfree(diag204_buf); } -- cgit v1.2.3 From 830cda3f9855ff092b0e9610346d110846fc497c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 26 Oct 2016 10:08:22 -0700 Subject: sparc64: Fix instruction count in comment for __hypervisor_flush_tlb_pending. Noticed by James Clarke. Signed-off-by: David S. Miller --- arch/sparc/mm/ultra.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index 85de139bfad6..5128d38b1d1a 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -358,7 +358,7 @@ __hypervisor_flush_tlb_page: /* 22 insns */ nop nop -__hypervisor_flush_tlb_pending: /* 16 insns */ +__hypervisor_flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ sllx %o1, 3, %g1 mov %o2, %g2 -- cgit v1.2.3 From a236441bb69723032db94128761a469030c3fe6d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 26 Oct 2016 10:20:14 -0700 Subject: sparc64: Fix illegal relative branches in hypervisor patched TLB cross-call code. Just like the non-cross-call TLB flush handlers, the cross-call ones need to avoid doing PC-relative branches outside of their code blocks. Signed-off-by: David S. Miller --- arch/sparc/mm/ultra.S | 42 ++++++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index 5128d38b1d1a..0fa2e6202c1f 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -484,7 +484,7 @@ cheetah_patch_cachetlbops: */ .align 32 .globl xcall_flush_tlb_mm -xcall_flush_tlb_mm: /* 21 insns */ +xcall_flush_tlb_mm: /* 24 insns */ mov PRIMARY_CONTEXT, %g2 ldxa [%g2] ASI_DMMU, %g3 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 @@ -506,9 +506,12 @@ xcall_flush_tlb_mm: /* 21 insns */ nop nop nop + nop + nop + nop .globl xcall_flush_tlb_page -xcall_flush_tlb_page: /* 17 insns */ +xcall_flush_tlb_page: /* 20 insns */ /* %g5=context, %g1=vaddr */ mov PRIMARY_CONTEXT, %g4 ldxa [%g4] ASI_DMMU, %g2 @@ -527,9 +530,12 @@ xcall_flush_tlb_page: /* 17 insns */ retry nop nop + nop + nop + nop .globl xcall_flush_tlb_kernel_range -xcall_flush_tlb_kernel_range: /* 25 insns */ +xcall_flush_tlb_kernel_range: /* 28 insns */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 @@ -555,6 +561,9 @@ xcall_flush_tlb_kernel_range: /* 25 insns */ nop nop nop + nop + nop + nop /* This runs in a very controlled environment, so we do * not need to worry about BH races etc. @@ -737,7 +746,7 @@ __hypervisor_tlb_xcall_error: ba,a,pt %xcc, rtrap .globl __hypervisor_xcall_flush_tlb_mm -__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ +__hypervisor_xcall_flush_tlb_mm: /* 24 insns */ /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */ mov %o0, %g2 mov %o1, %g3 @@ -751,7 +760,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ mov HV_FAST_MMU_DEMAP_CTX, %o5 ta HV_FAST_TRAP mov HV_FAST_MMU_DEMAP_CTX, %g6 - brnz,pn %o0, __hypervisor_tlb_xcall_error + brnz,pn %o0, 1f mov %o0, %g5 mov %g2, %o0 mov %g3, %o1 @@ -760,9 +769,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ mov %g7, %o5 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop .globl __hypervisor_xcall_flush_tlb_page -__hypervisor_xcall_flush_tlb_page: /* 17 insns */ +__hypervisor_xcall_flush_tlb_page: /* 20 insns */ /* %g5=ctx, %g1=vaddr */ mov %o0, %g2 mov %o1, %g3 @@ -774,16 +786,19 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */ sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP mov HV_MMU_UNMAP_ADDR_TRAP, %g6 - brnz,a,pn %o0, __hypervisor_tlb_xcall_error + brnz,a,pn %o0, 1f mov %o0, %g5 mov %g2, %o0 mov %g3, %o1 mov %g4, %o2 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop .globl __hypervisor_xcall_flush_tlb_kernel_range -__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ +__hypervisor_xcall_flush_tlb_kernel_range: /* 28 insns */ /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 @@ -800,7 +815,7 @@ __hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ ta HV_MMU_UNMAP_ADDR_TRAP mov HV_MMU_UNMAP_ADDR_TRAP, %g6 - brnz,pn %o0, __hypervisor_tlb_xcall_error + brnz,pn %o0, 1f mov %o0, %g5 sethi %hi(PAGE_SIZE), %o2 brnz,pt %g3, 1b @@ -810,6 +825,9 @@ __hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ mov %g7, %o2 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop /* These just get rescheduled to PIL vectors. */ .globl xcall_call_function @@ -894,21 +912,21 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1 call tlb_patch_one - mov 21, %o2 + mov 24, %o2 sethi %hi(xcall_flush_tlb_page), %o0 or %o0, %lo(xcall_flush_tlb_page), %o0 sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 call tlb_patch_one - mov 17, %o2 + mov 20, %o2 sethi %hi(xcall_flush_tlb_kernel_range), %o0 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 25, %o2 + mov 28, %o2 #endif /* CONFIG_SMP */ ret -- cgit v1.2.3 From 26984c3bc29aa15b705475177842feddcd3d9df0 Mon Sep 17 00:00:00 2001 From: Yisheng Xie Date: Fri, 21 Oct 2016 16:13:55 +0800 Subject: arm64/numa: fix pcpu_cpu_distance() to get correct CPU proximity The pcpu_build_alloc_info() function group CPUs according to their proximity, by call callback function @cpu_distance_fn from different ARCHs. For arm64 the callback of @cpu_distance_fn is pcpu_cpu_distance(from, to) -> node_distance(from, to) The @from and @to for function node_distance() should be nid. However, pcpu_cpu_distance() in arch/arm64/mm/numa.c just past the cpu id for @from and @to, and didn't convert to numa node id. For this incorrect cpu proximity get from ARCH, it may cause each CPU in one group and make group_cnt out of bound: setup_per_cpu_areas() pcpu_embed_first_chunk() pcpu_build_alloc_info() in pcpu_build_alloc_info, since cpu_distance_fn will return REMOTE_DISTANCE if we pass cpu ids (0,1,2...), so cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE will wrongly be ture. This may results in triggering the BUG_ON(unit != nr_units) later: [ 0.000000] kernel BUG at mm/percpu.c:1916! [ 0.000000] Internal error: Oops - BUG: 0 [#1] PREEMPT SMP [ 0.000000] Modules linked in: [ 0.000000] CPU: 0 PID: 0 Comm: swapper Not tainted 4.9.0-rc1-00003-g14155ca-dirty #26 [ 0.000000] Hardware name: Hisilicon Hi1616 Evaluation Board (DT) [ 0.000000] task: ffff000008d6e900 task.stack: ffff000008d60000 [ 0.000000] PC is at pcpu_embed_first_chunk+0x420/0x704 [ 0.000000] LR is at pcpu_embed_first_chunk+0x3bc/0x704 [ 0.000000] pc : [] lr : [] pstate: 800000c5 [ 0.000000] sp : ffff000008d63eb0 [ 0.000000] x29: ffff000008d63eb0 [ 0.000000] x28: 0000000000000000 [ 0.000000] x27: 0000000000000040 [ 0.000000] x26: ffff8413fbfcef00 [ 0.000000] x25: 0000000000000042 [ 0.000000] x24: 0000000000000042 [ 0.000000] x23: 0000000000001000 [ 0.000000] x22: 0000000000000046 [ 0.000000] x21: 0000000000000001 [ 0.000000] x20: ffff000008cb3bc8 [ 0.000000] x19: ffff8413fbfcf570 [ 0.000000] x18: 0000000000000000 [ 0.000000] x17: ffff000008e49ae0 [ 0.000000] x16: 0000000000000003 [ 0.000000] x15: 000000000000001e [ 0.000000] x14: 0000000000000004 [ 0.000000] x13: 0000000000000000 [ 0.000000] x12: 000000000000006f [ 0.000000] x11: 00000413fbffff00 [ 0.000000] x10: 0000000000000004 [ 0.000000] x9 : 0000000000000000 [ 0.000000] x8 : 0000000000000001 [ 0.000000] x7 : ffff8413fbfcf63c [ 0.000000] x6 : ffff000008d65d28 [ 0.000000] x5 : ffff000008d65e50 [ 0.000000] x4 : 0000000000000000 [ 0.000000] x3 : ffff000008cb3cc8 [ 0.000000] x2 : 0000000000000040 [ 0.000000] x1 : 0000000000000040 [ 0.000000] x0 : 0000000000000000 [...] [ 0.000000] Call trace: [ 0.000000] Exception stack(0xffff000008d63ce0 to 0xffff000008d63e10) [ 0.000000] 3ce0: ffff8413fbfcf570 0001000000000000 ffff000008d63eb0 ffff000008c754f4 [ 0.000000] 3d00: ffff000008d63d50 ffff0000081af210 00000413fbfff010 0000000000001000 [ 0.000000] 3d20: ffff000008d63d50 ffff0000081af220 00000413fbfff010 0000000000001000 [ 0.000000] 3d40: 00000413fbfcef00 0000000000000004 ffff000008d63db0 ffff0000081af390 [ 0.000000] 3d60: 00000413fbfcef00 0000000000001000 0000000000000000 0000000000001000 [ 0.000000] 3d80: 0000000000000000 0000000000000040 0000000000000040 ffff000008cb3cc8 [ 0.000000] 3da0: 0000000000000000 ffff000008d65e50 ffff000008d65d28 ffff8413fbfcf63c [ 0.000000] 3dc0: 0000000000000001 0000000000000000 0000000000000004 00000413fbffff00 [ 0.000000] 3de0: 000000000000006f 0000000000000000 0000000000000004 000000000000001e [ 0.000000] 3e00: 0000000000000003 ffff000008e49ae0 [ 0.000000] [] pcpu_embed_first_chunk+0x420/0x704 [ 0.000000] [] setup_per_cpu_areas+0x38/0xc8 [ 0.000000] [] start_kernel+0x10c/0x390 [ 0.000000] [] __primary_switched+0x5c/0x64 [ 0.000000] Code: b8018660 17ffffd7 6b16037f 54000080 (d4210000) [ 0.000000] ---[ end trace 0000000000000000 ]--- [ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task! Fix by getting cpu's node id with early_cpu_to_node() then pass it to node_distance() as the original intention. Fixes: 7af3a0a99252 ("arm64/numa: support HAVE_SETUP_PER_CPU_AREA") Signed-off-by: Yisheng Xie Signed-off-by: Hanjun Guo Cc: Catalin Marinas Cc: Lorenzo Pieralisi Cc: Will Deacon Cc: Zhen Lei Signed-off-by: Will Deacon --- arch/arm64/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 778a985c8a70..9a71d0678a08 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -147,7 +147,7 @@ static int __init early_cpu_to_node(int cpu) static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) { - return node_distance(from, to); + return node_distance(early_cpu_to_node(from), early_cpu_to_node(to)); } static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, -- cgit v1.2.3 From 3f7a09f44e5ef8a2629842f2d22892114e603fc1 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Fri, 21 Oct 2016 16:13:56 +0800 Subject: arm64/numa: fix incorrect log for memory-less node When booting on NUMA system with memory-less node (no memory dimm on this memory controller), the print for setup_node_data() is incorrect: NUMA: Initmem setup node 2 [mem 0x00000000-0xffffffffffffffff] It can be fixed by printing [mem 0x00000000-0x00000000] when end_pfn is 0, but print will be more useful. Fixes: 1a2db300348b ("arm64, numa: Add NUMA support for arm64 platforms.") Signed-off-by: Hanjun Guo Cc: Catalin Marinas Cc: Ganapatrao Kulkarni Cc: Lorenzo Pieralisi Cc: Mark Rutland Cc: Will Deacon Cc: Yisheng Xie Signed-off-by: Will Deacon --- arch/arm64/mm/numa.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 9a71d0678a08..4b32168cf91a 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -223,8 +223,11 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) void *nd; int tnid; - pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", - nid, start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); + if (start_pfn < end_pfn) + pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid, + start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); + else + pr_info("Initmem setup node %d []\n", nid); nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); nd = __va(nd_pa); -- cgit v1.2.3 From 3fa72fe9c614717d22ae75b84d45f41da65c10fe Mon Sep 17 00:00:00 2001 From: Neeraj Upadhyay Date: Fri, 21 Oct 2016 14:28:46 +0530 Subject: arm64: mm: fix __page_to_voff definition Fix parameter name for __page_to_voff, to match its definition. At present, we don't see any issue, as page_to_virt's caller declares 'page'. Fixes: 9f2875912dac ("arm64: mm: restrict virt_to_page() to the linear mapping") Acked-by: Mark Rutland Acked-by: Ard Biesheuvel Signed-off-by: Neeraj Upadhyay Signed-off-by: Will Deacon --- arch/arm64/include/asm/memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index ba62df8c6e35..b71086d25195 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -217,7 +217,7 @@ static inline void *phys_to_virt(phys_addr_t x) #define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #else #define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page)) -#define __page_to_voff(kaddr) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) +#define __page_to_voff(page) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) #define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET)) #define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) -- cgit v1.2.3 From 85c856b39b479dde410ddd09df1da745343010c9 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 26 Oct 2016 08:38:38 -0700 Subject: kvm: nVMX: Fix kernel panics induced by illegal INVEPT/INVVPID types Bitwise shifts by amounts greater than or equal to the width of the left operand are undefined. A malicious guest can exploit this to crash a 32-bit host, due to the BUG_ON(1)'s in handle_{invept,invvpid}. Signed-off-by: Jim Mattson Message-Id: <1477496318-17681-1-git-send-email-jmattson@google.com> [Change 1UL to 1, to match the range check on the shift count. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index cf1b16dbc98a..74a4df993a51 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7659,7 +7659,7 @@ static int handle_invept(struct kvm_vcpu *vcpu) types = (vmx->nested.nested_vmx_ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6; - if (!(types & (1UL << type))) { + if (type >= 32 || !(types & (1 << type))) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); skip_emulated_instruction(vcpu); @@ -7722,7 +7722,7 @@ static int handle_invvpid(struct kvm_vcpu *vcpu) types = (vmx->nested.nested_vmx_vpid_caps >> 8) & 0x7; - if (!(types & (1UL << type))) { + if (type >= 32 || !(types & (1 << type))) { nested_vmx_failValid(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID); skip_emulated_instruction(vcpu); -- cgit v1.2.3 From 39715bf972ed4fee18fe5409609a971fb16b1771 Mon Sep 17 00:00:00 2001 From: Valentin Rothberg Date: Wed, 5 Oct 2016 07:57:26 +0200 Subject: powerpc/process: Fix CONFIG_ALIVEC typo in restore_tm_state() It should be ALTIVEC, not ALIVEC. Cyril explains: If a thread performs a transaction with altivec and then gets preempted for whatever reason, this bug may cause the kernel to not re-enable altivec when that thread runs again. This will result in an altivec unavailable fault, when that fault happens inside a user transaction the kernel has no choice but to enable altivec and doom the transaction. The result is that transactions using altivec may get aborted more often than they should. The difficulty in catching this with a selftest is my deliberate use of the word may above. Optimisations to avoid FPU/altivec/VSX faults mean that the kernel will always leave them on for 255 switches. This code prevents the kernel turning it off if it got to the 256th switch (and userspace was transactional). Fixes: dc16b553c949 ("powerpc: Always restore FPU/VEC/VSX if hardware transactional memory in use") Reviewed-by: Cyril Bur Signed-off-by: Valentin Rothberg Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/process.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9e7c10fe205f..ce6dc61b15b2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1012,7 +1012,7 @@ void restore_tm_state(struct pt_regs *regs) /* Ensure that restore_math() will restore */ if (msr_diff & MSR_FP) current->thread.load_fp = 1; -#ifdef CONFIG_ALIVEC +#ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC) current->thread.load_vec = 1; #endif -- cgit v1.2.3 From bd77c4498616e27d5725b5959d880ce2272fefa9 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 24 Oct 2016 08:50:43 +0530 Subject: powerpc/mm/radix: Use tlbiel only if we ever ran on the current cpu Before this patch, we used tlbiel, if we ever ran only on this core. That was mostly derived from the nohash usage of the same. But is incorrect, the ISA 3.0 clarifies tlbiel such that: "All TLB entries that have all of the following properties are made invalid on the thread executing the tlbiel instruction" ie. tlbiel only invalidates TLB entries on the current thread. So if the mm has been used on any other thread (aka. cpu) then we must broadcast the invalidate. This bug could lead to invalid TLB entries if a program runs on multiple threads of a core. Hence use tlbiel, if we only ever ran on only the current cpu. Fixes: 1a472c9dba6b ("powerpc/mm/radix: Add tlbflush routines") Cc: stable@vger.kernel.org # v4.7+ Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/tlb.h | 12 ++++++++++++ arch/powerpc/mm/tlb-radix.c | 8 ++++---- 2 files changed, 16 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index f6f68f73e858..99e1397b71da 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -52,11 +52,23 @@ static inline int mm_is_core_local(struct mm_struct *mm) return cpumask_subset(mm_cpumask(mm), topology_sibling_cpumask(smp_processor_id())); } + +static inline int mm_is_thread_local(struct mm_struct *mm) +{ + return cpumask_equal(mm_cpumask(mm), + cpumask_of(smp_processor_id())); +} + #else static inline int mm_is_core_local(struct mm_struct *mm) { return 1; } + +static inline int mm_is_thread_local(struct mm_struct *mm) +{ + return 1; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 0e49ec541ab5..bda8c43be78a 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -175,7 +175,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm) if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; - if (!mm_is_core_local(mm)) { + if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) @@ -201,7 +201,7 @@ void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; - if (!mm_is_core_local(mm)) { + if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) @@ -226,7 +226,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, pid = mm ? mm->context.id : 0; if (unlikely(pid == MMU_NO_CONTEXT)) goto bail; - if (!mm_is_core_local(mm)) { + if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) @@ -321,7 +321,7 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, { unsigned long pid; unsigned long addr; - int local = mm_is_core_local(mm); + int local = mm_is_thread_local(mm); unsigned long ap = mmu_get_ap(psize); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long page_size = 1UL << mmu_psize_defs[psize].shift; -- cgit v1.2.3 From fb479e44a9e240a23c2d208c2ace23542a47f41c Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 13 Oct 2016 13:17:14 +1100 Subject: powerpc/64s: relocation, register save fixes for system reset interrupt This patch does a couple of things. First of all, powernv immediately explodes when running a relocated kernel, because the system reset exception for handling sleeps does not do correct relocated branches. Secondly, the sleep handling code trashes the condition and cfar registers, which we would like to preserve for debugging purposes (for non-sleep case exception). This patch changes the exception to use the standard format that saves registers before any tests or branches are made. It adds the test for idle-wakeup as an "extra" to break out of the normal exception path. Then it branches to a relocated idle handler that calls the various idle handling functions. After this patch, POWER8 CPU simulator now boots powernv kernel that is running at non-zero. Fixes: 948cf67c4726 ("powerpc: Add NAP mode support on Power7 in HV mode") Cc: stable@vger.kernel.org # v3.0+ Signed-off-by: Nicholas Piggin Acked-by: Gautham R. Shenoy Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/exception-64s.h | 16 ++++++++++ arch/powerpc/kernel/exceptions-64s.S | 50 ++++++++++++++++++-------------- 2 files changed, 45 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 2e4e7d878c8e..84d49b197c32 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -93,6 +93,10 @@ ld reg,PACAKBASE(r13); /* get high part of &label */ \ ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l; +#define __LOAD_HANDLER(reg, label) \ + ld reg,PACAKBASE(r13); \ + ori reg,reg,(ABS_ADDR(label))@l; + /* Exception register prefixes */ #define EXC_HV H #define EXC_STD @@ -208,6 +212,18 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define kvmppc_interrupt kvmppc_interrupt_pr #endif +#ifdef CONFIG_RELOCATABLE +#define BRANCH_TO_COMMON(reg, label) \ + __LOAD_HANDLER(reg, label); \ + mtctr reg; \ + bctr + +#else +#define BRANCH_TO_COMMON(reg, label) \ + b label + +#endif + #define __KVM_HANDLER_PROLOG(area, n) \ BEGIN_FTR_SECTION_NESTED(947) \ ld r10,area+EX_CFAR(r13); \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f129408c6022..08ba447a4b3d 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -95,19 +95,35 @@ __start_interrupts: /* No virt vectors corresponding with 0x0..0x100 */ EXC_VIRT_NONE(0x4000, 0x4100) -EXC_REAL_BEGIN(system_reset, 0x100, 0x200) - SET_SCRATCH0(r13) + #ifdef CONFIG_PPC_P7_NAP -BEGIN_FTR_SECTION - /* Running native on arch 2.06 or later, check if we are - * waking up from nap/sleep/winkle. + /* + * If running native on arch 2.06 or later, check if we are waking up + * from nap/sleep/winkle, and branch to idle handler. */ - mfspr r13,SPRN_SRR1 - rlwinm. r13,r13,47-31,30,31 - beq 9f +#define IDLETEST(n) \ + BEGIN_FTR_SECTION ; \ + mfspr r10,SPRN_SRR1 ; \ + rlwinm. r10,r10,47-31,30,31 ; \ + beq- 1f ; \ + cmpwi cr3,r10,2 ; \ + BRANCH_TO_COMMON(r10, system_reset_idle_common) ; \ +1: \ + END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +#else +#define IDLETEST NOTEST +#endif - cmpwi cr3,r13,2 - GET_PACA(r13) +EXC_REAL_BEGIN(system_reset, 0x100, 0x200) + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + IDLETEST, 0x100) + +EXC_REAL_END(system_reset, 0x100, 0x200) +EXC_VIRT_NONE(0x4100, 0x4200) + +#ifdef CONFIG_PPC_P7_NAP +EXC_COMMON_BEGIN(system_reset_idle_common) bl pnv_restore_hyp_resource li r0,PNV_THREAD_RUNNING @@ -130,14 +146,8 @@ BEGIN_FTR_SECTION blt cr3,2f b pnv_wakeup_loss 2: b pnv_wakeup_noloss +#endif -9: -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -#endif /* CONFIG_PPC_P7_NAP */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, - NOTEST, 0x100) -EXC_REAL_END(system_reset, 0x100, 0x200) -EXC_VIRT_NONE(0x4100, 0x4200) EXC_COMMON(system_reset_common, 0x100, system_reset_exception) #ifdef CONFIG_PPC_PSERIES @@ -817,10 +827,8 @@ EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00) TRAMP_KVM(PACA_EXGEN, 0xb00) EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) - -#define LOAD_SYSCALL_HANDLER(reg) \ - ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(system_call_common))@l; +#define LOAD_SYSCALL_HANDLER(reg) \ + __LOAD_HANDLER(reg, system_call_common) /* Syscall routine is used twice, in reloc-off and reloc-on paths */ #define SYSCALL_PSERIES_1 \ -- cgit v1.2.3 From a74ad5e660a9ee1d071665e7e8ad822784a2dc7f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 27 Oct 2016 09:04:54 -0700 Subject: sparc64: Handle extremely large kernel TLB range flushes more gracefully. When the vmalloc area gets fragmented, and because the firmware mapping area sits between where modules live and the vmalloc area, we can sometimes receive requests for enormous kernel TLB range flushes. When this happens the cpu just spins flushing billions of pages and this triggers the NMI watchdog and other problems. We took care of this on the TSB side by doing a linear scan of the table once we pass a certain threshold. Do something similar for the TLB flush, however we are limited by the TLB flush facilities provided by the different chip variants. First of all we use an (mostly arbitrary) cut-off of 256K which is about 32 pages. This can be tuned in the future. The huge range code path for each chip works as follows: 1) On spitfire we flush all non-locked TLB entries using diagnostic acceses. 2) On cheetah we use the "flush all" TLB flush. 3) On sun4v/hypervisor we do a TLB context flush on context 0, which unlike previous chips does not remove "permanent" or locked entries. We could probably do something better on spitfire, such as limiting the flush to kernel TLB entries or even doing range comparisons. However that probably isn't worth it since those chips are old and the TLB only had 64 entries. Reported-by: James Clarke Tested-by: James Clarke Signed-off-by: David S. Miller --- arch/sparc/mm/ultra.S | 283 ++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 228 insertions(+), 55 deletions(-) (limited to 'arch') diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index 0fa2e6202c1f..5d2fd6cd3189 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -113,12 +113,14 @@ __flush_tlb_pending: /* 27 insns */ .align 32 .globl __flush_tlb_kernel_range -__flush_tlb_kernel_range: /* 19 insns */ +__flush_tlb_kernel_range: /* 31 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f + sub %o1, %o0, %o3 + srlx %o3, 18, %o4 + brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow sethi %hi(PAGE_SIZE), %o4 - sub %o1, %o0, %o3 sub %o3, %o4, %o3 or %o0, 0x20, %o0 ! Nucleus 1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP @@ -134,6 +136,38 @@ __flush_tlb_kernel_range: /* 19 insns */ nop nop nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + +__spitfire_flush_tlb_kernel_range_slow: + mov 63 * 8, %o4 +1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3 + andcc %o3, 0x40, %g0 /* _PAGE_L_4U */ + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %o3 + stxa %g0, [%o3] ASI_IMMU + stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS + membar #Sync +2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3 + andcc %o3, 0x40, %g0 + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %o3 + stxa %g0, [%o3] ASI_DMMU + stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS + membar #Sync +2: sub %o4, 8, %o4 + brgez,pt %o4, 1b + nop + retl + nop __spitfire_flush_tlb_mm_slow: rdpr %pstate, %g1 @@ -288,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */ retl wrpr %g7, 0x0, %pstate +__cheetah_flush_tlb_kernel_range: /* 31 insns */ + /* %o0=start, %o1=end */ + cmp %o0, %o1 + be,pn %xcc, 2f + sub %o1, %o0, %o3 + srlx %o3, 18, %o4 + brnz,pn %o4, 3f + sethi %hi(PAGE_SIZE), %o4 + sub %o3, %o4, %o3 + or %o0, 0x20, %o0 ! Nucleus +1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP + stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP + membar #Sync + brnz,pt %o3, 1b + sub %o3, %o4, %o3 +2: sethi %hi(KERNBASE), %o3 + flush %o3 + retl + nop +3: mov 0x80, %o4 + stxa %g0, [%o4] ASI_DMMU_DEMAP + membar #Sync + stxa %g0, [%o4] ASI_IMMU_DEMAP + membar #Sync + retl + nop + nop + nop + nop + nop + nop + nop + nop + #ifdef DCACHE_ALIASING_POSSIBLE __cheetah_flush_dcache_page: /* 11 insns */ sethi %hi(PAGE_OFFSET), %g1 @@ -388,13 +456,15 @@ __hypervisor_flush_tlb_pending: /* 27 insns */ nop nop -__hypervisor_flush_tlb_kernel_range: /* 19 insns */ +__hypervisor_flush_tlb_kernel_range: /* 31 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f - sethi %hi(PAGE_SIZE), %g3 - mov %o0, %g1 - sub %o1, %g1, %g2 + sub %o1, %o0, %g2 + srlx %g2, 18, %g3 + brnz,pn %g3, 4f + mov %o0, %g1 + sethi %hi(PAGE_SIZE), %g3 sub %g2, %g3, %g2 1: add %g1, %g2, %o0 /* ARG0: virtual address */ mov 0, %o1 /* ARG1: mmu context */ @@ -409,6 +479,16 @@ __hypervisor_flush_tlb_kernel_range: /* 19 insns */ 3: sethi %hi(__hypervisor_tlb_tl0_error), %o2 jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 nop +4: mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov 0, %o2 /* ARG2: mmu context == nucleus */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, 3b + mov HV_FAST_MMU_DEMAP_CTX, %o1 + retl + nop #ifdef DCACHE_ALIASING_POSSIBLE /* XXX Niagara and friends have an 8K cache, so no aliasing is @@ -431,43 +511,6 @@ tlb_patch_one: retl nop - .globl cheetah_patch_cachetlbops -cheetah_patch_cachetlbops: - save %sp, -128, %sp - - sethi %hi(__flush_tlb_mm), %o0 - or %o0, %lo(__flush_tlb_mm), %o0 - sethi %hi(__cheetah_flush_tlb_mm), %o1 - or %o1, %lo(__cheetah_flush_tlb_mm), %o1 - call tlb_patch_one - mov 19, %o2 - - sethi %hi(__flush_tlb_page), %o0 - or %o0, %lo(__flush_tlb_page), %o0 - sethi %hi(__cheetah_flush_tlb_page), %o1 - or %o1, %lo(__cheetah_flush_tlb_page), %o1 - call tlb_patch_one - mov 22, %o2 - - sethi %hi(__flush_tlb_pending), %o0 - or %o0, %lo(__flush_tlb_pending), %o0 - sethi %hi(__cheetah_flush_tlb_pending), %o1 - or %o1, %lo(__cheetah_flush_tlb_pending), %o1 - call tlb_patch_one - mov 27, %o2 - -#ifdef DCACHE_ALIASING_POSSIBLE - sethi %hi(__flush_dcache_page), %o0 - or %o0, %lo(__flush_dcache_page), %o0 - sethi %hi(__cheetah_flush_dcache_page), %o1 - or %o1, %lo(__cheetah_flush_dcache_page), %o1 - call tlb_patch_one - mov 11, %o2 -#endif /* DCACHE_ALIASING_POSSIBLE */ - - ret - restore - #ifdef CONFIG_SMP /* These are all called by the slaves of a cross call, at * trap level 1, with interrupts fully disabled. @@ -535,13 +578,15 @@ xcall_flush_tlb_page: /* 20 insns */ nop .globl xcall_flush_tlb_kernel_range -xcall_flush_tlb_kernel_range: /* 28 insns */ +xcall_flush_tlb_kernel_range: /* 44 insns */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 andn %g7, %g2, %g7 sub %g7, %g1, %g3 - add %g2, 1, %g2 + srlx %g3, 18, %g2 + brnz,pn %g2, 2f + add %g2, 1, %g2 sub %g3, %g2, %g3 or %g1, 0x20, %g1 ! Nucleus 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP @@ -550,11 +595,25 @@ xcall_flush_tlb_kernel_range: /* 28 insns */ brnz,pt %g3, 1b sub %g3, %g2, %g3 retry - nop - nop - nop - nop - nop +2: mov 63 * 8, %g1 +1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2 + andcc %g2, 0x40, %g0 /* _PAGE_L_4U */ + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %g2 + stxa %g0, [%g2] ASI_IMMU + stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS + membar #Sync +2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2 + andcc %g2, 0x40, %g0 + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %g2 + stxa %g0, [%g2] ASI_DMMU + stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS + membar #Sync +2: sub %g1, 8, %g1 + brgez,pt %g1, 1b + nop + retry nop nop nop @@ -683,6 +742,52 @@ xcall_fetch_glob_pmu_n4: retry +__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */ + sethi %hi(PAGE_SIZE - 1), %g2 + or %g2, %lo(PAGE_SIZE - 1), %g2 + andn %g1, %g2, %g1 + andn %g7, %g2, %g7 + sub %g7, %g1, %g3 + srlx %g3, 18, %g2 + brnz,pn %g2, 2f + add %g2, 1, %g2 + sub %g3, %g2, %g3 + or %g1, 0x20, %g1 ! Nucleus +1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP + stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP + membar #Sync + brnz,pt %g3, 1b + sub %g3, %g2, %g3 + retry +2: mov 0x80, %g2 + stxa %g0, [%g2] ASI_DMMU_DEMAP + membar #Sync + stxa %g0, [%g2] ASI_IMMU_DEMAP + membar #Sync + retry + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + #ifdef DCACHE_ALIASING_POSSIBLE .align 32 .globl xcall_flush_dcache_page_cheetah @@ -798,18 +903,20 @@ __hypervisor_xcall_flush_tlb_page: /* 20 insns */ nop .globl __hypervisor_xcall_flush_tlb_kernel_range -__hypervisor_xcall_flush_tlb_kernel_range: /* 28 insns */ +__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */ /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 andn %g7, %g2, %g7 sub %g7, %g1, %g3 + srlx %g3, 18, %g7 add %g2, 1, %g2 sub %g3, %g2, %g3 mov %o0, %g2 mov %o1, %g4 - mov %o2, %g7 + brnz,pn %g7, 2f + mov %o2, %g7 1: add %g1, %g3, %o0 /* ARG0: virtual address */ mov 0, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ @@ -820,7 +927,7 @@ __hypervisor_xcall_flush_tlb_kernel_range: /* 28 insns */ sethi %hi(PAGE_SIZE), %o2 brnz,pt %g3, 1b sub %g3, %o2, %g3 - mov %g2, %o0 +5: mov %g2, %o0 mov %g4, %o1 mov %g7, %o2 membar #Sync @@ -828,6 +935,20 @@ __hypervisor_xcall_flush_tlb_kernel_range: /* 28 insns */ 1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 nop +2: mov %o3, %g1 + mov %o5, %g3 + mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov 0, %o2 /* ARG2: mmu context == nucleus */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + mov %g1, %o3 + brz,pt %o0, 5b + mov %g3, %o5 + mov HV_FAST_MMU_DEMAP_CTX, %g6 + ba,pt %xcc, 1b + clr %g5 /* These just get rescheduled to PIL vectors. */ .globl xcall_call_function @@ -864,6 +985,58 @@ xcall_kgdb_capture: #endif /* CONFIG_SMP */ + .globl cheetah_patch_cachetlbops +cheetah_patch_cachetlbops: + save %sp, -128, %sp + + sethi %hi(__flush_tlb_mm), %o0 + or %o0, %lo(__flush_tlb_mm), %o0 + sethi %hi(__cheetah_flush_tlb_mm), %o1 + or %o1, %lo(__cheetah_flush_tlb_mm), %o1 + call tlb_patch_one + mov 19, %o2 + + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__cheetah_flush_tlb_page), %o1 + or %o1, %lo(__cheetah_flush_tlb_page), %o1 + call tlb_patch_one + mov 22, %o2 + + sethi %hi(__flush_tlb_pending), %o0 + or %o0, %lo(__flush_tlb_pending), %o0 + sethi %hi(__cheetah_flush_tlb_pending), %o1 + or %o1, %lo(__cheetah_flush_tlb_pending), %o1 + call tlb_patch_one + mov 27, %o2 + + sethi %hi(__flush_tlb_kernel_range), %o0 + or %o0, %lo(__flush_tlb_kernel_range), %o0 + sethi %hi(__cheetah_flush_tlb_kernel_range), %o1 + or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 31, %o2 + +#ifdef DCACHE_ALIASING_POSSIBLE + sethi %hi(__flush_dcache_page), %o0 + or %o0, %lo(__flush_dcache_page), %o0 + sethi %hi(__cheetah_flush_dcache_page), %o1 + or %o1, %lo(__cheetah_flush_dcache_page), %o1 + call tlb_patch_one + mov 11, %o2 +#endif /* DCACHE_ALIASING_POSSIBLE */ + +#ifdef CONFIG_SMP + sethi %hi(xcall_flush_tlb_kernel_range), %o0 + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 + sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1 + or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 44, %o2 +#endif /* CONFIG_SMP */ + + ret + restore .globl hypervisor_patch_cachetlbops hypervisor_patch_cachetlbops: @@ -895,7 +1068,7 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 19, %o2 + mov 31, %o2 #ifdef DCACHE_ALIASING_POSSIBLE sethi %hi(__flush_dcache_page), %o0 @@ -926,7 +1099,7 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 28, %o2 + mov 44, %o2 #endif /* CONFIG_SMP */ ret -- cgit v1.2.3 From 796f4687bd622b0f8076b6695001ab5cdc2b722a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 27 Oct 2016 20:14:45 +0200 Subject: kvm/x86: Show WRMSR data is in hex MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the "0x" prefix to the error messages format to make it unambiguous about what kind of value we're talking about. Signed-off-by: Borislav Petkov Cc: Paolo Bonzini Cc: "Radim Krčmář" Message-Id: <20161027181445.25319-1-bp@alien8.de> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e375235d81c9..0b2087981c06 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2262,7 +2262,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) /* Drop writes to this legacy MSR -- see rdmsr * counterpart for further detail. */ - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", msr, data); + vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); break; case MSR_AMD64_OSVW_ID_LENGTH: if (!guest_cpuid_has_osvw(vcpu)) @@ -2280,11 +2280,11 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (kvm_pmu_is_valid_msr(vcpu, msr)) return kvm_pmu_set_msr(vcpu, msr_info); if (!ignore_msrs) { - vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data %llx\n", + vcpu_unimpl(vcpu, "unhandled wrmsr: 0x%x data 0x%llx\n", msr, data); return 1; } else { - vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data %llx\n", + vcpu_unimpl(vcpu, "ignored wrmsr: 0x%x data 0x%llx\n", msr, data); break; } -- cgit v1.2.3 From 21753583056d48a5fad964d6f272e28168426845 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 27 Oct 2016 17:46:24 -0700 Subject: h8300: fix syscall restarting Back in commit f56141e3e2d9 ("all arches, signal: move restart_block to struct task_struct"), all architectures and core code were changed to use task_struct::restart_block. However, when h8300 support was subsequently restored in v4.2, it was not updated to account for this, and maintains thread_info::restart_block, which is not kept in sync. This patch drops the redundant restart_block from thread_info, and moves h8300 to the common one in task_struct, ensuring that syscall restarting always works as expected. Fixes: f56141e3e2d9 ("all arches, signal: move restart_block to struct task_struct") Link: http://lkml.kernel.org/r/1476714934-11635-1-git-send-email-mark.rutland@arm.com Signed-off-by: Mark Rutland Cc: Andy Lutomirski Cc: Yoshinori Sato Cc: uclinux-h8-devel@lists.sourceforge.jp Cc: [4.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/h8300/include/asm/thread_info.h | 4 ---- arch/h8300/kernel/signal.c | 2 +- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h index b408fe660cf8..3cef06875f5c 100644 --- a/arch/h8300/include/asm/thread_info.h +++ b/arch/h8300/include/asm/thread_info.h @@ -31,7 +31,6 @@ struct thread_info { int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; - struct restart_block restart_block; }; /* @@ -44,9 +43,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index ad1f81f574e5..7138303cbbf2 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -79,7 +79,7 @@ restore_sigcontext(struct sigcontext *usc, int *pd0) unsigned int er0; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* restore passed registers */ #define COPY(r) do { err |= get_user(regs->r, &usc->sc_##r); } while (0) -- cgit v1.2.3 From c0a0aba8e478229b2f0956918542152fbad3f794 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 27 Oct 2016 17:46:38 -0700 Subject: kconfig.h: remove config_enabled() macro The use of config_enabled() is ambiguous. For config options, IS_ENABLED(), IS_REACHABLE(), etc. will make intention clearer. Sometimes config_enabled() has been used for non-config options because it is useful to check whether the given symbol is defined or not. I have been tackling on deprecating config_enabled(), and now is the time to finish this work. Some new users have appeared for v4.9-rc1, but it is trivial to replace them: - arch/x86/mm/kaslr.c replace config_enabled() with IS_ENABLED() because CONFIG_X86_ESPFIX64 and CONFIG_EFI are boolean. - include/asm-generic/export.h replace config_enabled() with __is_defined(). Then, config_enabled() can be removed now. Going forward, please use IS_ENABLED(), IS_REACHABLE(), etc. for config options, and __is_defined() for non-config symbols. Link: http://lkml.kernel.org/r/1476616078-32252-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Acked-by: Ingo Molnar Acked-by: Nicolas Pitre Cc: Peter Oberparleiter Cc: Arnd Bergmann Cc: Kees Cook Cc: Michal Marek Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Thomas Garnier Cc: Paul Bolle Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/kaslr.c | 6 +++--- include/asm-generic/export.h | 2 +- include/linux/kconfig.h | 5 ++--- 3 files changed, 6 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index ddd2661c4502..887e57182716 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -104,10 +104,10 @@ void __init kernel_randomize_memory(void) * consistent with the vaddr_start/vaddr_end variables. */ BUILD_BUG_ON(vaddr_start >= vaddr_end); - BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) && + BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && vaddr_end >= EFI_VA_START); - BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) || - config_enabled(CONFIG_EFI)) && + BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || + IS_ENABLED(CONFIG_EFI)) && vaddr_end >= __START_KERNEL_map); BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); diff --git a/include/asm-generic/export.h b/include/asm-generic/export.h index 43199a049da5..63554e9f6e0c 100644 --- a/include/asm-generic/export.h +++ b/include/asm-generic/export.h @@ -70,7 +70,7 @@ KSYM(__kcrctab_\name): #include #define __EXPORT_SYMBOL(sym, val, sec) \ - __cond_export_sym(sym, val, sec, config_enabled(__KSYM_##sym)) + __cond_export_sym(sym, val, sec, __is_defined(__KSYM_##sym)) #define __cond_export_sym(sym, val, sec, conf) \ ___cond_export_sym(sym, val, sec, conf) #define ___cond_export_sym(sym, val, sec, enabled) \ diff --git a/include/linux/kconfig.h b/include/linux/kconfig.h index 15ec117ec537..8f2e059e4d45 100644 --- a/include/linux/kconfig.h +++ b/include/linux/kconfig.h @@ -31,7 +31,6 @@ * When CONFIG_BOOGER is not defined, we generate a (... 1, 0) pair, and when * the last step cherry picks the 2nd arg, we get a zero. */ -#define config_enabled(cfg) ___is_defined(cfg) #define __is_defined(x) ___is_defined(x) #define ___is_defined(val) ____is_defined(__ARG_PLACEHOLDER_##val) #define ____is_defined(arg1_or_junk) __take_second_arg(arg1_or_junk 1, 0) @@ -41,13 +40,13 @@ * otherwise. For boolean options, this is equivalent to * IS_ENABLED(CONFIG_FOO). */ -#define IS_BUILTIN(option) config_enabled(option) +#define IS_BUILTIN(option) __is_defined(option) /* * IS_MODULE(CONFIG_FOO) evaluates to 1 if CONFIG_FOO is set to 'm', 0 * otherwise. */ -#define IS_MODULE(option) config_enabled(option##_MODULE) +#define IS_MODULE(option) __is_defined(option##_MODULE) /* * IS_REACHABLE(CONFIG_FOO) evaluates to 1 if the currently compiled -- cgit v1.2.3 From 17a88939568be28a8ea5195b55ef3a84a469777e Mon Sep 17 00:00:00 2001 From: Uwe Kleine-König Date: Thu, 27 Oct 2016 17:47:10 -0700 Subject: cris/arch-v32: cryptocop: print a hex number after a 0x prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It makes the result hard to interpret correctly if a base 10 number is prefixed by 0x. So change to a hex number. Link: http://lkml.kernel.org/r/20161026125658.25728-6-u.kleine-koenig@pengutronix.de Signed-off-by: Uwe Kleine-König Cc: Mikael Starvik Cc: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/cris/arch-v32/drivers/cryptocop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index 099e170a93ee..0068fd411a84 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -3149,7 +3149,7 @@ static void print_dma_descriptors(struct cryptocop_int_operation *iop) printk("print_dma_descriptors start\n"); printk("iop:\n"); - printk("\tsid: 0x%lld\n", iop->sid); + printk("\tsid: 0x%llx\n", iop->sid); printk("\tcdesc_out: 0x%p\n", iop->cdesc_out); printk("\tcdesc_in: 0x%p\n", iop->cdesc_in); -- cgit v1.2.3 From 237d6e6884136923b6bd26d5141ebe1d065960c9 Mon Sep 17 00:00:00 2001 From: Michael Holzheu Date: Tue, 25 Oct 2016 16:24:28 +0200 Subject: s390/hypfs: Use get_free_page() instead of kmalloc to ensure page alignment Since commit d86bd1bece6f ("mm/slub: support left redzone") it is no longer guaranteed that kmalloc(PAGE_SIZE) returns page aligned memory. After the above commit we get an error for diag224 because aligned memory is required. This leads to the following user visible error: # mount none -t s390_hypfs /sys/hypervisor/ mount: unknown filesystem type 's390_hypfs' # dmesg | grep hypfs hypfs.cccfb8: The hardware system does not provide all functions required by hypfs hypfs.7a79f0: Initialization of hypfs failed with rc=-61 Fix this problem and use get_free_page() instead of kmalloc() to get correctly aligned memory. Cc: stable@vger.kernel.org # v3.6+ Signed-off-by: Michael Holzheu Signed-off-by: Martin Schwidefsky --- arch/s390/hypfs/hypfs_diag.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/s390/hypfs/hypfs_diag.c b/arch/s390/hypfs/hypfs_diag.c index 28f03ca60100..794bebb43d23 100644 --- a/arch/s390/hypfs/hypfs_diag.c +++ b/arch/s390/hypfs/hypfs_diag.c @@ -363,11 +363,11 @@ out: static int diag224_get_name_table(void) { /* memory must be below 2GB */ - diag224_cpu_names = kmalloc(PAGE_SIZE, GFP_KERNEL | GFP_DMA); + diag224_cpu_names = (char *) __get_free_page(GFP_KERNEL | GFP_DMA); if (!diag224_cpu_names) return -ENOMEM; if (diag224(diag224_cpu_names)) { - kfree(diag224_cpu_names); + free_page((unsigned long) diag224_cpu_names); return -EOPNOTSUPP; } EBCASC(diag224_cpu_names + 16, (*diag224_cpu_names + 1) * 16); @@ -376,7 +376,7 @@ static int diag224_get_name_table(void) static void diag224_delete_name_table(void) { - kfree(diag224_cpu_names); + free_page((unsigned long) diag224_cpu_names); } static int diag224_idx2name(int index, char *name) -- cgit v1.2.3 From 1c27f646b18fb56308dff82784ca61951bad0b48 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 27 Oct 2016 14:36:23 +0200 Subject: x86/microcode/AMD: Fix more fallout from CONFIG_RANDOMIZE_MEMORY=y We needed the physical address of the container in order to compute the offset within the relocated ramdisk. And we did this by doing __pa() on the virtual address. However, __pa() does checks whether the physical address is within PAGE_OFFSET and __START_KERNEL_map - see __phys_addr() - which fail if we have CONFIG_RANDOMIZE_MEMORY enabled: we feed a virtual address which *doesn't* have the randomization offset into a function which uses PAGE_OFFSET which *does* have that offset. This makes this check fire: VIRTUAL_BUG_ON((x > y) || !phys_addr_valid(x)); ^^^^^^ due to the randomization offset. The fix is as simple as using __pa_nodebug() because we do that randomization offset accounting later in that function ourselves. Reported-by: Bob Peterson Tested-by: Bob Peterson Signed-off-by: Borislav Petkov Cc: Andreas Gruenbacher Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Mel Gorman Cc: Peter Zijlstra Cc: Steven Whitehouse Cc: Thomas Gleixner Cc: linux-mm Cc: stable@vger.kernel.org # 4.9 Link: http://lkml.kernel.org/r/20161027123623.j2jri5bandimboff@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/microcode/amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 620ab06bcf45..017bda12caae 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -429,7 +429,7 @@ int __init save_microcode_in_initrd_amd(void) * We need the physical address of the container for both bitness since * boot_params.hdr.ramdisk_image is a physical address. */ - cont = __pa(container); + cont = __pa_nodebug(container); cont_va = container; #endif -- cgit v1.2.3 From 5aab90ce1ec449912a2ebc4d45e0c85dac29e9dd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 26 Oct 2016 11:48:24 +0200 Subject: perf/powerpc: Don't call perf_event_disable() from atomic context The trinity syscall fuzzer triggered following WARN() on powerpc: WARNING: CPU: 9 PID: 2998 at arch/powerpc/kernel/hw_breakpoint.c:278 ... NIP [c00000000093aedc] .hw_breakpoint_handler+0x28c/0x2b0 LR [c00000000093aed8] .hw_breakpoint_handler+0x288/0x2b0 Call Trace: [c0000002f7933580] [c00000000093aed8] .hw_breakpoint_handler+0x288/0x2b0 (unreliable) [c0000002f7933630] [c0000000000f671c] .notifier_call_chain+0x7c/0xf0 [c0000002f79336d0] [c0000000000f6abc] .__atomic_notifier_call_chain+0xbc/0x1c0 [c0000002f7933780] [c0000000000f6c40] .notify_die+0x70/0xd0 [c0000002f7933820] [c00000000001a74c] .do_break+0x4c/0x100 [c0000002f7933920] [c0000000000089fc] handle_dabr_fault+0x14/0x48 Followed by a lockdep warning: =============================== [ INFO: suspicious RCU usage. ] 4.8.0-rc5+ #7 Tainted: G W ------------------------------- ./include/linux/rcupdate.h:556 Illegal context switch in RCU read-side critical section! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by ls/2998: #0: (rcu_read_lock){......}, at: [] .__atomic_notifier_call_chain+0x0/0x1c0 #1: (rcu_read_lock){......}, at: [] .hw_breakpoint_handler+0x0/0x2b0 stack backtrace: CPU: 9 PID: 2998 Comm: ls Tainted: G W 4.8.0-rc5+ #7 Call Trace: [c0000002f7933150] [c00000000094b1f8] .dump_stack+0xe0/0x14c (unreliable) [c0000002f79331e0] [c00000000013c468] .lockdep_rcu_suspicious+0x138/0x180 [c0000002f7933270] [c0000000001005d8] .___might_sleep+0x278/0x2e0 [c0000002f7933300] [c000000000935584] .mutex_lock_nested+0x64/0x5a0 [c0000002f7933410] [c00000000023084c] .perf_event_ctx_lock_nested+0x16c/0x380 [c0000002f7933500] [c000000000230a80] .perf_event_disable+0x20/0x60 [c0000002f7933580] [c00000000093aeec] .hw_breakpoint_handler+0x29c/0x2b0 [c0000002f7933630] [c0000000000f671c] .notifier_call_chain+0x7c/0xf0 [c0000002f79336d0] [c0000000000f6abc] .__atomic_notifier_call_chain+0xbc/0x1c0 [c0000002f7933780] [c0000000000f6c40] .notify_die+0x70/0xd0 [c0000002f7933820] [c00000000001a74c] .do_break+0x4c/0x100 [c0000002f7933920] [c0000000000089fc] handle_dabr_fault+0x14/0x48 While it looks like the first WARN() is probably valid, the other one is triggered by disabling event via perf_event_disable() from atomic context. The event is disabled here in case we were not able to emulate the instruction that hit the breakpoint. By disabling the event we unschedule the event and make sure it's not scheduled back. But we can't call perf_event_disable() from atomic context, instead we need to use the event's pending_disable irq_work method to disable it. Reported-by: Jan Stancek Signed-off-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Huang Ying Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Neuling Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161026094824.GA21397@krava Signed-off-by: Ingo Molnar --- arch/powerpc/kernel/hw_breakpoint.c | 2 +- include/linux/perf_event.h | 1 + kernel/events/core.c | 10 ++++++++-- 3 files changed, 10 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 9781c69eae57..03d089b3ed72 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -275,7 +275,7 @@ int hw_breakpoint_handler(struct die_args *args) if (!stepped) { WARN(1, "Unable to handle hardware breakpoint. Breakpoint at " "0x%lx will be disabled.", info->address); - perf_event_disable(bp); + perf_event_disable_inatomic(bp); goto out; } /* diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 060d0ede88df..4741ecdb9817 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1257,6 +1257,7 @@ extern u64 perf_swevent_set_period(struct perf_event *event); extern void perf_event_enable(struct perf_event *event); extern void perf_event_disable(struct perf_event *event); extern void perf_event_disable_local(struct perf_event *event); +extern void perf_event_disable_inatomic(struct perf_event *event); extern void perf_event_task_tick(void); #else /* !CONFIG_PERF_EVENTS: */ static inline void * diff --git a/kernel/events/core.c b/kernel/events/core.c index a5d2e62faf7e..0e292132efac 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1960,6 +1960,12 @@ void perf_event_disable(struct perf_event *event) } EXPORT_SYMBOL_GPL(perf_event_disable); +void perf_event_disable_inatomic(struct perf_event *event) +{ + event->pending_disable = 1; + irq_work_queue(&event->pending); +} + static void perf_set_shadow_time(struct perf_event *event, struct perf_event_context *ctx, u64 tstamp) @@ -7075,8 +7081,8 @@ static int __perf_event_overflow(struct perf_event *event, if (events && atomic_dec_and_test(&event->event_limit)) { ret = 1; event->pending_kill = POLL_HUP; - event->pending_disable = 1; - irq_work_queue(&event->pending); + + perf_event_disable_inatomic(event); } READ_ONCE(event->overflow_handler)(event, data, regs); -- cgit v1.2.3 From f92b7604149a55cb601fc0b52911b1e11f0f2514 Mon Sep 17 00:00:00 2001 From: Imre Palik Date: Fri, 21 Oct 2016 01:18:59 -0700 Subject: perf/x86/intel: Honour the CPUID for number of fixed counters in hypervisors perf doesn't seem to honour the number of fixed counters specified by CPUID leaf 0xa. It always assumes that Intel CPUs have at least 3 fixed counters. So if some of the fixed counters are masked out by the hypervisor, it still tries to check/set them. This patch makes perf behave nicer when the kernel is running under a hypervisor that doesn't expose all the counters. This patch contains some ideas from Matt Wilson. Signed-off-by: Imre Palik Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Andi Kleen Cc: Alexander Kozyrev Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Artyom Kuanbekov Cc: David Carrillo-Cisneros Cc: David Woodhouse Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Matt Wilson Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1477037939-15605-1-git-send-email-imrep.amz@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index eab0915f5995..a74a2dbc0180 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3607,10 +3607,14 @@ __init int intel_pmu_init(void) /* * Quirk: v2 perfmon does not report fixed-purpose events, so - * assume at least 3 events: + * assume at least 3 events, when not running in a hypervisor: */ - if (version > 1) - x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); + if (version > 1) { + int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR); + + x86_pmu.num_counters_fixed = + max((int)edx.split.num_counters_fixed, assume); + } if (boot_cpu_has(X86_FEATURE_PDCM)) { u64 capabilities; -- cgit v1.2.3 From bd768e146624cbec7122ed15dead8daa137d909d Mon Sep 17 00:00:00 2001 From: Ido Yariv Date: Fri, 21 Oct 2016 12:39:57 -0400 Subject: KVM: x86: fix wbinvd_dirty_mask use-after-free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vcpu->arch.wbinvd_dirty_mask may still be used after freeing it, corrupting memory. For example, the following call trace may set a bit in an already freed cpu mask: kvm_arch_vcpu_load vcpu_load vmx_free_vcpu_nested vmx_free_vcpu kvm_arch_vcpu_free Fix this by deferring freeing of wbinvd_dirty_mask. Cc: stable@vger.kernel.org Signed-off-by: Ido Yariv Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0b2087981c06..e954be8a3185 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7410,10 +7410,12 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu) void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) { + void *wbinvd_dirty_mask = vcpu->arch.wbinvd_dirty_mask; + kvmclock_reset(vcpu); - free_cpumask_var(vcpu->arch.wbinvd_dirty_mask); kvm_x86_ops->vcpu_free(vcpu); + free_cpumask_var(wbinvd_dirty_mask); } struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, -- cgit v1.2.3 From 711c1f2671174c918045e2cb20aece976ac516cd Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 13 Oct 2016 15:53:02 -0700 Subject: ARCv2: boot log: print IOC exists as well as enabled status Previously we would not print the case when IOC existed but was not enabled. And while at it, reduce one line off boot printing by consolidating the Peripheral address space and IO-Coherency which in a way applies to them Signed-off-by: Vineet Gupta --- arch/arc/include/asm/setup.h | 1 + arch/arc/kernel/setup.c | 4 +--- arch/arc/mm/cache.c | 9 +++------ 3 files changed, 5 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h index 48b37c693db3..bdc43df922c9 100644 --- a/arch/arc/include/asm/setup.h +++ b/arch/arc/include/asm/setup.h @@ -43,5 +43,6 @@ void __init setup_arch_memory(void); #define IS_USED_RUN(v) ((v) ? "" : "(not used) ") #define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) #define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) +#define IS_AVAIL3(v, v2, s) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2)) #endif /* __ASMARC_SETUP_H */ diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 75e540972135..a77efa173df0 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -272,9 +272,7 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) FIX_PTR(cpu); - n += scnprintf(buf + n, len - n, - "Vector Table\t: %#x\nPeripherals\t: %#lx:%#lx\n", - cpu->vec_base, perip_base, perip_end); + n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\n", cpu->vec_base); if (cpu->extn.fpu_sp || cpu->extn.fpu_dp) n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n", diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 518ff76771f3..2b96cfc3be75 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -53,18 +53,15 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); - if (!is_isa_arcv2()) - return buf; - p = &cpuinfo_arc700[c].slc; if (p->ver) n += scnprintf(buf + n, len - n, "SLC\t\t: %uK, %uB Line%s\n", p->sz_k, p->line_len, IS_USED_RUN(slc_enable)); - if (ioc_exists) - n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n", - IS_DISABLED_RUN(ioc_enable)); + n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n", + perip_base, + IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency ")); return buf; } -- cgit v1.2.3 From 73e284d2572581d848267c74552215f95f0f0996 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 20 Oct 2016 17:49:15 -0700 Subject: ARC: boot log: refactor printing abt features not captured in BCRs On older arc700 cores, some of the features configured were not present in Build config registers. To print about them at boot, we just use the Kconfig option i.e. whether linux is built to use them or not. So yes this seems bogus, but what else can be done. Moreover if linux is booting with these enabled, then the Kconfig info is a good indicator anyways. Over time these "hacks" accumulated in read_arc_build_cfg_regs() as well as arc_cpu_mumbojumbo(). so refactor and move all of those in a single place: read_arc_build_cfg_regs(). This causes some code redcution too: | bloat-o-meter2 arch/arc/kernel/setup.o.0 arch/arc/kernel/setup.o.1 | add/remove: 0/0 grow/shrink: 2/1 up/down: 64/-132 (-68) | function old new delta | setup_processor 610 670 +60 | cpuinfo_arc700 76 80 +4 | arc_cpu_mumbojumbo 752 620 -132 Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 1 + arch/arc/kernel/setup.c | 87 ++++++++++++++++++++---------------------- 2 files changed, 43 insertions(+), 45 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index db25c65155cb..819b44c1a719 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -349,6 +349,7 @@ struct cpuinfo_arc { struct cpuinfo_arc_bpu bpu; struct bcr_identity core; struct bcr_isa isa; + const char *details; unsigned int vec_base; struct cpuinfo_arc_ccm iccm, dccm; struct { diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index a77efa173df0..0170d94f3860 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -40,6 +40,20 @@ struct task_struct *_current_task[NR_CPUS]; /* For stack switching */ struct cpuinfo_arc cpuinfo_arc700[NR_CPUS]; +static const struct cpuinfo_data arc_cpu_tbl[] = { +#ifdef CONFIG_ISA_ARCOMPACT + { {0x20, "ARC 600" }, 0x2F}, + { {0x30, "ARC 700" }, 0x33}, + { {0x34, "ARC 700 R4.10"}, 0x34}, + { {0x35, "ARC 700 R4.11"}, 0x35}, +#else + { {0x50, "ARC HS38 R2.0"}, 0x51}, + { {0x52, "ARC HS38 R2.1"}, 0x52}, + { {0x53, "ARC HS38 R3.0"}, 0x53}, +#endif + { {0x00, NULL } } +}; + static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu) { if (is_isa_arcompact()) { @@ -92,11 +106,24 @@ static void read_arc_build_cfg_regs(void) struct bcr_timer timer; struct bcr_generic bcr; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; + const struct cpuinfo_data *tbl; + FIX_PTR(cpu); READ_BCR(AUX_IDENTITY, cpu->core); READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa); + for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) { + if ((cpu->core.family >= tbl->info.id) && + (cpu->core.family <= tbl->up_range)) { + cpu->details = tbl->info.str; + break; + } + } + + if (tbl->info.id == 0) + cpu->details = "UNKNOWN"; + READ_BCR(ARC_REG_TIMERS_BCR, timer); cpu->extn.timer0 = timer.t0; cpu->extn.timer1 = timer.t1; @@ -160,64 +187,34 @@ static void read_arc_build_cfg_regs(void) cpu->extn.rtt = bcr.ver ? 1 : 0; cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt; -} -static const struct cpuinfo_data arc_cpu_tbl[] = { -#ifdef CONFIG_ISA_ARCOMPACT - { {0x20, "ARC 600" }, 0x2F}, - { {0x30, "ARC 700" }, 0x33}, - { {0x34, "ARC 700 R4.10"}, 0x34}, - { {0x35, "ARC 700 R4.11"}, 0x35}, -#else - { {0x50, "ARC HS38 R2.0"}, 0x51}, - { {0x52, "ARC HS38 R2.1"}, 0x52}, - { {0x53, "ARC HS38 R3.0"}, 0x53}, -#endif - { {0x00, NULL } } -}; + /* some hacks for lack of feature BCR info in old ARC700 cores */ + if (is_isa_arcompact()) { + if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */ + cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC); + else + cpu->isa.atomic = cpu->isa.atomic1; + cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); + } +} static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; struct bcr_identity *core = &cpu->core; - const struct cpuinfo_data *tbl; - char *isa_nm; - int i, be, atomic; - int n = 0; + int i, n = 0; FIX_PTR(cpu); - if (is_isa_arcompact()) { - isa_nm = "ARCompact"; - be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); - - atomic = cpu->isa.atomic1; - if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */ - atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC); - } else { - isa_nm = "ARCv2"; - be = cpu->isa.be; - atomic = cpu->isa.atomic; - } - n += scnprintf(buf + n, len - n, "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n", core->family, core->cpu_id, core->chip_id); - for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) { - if ((core->family >= tbl->info.id) && - (core->family <= tbl->up_range)) { - n += scnprintf(buf + n, len - n, - "processor [%d]\t: %s (%s ISA) %s\n", - cpu_id, tbl->info.str, isa_nm, - IS_AVAIL1(be, "[Big-Endian]")); - break; - } - } - - if (tbl->info.id == 0) - n += scnprintf(buf + n, len - n, "UNKNOWN ARC Processor\n"); + n += scnprintf(buf + n, len - n, "processor [%d]\t: %s (%s ISA) %s\n", + cpu_id, cpu->details, + is_isa_arcompact() ? "ARCompact" : "ARCv2", + IS_AVAIL1(cpu->isa.be, "[Big-Endian]")); n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ", IS_AVAIL1(cpu->extn.timer0, "Timer0 "), @@ -226,7 +223,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) CONFIG_ARC_HAS_RTC)); n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s", - IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC), + IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC), IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64), IS_AVAIL1(cpu->isa.unalign, "unalign (not used)")); -- cgit v1.2.3 From a024fd9bc4d0b102b8aa66b8ecba678d2d32fdcf Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 20 Oct 2016 18:08:10 -0700 Subject: ARC: boot log: don't assume SWAPE instruction support This came to light when helping a customer with oldish ARC750 core who were getting instruction errors because of lack of SWAPE but boot log was incorrectly printing it as being present Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 2 +- arch/arc/kernel/setup.c | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index 819b44c1a719..b8d29b136b96 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -353,7 +353,7 @@ struct cpuinfo_arc { unsigned int vec_base; struct cpuinfo_arc_ccm iccm, dccm; struct { - unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3, + unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2, fpu_sp:1, fpu_dp:1, pad2:6, debug:1, ap:1, smart:1, rtt:1, pad3:4, timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4; diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 0170d94f3860..156981aecd74 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -138,6 +138,9 @@ static void read_arc_build_cfg_regs(void) cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0; /* 1,3 */ cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0; cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */ + cpu->extn.swape = (cpu->core.family >= 0x34) ? 1 : + IS_ENABLED(CONFIG_ARC_HAS_SWAPE); + READ_BCR(ARC_REG_XY_MEM_BCR, cpu->extn_xymem); /* Read CCM BCRs for boot reporting even if not enabled in Kconfig */ @@ -250,7 +253,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) IS_AVAIL1(cpu->extn.swap, "swap "), IS_AVAIL1(cpu->extn.minmax, "minmax "), IS_AVAIL1(cpu->extn.crc, "crc "), - IS_AVAIL2(1, "swape", CONFIG_ARC_HAS_SWAPE)); + IS_AVAIL2(cpu->extn.swape, "swape", CONFIG_ARC_HAS_SWAPE)); if (cpu->bpu.ver) n += scnprintf(buf + n, len - n, -- cgit v1.2.3 From d7c46114e356fe41b7291ebff70d7ca09c0f0ac9 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 25 Oct 2016 13:45:11 -0700 Subject: ARC: boot log: remove awkward space comma from MMU line Signed-off-by: Vineet Gupta --- arch/arc/mm/tlb.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index ec868a9081a1..bdb295e09160 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -793,16 +793,16 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) char super_pg[64] = ""; if (p_mmu->s_pg_sz_m) - scnprintf(super_pg, 64, "%dM Super Page%s, ", + scnprintf(super_pg, 64, "%dM Super Page %s", p_mmu->s_pg_sz_m, IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); n += scnprintf(buf + n, len - n, - "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n", + "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n", p_mmu->ver, p_mmu->pg_sz_k, super_pg, p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, - IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40)); + IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40)); return buf; } -- cgit v1.2.3 From d975cbc8acb6f4a52ac46a57b13bd6a7f871b5e9 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 27 Oct 2016 14:33:19 -0700 Subject: ARC: boot log: refactor cpu name/release printing The motivation is to identify ARC750 vs. ARC770 (we currently print generic "ARC700"). A given ARC700 release could be 750 or 770, with same ARCNUM (or family identifier which is unfortunate). The existing arc_cpu_tbl[] kept a single concatenated string for core name and release which thus doesn't work for 750 vs. 770 identification. So split this into 2 tables, one with core names and other with release. And while we are at it, get rid of the range checking for family numbers. We just document the known to exist cores running Linux and ditch others. With this in place, we add detection of ARC750 which is - cores 0x33 and before - cores 0x34 and later with MMUv2 Signed-off-by: Vineet Gupta --- arch/arc/include/asm/arcregs.h | 2 +- arch/arc/include/asm/setup.h | 5 ----- arch/arc/kernel/setup.c | 51 +++++++++++++++++++++++++++--------------- 3 files changed, 34 insertions(+), 24 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index b8d29b136b96..7f3f9f63708c 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -349,7 +349,7 @@ struct cpuinfo_arc { struct cpuinfo_arc_bpu bpu; struct bcr_identity core; struct bcr_isa isa; - const char *details; + const char *details, *name; unsigned int vec_base; struct cpuinfo_arc_ccm iccm, dccm; struct { diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h index bdc43df922c9..cb954cdab070 100644 --- a/arch/arc/include/asm/setup.h +++ b/arch/arc/include/asm/setup.h @@ -27,11 +27,6 @@ struct id_to_str { const char *str; }; -struct cpuinfo_data { - struct id_to_str info; - int up_range; -}; - extern int root_mountflags, end_mem; void setup_processor(void); diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 156981aecd74..0385df77a697 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -40,18 +40,27 @@ struct task_struct *_current_task[NR_CPUS]; /* For stack switching */ struct cpuinfo_arc cpuinfo_arc700[NR_CPUS]; -static const struct cpuinfo_data arc_cpu_tbl[] = { +static const struct id_to_str arc_cpu_rel[] = { #ifdef CONFIG_ISA_ARCOMPACT - { {0x20, "ARC 600" }, 0x2F}, - { {0x30, "ARC 700" }, 0x33}, - { {0x34, "ARC 700 R4.10"}, 0x34}, - { {0x35, "ARC 700 R4.11"}, 0x35}, + { 0x34, "R4.10"}, + { 0x35, "R4.11"}, #else - { {0x50, "ARC HS38 R2.0"}, 0x51}, - { {0x52, "ARC HS38 R2.1"}, 0x52}, - { {0x53, "ARC HS38 R3.0"}, 0x53}, + { 0x51, "R2.0" }, + { 0x52, "R2.1" }, + { 0x53, "R3.0" }, #endif - { {0x00, NULL } } + { 0x00, NULL } +}; + +static const struct id_to_str arc_cpu_nm[] = { +#ifdef CONFIG_ISA_ARCOMPACT + { 0x20, "ARC 600" }, + { 0x30, "ARC 770" }, /* 750 identified seperately */ +#else + { 0x40, "ARC EM" }, + { 0x50, "ARC HS38" }, +#endif + { 0x00, "Unknown" } }; static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu) @@ -106,23 +115,25 @@ static void read_arc_build_cfg_regs(void) struct bcr_timer timer; struct bcr_generic bcr; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; - const struct cpuinfo_data *tbl; + const struct id_to_str *tbl; FIX_PTR(cpu); READ_BCR(AUX_IDENTITY, cpu->core); READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa); - for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) { - if ((cpu->core.family >= tbl->info.id) && - (cpu->core.family <= tbl->up_range)) { - cpu->details = tbl->info.str; + for (tbl = &arc_cpu_rel[0]; tbl->id != 0; tbl++) { + if (cpu->core.family == tbl->id) { + cpu->details = tbl->str; break; } } - if (tbl->info.id == 0) - cpu->details = "UNKNOWN"; + for (tbl = &arc_cpu_nm[0]; tbl->id != 0; tbl++) { + if ((cpu->core.family & 0xF0) == tbl->id) + break; + } + cpu->name = tbl->str; READ_BCR(ARC_REG_TIMERS_BCR, timer); cpu->extn.timer0 = timer.t0; @@ -199,6 +210,10 @@ static void read_arc_build_cfg_regs(void) cpu->isa.atomic = cpu->isa.atomic1; cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); + + /* there's no direct way to distinguish 750 vs. 770 */ + if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3)) + cpu->name = "ARC750"; } } @@ -214,8 +229,8 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n", core->family, core->cpu_id, core->chip_id); - n += scnprintf(buf + n, len - n, "processor [%d]\t: %s (%s ISA) %s\n", - cpu_id, cpu->details, + n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s\n", + cpu_id, cpu->name, cpu->details, is_isa_arcompact() ? "ARCompact" : "ARCv2", IS_AVAIL1(cpu->isa.be, "[Big-Endian]")); -- cgit v1.2.3 From c3005475889c7c730638f95d13be3360f0b33e98 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Fri, 21 Oct 2016 16:04:37 -0700 Subject: ARC: build: retire old toggles These are really ancient toggles and tools no longer require them to be passed. This paves way for deprecating them in long run. Signed-off-by: Vineet Gupta --- arch/arc/Makefile | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch') diff --git a/arch/arc/Makefile b/arch/arc/Makefile index aa82d13d4213..864adad52280 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -50,9 +50,6 @@ atleast_gcc44 := $(call cc-ifversion, -ge, 0404, y) cflags-$(atleast_gcc44) += -fsection-anchors -cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock -cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape - ifdef CONFIG_ISA_ARCV2 ifndef CONFIG_ARC_HAS_LL64 -- cgit v1.2.3 From f644e3688855902ad11549029098a62cbbc8f558 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 25 Oct 2016 08:58:17 -0700 Subject: ARC: mm: retire ARC_DBG_TLB_MISS_COUNT... ... given that we have perf counters abel to do the same thing non intrusively Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 8 --- arch/arc/kernel/troubleshoot.c | 110 ----------------------------------------- arch/arc/mm/tlbex.S | 21 -------- 3 files changed, 139 deletions(-) (limited to 'arch') diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index ac0b309aced5..bd204bfa29ed 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -540,14 +540,6 @@ config ARC_DBG_TLB_PARANOIA bool "Paranoia Checks in Low Level TLB Handlers" default n -config ARC_DBG_TLB_MISS_COUNT - bool "Profile TLB Misses" - default n - select DEBUG_FS - help - Counts number of I and D TLB Misses and exports them via Debugfs - The counters can be cleared via Debugfs as well - endif config ARC_UBOOT_SUPPORT diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 934150e7ac48..82f9bc819f4a 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -237,113 +237,3 @@ void show_kernel_fault_diag(const char *str, struct pt_regs *regs, if (!user_mode(regs)) show_stacktrace(current, regs); } - -#ifdef CONFIG_DEBUG_FS - -#include -#include -#include -#include -#include -#include -#include - -static struct dentry *test_dentry; -static struct dentry *test_dir; -static struct dentry *test_u32_dentry; - -static u32 clr_on_read = 1; - -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT -u32 numitlb, numdtlb, num_pte_not_present; - -static int fill_display_data(char *kbuf) -{ - size_t num = 0; - num += sprintf(kbuf + num, "I-TLB Miss %x\n", numitlb); - num += sprintf(kbuf + num, "D-TLB Miss %x\n", numdtlb); - num += sprintf(kbuf + num, "PTE not present %x\n", num_pte_not_present); - - if (clr_on_read) - numitlb = numdtlb = num_pte_not_present = 0; - - return num; -} - -static int tlb_stats_open(struct inode *inode, struct file *file) -{ - file->private_data = (void *)__get_free_page(GFP_KERNEL); - return 0; -} - -/* called on user read(): display the counters */ -static ssize_t tlb_stats_output(struct file *file, /* file descriptor */ - char __user *user_buf, /* user buffer */ - size_t len, /* length of buffer */ - loff_t *offset) /* offset in the file */ -{ - size_t num; - char *kbuf = (char *)file->private_data; - - /* All of the data can he shoved in one iteration */ - if (*offset != 0) - return 0; - - num = fill_display_data(kbuf); - - /* simple_read_from_buffer() is helper for copy to user space - It copies up to @2 (num) bytes from kernel buffer @4 (kbuf) at offset - @3 (offset) into the user space address starting at @1 (user_buf). - @5 (len) is max size of user buffer - */ - return simple_read_from_buffer(user_buf, num, offset, kbuf, len); -} - -/* called on user write : clears the counters */ -static ssize_t tlb_stats_clear(struct file *file, const char __user *user_buf, - size_t length, loff_t *offset) -{ - numitlb = numdtlb = num_pte_not_present = 0; - return length; -} - -static int tlb_stats_close(struct inode *inode, struct file *file) -{ - free_page((unsigned long)(file->private_data)); - return 0; -} - -static const struct file_operations tlb_stats_file_ops = { - .read = tlb_stats_output, - .write = tlb_stats_clear, - .open = tlb_stats_open, - .release = tlb_stats_close -}; -#endif - -static int __init arc_debugfs_init(void) -{ - test_dir = debugfs_create_dir("arc", NULL); - -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - test_dentry = debugfs_create_file("tlb_stats", 0444, test_dir, NULL, - &tlb_stats_file_ops); -#endif - - test_u32_dentry = - debugfs_create_u32("clr_on_read", 0444, test_dir, &clr_on_read); - - return 0; -} - -module_init(arc_debugfs_init); - -static void __exit arc_debugfs_exit(void) -{ - debugfs_remove(test_u32_dentry); - debugfs_remove(test_dentry); - debugfs_remove(test_dir); -} -module_exit(arc_debugfs_exit); - -#endif diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index f1967eeb32e7..b30e4e36bb00 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -237,15 +237,6 @@ ex_saved_reg1: 2: -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - and.f 0, r0, _PAGE_PRESENT - bz 1f - ld r3, [num_pte_not_present] - add r3, r3, 1 - st r3, [num_pte_not_present] -1: -#endif - .endm ;----------------------------------------------------------------- @@ -309,12 +300,6 @@ ENTRY(EV_TLBMissI) TLBMISS_FREEUP_REGS -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - ld r0, [@numitlb] - add r0, r0, 1 - st r0, [@numitlb] -#endif - ;---------------------------------------------------------------- ; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA LOAD_FAULT_PTE @@ -349,12 +334,6 @@ ENTRY(EV_TLBMissD) TLBMISS_FREEUP_REGS -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - ld r0, [@numdtlb] - add r0, r0, 1 - st r0, [@numdtlb] -#endif - ;---------------------------------------------------------------- ; Get the PTE corresponding to V-addr accessed ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA -- cgit v1.2.3 From d65283f7b695b5d04ca1ab58b6bb41f443b96286 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 25 Oct 2016 10:43:20 -0700 Subject: ARC: module: elide loop to save reference to .eh_frame The loop was really needed in .debug_frame regime where wanted make it as SH_ALLOC so that apply_relocate_add() would process it. That's not needed for .eh_frame, so we check this in apply_relocate_add() which gets called for each section. Note that we need to save reference to "section name strings" section in module_frob_arch_sections() since apply_relocate_add() doesn't get that Signed-off-by: Vineet Gupta --- arch/arc/include/asm/module.h | 1 + arch/arc/kernel/module.c | 18 ++++++++---------- 2 files changed, 9 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arc/include/asm/module.h b/arch/arc/include/asm/module.h index 518222bb3f8e..6e91d8b339c3 100644 --- a/arch/arc/include/asm/module.h +++ b/arch/arc/include/asm/module.h @@ -18,6 +18,7 @@ struct mod_arch_specific { void *unw_info; int unw_sec_idx; + const char *secstr; }; #endif diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c index 9a2849756022..24bd2ffb90b7 100644 --- a/arch/arc/kernel/module.c +++ b/arch/arc/kernel/module.c @@ -30,17 +30,9 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, char *secstr, struct module *mod) { #ifdef CONFIG_ARC_DW2_UNWIND - int i; - mod->arch.unw_sec_idx = 0; mod->arch.unw_info = NULL; - - for (i = 1; i < hdr->e_shnum; i++) { - if (strcmp(secstr+sechdrs[i].sh_name, ".eh_frame") == 0) { - mod->arch.unw_sec_idx = i; - break; - } - } + mod->arch.secstr = secstr; #endif return 0; } @@ -66,8 +58,10 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, Elf32_Addr location; Elf32_Addr sec_to_patch; int relo_type; + unsigned int tgtsec; - sec_to_patch = sechdrs[sechdrs[relsec].sh_info].sh_addr; + tgtsec = sechdrs[relsec].sh_info; + sec_to_patch = sechdrs[tgtsec].sh_addr; sym_sec = (Elf32_Sym *) sechdrs[symindex].sh_addr; n = sechdrs[relsec].sh_size / sizeof(*rel_entry); @@ -111,6 +105,10 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, goto relo_err; } + + if (strcmp(module->arch.secstr+sechdrs[tgtsec].sh_name, ".eh_frame") == 0) + module->arch.unw_sec_idx = tgtsec; + return 0; relo_err: -- cgit v1.2.3 From b75dcd9c7d352c7d9ea9010e95c708595094896a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 25 Oct 2016 11:23:19 -0700 Subject: ARC: module: print pretty section names Now that we have referece to section name string table in apply_relocate_add(), use it to - print the name of section being relocated - print symbol with NULL name (since it refers to a section) before | Section to fixup 7000a060 | ========================================================= | rela->r_off | rela->addend | sym->st_value | ADDR | VALUE | ========================================================= | 1c 0 7000e000 7000a07c 7000e000 [] | 40 0 7000a000 7000a0a0 7000a000 [] after | Section to fixup .eh_frame @7000a060 | ========================================================= | r_off r_add st_value ADDRESS VALUE | ========================================================= | 1c 0 7000e000 7000a07c 7000e000 [.init.text] | 40 0 7000a000 7000a0a0 7000a000 [.exit.text] Signed-off-by: Vineet Gupta --- arch/arc/kernel/module.c | 35 +++++++++++++++++++++-------------- 1 file changed, 21 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c index 24bd2ffb90b7..42e964db2967 100644 --- a/arch/arc/kernel/module.c +++ b/arch/arc/kernel/module.c @@ -51,31 +51,33 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, unsigned int relsec, /* sec index for relo sec */ struct module *module) { - int i, n; + int i, n, relo_type; Elf32_Rela *rel_entry = (void *)sechdrs[relsec].sh_addr; Elf32_Sym *sym_entry, *sym_sec; - Elf32_Addr relocation; - Elf32_Addr location; - Elf32_Addr sec_to_patch; - int relo_type; + Elf32_Addr relocation, location, tgt_addr; unsigned int tgtsec; + /* + * @relsec has relocations e.g. .rela.init.text + * @tgtsec is section to patch e.g. .init.text + */ tgtsec = sechdrs[relsec].sh_info; - sec_to_patch = sechdrs[tgtsec].sh_addr; + tgt_addr = sechdrs[tgtsec].sh_addr; sym_sec = (Elf32_Sym *) sechdrs[symindex].sh_addr; n = sechdrs[relsec].sh_size / sizeof(*rel_entry); - pr_debug("\n========== Module Sym reloc ===========================\n"); - pr_debug("Section to fixup %x\n", sec_to_patch); + pr_debug("\nSection to fixup %s @%x\n", + module->arch.secstr + sechdrs[tgtsec].sh_name, tgt_addr); pr_debug("=========================================================\n"); - pr_debug("rela->r_off | rela->addend | sym->st_value | ADDR | VALUE\n"); + pr_debug("r_off\tr_add\tst_value ADDRESS VALUE\n"); pr_debug("=========================================================\n"); /* Loop thru entries in relocation section */ for (i = 0; i < n; i++) { + const char *s; /* This is where to make the change */ - location = sec_to_patch + rel_entry[i].r_offset; + location = tgt_addr + rel_entry[i].r_offset; /* This is the symbol it is referring to. Note that all undefined symbols have been resolved. */ @@ -83,10 +85,15 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, relocation = sym_entry->st_value + rel_entry[i].r_addend; - pr_debug("\t%x\t\t%x\t\t%x %x %x [%s]\n", - rel_entry[i].r_offset, rel_entry[i].r_addend, - sym_entry->st_value, location, relocation, - strtab + sym_entry->st_name); + if (sym_entry->st_name == 0 && ELF_ST_TYPE (sym_entry->st_info) == STT_SECTION) { + s = module->arch.secstr + sechdrs[sym_entry->st_shndx].sh_name; + } else { + s = strtab + sym_entry->st_name; + } + + pr_debug(" %x\t%x\t%x %x %x [%s]\n", + rel_entry[i].r_offset, rel_entry[i].r_addend, + sym_entry->st_value, location, relocation, s); /* This assumes modules are built with -mlong-calls * so any branches/jumps are absolute 32 bit jmps -- cgit v1.2.3 From 1e90a13d0c3dc94512af1ccb2b6563e8297838fa Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sat, 29 Oct 2016 13:42:42 +0200 Subject: x86/smpboot: Init apic mapping before usage The recent changes, which forced the registration of the boot cpu on UP systems, which do not have ACPI tables, have been fixed for systems w/o local APIC, but left a wreckage for systems which have neither ACPI nor mptables, but the CPU has an APIC, e.g. virtualbox. The boot process crashes in prefill_possible_map() as it wants to register the boot cpu, which needs to access the local apic, but the local APIC is not yet mapped. There is no reason why init_apic_mapping() can't be invoked before prefill_possible_map(). So instead of playing another silly early mapping game, as the ACPI/mptables code does, we just move init_apic_mapping() before the call to prefill_possible_map(). In hindsight, I should have noticed that combination earlier. Sorry for the churn (also in stable)! Fixes: ff8560512b8d ("x86/boot/smp: Don't try to poke disabled/non-existent APIC") Reported-and-debugged-by: Michal Necasek Reported-and-tested-by: Wolfgang Bauer Cc: prarit@redhat.com Cc: ville.syrjala@linux.intel.com Cc: michael.thayer@oracle.com Cc: knut.osmundsen@oracle.com Cc: frank.mehnert@oracle.com Cc: Borislav Petkov Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1610282114380.5053@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/setup.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bbfbca5fea0c..9c337b0e8ba7 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1221,11 +1221,16 @@ void __init setup_arch(char **cmdline_p) */ get_smp_config(); + /* + * Systems w/o ACPI and mptables might not have it mapped the local + * APIC yet, but prefill_possible_map() might need to access it. + */ + init_apic_mappings(); + prefill_possible_map(); init_cpu_to_node(); - init_apic_mappings(); io_apic_init_mappings(); kvm_guest_init(); -- cgit v1.2.3 From f9d4286b9516b02e795214412d36885f572b57ad Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Thu, 27 Oct 2016 16:30:06 +0200 Subject: arch/powerpc: Update parameters for csum_tcpudp_magic & csum_tcpudp_nofold Commit 01cfbad "ipv4: Update parameters for csum_tcpudp_magic to their original types" changed parameters for csum_tcpudp_magic and csum_tcpudp_nofold for many platforms but not for PowerPC. Fixes: 01cfbad "ipv4: Update parameters for csum_tcpudp_magic to their original types" Cc: Alexander Duyck Signed-off-by: Ivan Vecera Acked-by: Alexander Duyck Signed-off-by: David S. Miller --- arch/powerpc/include/asm/checksum.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index ee655ed1ff1b..1e8fceb308a5 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -53,10 +53,8 @@ static inline __sum16 csum_fold(__wsum sum) return (__force __sum16)(~((__force u32)sum + tmp) >> 16); } -static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { #ifdef __powerpc64__ unsigned long s = (__force u32)sum; @@ -83,10 +81,8 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, * computes the checksum of the TCP/UDP pseudo-header * returns a 16-bit checksum, already complemented */ -static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } -- cgit v1.2.3 From ea26e4ec08d4727e3a9e48a6b74695861effcbd9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 1 Nov 2016 00:39:48 +0100 Subject: KVM: x86: drop TSC offsetting kvm_x86_ops to fix KVM_GET/SET_CLOCK MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since commit a545ab6a0085 ("kvm: x86: add tsc_offset field to struct kvm_vcpu_arch", 2016-09-07) the offset between host and L1 TSC is cached and need not be fished out of the VMCS or VMCB. This means that we can implement adjust_tsc_offset_guest and read_l1_tsc entirely in generic code. The simplification is particularly significant for VMX code, where vmx->nested.vmcs01_tsc_offset was duplicating what is now in vcpu->arch.tsc_offset. Therefore the vmcs01_tsc_offset can be dropped completely. More importantly, this fixes KVM_GET_CLOCK/KVM_SET_CLOCK which, after commit 108b249c453d ("KVM: x86: introduce get_kvmclock_ns", 2016-09-01) called read_l1_tsc while the VMCS was not loaded. It thus returned bogus values on Intel CPUs. Fixes: 108b249c453dd7132599ab6dc7e435a7036c193f Reported-by: Roman Kagan Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 --- arch/x86/kvm/svm.c | 23 ----------------------- arch/x86/kvm/vmx.c | 39 +++------------------------------------ arch/x86/kvm/x86.c | 6 +++--- 4 files changed, 6 insertions(+), 65 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4b20f7304b9c..bdde80731f49 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -948,7 +948,6 @@ struct kvm_x86_ops { int (*get_lpage_level)(void); bool (*rdtscp_supported)(void); bool (*invpcid_supported)(void); - void (*adjust_tsc_offset_guest)(struct kvm_vcpu *vcpu, s64 adjustment); void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); @@ -958,8 +957,6 @@ struct kvm_x86_ops { void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); - u64 (*read_l1_tsc)(struct kvm_vcpu *vcpu, u64 host_tsc); - void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); int (*check_intercept)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index f8157a36ab09..8ca1eca5038d 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1138,21 +1138,6 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) mark_dirty(svm->vmcb, VMCB_INTERCEPTS); } -static void svm_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - svm->vmcb->control.tsc_offset += adjustment; - if (is_guest_mode(vcpu)) - svm->nested.hsave->control.tsc_offset += adjustment; - else - trace_kvm_write_tsc_offset(vcpu->vcpu_id, - svm->vmcb->control.tsc_offset - adjustment, - svm->vmcb->control.tsc_offset); - - mark_dirty(svm->vmcb, VMCB_INTERCEPTS); -} - static void avic_init_vmcb(struct vcpu_svm *svm) { struct vmcb *vmcb = svm->vmcb; @@ -3449,12 +3434,6 @@ static int cr8_write_interception(struct vcpu_svm *svm) return 0; } -static u64 svm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) -{ - struct vmcb *vmcb = get_host_vmcb(to_svm(vcpu)); - return vmcb->control.tsc_offset + host_tsc; -} - static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) { struct vcpu_svm *svm = to_svm(vcpu); @@ -5422,8 +5401,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .has_wbinvd_exit = svm_has_wbinvd_exit, .write_tsc_offset = svm_write_tsc_offset, - .adjust_tsc_offset_guest = svm_adjust_tsc_offset_guest, - .read_l1_tsc = svm_read_l1_tsc, .set_tdp_cr3 = set_tdp_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 74a4df993a51..754c3a7f444a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -421,7 +421,6 @@ struct nested_vmx { /* vmcs02_list cache of VMCSs recently used to run L2 guests */ struct list_head vmcs02_pool; int vmcs02_num; - u64 vmcs01_tsc_offset; bool change_vmcs01_virtual_x2apic_mode; /* L2 must run next, and mustn't decide to exit to L1. */ bool nested_run_pending; @@ -2604,20 +2603,6 @@ static u64 guest_read_tsc(struct kvm_vcpu *vcpu) return kvm_scale_tsc(vcpu, host_tsc) + tsc_offset; } -/* - * Like guest_read_tsc, but always returns L1's notion of the timestamp - * counter, even if a nested guest (L2) is currently running. - */ -static u64 vmx_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) -{ - u64 tsc_offset; - - tsc_offset = is_guest_mode(vcpu) ? - to_vmx(vcpu)->nested.vmcs01_tsc_offset : - vmcs_read64(TSC_OFFSET); - return host_tsc + tsc_offset; -} - /* * writes 'offset' into guest's timestamp counter offset register */ @@ -2631,7 +2616,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) * to the newly set TSC to get L2's TSC. */ struct vmcs12 *vmcs12; - to_vmx(vcpu)->nested.vmcs01_tsc_offset = offset; /* recalculate vmcs02.TSC_OFFSET: */ vmcs12 = get_vmcs12(vcpu); vmcs_write64(TSC_OFFSET, offset + @@ -2644,19 +2628,6 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) } } -static void vmx_adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) -{ - u64 offset = vmcs_read64(TSC_OFFSET); - - vmcs_write64(TSC_OFFSET, offset + adjustment); - if (is_guest_mode(vcpu)) { - /* Even when running L2, the adjustment needs to apply to L1 */ - to_vmx(vcpu)->nested.vmcs01_tsc_offset += adjustment; - } else - trace_kvm_write_tsc_offset(vcpu->vcpu_id, offset, - offset + adjustment); -} - static bool guest_cpuid_has_vmx(struct kvm_vcpu *vcpu) { struct kvm_cpuid_entry2 *best = kvm_find_cpuid_entry(vcpu, 1, 0); @@ -10061,9 +10032,9 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) vmcs_write64(TSC_OFFSET, - vmx->nested.vmcs01_tsc_offset + vmcs12->tsc_offset); + vcpu->arch.tsc_offset + vmcs12->tsc_offset); else - vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); if (kvm_has_tsc_control) decache_tsc_multiplier(vmx); @@ -10293,8 +10264,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) enter_guest_mode(vcpu); - vmx->nested.vmcs01_tsc_offset = vmcs_read64(TSC_OFFSET); - if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); @@ -10818,7 +10787,7 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, load_vmcs12_host_state(vcpu, vmcs12); /* Update any VMCS fields that might have changed while L2 ran */ - vmcs_write64(TSC_OFFSET, vmx->nested.vmcs01_tsc_offset); + vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset); if (vmx->hv_deadline_tsc == -1) vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL, PIN_BASED_VMX_PREEMPTION_TIMER); @@ -11339,8 +11308,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, .write_tsc_offset = vmx_write_tsc_offset, - .adjust_tsc_offset_guest = vmx_adjust_tsc_offset_guest, - .read_l1_tsc = vmx_read_l1_tsc, .set_tdp_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e954be8a3185..3017de0431bd 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1409,7 +1409,7 @@ static u64 kvm_compute_tsc_offset(struct kvm_vcpu *vcpu, u64 target_tsc) u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc) { - return kvm_x86_ops->read_l1_tsc(vcpu, kvm_scale_tsc(vcpu, host_tsc)); + return vcpu->arch.tsc_offset + kvm_scale_tsc(vcpu, host_tsc); } EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); @@ -1547,7 +1547,7 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc); static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) { - kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); + kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment); } static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) @@ -1555,7 +1555,7 @@ static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) if (vcpu->arch.tsc_scaling_ratio != kvm_default_tsc_scaling_ratio) WARN_ON(adjustment < 0); adjustment = kvm_scale_tsc(vcpu, (u64) adjustment); - kvm_x86_ops->adjust_tsc_offset_guest(vcpu, adjustment); + adjust_tsc_offset_guest(vcpu, adjustment); } #ifdef CONFIG_X86_64 -- cgit v1.2.3 From 355f4fb1405ec29d0fac49b4d41fcd78cbd455d5 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Fri, 28 Oct 2016 08:29:39 -0700 Subject: kvm: nVMX: VMCLEAR an active shadow VMCS after last use After a successful VM-entry with the "VMCS shadowing" VM-execution control set, the shadow VMCS referenced by the VMCS link pointer field in the current VMCS becomes active on the logical processor. A VMCS that is made active on more than one logical processor may become corrupted. Therefore, before an active VMCS can be migrated to another logical processor, the first logical processor must execute a VMCLEAR for the active VMCS. VMCLEAR both ensures that all VMCS data are written to memory and makes the VMCS inactive. Signed-off-by: Jim Mattson Reviewed-By: David Matlack Message-Id: <1477668579-22555-1-git-send-email-jmattson@google.com> Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 754c3a7f444a..5382b82462fc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -187,6 +187,7 @@ struct vmcs { */ struct loaded_vmcs { struct vmcs *vmcs; + struct vmcs *shadow_vmcs; int cpu; int launched; struct list_head loaded_vmcss_on_cpu_link; @@ -411,7 +412,6 @@ struct nested_vmx { * memory during VMXOFF, VMCLEAR, VMPTRLD. */ struct vmcs12 *cached_vmcs12; - struct vmcs *current_shadow_vmcs; /* * Indicates if the shadow vmcs must be updated with the * data hold by vmcs12 @@ -1418,6 +1418,8 @@ static void vmcs_clear(struct vmcs *vmcs) static inline void loaded_vmcs_init(struct loaded_vmcs *loaded_vmcs) { vmcs_clear(loaded_vmcs->vmcs); + if (loaded_vmcs->shadow_vmcs && loaded_vmcs->launched) + vmcs_clear(loaded_vmcs->shadow_vmcs); loaded_vmcs->cpu = -1; loaded_vmcs->launched = 0; } @@ -3533,6 +3535,7 @@ static void free_loaded_vmcs(struct loaded_vmcs *loaded_vmcs) loaded_vmcs_clear(loaded_vmcs); free_vmcs(loaded_vmcs->vmcs); loaded_vmcs->vmcs = NULL; + WARN_ON(loaded_vmcs->shadow_vmcs != NULL); } static void free_kvm_area(void) @@ -6667,6 +6670,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx) if (!item) return NULL; item->vmcs02.vmcs = alloc_vmcs(); + item->vmcs02.shadow_vmcs = NULL; if (!item->vmcs02.vmcs) { kfree(item); return NULL; @@ -7043,7 +7047,7 @@ static int handle_vmon(struct kvm_vcpu *vcpu) shadow_vmcs->revision_id |= (1u << 31); /* init shadow vmcs */ vmcs_clear(shadow_vmcs); - vmx->nested.current_shadow_vmcs = shadow_vmcs; + vmx->vmcs01.shadow_vmcs = shadow_vmcs; } INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); @@ -7145,8 +7149,11 @@ static void free_nested(struct vcpu_vmx *vmx) free_page((unsigned long)vmx->nested.msr_bitmap); vmx->nested.msr_bitmap = NULL; } - if (enable_shadow_vmcs) - free_vmcs(vmx->nested.current_shadow_vmcs); + if (enable_shadow_vmcs) { + vmcs_clear(vmx->vmcs01.shadow_vmcs); + free_vmcs(vmx->vmcs01.shadow_vmcs); + vmx->vmcs01.shadow_vmcs = NULL; + } kfree(vmx->nested.cached_vmcs12); /* Unpin physical memory we referred to in current vmcs02 */ if (vmx->nested.apic_access_page) { @@ -7323,7 +7330,7 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx) int i; unsigned long field; u64 field_value; - struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; + struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; const unsigned long *fields = shadow_read_write_fields; const int num_fields = max_shadow_read_write_fields; @@ -7372,7 +7379,7 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx) int i, q; unsigned long field; u64 field_value = 0; - struct vmcs *shadow_vmcs = vmx->nested.current_shadow_vmcs; + struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs; vmcs_load(shadow_vmcs); @@ -7562,7 +7569,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); vmcs_write64(VMCS_LINK_POINTER, - __pa(vmx->nested.current_shadow_vmcs)); + __pa(vmx->vmcs01.shadow_vmcs)); vmx->nested.sync_shadow_vmcs = true; } } @@ -9127,6 +9134,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id) vmx->loaded_vmcs = &vmx->vmcs01; vmx->loaded_vmcs->vmcs = alloc_vmcs(); + vmx->loaded_vmcs->shadow_vmcs = NULL; if (!vmx->loaded_vmcs->vmcs) goto free_msrs; if (!vmm_exclusive) -- cgit v1.2.3 From d9092f52d7e61dd1557f2db2400ddb430e85937e Mon Sep 17 00:00:00 2001 From: Owen Hofmann Date: Thu, 27 Oct 2016 11:25:52 -0700 Subject: kvm: x86: Check memopp before dereference (CVE-2016-8630) Commit 41061cdb98 ("KVM: emulate: do not initialize memopp") removes a check for non-NULL under incorrect assumptions. An undefined instruction with a ModR/M byte with Mod=0 and R/M-5 (e.g. 0xc7 0x15) will attempt to dereference a null pointer here. Fixes: 41061cdb98a0bec464278b4db8e894a3121671f5 Message-Id: <1477592752-126650-2-git-send-email-osh@google.com> Signed-off-by: Owen Hofmann Signed-off-by: Paolo Bonzini --- arch/x86/kvm/emulate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 4e95d3eb2955..cbd7b92585bb 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -5045,7 +5045,7 @@ done_prefixes: /* Decode and fetch the destination operand: register or memory. */ rc = decode_operand(ctxt, &ctxt->dst, (ctxt->d >> DstShift) & OpMask); - if (ctxt->rip_relative) + if (ctxt->rip_relative && likely(ctxt->memopp)) ctxt->memopp->addr.mem.ea = address_mask(ctxt, ctxt->memopp->addr.mem.ea + ctxt->_eip); -- cgit v1.2.3 From 6f63d0f6be4e48b3042df9541694f6f2c405017e Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 28 Oct 2016 20:56:07 +0200 Subject: parisc: use KERN_CONT when printing device inventory Recent changes to printk require KERN_CONT uses to continue logging messages. So add KERN_CONT to output of device inventory. Signed-off-by: Helge Deller --- arch/parisc/kernel/drivers.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index f8150669b8c6..700e2d2da096 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -873,11 +873,11 @@ static void print_parisc_device(struct parisc_device *dev) if (dev->num_addrs) { int k; - printk(", additional addresses: "); + pr_cont(", additional addresses: "); for (k = 0; k < dev->num_addrs; k++) - printk("0x%lx ", dev->addr[k]); + pr_cont("0x%lx ", dev->addr[k]); } - printk("\n"); + pr_cont("\n"); } /** -- cgit v1.2.3 From f4125cfdb3008363137f744c101e5d76ead760ba Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Fri, 28 Oct 2016 22:13:42 +0200 Subject: parisc: Avoid trashing sr2 and sr3 in LWS code There is no need to trash sr2 and sr3 in the Light-weight syscall (LWS). sr2 already points to kernel space (it's zero in userspace, otherwise syscalls wouldn't work), and since the LWS code is executed in userspace, we can simply ignore to preload sr3. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/syscall.S | 53 ++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index d03422e5f188..40190dbecdc0 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -474,11 +474,6 @@ lws_start: comiclr,>> __NR_lws_entries, %r20, %r0 b,n lws_exit_nosys - /* WARNING: Trashing sr2 and sr3 */ - mfsp %sr7,%r1 /* get userspace into sr3 */ - mtsp %r1,%sr3 - mtsp %r0,%sr2 /* get kernel space into sr2 */ - /* Load table start */ ldil L%lws_table, %r1 ldo R%lws_table(%r1), %r28 /* Scratch use of r28 */ @@ -627,9 +622,9 @@ cas_action: stw %r1, 4(%sr2,%r20) #endif /* The load and store could fail */ -1: ldw,ma 0(%sr3,%r26), %r28 +1: ldw,ma 0(%r26), %r28 sub,<> %r28, %r25, %r0 -2: stw,ma %r24, 0(%sr3,%r26) +2: stw,ma %r24, 0(%r26) /* Free lock */ stw,ma %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG @@ -706,9 +701,9 @@ lws_compare_and_swap_2: nop /* 8bit load */ -4: ldb 0(%sr3,%r25), %r25 +4: ldb 0(%r25), %r25 b cas2_lock_start -5: ldb 0(%sr3,%r24), %r24 +5: ldb 0(%r24), %r24 nop nop nop @@ -716,9 +711,9 @@ lws_compare_and_swap_2: nop /* 16bit load */ -6: ldh 0(%sr3,%r25), %r25 +6: ldh 0(%r25), %r25 b cas2_lock_start -7: ldh 0(%sr3,%r24), %r24 +7: ldh 0(%r24), %r24 nop nop nop @@ -726,9 +721,9 @@ lws_compare_and_swap_2: nop /* 32bit load */ -8: ldw 0(%sr3,%r25), %r25 +8: ldw 0(%r25), %r25 b cas2_lock_start -9: ldw 0(%sr3,%r24), %r24 +9: ldw 0(%r24), %r24 nop nop nop @@ -737,14 +732,14 @@ lws_compare_and_swap_2: /* 64bit load */ #ifdef CONFIG_64BIT -10: ldd 0(%sr3,%r25), %r25 -11: ldd 0(%sr3,%r24), %r24 +10: ldd 0(%r25), %r25 +11: ldd 0(%r24), %r24 #else /* Load new value into r22/r23 - high/low */ -10: ldw 0(%sr3,%r25), %r22 -11: ldw 4(%sr3,%r25), %r23 +10: ldw 0(%r25), %r22 +11: ldw 4(%r25), %r23 /* Load new value into fr4 for atomic store later */ -12: flddx 0(%sr3,%r24), %fr4 +12: flddx 0(%r24), %fr4 #endif cas2_lock_start: @@ -794,30 +789,30 @@ cas2_action: ldo 1(%r0),%r28 /* 8bit CAS */ -13: ldb,ma 0(%sr3,%r26), %r29 +13: ldb,ma 0(%r26), %r29 sub,= %r29, %r25, %r0 b,n cas2_end -14: stb,ma %r24, 0(%sr3,%r26) +14: stb,ma %r24, 0(%r26) b cas2_end copy %r0, %r28 nop nop /* 16bit CAS */ -15: ldh,ma 0(%sr3,%r26), %r29 +15: ldh,ma 0(%r26), %r29 sub,= %r29, %r25, %r0 b,n cas2_end -16: sth,ma %r24, 0(%sr3,%r26) +16: sth,ma %r24, 0(%r26) b cas2_end copy %r0, %r28 nop nop /* 32bit CAS */ -17: ldw,ma 0(%sr3,%r26), %r29 +17: ldw,ma 0(%r26), %r29 sub,= %r29, %r25, %r0 b,n cas2_end -18: stw,ma %r24, 0(%sr3,%r26) +18: stw,ma %r24, 0(%r26) b cas2_end copy %r0, %r28 nop @@ -825,22 +820,22 @@ cas2_action: /* 64bit CAS */ #ifdef CONFIG_64BIT -19: ldd,ma 0(%sr3,%r26), %r29 +19: ldd,ma 0(%r26), %r29 sub,*= %r29, %r25, %r0 b,n cas2_end -20: std,ma %r24, 0(%sr3,%r26) +20: std,ma %r24, 0(%r26) copy %r0, %r28 #else /* Compare first word */ -19: ldw,ma 0(%sr3,%r26), %r29 +19: ldw,ma 0(%r26), %r29 sub,= %r29, %r22, %r0 b,n cas2_end /* Compare second word */ -20: ldw,ma 4(%sr3,%r26), %r29 +20: ldw,ma 4(%r26), %r29 sub,= %r29, %r23, %r0 b,n cas2_end /* Perform the store */ -21: fstdx %fr4, 0(%sr3,%r26) +21: fstdx %fr4, 0(%r26) copy %r0, %r28 #endif -- cgit v1.2.3 From 6ed518328d0189e0fdf1bb7c73290d546143ea66 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Fri, 28 Oct 2016 23:00:34 -0400 Subject: parisc: Ensure consistent state when switching to kernel stack at syscall entry We have one critical section in the syscall entry path in which we switch from the userspace stack to kernel stack. In the event of an external interrupt, the interrupt code distinguishes between those two states by analyzing the value of sr7. If sr7 is zero, it uses the kernel stack. Therefore it's important, that the value of sr7 is in sync with the currently enabled stack. This patch now disables interrupts while executing the critical section. This prevents the interrupt handler to possibly see an inconsistent state which in the worst case can lead to crashes. Interestingly, in the syscall exit path interrupts were already disabled in the critical section which switches back to the userspace stack. Cc: Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/kernel/syscall.S | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 40190dbecdc0..deec1f8465e5 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -106,8 +106,6 @@ linux_gateway_entry: mtsp %r0,%sr4 /* get kernel space into sr4 */ mtsp %r0,%sr5 /* get kernel space into sr5 */ mtsp %r0,%sr6 /* get kernel space into sr6 */ - mfsp %sr7,%r1 /* save user sr7 */ - mtsp %r1,%sr3 /* and store it in sr3 */ #ifdef CONFIG_64BIT /* for now we can *always* set the W bit on entry to the syscall @@ -133,6 +131,14 @@ linux_gateway_entry: depdi 0, 31, 32, %r21 1: #endif + + /* We use a rsm/ssm pair to prevent sr3 from being clobbered + * by external interrupts. + */ + mfsp %sr7,%r1 /* save user sr7 */ + rsm PSW_SM_I, %r0 /* disable interrupts */ + mtsp %r1,%sr3 /* and store it in sr3 */ + mfctl %cr30,%r1 xor %r1,%r30,%r30 /* ye olde xor trick */ xor %r1,%r30,%r1 @@ -147,6 +153,7 @@ linux_gateway_entry: */ mtsp %r0,%sr7 /* get kernel space into sr7 */ + ssm PSW_SM_I, %r0 /* enable interrupts */ STREGM %r1,FRAME_SIZE(%r30) /* save r1 (usp) here for now */ mfctl %cr30,%r1 /* get task ptr in %r1 */ LDREG TI_TASK(%r1),%r1 -- cgit v1.2.3 From 6a6e2a14bbc06a627acc71108808fdc866078db4 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 29 Oct 2016 23:52:43 +0200 Subject: parisc: Use LINUX_GATEWAY_ADDR define instead of hardcoded value LINUX_GATEWAY_ADDR is defined in unistd.h. Let's use it. Signed-off-by: Helge Deller --- arch/parisc/kernel/syscall.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index deec1f8465e5..23de307c3052 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -100,7 +100,7 @@ set_thread_pointer: .endr /* This address must remain fixed at 0x100 for glibc's syscalls to work */ - .align 256 + .align LINUX_GATEWAY_ADDR linux_gateway_entry: gate .+8, %r0 /* become privileged */ mtsp %r0,%sr4 /* get kernel space into sr4 */ -- cgit v1.2.3 From 18088db042dd9ae25e8a1e069eb3a16db601ef8a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 29 Oct 2016 23:47:57 +0200 Subject: parisc: Ignore the pkey system calls for now Signed-off-by: Helge Deller --- arch/parisc/include/uapi/asm/unistd.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/parisc/include/uapi/asm/unistd.h b/arch/parisc/include/uapi/asm/unistd.h index a9b9407f38f7..6b0741e7a7ed 100644 --- a/arch/parisc/include/uapi/asm/unistd.h +++ b/arch/parisc/include/uapi/asm/unistd.h @@ -368,7 +368,9 @@ #define __IGNORE_select /* newselect */ #define __IGNORE_fadvise64 /* fadvise64_64 */ - +#define __IGNORE_pkey_mprotect +#define __IGNORE_pkey_alloc +#define __IGNORE_pkey_free #define LINUX_GATEWAY_ADDR 0x100 -- cgit v1.2.3 From 682c1e52215da4a3e89c14aad60bfc0d400b025f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Sat, 15 Oct 2016 23:03:43 +0100 Subject: MIPS: CPC: Provide default mips_cpc_default_phys_base to ignore CPC Provide a default implementation of mips_cpc_default_phys_base() which simply returns 0, and adjust mips_cpc_phys_base() to allow for mips_cpc_default_phys_base() returning 0. This allows kernels which include CPC support to be built without platform code & simply ignore the CPC if it wasn't already enabled by the bootloader. This fixes link failures such as the following from generic defconfigs: arch/mips/built-in.o: In function `mips_cpc_phys_base': arch/mips/kernel/mips-cpc.c:47: undefined reference to `mips_cpc_default_phys_base' [ralf@linux-mips.org: changed prototype for coding style compliance.] Signed-off-by: Paul Burton Reported-by: kbuild test robot Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14401/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/mips-cpc.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/kernel/mips-cpc.c b/arch/mips/kernel/mips-cpc.c index 2a45867d3b4f..a4964c334cab 100644 --- a/arch/mips/kernel/mips-cpc.c +++ b/arch/mips/kernel/mips-cpc.c @@ -21,6 +21,11 @@ static DEFINE_PER_CPU_ALIGNED(spinlock_t, cpc_core_lock); static DEFINE_PER_CPU_ALIGNED(unsigned long, cpc_core_lock_flags); +phys_addr_t __weak mips_cpc_default_phys_base(void) +{ + return 0; +} + /** * mips_cpc_phys_base - retrieve the physical base address of the CPC * @@ -43,8 +48,12 @@ static phys_addr_t mips_cpc_phys_base(void) if (cpc_base & CM_GCR_CPC_BASE_CPCEN_MSK) return cpc_base & CM_GCR_CPC_BASE_CPCBASE_MSK; - /* Otherwise, give it the default address & enable it */ + /* Otherwise, use the default address */ cpc_base = mips_cpc_default_phys_base(); + if (!cpc_base) + return cpc_base; + + /* Enable the CPC, mapped at the default address */ write_gcr_cpc_base(cpc_base | CM_GCR_CPC_BASE_CPCEN_MSK); return cpc_base; } -- cgit v1.2.3 From 93032e31a5f12df847639db3ad0b7f300bf44b7b Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Fri, 14 Oct 2016 10:17:32 +0100 Subject: MIPS: Malta: Fixup reboot Commit 10b6ea0959de ("MIPS: Malta: Use syscon-reboot driver to reboot") converted the Malta board to use the generic syscon-reboot driver to handle reboots, but incorrectly used the value 0x4d rather than 0x42 as the magic to write to the reboot register. I also incorrectly believed that syscon/regmap would default to native endianness, but this isn't the case. Force this by specifying with a native-endian property in the devicetree. Signed-off-by: Paul Burton Fixes: 10b6ea0959de ("MIPS: Malta: Use syscon-reboot driver to reboot") Reported-by: Guenter Roeck Cc: linux-mips@linux-mips.org Tested-by: Guenter Roeck Tested-by: Maciej W. Rozycki Patchwork: https://patchwork.linux-mips.org/patch/14396/ Signed-off-by: Ralf Baechle --- arch/mips/boot/dts/mti/malta.dts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/boot/dts/mti/malta.dts b/arch/mips/boot/dts/mti/malta.dts index f604a272d91d..ffe3a1508e72 100644 --- a/arch/mips/boot/dts/mti/malta.dts +++ b/arch/mips/boot/dts/mti/malta.dts @@ -84,12 +84,13 @@ fpga_regs: system-controller@1f000000 { compatible = "mti,malta-fpga", "syscon", "simple-mfd"; reg = <0x1f000000 0x1000>; + native-endian; reboot { compatible = "syscon-reboot"; regmap = <&fpga_regs>; offset = <0x500>; - mask = <0x4d>; + mask = <0x42>; }; }; -- cgit v1.2.3 From 4736697963385e6257ee8e260e97347e858cd962 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 17 Oct 2016 17:21:46 +0100 Subject: MIPS: KASLR: Fix handling of NULL FDT If platform code returns a NULL pointer to the FDT, initial_boot_params will not get set to a valid pointer and attempting to find the /chosen node in it will cause a NULL pointer dereference and the kernel to crash immediately on startup - with no output to the console. Fix this by checking that initial_boot_params is valid before using it. Fixes: 405bc8fd12f5 ("MIPS: Kernel: Implement KASLR using CONFIG_RELOCATABLE") Cc: stable@vger.kernel.org # 4.7+ Signed-off-by: Matt Redfearn Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14414/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/relocate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/kernel/relocate.c b/arch/mips/kernel/relocate.c index ca1cc30c0891..1958910b75c0 100644 --- a/arch/mips/kernel/relocate.c +++ b/arch/mips/kernel/relocate.c @@ -200,7 +200,7 @@ static inline __init unsigned long get_random_boot(void) #if defined(CONFIG_USE_OF) /* Get any additional entropy passed in device tree */ - { + if (initial_boot_params) { int node, len; u64 *prop; -- cgit v1.2.3 From 9a59061cfd7ac00f21111d2e8aa7f4ab11d27f6c Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 17 Oct 2016 17:25:24 +0100 Subject: MIPS: generic: Fix KASLR for generic kernel. The KASLR code requires that the plat_get_fdt() function return the address of the device tree, and it must be available early in the boot, before prom_init() is called. Move the code determining the address of the device tree into plat_get_fdt, and call that from prom_init(). The fdt pointer will be set up by plat_get_fdt() called from relocate_kernel initially and once the relocated kernel has started, prom_init() will use it again to determine the address in the relocated image. Fixes: eed0eabd12ef ("MIPS: generic: Introduce generic DT-based board support") Signed-off-by: Matt Redfearn Reviewed-by: James Hogan Reviewed-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14415/ Signed-off-by: Ralf Baechle --- arch/mips/generic/init.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/mips/generic/init.c b/arch/mips/generic/init.c index 0ea73e845440..d493ccbf274a 100644 --- a/arch/mips/generic/init.c +++ b/arch/mips/generic/init.c @@ -29,10 +29,20 @@ static __initdata const struct mips_machine *mach; static __initdata const void *mach_match_data; void __init prom_init(void) +{ + plat_get_fdt(); + BUG_ON(!fdt); +} + +void __init *plat_get_fdt(void) { const struct mips_machine *check_mach; const struct of_device_id *match; + if (fdt) + /* Already set up */ + return (void *)fdt; + if ((fw_arg0 == -2) && !fdt_check_header((void *)fw_arg1)) { /* * We booted using the UHI boot protocol, so we have been @@ -75,12 +85,6 @@ void __init prom_init(void) /* Retrieve the machine's FDT */ fdt = mach->fdt; } - - BUG_ON(!fdt); -} - -void __init *plat_get_fdt(void) -{ return (void *)fdt; } -- cgit v1.2.3 From 818f38c5b7c4482abd71c64ac4d49911fbefaf9e Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Mon, 17 Oct 2016 10:09:39 +0100 Subject: MIPS: Fix build of compressed image Changes introduced to arch/mips/Makefile for the generic kernel resulted in build errors when making a compressed image if platform-y has multiple values, like this: make[2]: *** No rule to make target `alchemy/'. make[1]: *** [vmlinuz] Error 2 make[1]: Target `_all' not remade because of errors. make: *** [sub-make] Error 2 make: Target `_all' not remade because of errors. Fix this by quoting $(platform-y) as it is passed to the Makefile in arch/mips/boot/compressed/Makefile Reported-by: kernelci.org bot Link: https://storage.kernelci.org/next/next-20161017/mips-gpr_defconfig/build.log Signed-off-by: Matt Redfearn Reviewed-by: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14405/ Signed-off-by: Ralf Baechle --- arch/mips/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/Makefile b/arch/mips/Makefile index fbf40d3c8123..1a6bac7b076f 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -263,7 +263,7 @@ KBUILD_CPPFLAGS += -DDATAOFFSET=$(if $(dataoffset-y),$(dataoffset-y),0) bootvars-y = VMLINUX_LOAD_ADDRESS=$(load-y) \ VMLINUX_ENTRY_ADDRESS=$(entry-y) \ - PLATFORM=$(platform-y) + PLATFORM="$(platform-y)" ifdef CONFIG_32BIT bootvars-y += ADDR_BITS=32 endif -- cgit v1.2.3 From bcf084de5d429c0a321de6e2c508440148da1884 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 19 Oct 2016 14:33:20 +0100 Subject: MIPS: traps: Fix output of show_backtrace Since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") the output from show_backtrace on MIPS has been pretty unreadable due to the lack of KERN_CONT markers. Use pr_cont to provide the appropriate markers & restore the expected output. Signed-off-by: Matt Redfearn Cc: Maciej W. Rozycki Cc: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14429/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 1f5fdee1dfc3..6ae5ea752d4c 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -156,7 +156,7 @@ static void show_backtrace(struct task_struct *task, const struct pt_regs *regs) print_ip_sym(pc); pc = unwind_stack(task, &sp, pc, &ra); } while (pc); - printk("\n"); + pr_cont("\n"); } /* -- cgit v1.2.3 From fe4e09e701213213cf2024e2979ea227c2e36c60 Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 19 Oct 2016 14:33:21 +0100 Subject: MIPS: traps: Fix output of show_stacktrace Since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") the output from show_stacktrace on MIPS has been pretty unreadable due to the lack of KERN_CONT markers. Use pr_cont to provide the appropriate markers & restore the expected output. Also start a new line with printk such that the presence of timing information does not interfere with output. Signed-off-by: Matt Redfearn Cc: Maciej W. Rozycki Cc: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14430/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 6ae5ea752d4c..25ce866b2895 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -174,22 +174,24 @@ static void show_stacktrace(struct task_struct *task, printk("Stack :"); i = 0; while ((unsigned long) sp & (PAGE_SIZE - 1)) { - if (i && ((i % (64 / field)) == 0)) - printk("\n "); + if (i && ((i % (64 / field)) == 0)) { + pr_cont("\n"); + printk(" "); + } if (i > 39) { - printk(" ..."); + pr_cont(" ..."); break; } if (__get_user(stackdata, sp++)) { - printk(" (Bad stack address)"); + pr_cont(" (Bad stack address)"); break; } - printk(" %0*lx", field, stackdata); + pr_cont(" %0*lx", field, stackdata); i++; } - printk("\n"); + pr_cont("\n"); show_backtrace(task, regs); } -- cgit v1.2.3 From 41000c5819ee5aea8c5c3b388e4e21e679c1b95c Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 19 Oct 2016 14:33:22 +0100 Subject: MIPS: traps: Fix output of show_code Since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") the output from show_code on MIPS has been pretty unreadable due to the lack of KERN_CONT markers. Use pr_cont to provide the appropriate markers & restore the expected output. Signed-off-by: Matt Redfearn Cc: Maciej W. Rozycki Cc: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14431/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 25ce866b2895..273b4a419f76 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -231,18 +231,19 @@ static void show_code(unsigned int __user *pc) long i; unsigned short __user *pc16 = NULL; - printk("\nCode:"); + printk("Code:"); if ((unsigned long)pc & 1) pc16 = (unsigned short __user *)((unsigned long)pc & ~1); for(i = -3 ; i < 6 ; i++) { unsigned int insn; if (pc16 ? __get_user(insn, pc16 + i) : __get_user(insn, pc + i)) { - printk(" (Bad address in epc)\n"); + pr_cont(" (Bad address in epc)\n"); break; } - printk("%c%0*x%c", (i?' ':'<'), pc16 ? 4 : 8, insn, (i?' ':'>')); + pr_cont("%c%0*x%c", (i?' ':'<'), pc16 ? 4 : 8, insn, (i?' ':'>')); } + pr_cont("\n"); } static void __show_regs(const struct pt_regs *regs) -- cgit v1.2.3 From 752f5499823edb0c13c594f739363527178f714d Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Wed, 19 Oct 2016 14:33:23 +0100 Subject: MIPS: Fix __show_regs() output Since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") the output from __show_regs() on MIPS has been pretty unreadable due to the lack of KERN_CONT markers. Use pr_cont to provide the appropriate markers & restore the expected register output. Signed-off-by: Paul Burton Signed-off-by: Matt Redfearn Cc: Maciej W. Rozycki Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14432/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/traps.c | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 273b4a419f76..b9a910b208f9 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -262,15 +262,15 @@ static void __show_regs(const struct pt_regs *regs) if ((i % 4) == 0) printk("$%2d :", i); if (i == 0) - printk(" %0*lx", field, 0UL); + pr_cont(" %0*lx", field, 0UL); else if (i == 26 || i == 27) - printk(" %*s", field, ""); + pr_cont(" %*s", field, ""); else - printk(" %0*lx", field, regs->regs[i]); + pr_cont(" %0*lx", field, regs->regs[i]); i++; if ((i % 4) == 0) - printk("\n"); + pr_cont("\n"); } #ifdef CONFIG_CPU_HAS_SMARTMIPS @@ -291,46 +291,46 @@ static void __show_regs(const struct pt_regs *regs) if (cpu_has_3kex) { if (regs->cp0_status & ST0_KUO) - printk("KUo "); + pr_cont("KUo "); if (regs->cp0_status & ST0_IEO) - printk("IEo "); + pr_cont("IEo "); if (regs->cp0_status & ST0_KUP) - printk("KUp "); + pr_cont("KUp "); if (regs->cp0_status & ST0_IEP) - printk("IEp "); + pr_cont("IEp "); if (regs->cp0_status & ST0_KUC) - printk("KUc "); + pr_cont("KUc "); if (regs->cp0_status & ST0_IEC) - printk("IEc "); + pr_cont("IEc "); } else if (cpu_has_4kex) { if (regs->cp0_status & ST0_KX) - printk("KX "); + pr_cont("KX "); if (regs->cp0_status & ST0_SX) - printk("SX "); + pr_cont("SX "); if (regs->cp0_status & ST0_UX) - printk("UX "); + pr_cont("UX "); switch (regs->cp0_status & ST0_KSU) { case KSU_USER: - printk("USER "); + pr_cont("USER "); break; case KSU_SUPERVISOR: - printk("SUPERVISOR "); + pr_cont("SUPERVISOR "); break; case KSU_KERNEL: - printk("KERNEL "); + pr_cont("KERNEL "); break; default: - printk("BAD_MODE "); + pr_cont("BAD_MODE "); break; } if (regs->cp0_status & ST0_ERL) - printk("ERL "); + pr_cont("ERL "); if (regs->cp0_status & ST0_EXL) - printk("EXL "); + pr_cont("EXL "); if (regs->cp0_status & ST0_IE) - printk("IE "); + pr_cont("IE "); } - printk("\n"); + pr_cont("\n"); exccode = (cause & CAUSEF_EXCCODE) >> CAUSEB_EXCCODE; printk("Cause : %08x (ExcCode %02x)\n", cause, exccode); -- cgit v1.2.3 From 8a98495c7008d5bdede3d53e05e69ecdc0bc614b Mon Sep 17 00:00:00 2001 From: James Hogan Date: Fri, 21 Oct 2016 20:06:40 +0100 Subject: MIPS: dump_tlb: Fix printk continuations Since commit 4bcc595ccd80 ("printk: reinstate KERN_CONT for printing continuation lines") the output from TLB dumps on MIPS has been pretty unreadable due to the lack of KERN_CONT markers. Use pr_cont to provide the appropriate markers & restore the expected output. Continuation is also used for the second line of each TLB entry printed in dump_tlb.c even though it has a newline, since it is a continuation of the interpretation of the same TLB entry. For example: [ 46.371884] Index: 0 pgmask=16kb va=77654000 asid=73 gid=00 [ri=0 xi=0 pa=ffc18000 c=5 d=0 v=1 g=0] [ri=0 xi=0 pa=ffc1c000 c=5 d=0 v=1 g=0] [ 46.385380] Index: 12 pgmask=16kb va=004b4000 asid=73 gid=00 [ri=0 xi=0 pa=00000000 c=0 d=0 v=0 g=0] [ri=0 xi=0 pa=ffb00000 c=5 d=1 v=1 g=0] Signed-off-by: James Hogan Cc: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14444/ Signed-off-by: Ralf Baechle --- arch/mips/lib/dump_tlb.c | 44 ++++++++++++++++++++++---------------------- arch/mips/lib/r3k_dump_tlb.c | 18 +++++++++--------- 2 files changed, 31 insertions(+), 31 deletions(-) (limited to 'arch') diff --git a/arch/mips/lib/dump_tlb.c b/arch/mips/lib/dump_tlb.c index 0f80b936e75e..6eb50a7137db 100644 --- a/arch/mips/lib/dump_tlb.c +++ b/arch/mips/lib/dump_tlb.c @@ -135,42 +135,42 @@ static void dump_tlb(int first, int last) c0 = (entrylo0 & ENTRYLO_C) >> ENTRYLO_C_SHIFT; c1 = (entrylo1 & ENTRYLO_C) >> ENTRYLO_C_SHIFT; - printk("va=%0*lx asid=%0*lx", - vwidth, (entryhi & ~0x1fffUL), - asidwidth, entryhi & asidmask); + pr_cont("va=%0*lx asid=%0*lx", + vwidth, (entryhi & ~0x1fffUL), + asidwidth, entryhi & asidmask); if (cpu_has_guestid) - printk(" gid=%02lx", - (guestctl1 & MIPS_GCTL1_RID) + pr_cont(" gid=%02lx", + (guestctl1 & MIPS_GCTL1_RID) >> MIPS_GCTL1_RID_SHIFT); /* RI/XI are in awkward places, so mask them off separately */ pa = entrylo0 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI); if (xpa) pa |= (unsigned long long)readx_c0_entrylo0() << 30; pa = (pa << 6) & PAGE_MASK; - printk("\n\t["); + pr_cont("\n\t["); if (cpu_has_rixi) - printk("ri=%d xi=%d ", - (entrylo0 & MIPS_ENTRYLO_RI) ? 1 : 0, - (entrylo0 & MIPS_ENTRYLO_XI) ? 1 : 0); - printk("pa=%0*llx c=%d d=%d v=%d g=%d] [", - pwidth, pa, c0, - (entrylo0 & ENTRYLO_D) ? 1 : 0, - (entrylo0 & ENTRYLO_V) ? 1 : 0, - (entrylo0 & ENTRYLO_G) ? 1 : 0); + pr_cont("ri=%d xi=%d ", + (entrylo0 & MIPS_ENTRYLO_RI) ? 1 : 0, + (entrylo0 & MIPS_ENTRYLO_XI) ? 1 : 0); + pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d] [", + pwidth, pa, c0, + (entrylo0 & ENTRYLO_D) ? 1 : 0, + (entrylo0 & ENTRYLO_V) ? 1 : 0, + (entrylo0 & ENTRYLO_G) ? 1 : 0); /* RI/XI are in awkward places, so mask them off separately */ pa = entrylo1 & ~(MIPS_ENTRYLO_RI | MIPS_ENTRYLO_XI); if (xpa) pa |= (unsigned long long)readx_c0_entrylo1() << 30; pa = (pa << 6) & PAGE_MASK; if (cpu_has_rixi) - printk("ri=%d xi=%d ", - (entrylo1 & MIPS_ENTRYLO_RI) ? 1 : 0, - (entrylo1 & MIPS_ENTRYLO_XI) ? 1 : 0); - printk("pa=%0*llx c=%d d=%d v=%d g=%d]\n", - pwidth, pa, c1, - (entrylo1 & ENTRYLO_D) ? 1 : 0, - (entrylo1 & ENTRYLO_V) ? 1 : 0, - (entrylo1 & ENTRYLO_G) ? 1 : 0); + pr_cont("ri=%d xi=%d ", + (entrylo1 & MIPS_ENTRYLO_RI) ? 1 : 0, + (entrylo1 & MIPS_ENTRYLO_XI) ? 1 : 0); + pr_cont("pa=%0*llx c=%d d=%d v=%d g=%d]\n", + pwidth, pa, c1, + (entrylo1 & ENTRYLO_D) ? 1 : 0, + (entrylo1 & ENTRYLO_V) ? 1 : 0, + (entrylo1 & ENTRYLO_G) ? 1 : 0); } printk("\n"); diff --git a/arch/mips/lib/r3k_dump_tlb.c b/arch/mips/lib/r3k_dump_tlb.c index 744f4a7bc49d..85b4086e553e 100644 --- a/arch/mips/lib/r3k_dump_tlb.c +++ b/arch/mips/lib/r3k_dump_tlb.c @@ -53,15 +53,15 @@ static void dump_tlb(int first, int last) */ printk("Index: %2d ", i); - printk("va=%08lx asid=%08lx" - " [pa=%06lx n=%d d=%d v=%d g=%d]", - entryhi & PAGE_MASK, - entryhi & asid_mask, - entrylo0 & PAGE_MASK, - (entrylo0 & R3K_ENTRYLO_N) ? 1 : 0, - (entrylo0 & R3K_ENTRYLO_D) ? 1 : 0, - (entrylo0 & R3K_ENTRYLO_V) ? 1 : 0, - (entrylo0 & R3K_ENTRYLO_G) ? 1 : 0); + pr_cont("va=%08lx asid=%08lx" + " [pa=%06lx n=%d d=%d v=%d g=%d]", + entryhi & PAGE_MASK, + entryhi & asid_mask, + entrylo0 & PAGE_MASK, + (entrylo0 & R3K_ENTRYLO_N) ? 1 : 0, + (entrylo0 & R3K_ENTRYLO_D) ? 1 : 0, + (entrylo0 & R3K_ENTRYLO_V) ? 1 : 0, + (entrylo0 & R3K_ENTRYLO_G) ? 1 : 0); } } printk("\n"); -- cgit v1.2.3 From c9e5603974573367c4d80964a845237a2297228c Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 28 Oct 2016 08:20:09 +0100 Subject: MIPS: ptrace: Also initialize the FP context on individual FCSR writes Complement commit ac9ad83bc318 ("MIPS: prevent FP context set via ptrace being discarded") and also initialize the FP context whenever FCSR alone is written with a PTRACE_POKEUSR request addressing FPC_CSR, rather than along with the full FPU register set in the case of the PTRACE_SETFPREGS request. Signed-off-by: Maciej W. Rozycki Cc: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14459/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/ptrace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 6103b24d1bfc..fd3a9580661a 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -817,6 +817,7 @@ long arch_ptrace(struct task_struct *child, long request, break; #endif case FPC_CSR: + init_fp_ctx(child); ptrace_setfcr31(child, data); break; case DSP_BASE ... DSP_BASE + 5: { -- cgit v1.2.3 From 5a1aca4469fdccd5b74ba0b4e490173b2b447895 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Fri, 28 Oct 2016 08:21:03 +0100 Subject: MIPS: Fix FCSR Cause bit handling for correct SIGFPE issue Sanitize FCSR Cause bit handling, following a trail of past attempts: * commit 4249548454f7 ("MIPS: ptrace: Fix FP context restoration FCSR regression"), * commit 443c44032a54 ("MIPS: Always clear FCSR cause bits after emulation"), * commit 64bedffe4968 ("MIPS: Clear [MSA]FPE CSR.Cause after notify_die()"), * commit b1442d39fac2 ("MIPS: Prevent user from setting FCSR cause bits"), * commit b54d2901517d ("Properly handle branch delay slots in connection with signals."). Specifically do not mask these bits out in ptrace(2) processing and send a SIGFPE signal instead whenever a matching pair of an FCSR Cause and Enable bit is seen as execution of an affected context is about to resume. Only then clear Cause bits, and even then do not clear any bits that are set but masked with the respective Enable bits. Adjust Cause bit clearing throughout code likewise, except within the FPU emulator proper where they are set according to IEEE 754 exceptions raised as the operation emulated executed. Do so so that any IEEE 754 exceptions subject to their default handling are recorded like with operations executed by FPU hardware. Signed-off-by: Maciej W. Rozycki Cc: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14460/ Signed-off-by: Ralf Baechle --- arch/mips/include/asm/fpu_emulator.h | 13 +++++++ arch/mips/include/asm/switch_to.h | 18 +++++++++ arch/mips/kernel/mips-r2-to-r6-emul.c | 10 ++--- arch/mips/kernel/ptrace.c | 7 ++-- arch/mips/kernel/traps.c | 72 +++++++++++++++++++---------------- 5 files changed, 78 insertions(+), 42 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/fpu_emulator.h b/arch/mips/include/asm/fpu_emulator.h index 355dc25172e7..c05369e0b8d6 100644 --- a/arch/mips/include/asm/fpu_emulator.h +++ b/arch/mips/include/asm/fpu_emulator.h @@ -63,6 +63,8 @@ do { \ extern int fpu_emulator_cop1Handler(struct pt_regs *xcp, struct mips_fpu_struct *ctx, int has_fpu, void *__user *fault_addr); +void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, + struct task_struct *tsk); int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31); int isBranchInstr(struct pt_regs *regs, struct mm_decoded_insn dec_insn, @@ -81,4 +83,15 @@ static inline void fpu_emulator_init_fpu(void) set_fpr64(&t->thread.fpu.fpr[i], 0, SIGNALLING_NAN); } +/* + * Mask the FCSR Cause bits according to the Enable bits, observing + * that Unimplemented is always enabled. + */ +static inline unsigned long mask_fcr31_x(unsigned long fcr31) +{ + return fcr31 & (FPU_CSR_UNI_X | + ((fcr31 & FPU_CSR_ALL_E) << + (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E)))); +} + #endif /* _ASM_FPU_EMULATOR_H */ diff --git a/arch/mips/include/asm/switch_to.h b/arch/mips/include/asm/switch_to.h index ebb5c0f2f90d..c0ae27971e31 100644 --- a/arch/mips/include/asm/switch_to.h +++ b/arch/mips/include/asm/switch_to.h @@ -75,6 +75,22 @@ do { if (cpu_has_rw_llb) { \ } \ } while (0) +/* + * Check FCSR for any unmasked exceptions pending set with `ptrace', + * clear them and send a signal. + */ +#define __sanitize_fcr31(next) \ +do { \ + unsigned long fcr31 = mask_fcr31_x(next->thread.fpu.fcr31); \ + void __user *pc; \ + \ + if (unlikely(fcr31)) { \ + pc = (void __user *)task_pt_regs(next)->cp0_epc; \ + next->thread.fpu.fcr31 &= ~fcr31; \ + force_fcr31_sig(fcr31, pc, next); \ + } \ +} while (0) + /* * For newly created kernel threads switch_to() will return to * ret_from_kernel_thread, newly created user threads to ret_from_fork. @@ -85,6 +101,8 @@ do { if (cpu_has_rw_llb) { \ do { \ __mips_mt_fpaff_switch_to(prev); \ lose_fpu_inatomic(1, prev); \ + if (tsk_used_math(next)) \ + __sanitize_fcr31(next); \ if (cpu_has_dsp) { \ __save_dsp(prev); \ __restore_dsp(next); \ diff --git a/arch/mips/kernel/mips-r2-to-r6-emul.c b/arch/mips/kernel/mips-r2-to-r6-emul.c index 22dedd62818a..bd09853aecdf 100644 --- a/arch/mips/kernel/mips-r2-to-r6-emul.c +++ b/arch/mips/kernel/mips-r2-to-r6-emul.c @@ -899,7 +899,7 @@ static inline int mipsr2_find_op_func(struct pt_regs *regs, u32 inst, * mipsr2_decoder: Decode and emulate a MIPS R2 instruction * @regs: Process register set * @inst: Instruction to decode and emulate - * @fcr31: Floating Point Control and Status Register returned + * @fcr31: Floating Point Control and Status Register Cause bits returned */ int mipsr2_decoder(struct pt_regs *regs, u32 inst, unsigned long *fcr31) { @@ -1172,13 +1172,13 @@ fpu_emul: err = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 0, &fault_addr); - *fcr31 = current->thread.fpu.fcr31; /* - * We can't allow the emulated instruction to leave any of - * the cause bits set in $fcr31. + * We can't allow the emulated instruction to leave any + * enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + *fcr31 = res = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~res; /* * this is a tricky issue - lose_fpu() uses LL/SC atomics diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index fd3a9580661a..a92994d60e91 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -79,16 +79,15 @@ void ptrace_disable(struct task_struct *child) } /* - * Poke at FCSR according to its mask. Don't set the cause bits as - * this is currently not handled correctly in FP context restoration - * and will cause an oops if a corresponding enable bit is set. + * Poke at FCSR according to its mask. Set the Cause bits even + * if a corresponding Enable bit is set. This will be noticed at + * the time the thread is switched to and SIGFPE thrown accordingly. */ static void ptrace_setfcr31(struct task_struct *child, u32 value) { u32 fcr31; u32 mask; - value &= ~FPU_CSR_ALL_X; fcr31 = child->thread.fpu.fcr31; mask = boot_cpu_data.fpu_msk31; child->thread.fpu.fcr31 = (value & ~mask) | (fcr31 & mask); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index b9a910b208f9..3905003dfe2b 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -708,6 +708,32 @@ asmlinkage void do_ov(struct pt_regs *regs) exception_exit(prev_state); } +/* + * Send SIGFPE according to FCSR Cause bits, which must have already + * been masked against Enable bits. This is impotant as Inexact can + * happen together with Overflow or Underflow, and `ptrace' can set + * any bits. + */ +void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, + struct task_struct *tsk) +{ + struct siginfo si = { .si_addr = fault_addr, .si_signo = SIGFPE }; + + if (fcr31 & FPU_CSR_INV_X) + si.si_code = FPE_FLTINV; + else if (fcr31 & FPU_CSR_DIV_X) + si.si_code = FPE_FLTDIV; + else if (fcr31 & FPU_CSR_OVF_X) + si.si_code = FPE_FLTOVF; + else if (fcr31 & FPU_CSR_UDF_X) + si.si_code = FPE_FLTUND; + else if (fcr31 & FPU_CSR_INE_X) + si.si_code = FPE_FLTRES; + else + si.si_code = __SI_FAULT; + force_sig_info(SIGFPE, &si, tsk); +} + int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) { struct siginfo si = { 0 }; @@ -718,27 +744,7 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) return 0; case SIGFPE: - si.si_addr = fault_addr; - si.si_signo = sig; - /* - * Inexact can happen together with Overflow or Underflow. - * Respect the mask to deliver the correct exception. - */ - fcr31 &= (fcr31 & FPU_CSR_ALL_E) << - (ffs(FPU_CSR_ALL_X) - ffs(FPU_CSR_ALL_E)); - if (fcr31 & FPU_CSR_INV_X) - si.si_code = FPE_FLTINV; - else if (fcr31 & FPU_CSR_DIV_X) - si.si_code = FPE_FLTDIV; - else if (fcr31 & FPU_CSR_OVF_X) - si.si_code = FPE_FLTOVF; - else if (fcr31 & FPU_CSR_UDF_X) - si.si_code = FPE_FLTUND; - else if (fcr31 & FPU_CSR_INE_X) - si.si_code = FPE_FLTRES; - else - si.si_code = __SI_FAULT; - force_sig_info(sig, &si, current); + force_fcr31_sig(fcr31, fault_addr, current); return 1; case SIGBUS: @@ -802,13 +808,13 @@ static int simulate_fp(struct pt_regs *regs, unsigned int opcode, /* Run the emulator */ sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, &fault_addr); - fcr31 = current->thread.fpu.fcr31; /* - * We can't allow the emulated instruction to leave any of - * the cause bits set in $fcr31. + * We can't allow the emulated instruction to leave any + * enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + fcr31 = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~fcr31; /* Restore the hardware register state */ own_fpu(1); @@ -834,7 +840,7 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) goto out; /* Clear FCSR.Cause before enabling interrupts */ - write_32bit_cp1_register(CP1_STATUS, fcr31 & ~FPU_CSR_ALL_X); + write_32bit_cp1_register(CP1_STATUS, fcr31 & ~mask_fcr31_x(fcr31)); local_irq_enable(); die_if_kernel("FP exception in kernel code", regs); @@ -856,13 +862,13 @@ asmlinkage void do_fpe(struct pt_regs *regs, unsigned long fcr31) /* Run the emulator */ sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 1, &fault_addr); - fcr31 = current->thread.fpu.fcr31; /* - * We can't allow the emulated instruction to leave any of - * the cause bits set in $fcr31. + * We can't allow the emulated instruction to leave any + * enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + fcr31 = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~fcr31; /* Restore the hardware register state */ own_fpu(1); /* Using the FPU again. */ @@ -1427,13 +1433,13 @@ asmlinkage void do_cpu(struct pt_regs *regs) sig = fpu_emulator_cop1Handler(regs, ¤t->thread.fpu, 0, &fault_addr); - fcr31 = current->thread.fpu.fcr31; /* * We can't allow the emulated instruction to leave - * any of the cause bits set in $fcr31. + * any enabled Cause bits set in $fcr31. */ - current->thread.fpu.fcr31 &= ~FPU_CSR_ALL_X; + fcr31 = mask_fcr31_x(current->thread.fpu.fcr31); + current->thread.fpu.fcr31 &= ~fcr31; /* Send a signal if required. */ if (!process_fpemu_return(sig, fault_addr, fcr31) && !err) -- cgit v1.2.3 From 35938a00ba86ae7a7404b66b526968ca2b8d3127 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 31 Oct 2016 16:25:44 +0000 Subject: MIPS: Fix ISA I FP sigcontext access violation handling Complement commit 0ae8dceaebe3 ("Merge with 2.3.10.") and use the local `fault' handler to recover from FP sigcontext access violation faults, like corresponding code does in r4k_fpu.S. The `bad_stack' handler is in syscall.c and is not suitable here as we want to propagate the error condition up through the caller rather than killing the thread outright. Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14474/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/r2300_fpu.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index b4ac6374a38f..c4c8c1b65be9 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -21,7 +21,7 @@ #define EX(a,b) \ 9: a,##b; \ .section __ex_table,"a"; \ - PTR 9b,bad_stack; \ + PTR 9b,fault; \ .previous .set noreorder -- cgit v1.2.3 From 6daaa3266db9cc488612690e42c23b0763e2b49a Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 31 Oct 2016 16:26:24 +0000 Subject: MIPS: Remove FIR from ISA I FP signal context Complement commit e50c0a8fa60d ("Support the MIPS32 / MIPS64 DSP ASE.") and remove the Floating Point Implementation Register (FIR) from the FP register set recorded in a signal context with MIPS I processors too, in line with the change applied to r4k_fpu.S. The `sc_fpc_eir' slot is unused according to our current ABI and the FIR register is read-only and always directly accessible from user software. [ralf@linux-mips.org: This is also required because the next commit depends on it.] Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14475/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/r2300_fpu.S | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index c4c8c1b65be9..ce249eae91ce 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -64,13 +64,9 @@ LEAF(_save_fp_context) EX(swc1 $f29,(SC_FPREGS+232)(a0)) EX(swc1 $f30,(SC_FPREGS+240)(a0)) EX(swc1 $f31,(SC_FPREGS+248)(a0)) - EX(sw t1,(SC_FPC_CSR)(a0)) - cfc1 t0,$0 # implementation/version jr ra + EX(sw t1,(SC_FPC_CSR)(a0)) .set pop - .set nomacro - EX(sw t0,(SC_FPC_EIR)(a0)) - .set macro END(_save_fp_context) /* -- cgit v1.2.3 From 758ef0a939d4c003381d2a97d9fb51b2d6d7e162 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 31 Oct 2016 16:27:01 +0000 Subject: MIPS: Fix ISA I/II FP signal context offsets Fix a regression introduced with commit 2db9ca0a3551 ("MIPS: Use struct mips_abi offsets to save FP context") for MIPS I/I FP signal contexts, by converting save/restore code to the updated internal API. Start FGR offsets from 0 rather than SC_FPREGS from $a0 and use $a1 rather than the offset of SC_FPC_CSR from $a0 for the Floating Point Control/Status Register (FCSR). Document the new internal API and adjust assembly code formatting for consistency. Signed-off-by: Maciej W. Rozycki Cc: Paul Burton Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14476/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/r2300_fpu.S | 159 ++++++++++++++++++++++--------------------- arch/mips/kernel/r6000_fpu.S | 89 +++++++++++++----------- 2 files changed, 131 insertions(+), 117 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index ce249eae91ce..70ca63752cfa 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -26,97 +26,104 @@ .set noreorder .set mips1 - /* Save floating point context */ + +/** + * _save_fp_context() - save FP context from the FPU + * @a0 - pointer to fpregs field of sigcontext + * @a1 - pointer to fpc_csr field of sigcontext + * + * Save FP context, including the 32 FP data registers and the FP + * control & status register, from the FPU to signal context. + */ LEAF(_save_fp_context) .set push SET_HARDFLOAT li v0, 0 # assume success - cfc1 t1,fcr31 - EX(swc1 $f0,(SC_FPREGS+0)(a0)) - EX(swc1 $f1,(SC_FPREGS+8)(a0)) - EX(swc1 $f2,(SC_FPREGS+16)(a0)) - EX(swc1 $f3,(SC_FPREGS+24)(a0)) - EX(swc1 $f4,(SC_FPREGS+32)(a0)) - EX(swc1 $f5,(SC_FPREGS+40)(a0)) - EX(swc1 $f6,(SC_FPREGS+48)(a0)) - EX(swc1 $f7,(SC_FPREGS+56)(a0)) - EX(swc1 $f8,(SC_FPREGS+64)(a0)) - EX(swc1 $f9,(SC_FPREGS+72)(a0)) - EX(swc1 $f10,(SC_FPREGS+80)(a0)) - EX(swc1 $f11,(SC_FPREGS+88)(a0)) - EX(swc1 $f12,(SC_FPREGS+96)(a0)) - EX(swc1 $f13,(SC_FPREGS+104)(a0)) - EX(swc1 $f14,(SC_FPREGS+112)(a0)) - EX(swc1 $f15,(SC_FPREGS+120)(a0)) - EX(swc1 $f16,(SC_FPREGS+128)(a0)) - EX(swc1 $f17,(SC_FPREGS+136)(a0)) - EX(swc1 $f18,(SC_FPREGS+144)(a0)) - EX(swc1 $f19,(SC_FPREGS+152)(a0)) - EX(swc1 $f20,(SC_FPREGS+160)(a0)) - EX(swc1 $f21,(SC_FPREGS+168)(a0)) - EX(swc1 $f22,(SC_FPREGS+176)(a0)) - EX(swc1 $f23,(SC_FPREGS+184)(a0)) - EX(swc1 $f24,(SC_FPREGS+192)(a0)) - EX(swc1 $f25,(SC_FPREGS+200)(a0)) - EX(swc1 $f26,(SC_FPREGS+208)(a0)) - EX(swc1 $f27,(SC_FPREGS+216)(a0)) - EX(swc1 $f28,(SC_FPREGS+224)(a0)) - EX(swc1 $f29,(SC_FPREGS+232)(a0)) - EX(swc1 $f30,(SC_FPREGS+240)(a0)) - EX(swc1 $f31,(SC_FPREGS+248)(a0)) + cfc1 t1, fcr31 + EX(swc1 $f0, 0(a0)) + EX(swc1 $f1, 8(a0)) + EX(swc1 $f2, 16(a0)) + EX(swc1 $f3, 24(a0)) + EX(swc1 $f4, 32(a0)) + EX(swc1 $f5, 40(a0)) + EX(swc1 $f6, 48(a0)) + EX(swc1 $f7, 56(a0)) + EX(swc1 $f8, 64(a0)) + EX(swc1 $f9, 72(a0)) + EX(swc1 $f10, 80(a0)) + EX(swc1 $f11, 88(a0)) + EX(swc1 $f12, 96(a0)) + EX(swc1 $f13, 104(a0)) + EX(swc1 $f14, 112(a0)) + EX(swc1 $f15, 120(a0)) + EX(swc1 $f16, 128(a0)) + EX(swc1 $f17, 136(a0)) + EX(swc1 $f18, 144(a0)) + EX(swc1 $f19, 152(a0)) + EX(swc1 $f20, 160(a0)) + EX(swc1 $f21, 168(a0)) + EX(swc1 $f22, 176(a0)) + EX(swc1 $f23, 184(a0)) + EX(swc1 $f24, 192(a0)) + EX(swc1 $f25, 200(a0)) + EX(swc1 $f26, 208(a0)) + EX(swc1 $f27, 216(a0)) + EX(swc1 $f28, 224(a0)) + EX(swc1 $f29, 232(a0)) + EX(swc1 $f30, 240(a0)) + EX(swc1 $f31, 248(a0)) jr ra - EX(sw t1,(SC_FPC_CSR)(a0)) + EX(sw t1, (a1)) .set pop END(_save_fp_context) -/* - * Restore FPU state: - * - fp gp registers - * - cp1 status/control register +/** + * _restore_fp_context() - restore FP context to the FPU + * @a0 - pointer to fpregs field of sigcontext + * @a1 - pointer to fpc_csr field of sigcontext * - * We base the decision which registers to restore from the signal stack - * frame on the current content of c0_status, not on the content of the - * stack frame which might have been changed by the user. + * Restore FP context, including the 32 FP data registers and the FP + * control & status register, from signal context to the FPU. */ LEAF(_restore_fp_context) .set push SET_HARDFLOAT li v0, 0 # assume success - EX(lw t0,(SC_FPC_CSR)(a0)) - EX(lwc1 $f0,(SC_FPREGS+0)(a0)) - EX(lwc1 $f1,(SC_FPREGS+8)(a0)) - EX(lwc1 $f2,(SC_FPREGS+16)(a0)) - EX(lwc1 $f3,(SC_FPREGS+24)(a0)) - EX(lwc1 $f4,(SC_FPREGS+32)(a0)) - EX(lwc1 $f5,(SC_FPREGS+40)(a0)) - EX(lwc1 $f6,(SC_FPREGS+48)(a0)) - EX(lwc1 $f7,(SC_FPREGS+56)(a0)) - EX(lwc1 $f8,(SC_FPREGS+64)(a0)) - EX(lwc1 $f9,(SC_FPREGS+72)(a0)) - EX(lwc1 $f10,(SC_FPREGS+80)(a0)) - EX(lwc1 $f11,(SC_FPREGS+88)(a0)) - EX(lwc1 $f12,(SC_FPREGS+96)(a0)) - EX(lwc1 $f13,(SC_FPREGS+104)(a0)) - EX(lwc1 $f14,(SC_FPREGS+112)(a0)) - EX(lwc1 $f15,(SC_FPREGS+120)(a0)) - EX(lwc1 $f16,(SC_FPREGS+128)(a0)) - EX(lwc1 $f17,(SC_FPREGS+136)(a0)) - EX(lwc1 $f18,(SC_FPREGS+144)(a0)) - EX(lwc1 $f19,(SC_FPREGS+152)(a0)) - EX(lwc1 $f20,(SC_FPREGS+160)(a0)) - EX(lwc1 $f21,(SC_FPREGS+168)(a0)) - EX(lwc1 $f22,(SC_FPREGS+176)(a0)) - EX(lwc1 $f23,(SC_FPREGS+184)(a0)) - EX(lwc1 $f24,(SC_FPREGS+192)(a0)) - EX(lwc1 $f25,(SC_FPREGS+200)(a0)) - EX(lwc1 $f26,(SC_FPREGS+208)(a0)) - EX(lwc1 $f27,(SC_FPREGS+216)(a0)) - EX(lwc1 $f28,(SC_FPREGS+224)(a0)) - EX(lwc1 $f29,(SC_FPREGS+232)(a0)) - EX(lwc1 $f30,(SC_FPREGS+240)(a0)) - EX(lwc1 $f31,(SC_FPREGS+248)(a0)) + EX(lw t0, (a1)) + EX(lwc1 $f0, 0(a0)) + EX(lwc1 $f1, 8(a0)) + EX(lwc1 $f2, 16(a0)) + EX(lwc1 $f3, 24(a0)) + EX(lwc1 $f4, 32(a0)) + EX(lwc1 $f5, 40(a0)) + EX(lwc1 $f6, 48(a0)) + EX(lwc1 $f7, 56(a0)) + EX(lwc1 $f8, 64(a0)) + EX(lwc1 $f9, 72(a0)) + EX(lwc1 $f10, 80(a0)) + EX(lwc1 $f11, 88(a0)) + EX(lwc1 $f12, 96(a0)) + EX(lwc1 $f13, 104(a0)) + EX(lwc1 $f14, 112(a0)) + EX(lwc1 $f15, 120(a0)) + EX(lwc1 $f16, 128(a0)) + EX(lwc1 $f17, 136(a0)) + EX(lwc1 $f18, 144(a0)) + EX(lwc1 $f19, 152(a0)) + EX(lwc1 $f20, 160(a0)) + EX(lwc1 $f21, 168(a0)) + EX(lwc1 $f22, 176(a0)) + EX(lwc1 $f23, 184(a0)) + EX(lwc1 $f24, 192(a0)) + EX(lwc1 $f25, 200(a0)) + EX(lwc1 $f26, 208(a0)) + EX(lwc1 $f27, 216(a0)) + EX(lwc1 $f28, 224(a0)) + EX(lwc1 $f29, 232(a0)) + EX(lwc1 $f30, 240(a0)) + EX(lwc1 $f31, 248(a0)) jr ra - ctc1 t0,fcr31 + ctc1 t0, fcr31 .set pop END(_restore_fp_context) .set reorder diff --git a/arch/mips/kernel/r6000_fpu.S b/arch/mips/kernel/r6000_fpu.S index 47077380c15c..9cc7bfab3419 100644 --- a/arch/mips/kernel/r6000_fpu.S +++ b/arch/mips/kernel/r6000_fpu.S @@ -21,7 +21,14 @@ .set push SET_HARDFLOAT - /* Save floating point context */ +/** + * _save_fp_context() - save FP context from the FPU + * @a0 - pointer to fpregs field of sigcontext + * @a1 - pointer to fpc_csr field of sigcontext + * + * Save FP context, including the 32 FP data registers and the FP + * control & status register, from the FPU to signal context. + */ LEAF(_save_fp_context) mfc0 t0,CP0_STATUS sll t0,t0,2 @@ -30,59 +37,59 @@ cfc1 t1,fcr31 /* Store the 16 double precision registers */ - sdc1 $f0,(SC_FPREGS+0)(a0) - sdc1 $f2,(SC_FPREGS+16)(a0) - sdc1 $f4,(SC_FPREGS+32)(a0) - sdc1 $f6,(SC_FPREGS+48)(a0) - sdc1 $f8,(SC_FPREGS+64)(a0) - sdc1 $f10,(SC_FPREGS+80)(a0) - sdc1 $f12,(SC_FPREGS+96)(a0) - sdc1 $f14,(SC_FPREGS+112)(a0) - sdc1 $f16,(SC_FPREGS+128)(a0) - sdc1 $f18,(SC_FPREGS+144)(a0) - sdc1 $f20,(SC_FPREGS+160)(a0) - sdc1 $f22,(SC_FPREGS+176)(a0) - sdc1 $f24,(SC_FPREGS+192)(a0) - sdc1 $f26,(SC_FPREGS+208)(a0) - sdc1 $f28,(SC_FPREGS+224)(a0) - sdc1 $f30,(SC_FPREGS+240)(a0) + sdc1 $f0,0(a0) + sdc1 $f2,16(a0) + sdc1 $f4,32(a0) + sdc1 $f6,48(a0) + sdc1 $f8,64(a0) + sdc1 $f10,80(a0) + sdc1 $f12,96(a0) + sdc1 $f14,112(a0) + sdc1 $f16,128(a0) + sdc1 $f18,144(a0) + sdc1 $f20,160(a0) + sdc1 $f22,176(a0) + sdc1 $f24,192(a0) + sdc1 $f26,208(a0) + sdc1 $f28,224(a0) + sdc1 $f30,240(a0) jr ra - sw t0,SC_FPC_CSR(a0) + sw t0,(a1) 1: jr ra nop END(_save_fp_context) -/* Restore FPU state: - * - fp gp registers - * - cp1 status/control register +/** + * _restore_fp_context() - restore FP context to the FPU + * @a0 - pointer to fpregs field of sigcontext + * @a1 - pointer to fpc_csr field of sigcontext * - * We base the decision which registers to restore from the signal stack - * frame on the current content of c0_status, not on the content of the - * stack frame which might have been changed by the user. + * Restore FP context, including the 32 FP data registers and the FP + * control & status register, from signal context to the FPU. */ LEAF(_restore_fp_context) mfc0 t0,CP0_STATUS sll t0,t0,2 bgez t0,1f - lw t0,SC_FPC_CSR(a0) + lw t0,(a1) /* Restore the 16 double precision registers */ - ldc1 $f0,(SC_FPREGS+0)(a0) - ldc1 $f2,(SC_FPREGS+16)(a0) - ldc1 $f4,(SC_FPREGS+32)(a0) - ldc1 $f6,(SC_FPREGS+48)(a0) - ldc1 $f8,(SC_FPREGS+64)(a0) - ldc1 $f10,(SC_FPREGS+80)(a0) - ldc1 $f12,(SC_FPREGS+96)(a0) - ldc1 $f14,(SC_FPREGS+112)(a0) - ldc1 $f16,(SC_FPREGS+128)(a0) - ldc1 $f18,(SC_FPREGS+144)(a0) - ldc1 $f20,(SC_FPREGS+160)(a0) - ldc1 $f22,(SC_FPREGS+176)(a0) - ldc1 $f24,(SC_FPREGS+192)(a0) - ldc1 $f26,(SC_FPREGS+208)(a0) - ldc1 $f28,(SC_FPREGS+224)(a0) - ldc1 $f30,(SC_FPREGS+240)(a0) + ldc1 $f0,0(a0) + ldc1 $f2,16(a0) + ldc1 $f4,32(a0) + ldc1 $f6,48(a0) + ldc1 $f8,64(a0) + ldc1 $f10,80(a0) + ldc1 $f12,96(a0) + ldc1 $f14,112(a0) + ldc1 $f16,128(a0) + ldc1 $f18,144(a0) + ldc1 $f20,160(a0) + ldc1 $f22,176(a0) + ldc1 $f24,192(a0) + ldc1 $f26,208(a0) + ldc1 $f28,224(a0) + ldc1 $f30,240(a0) jr ra ctc1 t0,fcr31 1: jr ra -- cgit v1.2.3 From f92722dc4545ebfa0f99a2f986fd88c112a22a42 Mon Sep 17 00:00:00 2001 From: "Maciej W. Rozycki" Date: Mon, 31 Oct 2016 16:27:40 +0000 Subject: MIPS: Correct MIPS I FP sigcontext layout Complement commit 80cbfad79096 ("MIPS: Correct MIPS I FP context layout") and correct the way Floating Point General registers are stored in a signal context with MIPS I hardware. Use the S.D and L.D assembly macros to have pairs of SWC1 instructions and pairs of LWC1 instructions produced, respectively, in an arrangement which makes the memory representation of floating-point data passed compatible with that used by hardware SDC1 and LDC1 instructions, where available, regardless of the hardware endianness used. This matches the layout used by r4k_fpu.S, ensuring run-time compatibility for MIPS I software across all o32 hardware platforms. Define an EX2 macro to handle exceptions from both hardware instructions implicitly produced from S.D and L.D assembly macros. Signed-off-by: Maciej W. Rozycki Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Patchwork: https://patchwork.linux-mips.org/patch/14477/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/r2300_fpu.S | 103 ++++++++++++++++--------------------------- 1 file changed, 39 insertions(+), 64 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/r2300_fpu.S b/arch/mips/kernel/r2300_fpu.S index 70ca63752cfa..918f2f6d3861 100644 --- a/arch/mips/kernel/r2300_fpu.S +++ b/arch/mips/kernel/r2300_fpu.S @@ -24,6 +24,13 @@ PTR 9b,fault; \ .previous +#define EX2(a,b) \ +9: a,##b; \ + .section __ex_table,"a"; \ + PTR 9b,bad_stack; \ + PTR 9b+4,bad_stack; \ + .previous + .set noreorder .set mips1 @@ -40,38 +47,22 @@ LEAF(_save_fp_context) SET_HARDFLOAT li v0, 0 # assume success cfc1 t1, fcr31 - EX(swc1 $f0, 0(a0)) - EX(swc1 $f1, 8(a0)) - EX(swc1 $f2, 16(a0)) - EX(swc1 $f3, 24(a0)) - EX(swc1 $f4, 32(a0)) - EX(swc1 $f5, 40(a0)) - EX(swc1 $f6, 48(a0)) - EX(swc1 $f7, 56(a0)) - EX(swc1 $f8, 64(a0)) - EX(swc1 $f9, 72(a0)) - EX(swc1 $f10, 80(a0)) - EX(swc1 $f11, 88(a0)) - EX(swc1 $f12, 96(a0)) - EX(swc1 $f13, 104(a0)) - EX(swc1 $f14, 112(a0)) - EX(swc1 $f15, 120(a0)) - EX(swc1 $f16, 128(a0)) - EX(swc1 $f17, 136(a0)) - EX(swc1 $f18, 144(a0)) - EX(swc1 $f19, 152(a0)) - EX(swc1 $f20, 160(a0)) - EX(swc1 $f21, 168(a0)) - EX(swc1 $f22, 176(a0)) - EX(swc1 $f23, 184(a0)) - EX(swc1 $f24, 192(a0)) - EX(swc1 $f25, 200(a0)) - EX(swc1 $f26, 208(a0)) - EX(swc1 $f27, 216(a0)) - EX(swc1 $f28, 224(a0)) - EX(swc1 $f29, 232(a0)) - EX(swc1 $f30, 240(a0)) - EX(swc1 $f31, 248(a0)) + EX2(s.d $f0, 0(a0)) + EX2(s.d $f2, 16(a0)) + EX2(s.d $f4, 32(a0)) + EX2(s.d $f6, 48(a0)) + EX2(s.d $f8, 64(a0)) + EX2(s.d $f10, 80(a0)) + EX2(s.d $f12, 96(a0)) + EX2(s.d $f14, 112(a0)) + EX2(s.d $f16, 128(a0)) + EX2(s.d $f18, 144(a0)) + EX2(s.d $f20, 160(a0)) + EX2(s.d $f22, 176(a0)) + EX2(s.d $f24, 192(a0)) + EX2(s.d $f26, 208(a0)) + EX2(s.d $f28, 224(a0)) + EX2(s.d $f30, 240(a0)) jr ra EX(sw t1, (a1)) .set pop @@ -90,38 +81,22 @@ LEAF(_restore_fp_context) SET_HARDFLOAT li v0, 0 # assume success EX(lw t0, (a1)) - EX(lwc1 $f0, 0(a0)) - EX(lwc1 $f1, 8(a0)) - EX(lwc1 $f2, 16(a0)) - EX(lwc1 $f3, 24(a0)) - EX(lwc1 $f4, 32(a0)) - EX(lwc1 $f5, 40(a0)) - EX(lwc1 $f6, 48(a0)) - EX(lwc1 $f7, 56(a0)) - EX(lwc1 $f8, 64(a0)) - EX(lwc1 $f9, 72(a0)) - EX(lwc1 $f10, 80(a0)) - EX(lwc1 $f11, 88(a0)) - EX(lwc1 $f12, 96(a0)) - EX(lwc1 $f13, 104(a0)) - EX(lwc1 $f14, 112(a0)) - EX(lwc1 $f15, 120(a0)) - EX(lwc1 $f16, 128(a0)) - EX(lwc1 $f17, 136(a0)) - EX(lwc1 $f18, 144(a0)) - EX(lwc1 $f19, 152(a0)) - EX(lwc1 $f20, 160(a0)) - EX(lwc1 $f21, 168(a0)) - EX(lwc1 $f22, 176(a0)) - EX(lwc1 $f23, 184(a0)) - EX(lwc1 $f24, 192(a0)) - EX(lwc1 $f25, 200(a0)) - EX(lwc1 $f26, 208(a0)) - EX(lwc1 $f27, 216(a0)) - EX(lwc1 $f28, 224(a0)) - EX(lwc1 $f29, 232(a0)) - EX(lwc1 $f30, 240(a0)) - EX(lwc1 $f31, 248(a0)) + EX2(l.d $f0, 0(a0)) + EX2(l.d $f2, 16(a0)) + EX2(l.d $f4, 32(a0)) + EX2(l.d $f6, 48(a0)) + EX2(l.d $f8, 64(a0)) + EX2(l.d $f10, 80(a0)) + EX2(l.d $f12, 96(a0)) + EX2(l.d $f14, 112(a0)) + EX2(l.d $f16, 128(a0)) + EX2(l.d $f18, 144(a0)) + EX2(l.d $f20, 160(a0)) + EX2(l.d $f22, 176(a0)) + EX2(l.d $f24, 192(a0)) + EX2(l.d $f26, 208(a0)) + EX2(l.d $f28, 224(a0)) + EX2(l.d $f30, 240(a0)) jr ra ctc1 t0, fcr31 .set pop -- cgit v1.2.3 From 16a767ec63167ef70c056795782d6c9c76ba5a5c Mon Sep 17 00:00:00 2001 From: James Hogan Date: Tue, 1 Nov 2016 13:59:09 +0000 Subject: MIPS: Fix max_low_pfn with disabled highmem When low memory doesn't reach HIGHMEM_START (e.g. up to 256MB at PA=0 is common) and highmem is present above HIGHMEM_START (e.g. on Malta the RAM overlayed by the IO region is aliased at PA=0x90000000), max_low_pfn will be initially calculated very large and then clipped down to HIGHMEM_START. This causes crashes when reading /sys/kernel/mm/page_idle/bitmap (i.e. CONFIG_IDLE_PAGE_TRACKING=y) when highmem is disabled. pfn_valid() will compare against max_mapnr which is derived from max_low_pfn when there is no highend_pfn set up, and will return true for PFNs right up to HIGHMEM_START, even though they are beyond the end of low memory and no page structs will actually exist for these PFNs. This is fixed by skipping high memory regions when initially calculating max_low_pfn if highmem is disabled, so it doesn't get clipped too high. We also clip regions which overlap the highmem boundary when highmem is disabled, so that max_pfn doesn't extend into highmem either. Signed-off-by: James Hogan Cc: Paul Burton Cc: linux-mips@linux-mips.org Patchwork: https://patchwork.linux-mips.org/patch/14490/ Signed-off-by: Ralf Baechle --- arch/mips/kernel/setup.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch') diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index 0d57909d9026..f66e5ce505b2 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -368,6 +368,19 @@ static void __init bootmem_init(void) end = PFN_DOWN(boot_mem_map.map[i].addr + boot_mem_map.map[i].size); +#ifndef CONFIG_HIGHMEM + /* + * Skip highmem here so we get an accurate max_low_pfn if low + * memory stops short of high memory. + * If the region overlaps HIGHMEM_START, end is clipped so + * max_pfn excludes the highmem portion. + */ + if (start >= PFN_DOWN(HIGHMEM_START)) + continue; + if (end > PFN_DOWN(HIGHMEM_START)) + end = PFN_DOWN(HIGHMEM_START); +#endif + if (end > max_low_pfn) max_low_pfn = end; if (start < min_low_pfn) -- cgit v1.2.3 From 272d01bd790fdf3f1b16372fe28136e27756756f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 3 Nov 2016 18:34:34 +0000 Subject: arm64: Fix circular include of asm/lse.h through linux/jump_label.h Commit efd9e03facd0 ("arm64: Use static keys for CPU features") introduced support for static keys in asm/cpufeature.h, including linux/jump_label.h. When CC_HAVE_ASM_GOTO is not defined, this causes a circular dependency via linux/atomic.h, asm/lse.h and asm/cpufeature.h. This patch moves the capability macros out out of asm/cpufeature.h into a separate asm/cpucaps.h and modifies some of the #includes accordingly. Fixes: efd9e03facd0 ("arm64: Use static keys for CPU features") Reported-by: Artem Savkov Tested-by: Artem Savkov Signed-off-by: Catalin Marinas Signed-off-by: Will Deacon --- arch/arm64/include/asm/alternative.h | 2 +- arch/arm64/include/asm/cpucaps.h | 40 ++++++++++++++++++++++++++++++++++++ arch/arm64/include/asm/cpufeature.h | 20 +----------------- arch/arm64/include/asm/lse.h | 1 - 4 files changed, 42 insertions(+), 21 deletions(-) create mode 100644 arch/arm64/include/asm/cpucaps.h (limited to 'arch') diff --git a/arch/arm64/include/asm/alternative.h b/arch/arm64/include/asm/alternative.h index 39feb85a6931..6e1cb8c5af4d 100644 --- a/arch/arm64/include/asm/alternative.h +++ b/arch/arm64/include/asm/alternative.h @@ -1,7 +1,7 @@ #ifndef __ASM_ALTERNATIVE_H #define __ASM_ALTERNATIVE_H -#include +#include #include #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h new file mode 100644 index 000000000000..87b446535185 --- /dev/null +++ b/arch/arm64/include/asm/cpucaps.h @@ -0,0 +1,40 @@ +/* + * arch/arm64/include/asm/cpucaps.h + * + * Copyright (C) 2016 ARM Ltd. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_CPUCAPS_H +#define __ASM_CPUCAPS_H + +#define ARM64_WORKAROUND_CLEAN_CACHE 0 +#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 +#define ARM64_WORKAROUND_845719 2 +#define ARM64_HAS_SYSREG_GIC_CPUIF 3 +#define ARM64_HAS_PAN 4 +#define ARM64_HAS_LSE_ATOMICS 5 +#define ARM64_WORKAROUND_CAVIUM_23154 6 +#define ARM64_WORKAROUND_834220 7 +#define ARM64_HAS_NO_HW_PREFETCH 8 +#define ARM64_HAS_UAO 9 +#define ARM64_ALT_PAN_NOT_UAO 10 +#define ARM64_HAS_VIRT_HOST_EXTN 11 +#define ARM64_WORKAROUND_CAVIUM_27456 12 +#define ARM64_HAS_32BIT_EL0 13 +#define ARM64_HYP_OFFSET_LOW 14 +#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 + +#define ARM64_NCAPS 16 + +#endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index a27c3245ba21..0bc0b1de90c4 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -11,6 +11,7 @@ #include +#include #include #include @@ -24,25 +25,6 @@ #define MAX_CPU_FEATURES (8 * sizeof(elf_hwcap)) #define cpu_feature(x) ilog2(HWCAP_ ## x) -#define ARM64_WORKAROUND_CLEAN_CACHE 0 -#define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1 -#define ARM64_WORKAROUND_845719 2 -#define ARM64_HAS_SYSREG_GIC_CPUIF 3 -#define ARM64_HAS_PAN 4 -#define ARM64_HAS_LSE_ATOMICS 5 -#define ARM64_WORKAROUND_CAVIUM_23154 6 -#define ARM64_WORKAROUND_834220 7 -#define ARM64_HAS_NO_HW_PREFETCH 8 -#define ARM64_HAS_UAO 9 -#define ARM64_ALT_PAN_NOT_UAO 10 -#define ARM64_HAS_VIRT_HOST_EXTN 11 -#define ARM64_WORKAROUND_CAVIUM_27456 12 -#define ARM64_HAS_32BIT_EL0 13 -#define ARM64_HYP_OFFSET_LOW 14 -#define ARM64_MISMATCHED_CACHE_LINE_SIZE 15 - -#define ARM64_NCAPS 16 - #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 23acc00be32d..fc756e22c84c 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -5,7 +5,6 @@ #include #include -#include #ifdef __ASSEMBLER__ -- cgit v1.2.3 From 2c7a5c5c48d97ce3105f3258a259f67b7b9d7eb1 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Sat, 24 Sep 2016 07:15:02 -0700 Subject: openrisc: Define __ro_after_init to avoid crash openrisc qemu tests fail with the following crash. Unable to handle kernel access at virtual address 0xc0300c34 Oops#: 0001 CPU #: 0 PC: c016c710 SR: 0000ae67 SP: c1017e04 GPR00: 00000000 GPR01: c1017e04 GPR02: c0300c34 GPR03: c0300c34 GPR04: 00000000 GPR05: c0300cb0 GPR06: c0300c34 GPR07: 000000ff GPR08: c107f074 GPR09: c0199ef4 GPR10: c1016000 GPR11: 00000000 GPR12: 00000000 GPR13: c107f044 GPR14: c0473774 GPR15: 07ce0000 GPR16: 00000000 GPR17: c107ed8a GPR18: 00009600 GPR19: c107f044 GPR20: c107ee74 GPR21: 00000003 GPR22: c0473770 GPR23: 00000033 GPR24: 000000bf GPR25: 00000019 GPR26: c046400c GPR27: 00000001 GPR28: c0464028 GPR29: c1018000 GPR30: 00000006 GPR31: ccf37483 RES: 00000000 oGPR11: ffffffff Process swapper (pid: 1, stackpage=c1001960) Stack: Stack dump [0xc1017cf8]: sp + 00: 0xc1017e04 sp + 04: 0xc0300c34 sp + 08: 0xc0300c34 sp + 12: 0x00000000 ... Bisect points to commit d2ec3f77de8e ("pty: make ptmx file ops read-only after init"). Fix by defining __ro_after_init for the openrisc architecture, similar to parisc. Fixes: d2ec3f77de8e ("pty: make ptmx file ops read-only after init") Cc: Kees Cook Signed-off-by: Guenter Roeck Acked-by: Stafford Horne --- arch/openrisc/include/asm/cache.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/openrisc/include/asm/cache.h b/arch/openrisc/include/asm/cache.h index 4ce7a01a252d..5f55da9cbfd5 100644 --- a/arch/openrisc/include/asm/cache.h +++ b/arch/openrisc/include/asm/cache.h @@ -23,6 +23,8 @@ * they shouldn't be hard-coded! */ +#define __ro_after_init __read_mostly + #define L1_CACHE_BYTES 16 #define L1_CACHE_SHIFT 4 -- cgit v1.2.3