From 31255e072b2e91f97645d792d25b2db744186dd1 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Tue, 7 Nov 2023 10:22:51 -0800 Subject: x86/shstk: Delay signal entry SSP write until after user accesses MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a signal is being delivered, the kernel needs to make accesses to userspace. These accesses could encounter an access error, in which case the signal delivery itself will trigger a segfault. Usually this would result in the kernel killing the process. But in the case of a SEGV signal handler being configured, the failure of the first signal delivery will result in *another* signal getting delivered. The second signal may succeed if another thread has resolved the issue that triggered the segfault (i.e. a well timed mprotect()/mmap()), or the second signal is being delivered to another stack (i.e. an alt stack). On x86, in the non-shadow stack case, all the accesses to userspace are done before changes to the registers (in pt_regs). The operation is aborted when an access error occurs, so although there may be writes done for the first signal, control flow changes for the signal (regs->ip, regs->sp, etc) are not committed until all the accesses have already completed successfully. This means that the second signal will be delivered as if it happened at the time of the first signal. It will effectively replace the first aborted signal, overwriting the half-written frame of the aborted signal. So on sigreturn from the second signal, control flow will resume happily from the point of control flow where the original signal was delivered. The problem is, when shadow stack is active, the shadow stack SSP register/MSR is updated *before* some of the userspace accesses. This means if the earlier accesses succeed and the later ones fail, the second signal will not be delivered at the same spot on the shadow stack as the first one. So on sigreturn from the second signal, the SSP will be pointing to the wrong location on the shadow stack (off by a frame). Pengfei privately reported that while using a shadow stack enabled glibc, the “signal06” test in the LTP test-suite hung. It turns out it is testing the above described double signal scenario. When this test was compiled with shadow stack, the first signal pushed a shadow stack sigframe, then the second pushed another. When the second signal was handled, the SSP was at the first shadow stack signal frame instead of the original location. The test then got stuck as the #CP from the twice incremented SSP was incorrect and generated segfaults in a loop. Fix this by adjusting the SSP register only after any userspace accesses, such that there can be no failures after the SSP is adjusted. Do this by moving the shadow stack sigframe push logic to happen after all other userspace accesses. Note, sigreturn (as opposed to the signal delivery dealt with in this patch) has ordering behavior that could lead to similar failures. The ordering issues there extend beyond shadow stack to include the alt stack restoration. Fixing that would require cross-arch changes, and the ordering today does not cause any known test or apps breakages. So leave it as is, for now. [ dhansen: minor changelog/subject tweak ] Fixes: 05e36022c054 ("x86/shstk: Handle signals for shadow stack") Reported-by: Pengfei Xu Signed-off-by: Rick Edgecombe Signed-off-by: Dave Hansen Tested-by: Pengfei Xu Cc:stable@vger.kernel.org Link: https://lore.kernel.org/all/20231107182251.91276-1-rick.p.edgecombe%40intel.com Link: https://github.com/linux-test-project/ltp/blob/master/testcases/kernel/syscalls/signal/signal06.c --- arch/x86/kernel/signal_64.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c index cacf2ede6217..23d8aaf8d9fd 100644 --- a/arch/x86/kernel/signal_64.c +++ b/arch/x86/kernel/signal_64.c @@ -175,9 +175,6 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) frame = get_sigframe(ksig, regs, sizeof(struct rt_sigframe), &fp); uc_flags = frame_uc_flags(regs); - if (setup_signal_shadow_stack(ksig)) - return -EFAULT; - if (!user_access_begin(frame, sizeof(*frame))) return -EFAULT; @@ -198,6 +195,9 @@ int x64_setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs) return -EFAULT; } + if (setup_signal_shadow_stack(ksig)) + return -EFAULT; + /* Set up registers for signal handler */ regs->di = ksig->sig; /* In case the signal handler was declared without prototypes */ -- cgit v1.2.3 From ec9aedb2aa1ab7ac420c00b31f5edc5be15ec167 Mon Sep 17 00:00:00 2001 From: Zhang Rui Date: Mon, 3 Jul 2023 00:28:02 +0800 Subject: x86/acpi: Ignore invalid x2APIC entries Currently, the kernel enumerates the possible CPUs by parsing both ACPI MADT Local APIC entries and x2APIC entries. So CPUs with "valid" APIC IDs, even if they have duplicated APIC IDs in Local APIC and x2APIC, are always enumerated. Below is what ACPI MADT Local APIC and x2APIC describes on an Ivebridge-EP system, [02Ch 0044 1] Subtable Type : 00 [Processor Local APIC] [02Fh 0047 1] Local Apic ID : 00 ... [164h 0356 1] Subtable Type : 00 [Processor Local APIC] [167h 0359 1] Local Apic ID : 39 [16Ch 0364 1] Subtable Type : 00 [Processor Local APIC] [16Fh 0367 1] Local Apic ID : FF ... [3ECh 1004 1] Subtable Type : 09 [Processor Local x2APIC] [3F0h 1008 4] Processor x2Apic ID : 00000000 ... [B5Ch 2908 1] Subtable Type : 09 [Processor Local x2APIC] [B60h 2912 4] Processor x2Apic ID : 00000077 As a result, kernel shows "smpboot: Allowing 168 CPUs, 120 hotplug CPUs". And this wastes significant amount of memory for the per-cpu data. Plus this also breaks https://lore.kernel.org/all/87edm36qqb.ffs@tglx/, because __max_logical_packages is over-estimated by the APIC IDs in the x2APIC entries. According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure: "[Compatibility note] On some legacy OSes, Logical processors with APIC ID values less than 255 (whether in XAPIC or X2APIC mode) must use the Processor Local APIC structure to convey their APIC information to OSPM, and those processors must be declared in the DSDT using the Processor() keyword. Logical processors with APIC ID values 255 and greater must use the Processor Local x2APIC structure and be declared using the Device() keyword." Therefore prevent the registration of x2APIC entries with an APIC ID less than 255 if the local APIC table enumerates valid APIC IDs. [ tglx: Simplify the logic ] Signed-off-by: Zhang Rui Signed-off-by: Thomas Gleixner Tested-by: Peter Zijlstra Link: https://lore.kernel.org/r/20230702162802.344176-1-rui.zhang@intel.com --- arch/x86/kernel/acpi/boot.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index c55c0ef47a18..fc5bce1b5047 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -63,6 +63,7 @@ int acpi_fix_pin2_polarity __initdata; #ifdef CONFIG_X86_LOCAL_APIC static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE; +static bool has_lapic_cpus __initdata; static bool acpi_support_online_capable; #endif @@ -232,6 +233,14 @@ acpi_parse_x2apic(union acpi_subtable_headers *header, const unsigned long end) if (!acpi_is_processor_usable(processor->lapic_flags)) return 0; + /* + * According to https://uefi.org/specs/ACPI/6.5/05_ACPI_Software_Programming_Model.html#processor-local-x2apic-structure + * when MADT provides both valid LAPIC and x2APIC entries, the APIC ID + * in x2APIC must be equal or greater than 0xff. + */ + if (has_lapic_cpus && apic_id < 0xff) + return 0; + /* * We need to register disabled CPU as well to permit * counting disabled CPUs. This allows us to size @@ -1114,10 +1123,7 @@ static int __init early_acpi_parse_madt_lapic_addr_ovr(void) static int __init acpi_parse_madt_lapic_entries(void) { - int count; - int x2count = 0; - int ret; - struct acpi_subtable_proc madt_proc[2]; + int count, x2count = 0; if (!boot_cpu_has(X86_FEATURE_APIC)) return -ENODEV; @@ -1126,21 +1132,11 @@ static int __init acpi_parse_madt_lapic_entries(void) acpi_parse_sapic, MAX_LOCAL_APIC); if (!count) { - memset(madt_proc, 0, sizeof(madt_proc)); - madt_proc[0].id = ACPI_MADT_TYPE_LOCAL_APIC; - madt_proc[0].handler = acpi_parse_lapic; - madt_proc[1].id = ACPI_MADT_TYPE_LOCAL_X2APIC; - madt_proc[1].handler = acpi_parse_x2apic; - ret = acpi_table_parse_entries_array(ACPI_SIG_MADT, - sizeof(struct acpi_table_madt), - madt_proc, ARRAY_SIZE(madt_proc), MAX_LOCAL_APIC); - if (ret < 0) { - pr_err("Error parsing LAPIC/X2APIC entries\n"); - return ret; - } - - count = madt_proc[0].count; - x2count = madt_proc[1].count; + count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC, + acpi_parse_lapic, MAX_LOCAL_APIC); + has_lapic_cpus = count > 0; + x2count = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_X2APIC, + acpi_parse_x2apic, MAX_LOCAL_APIC); } if (!count && !x2count) { pr_err("No LAPIC entries present\n"); -- cgit v1.2.3 From 7e8037b099c0bbe8f2109dc452dbcab8d400fc53 Mon Sep 17 00:00:00 2001 From: Saurabh Sengar Date: Sat, 11 Nov 2023 00:37:47 -0800 Subject: x86/hyperv: Fix the detection of E820_TYPE_PRAM in a Gen2 VM A Gen2 VM doesn't support legacy PCI/PCIe, so both raw_pci_ops and raw_pci_ext_ops are NULL, and pci_subsys_init() -> pcibios_init() doesn't call pcibios_resource_survey() -> e820__reserve_resources_late(); as a result, any emulated persistent memory of E820_TYPE_PRAM (12) via the kernel parameter memmap=nn[KMG]!ss is not added into iomem_resource and hence can't be detected by register_e820_pmem(). Fix this by directly calling e820__reserve_resources_late() in hv_pci_init(), which is called from arch_initcall(pci_arch_init). It's ok to move a Gen2 VM's e820__reserve_resources_late() from subsys_initcall(pci_subsys_init) to arch_initcall(pci_arch_init) because the code in-between doesn't depend on the E820 resources. e820__reserve_resources_late() depends on e820__reserve_resources(), which has been called earlier from setup_arch(). For a Gen-2 VM, the new hv_pci_init() also adds any memory of E820_TYPE_PMEM (7) into iomem_resource, and acpi_nfit_register_region() -> acpi_nfit_insert_resource() -> region_intersects() returns REGION_INTERSECTS, so the memory of E820_TYPE_PMEM won't get added twice. Changed the local variable "int gen2vm" to "bool gen2vm". Signed-off-by: Saurabh Sengar Signed-off-by: Dexuan Cui Signed-off-by: Wei Liu Message-ID: <1699691867-9827-1-git-send-email-ssengar@linux.microsoft.com> --- arch/x86/hyperv/hv_init.c | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 21556ad87f4b..8f3a4d16bb79 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -286,15 +287,31 @@ static int hv_cpu_die(unsigned int cpu) static int __init hv_pci_init(void) { - int gen2vm = efi_enabled(EFI_BOOT); + bool gen2vm = efi_enabled(EFI_BOOT); /* - * For Generation-2 VM, we exit from pci_arch_init() by returning 0. - * The purpose is to suppress the harmless warning: + * A Generation-2 VM doesn't support legacy PCI/PCIe, so both + * raw_pci_ops and raw_pci_ext_ops are NULL, and pci_subsys_init() -> + * pcibios_init() doesn't call pcibios_resource_survey() -> + * e820__reserve_resources_late(); as a result, any emulated persistent + * memory of E820_TYPE_PRAM (12) via the kernel parameter + * memmap=nn[KMG]!ss is not added into iomem_resource and hence can't be + * detected by register_e820_pmem(). Fix this by directly calling + * e820__reserve_resources_late() here: e820__reserve_resources_late() + * depends on e820__reserve_resources(), which has been called earlier + * from setup_arch(). Note: e820__reserve_resources_late() also adds + * any memory of E820_TYPE_PMEM (7) into iomem_resource, and + * acpi_nfit_register_region() -> acpi_nfit_insert_resource() -> + * region_intersects() returns REGION_INTERSECTS, so the memory of + * E820_TYPE_PMEM won't get added twice. + * + * We return 0 here so that pci_arch_init() won't print the warning: * "PCI: Fatal: No config space access function found" */ - if (gen2vm) + if (gen2vm) { + e820__reserve_resources_late(); return 0; + } /* For Generation-1 VM, we'll proceed in pci_arch_init(). */ return 1; -- cgit v1.2.3 From bfa993b355d33a438a746523e7129391c8664e8a Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Wed, 8 Nov 2023 16:25:15 -0500 Subject: acpi/processor: sanitize _OSC/_PDC capabilities for Xen dom0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Processor capability bits notify ACPI of the OS capabilities, and so ACPI can adjust the return of other Processor methods taking the OS capabilities into account. When Linux is running as a Xen dom0, the hypervisor is the entity in charge of processor power management, and hence Xen needs to make sure the capabilities reported by _OSC/_PDC match the capabilities of the driver in Xen. Introduce a small helper to sanitize the buffer when running as Xen dom0. When Xen supports HWP, this serves as the equivalent of commit a21211672c9a ("ACPI / processor: Request native thermal interrupt handling via _OSC") to avoid SMM crashes. Xen will set bit ACPI_PROC_CAP_COLLAB_PROC_PERF (bit 12) in the capability bits and the _OSC/_PDC call will apply it. [ jandryuk: Mention Xen HWP's need. Support _OSC & _PDC ] Signed-off-by: Roger Pau Monné Cc: stable@vger.kernel.org Signed-off-by: Jason Andryuk Reviewed-by: Michal Wilczynski Reviewed-by: Juergen Gross Link: https://lore.kernel.org/r/20231108212517.72279-1-jandryuk@gmail.com Signed-off-by: Juergen Gross --- arch/x86/include/asm/acpi.h | 14 ++++++++++++++ arch/x86/include/asm/xen/hypervisor.h | 9 +++++++++ drivers/xen/pcpu.c | 22 ++++++++++++++++++++++ 3 files changed, 45 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h index c8a7fc23f63c..f896eed4516c 100644 --- a/arch/x86/include/asm/acpi.h +++ b/arch/x86/include/asm/acpi.h @@ -16,6 +16,9 @@ #include #include #include +#include + +#include #ifdef CONFIG_ACPI_APEI # include @@ -127,6 +130,17 @@ static inline void arch_acpi_set_proc_cap_bits(u32 *cap) if (!cpu_has(c, X86_FEATURE_MWAIT) || boot_option_idle_override == IDLE_NOMWAIT) *cap &= ~(ACPI_PROC_CAP_C_C1_FFH | ACPI_PROC_CAP_C_C2C3_FFH); + + if (xen_initial_domain()) { + /* + * When Linux is running as Xen dom0, the hypervisor is the + * entity in charge of the processor power management, and so + * Xen needs to check the OS capabilities reported in the + * processor capabilities buffer matches what the hypervisor + * driver supports. + */ + xen_sanitize_proc_cap_bits(cap); + } } static inline bool acpi_has_cpu_in_madt(void) diff --git a/arch/x86/include/asm/xen/hypervisor.h b/arch/x86/include/asm/xen/hypervisor.h index 7048dfacc04b..a9088250770f 100644 --- a/arch/x86/include/asm/xen/hypervisor.h +++ b/arch/x86/include/asm/xen/hypervisor.h @@ -100,4 +100,13 @@ static inline void leave_lazy(enum xen_lazy_mode mode) enum xen_lazy_mode xen_get_lazy_mode(void); +#if defined(CONFIG_XEN_DOM0) && defined(CONFIG_ACPI) +void xen_sanitize_proc_cap_bits(uint32_t *buf); +#else +static inline void xen_sanitize_proc_cap_bits(uint32_t *buf) +{ + BUG(); +} +#endif + #endif /* _ASM_X86_XEN_HYPERVISOR_H */ diff --git a/drivers/xen/pcpu.c b/drivers/xen/pcpu.c index b3e3d1bb37f3..508655273145 100644 --- a/drivers/xen/pcpu.c +++ b/drivers/xen/pcpu.c @@ -47,6 +47,9 @@ #include #include +#ifdef CONFIG_ACPI +#include +#endif /* * @cpu_id: Xen physical cpu logic number @@ -400,4 +403,23 @@ bool __init xen_processor_present(uint32_t acpi_id) return online; } + +void xen_sanitize_proc_cap_bits(uint32_t *cap) +{ + struct xen_platform_op op = { + .cmd = XENPF_set_processor_pminfo, + .u.set_pminfo.id = -1, + .u.set_pminfo.type = XEN_PM_PDC, + }; + u32 buf[3] = { ACPI_PDC_REVISION_ID, 1, *cap }; + int ret; + + set_xen_guest_handle(op.u.set_pminfo.pdc, buf); + ret = HYPERVISOR_platform_op(&op); + if (ret) + pr_err("sanitize of _PDC buffer bits from Xen failed: %d\n", + ret); + else + *cap = buf[2]; +} #endif -- cgit v1.2.3 From 80aea01c48971a1fffc0252d036995572d84950d Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 2 Nov 2023 16:35:49 +0100 Subject: KVM: s390: vsie: fix wrong VIR 37 when MSO is used When the host invalidates a guest page, it will also check if the page was used to map the prefix of any guest CPUs, in which case they are stopped and marked as needing a prefix refresh. Upon starting the affected CPUs again, their prefix pages are explicitly faulted in and revalidated if they had been invalidated. A bit in the PGSTEs indicates whether or not a page might contain a prefix. The bit is allowed to overindicate. Pages above 2G are skipped, because they cannot be prefixes, since KVM runs all guests with MSO = 0. The same applies for nested guests (VSIE). When the host invalidates a guest page that maps the prefix of the nested guest, it has to stop the affected nested guest CPUs and mark them as needing a prefix refresh. The same PGSTE bit used for the guest prefix is also used for the nested guest. Pages above 2G are skipped like for normal guests, which is the source of the bug. The nested guest runs is the guest primary address space. The guest could be running the nested guest using MSO != 0. If the MSO + prefix for the nested guest is above 2G, the check for nested prefix will skip it. This will cause the invalidation notifier to not stop the CPUs of the nested guest and not mark them as needing refresh. When the nested guest is run again, its prefix will not be refreshed, since it has not been marked for refresh. This will cause a fatal validity intercept with VIR code 37. Fix this by removing the check for 2G for nested guests. Now all invalidations of pages with the notify bit set will always scan the existing VSIE shadow state descriptors. This allows to catch invalidations of nested guest prefix mappings even when the prefix is above 2G in the guest virtual address space. Fixes: a3508fbe9dc6 ("KVM: s390: vsie: initial support for nested virtualization") Tested-by: Nico Boehr Reviewed-by: Nico Boehr Reviewed-by: David Hildenbrand Message-ID: <20231102153549.53984-1-imbrenda@linux.ibm.com> Signed-off-by: Claudio Imbrenda --- arch/s390/kvm/vsie.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 02dcbe82a8e5..8207a892bbe2 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -587,10 +587,6 @@ void kvm_s390_vsie_gmap_notifier(struct gmap *gmap, unsigned long start, if (!gmap_is_shadow(gmap)) return; - if (start >= 1UL << 31) - /* We are only interested in prefix pages */ - return; - /* * Only new shadow blocks are added to the list during runtime, * therefore we can safely reference them all the time. -- cgit v1.2.3 From 27072b8e18a73ffeffb1c140939023915a35134b Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Thu, 9 Nov 2023 13:36:24 +0100 Subject: KVM: s390/mm: Properly reset no-dat When the CMMA state needs to be reset, the no-dat bit also needs to be reset. Failure to do so could cause issues in the guest, since the guest expects the bit to be cleared after a reset. Cc: Reviewed-by: Nico Boehr Message-ID: <20231109123624.37314-1-imbrenda@linux.ibm.com> Signed-off-by: Claudio Imbrenda --- arch/s390/mm/pgtable.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 3bd2ab2a9a34..5cb92941540b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -756,7 +756,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, pte_clear(mm, addr, ptep); } if (reset) - pgste_val(pgste) &= ~_PGSTE_GPS_USAGE_MASK; + pgste_val(pgste) &= ~(_PGSTE_GPS_USAGE_MASK | _PGSTE_GPS_NODAT); pgste_set_unlock(ptep, pgste); preempt_enable(); } -- cgit v1.2.3 From 5f74f820f6fc844b95f9e5e406e0a07d97510420 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 13 Nov 2023 11:12:57 +0100 Subject: parisc: fix mmap_base calculation when stack grows upwards Matoro reported various userspace crashes on the parisc platform with kernel 6.6 and bisected it to commit 3033cd430768 ("parisc: Use generic mmap top-down layout and brk randomization"). That commit switched parisc to use the common infrastructure to calculate mmap_base, but missed that the mmap_base() function takes care for architectures where the stack grows downwards only. Fix the mmap_base() calculation to include the stack-grows-upwards case and thus fix the userspace crashes on parisc. Link: https://lkml.kernel.org/r/ZVH2qeS1bG7/1J/l@p100 Fixes: 3033cd430768 ("parisc: Use generic mmap top-down layout and brk randomization") Signed-off-by: Helge Deller Reported-by: matoro Tested-by: matoro Cc: [6.6+] Signed-off-by: Andrew Morton --- arch/parisc/Kconfig | 6 +++--- arch/parisc/include/asm/elf.h | 10 +--------- arch/parisc/include/asm/processor.h | 2 ++ arch/parisc/kernel/sys_parisc.c | 2 +- mm/util.c | 10 ++++++++++ 5 files changed, 17 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index fd69dfa0cdab..a7c9c0e69e5a 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -140,11 +140,11 @@ config ARCH_MMAP_RND_COMPAT_BITS_MIN default 8 config ARCH_MMAP_RND_BITS_MAX - default 24 if 64BIT - default 17 + default 18 if 64BIT + default 13 config ARCH_MMAP_RND_COMPAT_BITS_MAX - default 17 + default 13 # unless you want to implement ACPI on PA-RISC ... ;-) config PM diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index 140eaa97bf21..2d73d3c3cd37 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -349,15 +349,7 @@ struct pt_regs; /* forward declaration... */ #define ELF_HWCAP 0 -/* Masks for stack and mmap randomization */ -#define BRK_RND_MASK (is_32bit_task() ? 0x07ffUL : 0x3ffffUL) -#define MMAP_RND_MASK (is_32bit_task() ? 0x1fffUL : 0x3ffffUL) -#define STACK_RND_MASK MMAP_RND_MASK - -struct mm_struct; -extern unsigned long arch_randomize_brk(struct mm_struct *); -#define arch_randomize_brk arch_randomize_brk - +#define STACK_RND_MASK 0x7ff /* 8MB of VA */ #define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 struct linux_binprm; diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index c05d121cf5d0..982aca20f56f 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -47,6 +47,8 @@ #ifndef __ASSEMBLY__ +struct rlimit; +unsigned long mmap_upper_limit(struct rlimit *rlim_stack); unsigned long calc_max_stack_size(unsigned long stack_max); /* diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index ab896eff7a1d..98af719d5f85 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -77,7 +77,7 @@ unsigned long calc_max_stack_size(unsigned long stack_max) * indicating that "current" should be used instead of a passed-in * value from the exec bprm as done with arch_pick_mmap_layout(). */ -static unsigned long mmap_upper_limit(struct rlimit *rlim_stack) +unsigned long mmap_upper_limit(struct rlimit *rlim_stack) { unsigned long stack_base; diff --git a/mm/util.c b/mm/util.c index aa01f6ea5a75..744b4d7e3fae 100644 --- a/mm/util.c +++ b/mm/util.c @@ -414,6 +414,15 @@ static int mmap_is_legacy(struct rlimit *rlim_stack) static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) { +#ifdef CONFIG_STACK_GROWSUP + /* + * For an upwards growing stack the calculation is much simpler. + * Memory for the maximum stack size is reserved at the top of the + * task. mmap_base starts directly below the stack and grows + * downwards. + */ + return PAGE_ALIGN_DOWN(mmap_upper_limit(rlim_stack) - rnd); +#else unsigned long gap = rlim_stack->rlim_cur; unsigned long pad = stack_guard_gap; @@ -431,6 +440,7 @@ static unsigned long mmap_base(unsigned long rnd, struct rlimit *rlim_stack) gap = MAX_GAP; return PAGE_ALIGN(STACK_TOP - gap - rnd); +#endif } void arch_pick_mmap_layout(struct mm_struct *mm, struct rlimit *rlim_stack) -- cgit v1.2.3 From 0b6240d697a96eaa45a2a5503a274ebb4f162fa3 Mon Sep 17 00:00:00 2001 From: Jonas Karlman Date: Sun, 5 Nov 2023 23:36:15 +0000 Subject: arm64: dts: rockchip: Expand reg size of vdec node for RK3328 Expand the reg size for the vdec node to include cache/performance registers the rkvdec driver writes to. Fixes: 17408c9b119d ("arm64: dts: rockchip: Add vdec support for RK3328") Signed-off-by: Jonas Karlman Link: https://lore.kernel.org/r/20231105233630.3927502-9-jonas@kwiboo.se Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3328.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index e729e7a22b23..cc8209795c3e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -668,7 +668,7 @@ vdec: video-codec@ff360000 { compatible = "rockchip,rk3328-vdec", "rockchip,rk3399-vdec"; - reg = <0x0 0xff360000 0x0 0x400>; + reg = <0x0 0xff360000 0x0 0x480>; interrupts = ; clocks = <&cru ACLK_RKVDEC>, <&cru HCLK_RKVDEC>, <&cru SCLK_VDEC_CABAC>, <&cru SCLK_VDEC_CORE>; -- cgit v1.2.3 From 35938c18291b5da7422b2fac6dac0af11aa8d0d7 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Sun, 5 Nov 2023 23:36:16 +0000 Subject: arm64: dts: rockchip: Expand reg size of vdec node for RK3399 Expand the reg size for the vdec node to include cache/performance registers the rkvdec driver writes to. Also add missing clocks to the related power-domain. Fixes: cbd7214402ec ("arm64: dts: rockchip: Define the rockchip Video Decoder node on rk3399") Signed-off-by: Alex Bee Signed-off-by: Jonas Karlman Link: https://lore.kernel.org/r/20231105233630.3927502-10-jonas@kwiboo.se Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399.dtsi | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3399.dtsi b/arch/arm64/boot/dts/rockchip/rk3399.dtsi index faf02e59d6c7..da0dfb237f85 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399.dtsi @@ -1109,7 +1109,9 @@ power-domain@RK3399_PD_VDU { reg = ; clocks = <&cru ACLK_VDU>, - <&cru HCLK_VDU>; + <&cru HCLK_VDU>, + <&cru SCLK_VDU_CA>, + <&cru SCLK_VDU_CORE>; pm_qos = <&qos_video_m1_r>, <&qos_video_m1_w>; #power-domain-cells = <0>; @@ -1384,7 +1386,7 @@ vdec: video-codec@ff660000 { compatible = "rockchip,rk3399-vdec"; - reg = <0x0 0xff660000 0x0 0x400>; + reg = <0x0 0xff660000 0x0 0x480>; interrupts = ; clocks = <&cru ACLK_VDU>, <&cru HCLK_VDU>, <&cru SCLK_VDU_CA>, <&cru SCLK_VDU_CORE>; -- cgit v1.2.3 From 3cee9c635f27d1003d46f624d816f3455698b625 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Tue, 14 Nov 2023 16:38:34 +0100 Subject: arm64: dts: rockchip: fix rk356x pcie msg interrupt name The expected name by the binding at this position is "msg" and the SoC's manual also calls the interrupt in question "msg", so fix the rk356x dtsi to use the correct name. Reviewed-by: Sebastian Reichel Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20231114153834.934978-1-heiko@sntech.de --- arch/arm64/boot/dts/rockchip/rk356x.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk356x.dtsi b/arch/arm64/boot/dts/rockchip/rk356x.dtsi index 0964761e3ce9..c19c0f1b3778 100644 --- a/arch/arm64/boot/dts/rockchip/rk356x.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk356x.dtsi @@ -977,7 +977,7 @@ , , ; - interrupt-names = "sys", "pmc", "msi", "legacy", "err"; + interrupt-names = "sys", "pmc", "msg", "legacy", "err"; bus-range = <0x0 0xf>; clocks = <&cru ACLK_PCIE20_MST>, <&cru ACLK_PCIE20_SLV>, <&cru ACLK_PCIE20_DBI>, <&cru PCLK_PCIE20>, -- cgit v1.2.3 From e80ed63affc9a9b4aacb44180ecd7ed601839599 Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Tue, 24 Oct 2023 09:20:35 +0100 Subject: riscv: dts: sophgo: remove address-cells from intc node A recent submission [1] from Rob has added additionalProperties: false to the interrupt-controller child node of RISC-V cpus, highlighting that the new cv1800b DT has been incorrectly using #address-cells. It has no child nodes, so #address-cells is not needed. Remove it. Link: https://patchwork.kernel.org/project/linux-riscv/patch/20230915201946.4184468-1-robh@kernel.org/ [1] Fixes: c3dffa879cca ("riscv: dts: sophgo: add initial CV1800B SoC device tree") Reviewed-by: Jisheng Zhang Acked-by: Chen Wang Signed-off-by: Conor Dooley --- arch/riscv/boot/dts/sophgo/cv1800b.dtsi | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi index df40e87ee063..aec6401a467b 100644 --- a/arch/riscv/boot/dts/sophgo/cv1800b.dtsi +++ b/arch/riscv/boot/dts/sophgo/cv1800b.dtsi @@ -34,7 +34,6 @@ cpu0_intc: interrupt-controller { compatible = "riscv,cpu-intc"; interrupt-controller; - #address-cells = <0>; #interrupt-cells = <1>; }; }; -- cgit v1.2.3 From 721d28f3dfb3e40c45ce45fbeeff47b72c230bc9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 16 Nov 2023 11:13:40 -0800 Subject: parisc: Replace strlcpy() with strscpy() strlcpy() reads the entire source buffer first. This read may exceed the destination size limit. This is both inefficient and can lead to linear read overflows if a source string is not NUL-terminated[1]. Additionally, it returns the size of the source string, not the resulting size of the destination string. In an effort to remove strlcpy() completely[2], replace strlcpy() here with strscpy(). Link: https://www.kernel.org/doc/html/latest/process/deprecated.html#strlcpy [1] Link: https://github.com/KSPP/linux/issues/89 [2] Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Azeem Shaikh Cc: linux-parisc@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: Helge Deller --- arch/parisc/kernel/processor.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/parisc/kernel/processor.c b/arch/parisc/kernel/processor.c index 29e2750f86a4..e95a977ba5f3 100644 --- a/arch/parisc/kernel/processor.c +++ b/arch/parisc/kernel/processor.c @@ -383,7 +383,7 @@ show_cpuinfo (struct seq_file *m, void *v) char cpu_name[60], *p; /* strip PA path from CPU name to not confuse lscpu */ - strlcpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name)); + strscpy(cpu_name, per_cpu(cpu_data, 0).dev->name, sizeof(cpu_name)); p = strrchr(cpu_name, '['); if (p) *(--p) = 0; -- cgit v1.2.3 From 8e4ece6889a5b1836b6a135827ac831a5350602a Mon Sep 17 00:00:00 2001 From: Kunkun Jiang Date: Mon, 20 Nov 2023 21:12:10 +0800 Subject: KVM: arm64: GICv4: Do not perform a map to a mapped vLPI Before performing a map, let's check whether the vLPI has been mapped. Fixes: 196b136498b3 ("KVM: arm/arm64: GICv4: Wire mapping/unmapping of VLPIs in VFIO irq bypass") Signed-off-by: Kunkun Jiang Acked-by: Marc Zyngier Reviewed-by: Zenghui Yu Link: https://lore.kernel.org/r/20231120131210.2039-1-jiangkunkun@huawei.com Signed-off-by: Oliver Upton --- arch/arm64/kvm/vgic/vgic-v4.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kvm/vgic/vgic-v4.c b/arch/arm64/kvm/vgic/vgic-v4.c index 339a55194b2c..74a67ad87f29 100644 --- a/arch/arm64/kvm/vgic/vgic-v4.c +++ b/arch/arm64/kvm/vgic/vgic-v4.c @@ -436,6 +436,10 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, if (ret) goto out; + /* Silently exit if the vLPI is already mapped */ + if (irq->hw) + goto out; + /* * Emit the mapping request. If it fails, the ITS probably * isn't v4 compatible, so let's silently bail out. Holding -- cgit v1.2.3 From d3ec75bc635cb0cb8185b63293d33a3d1b942d22 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: LoongArch: Add dependency between vmlinuz.efi and vmlinux.efi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A common issue in Makefile is a race in parallel building. You need to be careful to prevent multiple threads from writing to the same file simultaneously. Commit 3939f3345050 ("ARM: 8418/1: add boot image dependencies to not generate invalid images") addressed such a bad scenario. A similar symptom occurs with the following command: $ make -j$(nproc) ARCH=loongarch vmlinux.efi vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/loongarch/boot/vmlinux.efi OBJCOPY arch/loongarch/boot/vmlinux.efi PAD arch/loongarch/boot/vmlinux.bin GZIP arch/loongarch/boot/vmlinuz OBJCOPY arch/loongarch/boot/vmlinuz.o LD arch/loongarch/boot/vmlinuz.efi.elf OBJCOPY arch/loongarch/boot/vmlinuz.efi The log "OBJCOPY arch/loongarch/boot/vmlinux.efi" is displayed twice. It indicates that two threads simultaneously enter arch/loongarch/boot/ and write to arch/loongarch/boot/vmlinux.efi. It occasionally leads to a build failure: $ make -j$(nproc) ARCH=loongarch vmlinux.efi vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/loongarch/boot/vmlinux.efi PAD arch/loongarch/boot/vmlinux.bin truncate: Invalid number: ‘arch/loongarch/boot/vmlinux.bin’ make[2]: *** [drivers/firmware/efi/libstub/Makefile.zboot:13: arch/loongarch/boot/vmlinux.bin] Error 1 make[2]: *** Deleting file 'arch/loongarch/boot/vmlinux.bin' make[1]: *** [arch/loongarch/Makefile:146: vmlinuz.efi] Error 2 make[1]: *** Waiting for unfinished jobs.... make: *** [Makefile:234: __sub-make] Error 2 vmlinuz.efi depends on vmlinux.efi, but such a dependency is not specified in arch/loongarch/Makefile. Signed-off-by: Masahiro Yamada Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 9eeb0c05f3f4..a8d65eaf1211 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -142,6 +142,8 @@ vdso-install-y += arch/loongarch/vdso/vdso.so.dbg all: $(notdir $(KBUILD_IMAGE)) +vmlinuz.efi: vmlinux.efi + vmlinux.elf vmlinux.efi vmlinuz.efi: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(bootvars-y) $(boot)/$@ -- cgit v1.2.3 From cbfd44bd5c6eec0aada0c39130f0b8d7ecba0529 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: LoongArch: Explicitly set -fdirect-access-external-data for vmlinux After this llvm commit [1], The -fno-pic does not imply direct access external data. Explicitly set -fdirect-access-external-data for vmlinux that can avoids GOT entries. Link: https://github.com/llvm/llvm-project/commit/47eeee297775347cbdb7624d6a766c2a3eec4a59 Suggested-by: Xi Ruoyao Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index a8d65eaf1211..204b94b2e6aa 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -68,6 +68,7 @@ LDFLAGS_vmlinux += -static -n -nostdlib ifdef CONFIG_AS_HAS_EXPLICIT_RELOCS cflags-y += $(call cc-option,-mexplicit-relocs) KBUILD_CFLAGS_KERNEL += $(call cc-option,-mdirect-extern-access) +KBUILD_CFLAGS_KERNEL += $(call cc-option,-fdirect-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_CFLAGS_MODULE += $(call cc-option,-fno-direct-access-external-data) KBUILD_AFLAGS_MODULE += $(call cc-option,-mno-relax) $(call cc-option,-Wa$(comma)-mno-relax) -- cgit v1.2.3 From aa0cbc1b506b090c3a775b547c693ada108cc0d7 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: LoongArch: Record pc instead of offset in la_abs relocation To clarify, the previous version functioned flawlessly. However, it's worth noting that the LLVM's LoongArch backend currently lacks support for cross-section label calculations. With this patch, we enable the use of clang to compile relocatable kernels. Tested-by: Nathan Chancellor Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/asmmacro.h | 3 +-- arch/loongarch/include/asm/setup.h | 2 +- arch/loongarch/kernel/relocate.c | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/asmmacro.h b/arch/loongarch/include/asm/asmmacro.h index c9544f358c33..655db7d7a427 100644 --- a/arch/loongarch/include/asm/asmmacro.h +++ b/arch/loongarch/include/asm/asmmacro.h @@ -609,8 +609,7 @@ lu32i.d \reg, 0 lu52i.d \reg, \reg, 0 .pushsection ".la_abs", "aw", %progbits - 768: - .dword 768b-766b + .dword 766b .dword \sym .popsection #endif diff --git a/arch/loongarch/include/asm/setup.h b/arch/loongarch/include/asm/setup.h index a0bc159ce8bd..ee52fb1e9963 100644 --- a/arch/loongarch/include/asm/setup.h +++ b/arch/loongarch/include/asm/setup.h @@ -25,7 +25,7 @@ extern void set_merr_handler(unsigned long offset, void *addr, unsigned long len #ifdef CONFIG_RELOCATABLE struct rela_la_abs { - long offset; + long pc; long symvalue; }; diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 6c3eff9af9fb..288b739ca88d 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -52,7 +52,7 @@ static inline void __init relocate_absolute(long random_offset) for (p = begin; (void *)p < end; p++) { long v = p->symvalue; uint32_t lu12iw, ori, lu32id, lu52id; - union loongarch_instruction *insn = (void *)p - p->offset; + union loongarch_instruction *insn = (void *)p->pc; lu12iw = (v >> 12) & 0xfffff; ori = v & 0xfff; -- cgit v1.2.3 From ee2daf7102f42007b7bf5dbd1ae76478ee62c512 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: LoongArch: Add __percpu annotation for __percpu_read()/__percpu_write() When build kernel with C=1, we get: arch/loongarch/kernel/process.c:234:46: warning: incorrect type in argument 1 (different address spaces) arch/loongarch/kernel/process.c:234:46: expected void *ptr arch/loongarch/kernel/process.c:234:46: got unsigned long [noderef] __percpu * arch/loongarch/kernel/process.c:234:46: warning: incorrect type in argument 1 (different address spaces) arch/loongarch/kernel/process.c:234:46: expected void *ptr arch/loongarch/kernel/process.c:234:46: got unsigned long [noderef] __percpu * arch/loongarch/kernel/process.c:234:46: warning: incorrect type in argument 1 (different address spaces) arch/loongarch/kernel/process.c:234:46: expected void *ptr arch/loongarch/kernel/process.c:234:46: got unsigned long [noderef] __percpu * arch/loongarch/kernel/process.c:234:46: warning: incorrect type in argument 1 (different address spaces) arch/loongarch/kernel/process.c:234:46: expected void *ptr arch/loongarch/kernel/process.c:234:46: got unsigned long [noderef] __percpu * Add __percpu annotation for __percpu_read()/__percpu_write() can avoid such warnings. __percpu_xchg() and other functions don't need annotation because their wrapper, i.e. _pcp_protect(), already suppresses warnings. Also adjust the indentations in this file. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311080409.LlOfTR3m-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311080840.Vc2kXhfp-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311081340.3k72KKdg-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311120926.cjYHyoYw-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311152142.g6UyNx1R-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311160339.DbhaH8LX-lkp@intel.com/ Closes: https://lore.kernel.org/oe-kbuild-all/202311181454.CTPrSYmQ-lkp@intel.com/ Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/percpu.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/percpu.h b/arch/loongarch/include/asm/percpu.h index ed5da02b1cf6..9b36ac003f89 100644 --- a/arch/loongarch/include/asm/percpu.h +++ b/arch/loongarch/include/asm/percpu.h @@ -40,13 +40,13 @@ static __always_inline unsigned long __percpu_##op(void *ptr, \ switch (size) { \ case 4: \ __asm__ __volatile__( \ - "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \ + "am"#asm_op".w" " %[ret], %[val], %[ptr] \n" \ : [ret] "=&r" (ret), [ptr] "+ZB"(*(u32 *)ptr) \ : [val] "r" (val)); \ break; \ case 8: \ __asm__ __volatile__( \ - "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \ + "am"#asm_op".d" " %[ret], %[val], %[ptr] \n" \ : [ret] "=&r" (ret), [ptr] "+ZB"(*(u64 *)ptr) \ : [val] "r" (val)); \ break; \ @@ -63,7 +63,7 @@ PERCPU_OP(and, and, &) PERCPU_OP(or, or, |) #undef PERCPU_OP -static __always_inline unsigned long __percpu_read(void *ptr, int size) +static __always_inline unsigned long __percpu_read(void __percpu *ptr, int size) { unsigned long ret; @@ -100,7 +100,7 @@ static __always_inline unsigned long __percpu_read(void *ptr, int size) return ret; } -static __always_inline void __percpu_write(void *ptr, unsigned long val, int size) +static __always_inline void __percpu_write(void __percpu *ptr, unsigned long val, int size) { switch (size) { case 1: @@ -132,8 +132,7 @@ static __always_inline void __percpu_write(void *ptr, unsigned long val, int siz } } -static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, - int size) +static __always_inline unsigned long __percpu_xchg(void *ptr, unsigned long val, int size) { switch (size) { case 1: -- cgit v1.2.3 From 902d75cdf0cf0a3fb58550089ee519abf12566f5 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: LoongArch: Silence the boot warning about 'nokaslr' The kernel parameter 'nokaslr' is handled before start_kernel(), so we don't need early_param() to mark it technically. But it can cause a boot warning as follows: Unknown kernel command line parameters "nokaslr", will be passed to user space. When we use 'init=/bin/bash', 'nokaslr' which passed to user space will even cause a kernel panic. So we use early_param() to mark 'nokaslr', simply print a notice and silence the boot warning (also fix a potential panic). This logic is similar to RISC-V. Signed-off-by: Huacai Chen --- arch/loongarch/kernel/relocate.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/loongarch/kernel/relocate.c b/arch/loongarch/kernel/relocate.c index 288b739ca88d..1acfa704c8d0 100644 --- a/arch/loongarch/kernel/relocate.c +++ b/arch/loongarch/kernel/relocate.c @@ -102,6 +102,14 @@ static inline __init unsigned long get_random_boot(void) return hash; } +static int __init nokaslr(char *p) +{ + pr_info("KASLR is disabled.\n"); + + return 0; /* Print a notice and silence the boot warning */ +} +early_param("nokaslr", nokaslr); + static inline __init bool kaslr_disabled(void) { char *str; -- cgit v1.2.3 From 19d86a496233731882aea7ec24505ce6641b1c0c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: LoongArch: Mark {dmw,tlb}_virt_to_page() exports as non-GPL Mark {dmw,tlb}_virt_to_page() exports as non-GPL, in order to let out-of-tree modules (e.g. OpenZFS) be built without errors. Otherwise we get: ERROR: modpost: GPL-incompatible module zfs.ko uses GPL-only symbol 'dmw_virt_to_page' ERROR: modpost: GPL-incompatible module zfs.ko uses GPL-only symbol 'tlb_virt_to_page' Reported-by: Haowu Ge Signed-off-by: Huacai Chen --- arch/loongarch/mm/pgtable.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/mm/pgtable.c b/arch/loongarch/mm/pgtable.c index 71d0539e2d0b..2aae72e63871 100644 --- a/arch/loongarch/mm/pgtable.c +++ b/arch/loongarch/mm/pgtable.c @@ -13,13 +13,13 @@ struct page *dmw_virt_to_page(unsigned long kaddr) { return pfn_to_page(virt_to_pfn(kaddr)); } -EXPORT_SYMBOL_GPL(dmw_virt_to_page); +EXPORT_SYMBOL(dmw_virt_to_page); struct page *tlb_virt_to_page(unsigned long kaddr) { return pfn_to_page(pte_pfn(*virt_to_kpte(kaddr))); } -EXPORT_SYMBOL_GPL(tlb_virt_to_page); +EXPORT_SYMBOL(tlb_virt_to_page); pgd_t *pgd_alloc(struct mm_struct *mm) { -- cgit v1.2.3 From d43f37b73468c172bc89ac4824a1511b411f0778 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 21 Nov 2023 15:03:25 +0800 Subject: LoongArch: Implement constant timer shutdown interface When a cpu is hot-unplugged, it is put in idle state and the function arch_cpu_idle_dead() is called. The timer interrupt for this processor should be disabled, otherwise there will be pending timer interrupt for the unplugged cpu, so that vcpu is prevented from giving up scheduling when system is running in vm mode. This patch implements the timer shutdown interface so that the constant timer will be properly disabled when a CPU is hot-unplugged. Reviewed-by: WANG Xuerui Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kernel/time.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/kernel/time.c b/arch/loongarch/kernel/time.c index 3064af94db9c..e7015f7b70e3 100644 --- a/arch/loongarch/kernel/time.c +++ b/arch/loongarch/kernel/time.c @@ -58,14 +58,16 @@ static int constant_set_state_oneshot(struct clock_event_device *evt) return 0; } -static int constant_set_state_oneshot_stopped(struct clock_event_device *evt) +static int constant_set_state_periodic(struct clock_event_device *evt) { + unsigned long period; unsigned long timer_config; raw_spin_lock(&state_lock); - timer_config = csr_read64(LOONGARCH_CSR_TCFG); - timer_config &= ~CSR_TCFG_EN; + period = const_clock_freq / HZ; + timer_config = period & CSR_TCFG_VAL; + timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN); csr_write64(timer_config, LOONGARCH_CSR_TCFG); raw_spin_unlock(&state_lock); @@ -73,16 +75,14 @@ static int constant_set_state_oneshot_stopped(struct clock_event_device *evt) return 0; } -static int constant_set_state_periodic(struct clock_event_device *evt) +static int constant_set_state_shutdown(struct clock_event_device *evt) { - unsigned long period; unsigned long timer_config; raw_spin_lock(&state_lock); - period = const_clock_freq / HZ; - timer_config = period & CSR_TCFG_VAL; - timer_config |= (CSR_TCFG_PERIOD | CSR_TCFG_EN); + timer_config = csr_read64(LOONGARCH_CSR_TCFG); + timer_config &= ~CSR_TCFG_EN; csr_write64(timer_config, LOONGARCH_CSR_TCFG); raw_spin_unlock(&state_lock); @@ -90,11 +90,6 @@ static int constant_set_state_periodic(struct clock_event_device *evt) return 0; } -static int constant_set_state_shutdown(struct clock_event_device *evt) -{ - return 0; -} - static int constant_timer_next_event(unsigned long delta, struct clock_event_device *evt) { unsigned long timer_config; @@ -161,7 +156,7 @@ int constant_clockevent_init(void) cd->rating = 320; cd->cpumask = cpumask_of(cpu); cd->set_state_oneshot = constant_set_state_oneshot; - cd->set_state_oneshot_stopped = constant_set_state_oneshot_stopped; + cd->set_state_oneshot_stopped = constant_set_state_shutdown; cd->set_state_periodic = constant_set_state_periodic; cd->set_state_shutdown = constant_set_state_shutdown; cd->set_next_event = constant_timer_next_event; -- cgit v1.2.3 From e8df9d9f4209c04161321d8c12640ae560f65939 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Tue, 21 Nov 2023 09:46:28 +0800 Subject: perf/x86/intel: Correct incorrect 'or' operation for PMU capabilities When running perf-stat command on Intel hybrid platform, perf-stat reports the following errors: sudo taskset -c 7 ./perf stat -vvvv -e cpu_atom/instructions/ sleep 1 Opening: cpu/cycles/:HG ------------------------------------------------------------ perf_event_attr: type 0 (PERF_TYPE_HARDWARE) config 0xa00000000 disabled 1 ------------------------------------------------------------ sys_perf_event_open: pid 0 cpu -1 group_fd -1 flags 0x8 sys_perf_event_open failed, error -16 Performance counter stats for 'sleep 1': cpu_atom/instructions/ It looks the cpu_atom/instructions/ event can't be enabled on atom PMU even when the process is pinned on atom core. Investigation shows that exclusive_event_init() helper always returns -EBUSY error in the perf event creation. That's strange since the atom PMU should not be an exclusive PMU. Further investigation shows the issue was introduced by commit: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") The commit originally intents to clear the bit PERF_PMU_CAP_AUX_OUTPUT from PMU capabilities if intel_cap.pebs_output_pt_available is not set, but it incorrectly uses 'or' operation and leads to all PMU capabilities bits are set to 1 except bit PERF_PMU_CAP_AUX_OUTPUT. Testing this fix on Intel hybrid platforms, the observed issues disappear. Fixes: 97588df87b56 ("perf/x86/intel: Add common intel_pmu_init_hybrid()") Signed-off-by: Dapeng Mi Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20231121014628.729989-1-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a08f794a0e79..ce1c777227b4 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -4660,7 +4660,7 @@ static void intel_pmu_check_hybrid_pmus(struct x86_hybrid_pmu *pmu) if (pmu->intel_cap.pebs_output_pt_available) pmu->pmu.capabilities |= PERF_PMU_CAP_AUX_OUTPUT; else - pmu->pmu.capabilities |= ~PERF_PMU_CAP_AUX_OUTPUT; + pmu->pmu.capabilities &= ~PERF_PMU_CAP_AUX_OUTPUT; intel_pmu_check_event_constraints(pmu->event_constraints, pmu->num_counters, -- cgit v1.2.3 From 2e569ada424c40ce27c99bfab4f9780619061c83 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 15 Nov 2023 22:02:11 +0100 Subject: x86/microcode: Remove the driver announcement and version First of all, the print is useless. The driver will either load and say which microcode revision the machine has or issue an error. Then, the version number is meaningless and actively confusing, as Yazen mentioned recently: when a subset of patches are backported to a distro kernel, one can't assume the driver version is the same as the upstream one. And besides, the version number of the loader hasn't been used and incremented for a long time. So drop it. Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/20231115210212.9981-2-bp@alien8.de --- arch/x86/kernel/cpu/microcode/core.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 666d25bbc5ad..b4be3a2c79df 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -41,8 +41,6 @@ #include "internal.h" -#define DRIVER_VERSION "2.2" - static struct microcode_ops *microcode_ops; bool dis_ucode_ldr = true; @@ -846,8 +844,6 @@ static int __init microcode_init(void) cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/microcode:online", mc_cpu_online, mc_cpu_down_prep); - pr_info("Microcode Update Driver: v%s.", DRIVER_VERSION); - return 0; out_pdev: -- cgit v1.2.3 From 080990aa3344123673f686cda2df0d1b0deee046 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Wed, 15 Nov 2023 22:02:12 +0100 Subject: x86/microcode: Rework early revisions reporting The AMD side of the loader issues the microcode revision for each logical thread on the system, which can become really noisy on huge machines. And doing that doesn't make a whole lot of sense - the microcode revision is already in /proc/cpuinfo. So in case one is interested in the theoretical support of mixed silicon steppings on AMD, one can check there. What is also missing on the AMD side - something which people have requested before - is showing the microcode revision the CPU had *before* the early update. So abstract that up in the main code and have the BSP on each vendor provide those revision numbers. Then, dump them only once on driver init. On Intel, do not dump the patch date - it is not needed. Reported-by: Linus Torvalds Signed-off-by: Borislav Petkov (AMD) Reviewed-by: Thomas Gleixner Link: https://lore.kernel.org/r/CAHk-=wg=%2B8rceshMkB4VnKxmRccVLtBLPBawnewZuuqyx5U=3A@mail.gmail.com --- arch/x86/kernel/cpu/microcode/amd.c | 39 +++++++++----------------------- arch/x86/kernel/cpu/microcode/core.c | 11 +++++++-- arch/x86/kernel/cpu/microcode/intel.c | 17 ++++++-------- arch/x86/kernel/cpu/microcode/internal.h | 14 ++++++++---- 4 files changed, 37 insertions(+), 44 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 9373ec01c5ae..13b45b9c806d 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -104,8 +104,6 @@ struct cont_desc { size_t size; }; -static u32 ucode_new_rev; - /* * Microcode patch container file is prepended to the initrd in cpio * format. See Documentation/arch/x86/microcode.rst @@ -442,12 +440,11 @@ static int __apply_microcode_amd(struct microcode_amd *mc) * * Returns true if container found (sets @desc), false otherwise. */ -static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size) +static bool early_apply_microcode(u32 cpuid_1_eax, u32 old_rev, void *ucode, size_t size) { struct cont_desc desc = { 0 }; struct microcode_amd *mc; bool ret = false; - u32 rev, dummy; desc.cpuid_1_eax = cpuid_1_eax; @@ -457,22 +454,15 @@ static bool early_apply_microcode(u32 cpuid_1_eax, void *ucode, size_t size) if (!mc) return ret; - native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - /* * Allow application of the same revision to pick up SMT-specific * changes even if the revision of the other SMT thread is already * up-to-date. */ - if (rev > mc->hdr.patch_id) + if (old_rev > mc->hdr.patch_id) return ret; - if (!__apply_microcode_amd(mc)) { - ucode_new_rev = mc->hdr.patch_id; - ret = true; - } - - return ret; + return !__apply_microcode_amd(mc); } static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) @@ -506,9 +496,12 @@ static void __init find_blobs_in_containers(unsigned int cpuid_1_eax, struct cpi *ret = cp; } -void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) +void __init load_ucode_amd_bsp(struct early_load_data *ed, unsigned int cpuid_1_eax) { struct cpio_data cp = { }; + u32 dummy; + + native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->old_rev, dummy); /* Needed in load_microcode_amd() */ ucode_cpu_info[0].cpu_sig.sig = cpuid_1_eax; @@ -517,7 +510,8 @@ void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) if (!(cp.data && cp.size)) return; - early_apply_microcode(cpuid_1_eax, cp.data, cp.size); + if (early_apply_microcode(cpuid_1_eax, ed->old_rev, cp.data, cp.size)) + native_rdmsr(MSR_AMD64_PATCH_LEVEL, ed->new_rev, dummy); } static enum ucode_state load_microcode_amd(u8 family, const u8 *data, size_t size); @@ -625,10 +619,8 @@ void reload_ucode_amd(unsigned int cpu) rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); if (rev < mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) { - ucode_new_rev = mc->hdr.patch_id; - pr_info("reload patch_level=0x%08x\n", ucode_new_rev); - } + if (!__apply_microcode_amd(mc)) + pr_info_once("reload revision: 0x%08x\n", mc->hdr.patch_id); } } @@ -649,8 +641,6 @@ static int collect_cpu_info_amd(int cpu, struct cpu_signature *csig) if (p && (p->patch_id == csig->rev)) uci->mc = p->data; - pr_info("CPU%d: patch_level=0x%08x\n", cpu, csig->rev); - return 0; } @@ -691,8 +681,6 @@ static enum ucode_state apply_microcode_amd(int cpu) rev = mc_amd->hdr.patch_id; ret = UCODE_UPDATED; - pr_info("CPU%d: new patch_level=0x%08x\n", cpu, rev); - out: uci->cpu_sig.rev = rev; c->microcode = rev; @@ -935,11 +923,6 @@ struct microcode_ops * __init init_amd_microcode(void) pr_warn("AMD CPU family 0x%x not supported\n", c->x86); return NULL; } - - if (ucode_new_rev) - pr_info_once("microcode updated early to new patch_level=0x%08x\n", - ucode_new_rev); - return µcode_amd_ops; } diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index b4be3a2c79df..232026a239a6 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -75,6 +75,8 @@ static u32 final_levels[] = { 0, /* T-101 terminator */ }; +struct early_load_data early_data; + /* * Check the current patch level on this CPU. * @@ -153,9 +155,9 @@ void __init load_ucode_bsp(void) return; if (intel) - load_ucode_intel_bsp(); + load_ucode_intel_bsp(&early_data); else - load_ucode_amd_bsp(cpuid_1_eax); + load_ucode_amd_bsp(&early_data, cpuid_1_eax); } void load_ucode_ap(void) @@ -826,6 +828,11 @@ static int __init microcode_init(void) if (!microcode_ops) return -ENODEV; + pr_info_once("Current revision: 0x%08x\n", (early_data.new_rev ?: early_data.old_rev)); + + if (early_data.new_rev) + pr_info_once("Updated early from: 0x%08x\n", early_data.old_rev); + microcode_pdev = platform_device_register_simple("microcode", -1, NULL, 0); if (IS_ERR(microcode_pdev)) return PTR_ERR(microcode_pdev); diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 6024feb98d29..070426b9895f 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -339,16 +339,9 @@ static enum ucode_state __apply_microcode(struct ucode_cpu_info *uci, static enum ucode_state apply_microcode_early(struct ucode_cpu_info *uci) { struct microcode_intel *mc = uci->mc; - enum ucode_state ret; - u32 cur_rev, date; + u32 cur_rev; - ret = __apply_microcode(uci, mc, &cur_rev); - if (ret == UCODE_UPDATED) { - date = mc->hdr.date; - pr_info_once("updated early: 0x%x -> 0x%x, date = %04x-%02x-%02x\n", - cur_rev, mc->hdr.rev, date & 0xffff, date >> 24, (date >> 16) & 0xff); - } - return ret; + return __apply_microcode(uci, mc, &cur_rev); } static __init bool load_builtin_intel_microcode(struct cpio_data *cp) @@ -413,13 +406,17 @@ static int __init save_builtin_microcode(void) early_initcall(save_builtin_microcode); /* Load microcode on BSP from initrd or builtin blobs */ -void __init load_ucode_intel_bsp(void) +void __init load_ucode_intel_bsp(struct early_load_data *ed) { struct ucode_cpu_info uci; + ed->old_rev = intel_get_microcode_revision(); + uci.mc = get_microcode_blob(&uci, false); if (uci.mc && apply_microcode_early(&uci) == UCODE_UPDATED) ucode_patch_va = UCODE_BSP_LOADED; + + ed->new_rev = uci.cpu_sig.rev; } void load_ucode_intel_ap(void) diff --git a/arch/x86/kernel/cpu/microcode/internal.h b/arch/x86/kernel/cpu/microcode/internal.h index f8047b12329a..21776c529fa9 100644 --- a/arch/x86/kernel/cpu/microcode/internal.h +++ b/arch/x86/kernel/cpu/microcode/internal.h @@ -37,6 +37,12 @@ struct microcode_ops { use_nmi : 1; }; +struct early_load_data { + u32 old_rev; + u32 new_rev; +}; + +extern struct early_load_data early_data; extern struct ucode_cpu_info ucode_cpu_info[]; struct cpio_data find_microcode_in_initrd(const char *path); @@ -92,14 +98,14 @@ extern bool dis_ucode_ldr; extern bool force_minrev; #ifdef CONFIG_CPU_SUP_AMD -void load_ucode_amd_bsp(unsigned int family); +void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family); void load_ucode_amd_ap(unsigned int family); int save_microcode_in_initrd_amd(unsigned int family); void reload_ucode_amd(unsigned int cpu); struct microcode_ops *init_amd_microcode(void); void exit_amd_microcode(void); #else /* CONFIG_CPU_SUP_AMD */ -static inline void load_ucode_amd_bsp(unsigned int family) { } +static inline void load_ucode_amd_bsp(struct early_load_data *ed, unsigned int family) { } static inline void load_ucode_amd_ap(unsigned int family) { } static inline int save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } static inline void reload_ucode_amd(unsigned int cpu) { } @@ -108,12 +114,12 @@ static inline void exit_amd_microcode(void) { } #endif /* !CONFIG_CPU_SUP_AMD */ #ifdef CONFIG_CPU_SUP_INTEL -void load_ucode_intel_bsp(void); +void load_ucode_intel_bsp(struct early_load_data *ed); void load_ucode_intel_ap(void); void reload_ucode_intel(void); struct microcode_ops *init_intel_microcode(void); #else /* CONFIG_CPU_SUP_INTEL */ -static inline void load_ucode_intel_bsp(void) { } +static inline void load_ucode_intel_bsp(struct early_load_data *ed) { } static inline void load_ucode_intel_ap(void) { } static inline void reload_ucode_intel(void) { } static inline struct microcode_ops *init_intel_microcode(void) { return NULL; } -- cgit v1.2.3 From 18286883e779fb79b413a7462968ee3f6768f19c Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 14 Nov 2023 17:59:28 +0100 Subject: x86/hyperv: Use atomic_try_cmpxchg() to micro-optimize hv_nmi_unknown() Use atomic_try_cmpxchg() instead of atomic_cmpxchg(*ptr, old, new) == old in hv_nmi_unknown(). On x86 the CMPXCHG instruction returns success in the ZF flag, so this change saves a compare after CMPXCHG. The generated asm code improves from: 3e: 65 8b 15 00 00 00 00 mov %gs:0x0(%rip),%edx 45: b8 ff ff ff ff mov $0xffffffff,%eax 4a: f0 0f b1 15 00 00 00 lock cmpxchg %edx,0x0(%rip) 51: 00 52: 83 f8 ff cmp $0xffffffff,%eax 55: 0f 95 c0 setne %al to: 3e: 65 8b 15 00 00 00 00 mov %gs:0x0(%rip),%edx 45: b8 ff ff ff ff mov $0xffffffff,%eax 4a: f0 0f b1 15 00 00 00 lock cmpxchg %edx,0x0(%rip) 51: 00 52: 0f 95 c0 setne %al No functional change intended. Cc: "K. Y. Srinivasan" Cc: Haiyang Zhang Cc: Wei Liu Cc: Dexuan Cui Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: Dave Hansen Cc: "H. Peter Anvin" Signed-off-by: Uros Bizjak Reviewed-by: Michael Kelley Link: https://lore.kernel.org/r/20231114170038.381634-1-ubizjak@gmail.com Signed-off-by: Wei Liu Message-ID: <20231114170038.381634-1-ubizjak@gmail.com> --- arch/x86/kernel/cpu/mshyperv.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index e6bba12c759c..01fa06dd06b6 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -262,11 +262,14 @@ static uint32_t __init ms_hyperv_platform(void) static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs) { static atomic_t nmi_cpu = ATOMIC_INIT(-1); + unsigned int old_cpu, this_cpu; if (!unknown_nmi_panic) return NMI_DONE; - if (atomic_cmpxchg(&nmi_cpu, -1, raw_smp_processor_id()) != -1) + old_cpu = -1; + this_cpu = raw_smp_processor_id(); + if (!atomic_try_cmpxchg(&nmi_cpu, &old_cpu, this_cpu)) return NMI_HANDLED; return NMI_DONE; -- cgit v1.2.3 From 4711b7b8f99583f6105a33e91f106125134beacb Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Mon, 30 Oct 2023 11:41:33 +0100 Subject: s390/pai: cleanup event initialization Setting event::hw.last_tag to zero is not necessary. The memory for each event is dynamically allocated by the kernel common code and initialized to zero already. Remove this unnecessary assignment. Move the comment to function paicrypt_start() for clarification. Suggested-by: Sumanth Korikkar Acked-by: Sumanth Korikkar Signed-off-by: Thomas Richter Signed-off-by: Alexander Gordeev --- arch/s390/kernel/perf_pai_crypto.c | 11 +++++------ arch/s390/kernel/perf_pai_ext.c | 1 - 2 files changed, 5 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/perf_pai_crypto.c b/arch/s390/kernel/perf_pai_crypto.c index 77fd24e6cbb6..39a91b00438a 100644 --- a/arch/s390/kernel/perf_pai_crypto.c +++ b/arch/s390/kernel/perf_pai_crypto.c @@ -279,12 +279,6 @@ static int paicrypt_event_init(struct perf_event *event) if (IS_ERR(cpump)) return PTR_ERR(cpump); - /* Event initialization sets last_tag to 0. When later on the events - * are deleted and re-added, do not reset the event count value to zero. - * Events are added, deleted and re-added when 2 or more events - * are active at the same time. - */ - event->hw.last_tag = 0; event->destroy = paicrypt_event_destroy; if (a->sample_period) { @@ -318,6 +312,11 @@ static void paicrypt_start(struct perf_event *event, int flags) { u64 sum; + /* Event initialization sets last_tag to 0. When later on the events + * are deleted and re-added, do not reset the event count value to zero. + * Events are added, deleted and re-added when 2 or more events + * are active at the same time. + */ if (!event->hw.last_tag) { event->hw.last_tag = 1; sum = paicrypt_getall(event); /* Get current value */ diff --git a/arch/s390/kernel/perf_pai_ext.c b/arch/s390/kernel/perf_pai_ext.c index 8ba0f1a3a39d..e7013a2e8960 100644 --- a/arch/s390/kernel/perf_pai_ext.c +++ b/arch/s390/kernel/perf_pai_ext.c @@ -260,7 +260,6 @@ static int paiext_event_init(struct perf_event *event) rc = paiext_alloc(a, event); if (rc) return rc; - event->hw.last_tag = 0; event->destroy = paiext_event_destroy; if (a->sample_period) { -- cgit v1.2.3 From 673752a839694133a328610fcbc54f3d59ae87f3 Mon Sep 17 00:00:00 2001 From: Mikhail Zaslonko Date: Wed, 8 Nov 2023 18:18:52 +0100 Subject: s390/ipl: add missing IPL_TYPE_ECKD_DUMP case to ipl_init() Add missing IPL_TYPE_ECKD_DUMP case to ipl_init() creating ECKD ipl device attribute group similar to IPL_TYPE_ECKD case. Commit e2d2a2968f2a ("s390/ipl: add eckd dump support") should have had it from the beginning. Fixes: e2d2a2968f2a ("s390/ipl: add eckd dump support") Signed-off-by: Mikhail Zaslonko Reviewed-by: Sven Schnelle Signed-off-by: Alexander Gordeev --- arch/s390/kernel/ipl.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c index cc364fce6aa9..ba75f6bee774 100644 --- a/arch/s390/kernel/ipl.c +++ b/arch/s390/kernel/ipl.c @@ -666,6 +666,7 @@ static int __init ipl_init(void) &ipl_ccw_attr_group_lpar); break; case IPL_TYPE_ECKD: + case IPL_TYPE_ECKD_DUMP: rc = sysfs_create_group(&ipl_kset->kobj, &ipl_eckd_attr_group); break; case IPL_TYPE_FCP: -- cgit v1.2.3 From 0a9ace1117bbaa25687468af703b472235f5c210 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 15 Nov 2023 11:39:02 +0100 Subject: s390: remove odd comment In the meantime hopefully most people got used to forward declarations, therefore remove the explanation. Signed-off-by: Heiko Carstens Signed-off-by: Alexander Gordeev --- arch/s390/include/asm/processor.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index dc17896a001a..c15eadbb9983 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -228,7 +228,6 @@ typedef struct thread_struct thread_struct; execve_tail(); \ } while (0) -/* Forward declaration, a strange C thing */ struct task_struct; struct mm_struct; struct seq_file; -- cgit v1.2.3 From acfa60dbe03802d6afd28401aa47801270e82021 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 17 Nov 2023 13:14:22 +0000 Subject: arm64: mm: Fix "rodata=on" when CONFIG_RODATA_FULL_DEFAULT_ENABLED=y When CONFIG_RODATA_FULL_DEFAULT_ENABLED=y, passing "rodata=on" on the kernel command-line (rather than "rodata=full") should turn off the "full" behaviour, leaving writable linear aliases of read-only kernel memory. Unfortunately, the option has no effect in this situation and the only way to disable the "rodata=full" behaviour is to disable rodata protection entirely by passing "rodata=off". Fix this by parsing the "on" and "off" options in the arch code, additionally enforcing that 'rodata_full' cannot be set without also setting 'rodata_enabled', allowing us to simplify a couple of checks in the process. Fixes: 2e8cff0a0eee ("arm64: fix rodata=full") Cc: Ard Biesheuvel Cc: Mark Rutland Signed-off-by: Will Deacon Reviewed-by: "Russell King (Oracle)" Reviewed-by: Ard Biesheuvel Link: https://lore.kernel.org/r/20231117131422.29663-1-will@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/setup.h | 17 +++++++++++++++-- arch/arm64/mm/pageattr.c | 7 +++---- 2 files changed, 18 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/setup.h b/arch/arm64/include/asm/setup.h index f4af547ef54c..2e4d7da74fb8 100644 --- a/arch/arm64/include/asm/setup.h +++ b/arch/arm64/include/asm/setup.h @@ -21,9 +21,22 @@ static inline bool arch_parse_debug_rodata(char *arg) extern bool rodata_enabled; extern bool rodata_full; - if (arg && !strcmp(arg, "full")) { + if (!arg) + return false; + + if (!strcmp(arg, "full")) { + rodata_enabled = rodata_full = true; + return true; + } + + if (!strcmp(arg, "off")) { + rodata_enabled = rodata_full = false; + return true; + } + + if (!strcmp(arg, "on")) { rodata_enabled = true; - rodata_full = true; + rodata_full = false; return true; } diff --git a/arch/arm64/mm/pageattr.c b/arch/arm64/mm/pageattr.c index 8e2017ba5f1b..924843f1f661 100644 --- a/arch/arm64/mm/pageattr.c +++ b/arch/arm64/mm/pageattr.c @@ -29,8 +29,8 @@ bool can_set_direct_map(void) * * KFENCE pool requires page-granular mapping if initialized late. */ - return (rodata_enabled && rodata_full) || debug_pagealloc_enabled() || - arm64_kfence_can_set_direct_map(); + return rodata_full || debug_pagealloc_enabled() || + arm64_kfence_can_set_direct_map(); } static int change_page_range(pte_t *ptep, unsigned long addr, void *data) @@ -105,8 +105,7 @@ static int change_memory_common(unsigned long addr, int numpages, * If we are manipulating read-only permissions, apply the same * change to the linear mapping of the pages that back this VM area. */ - if (rodata_enabled && - rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || + if (rodata_full && (pgprot_val(set_mask) == PTE_RDONLY || pgprot_val(clear_mask) == PTE_RDONLY)) { for (i = 0; i < area->nr_pages; i++) { __change_memory_common((u64)page_address(area->pages[i]), -- cgit v1.2.3 From 7bf9a6b46549852a37e6d07e52c601c3c706b562 Mon Sep 17 00:00:00 2001 From: Stefano Stabellini Date: Wed, 22 Nov 2023 15:07:41 -0800 Subject: arm/xen: fix xen_vcpu_info allocation alignment xen_vcpu_info is a percpu area than needs to be mapped by Xen. Currently, it could cross a page boundary resulting in Xen being unable to map it: [ 0.567318] kernel BUG at arch/arm64/xen/../../arm/xen/enlighten.c:164! [ 0.574002] Internal error: Oops - BUG: 00000000f2000800 [#1] PREEMPT SMP Fix the issue by using __alloc_percpu and requesting alignment for the memory allocation. Signed-off-by: Stefano Stabellini Link: https://lore.kernel.org/r/alpine.DEB.2.22.394.2311221501340.2053963@ubuntu-linux-20-04-desktop Fixes: 24d5373dda7c ("arm/xen: Use alloc_percpu rather than __alloc_percpu") Reviewed-by: Juergen Gross Signed-off-by: Juergen Gross --- arch/arm/xen/enlighten.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 9afdc4c4a5dc..a395b6c0aae2 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -484,7 +484,8 @@ static int __init xen_guest_init(void) * for secondary CPUs as they are brought up. * For uniformity we use VCPUOP_register_vcpu_info even on cpu0. */ - xen_vcpu_info = alloc_percpu(struct vcpu_info); + xen_vcpu_info = __alloc_percpu(sizeof(struct vcpu_info), + 1 << fls(sizeof(struct vcpu_info) - 1)); if (xen_vcpu_info == NULL) return -ENOMEM; -- cgit v1.2.3 From c0a8574204054effad6ac83cc75c02576e2985fe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 19 Nov 2023 14:32:34 +0900 Subject: arm64: add dependency between vmlinuz.efi and Image A common issue in Makefile is a race in parallel building. You need to be careful to prevent multiple threads from writing to the same file simultaneously. Commit 3939f3345050 ("ARM: 8418/1: add boot image dependencies to not generate invalid images") addressed such a bad scenario. A similar symptom occurs with the following command: $ make -j$(nproc) ARCH=arm64 Image vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/arm64/boot/Image OBJCOPY arch/arm64/boot/Image AS arch/arm64/boot/zboot-header.o PAD arch/arm64/boot/vmlinux.bin GZIP arch/arm64/boot/vmlinuz OBJCOPY arch/arm64/boot/vmlinuz.o LD arch/arm64/boot/vmlinuz.efi.elf OBJCOPY arch/arm64/boot/vmlinuz.efi The log "OBJCOPY arch/arm64/boot/Image" is displayed twice. It indicates that two threads simultaneously enter arch/arm64/boot/ and write to arch/arm64/boot/Image. It occasionally leads to a build failure: $ make -j$(nproc) ARCH=arm64 Image vmlinuz.efi [ snip ] SORTTAB vmlinux OBJCOPY arch/arm64/boot/Image PAD arch/arm64/boot/vmlinux.bin truncate: Invalid number: 'arch/arm64/boot/vmlinux.bin' make[2]: *** [drivers/firmware/efi/libstub/Makefile.zboot:13: arch/arm64/boot/vmlinux.bin] Error 1 make[2]: *** Deleting file 'arch/arm64/boot/vmlinux.bin' make[1]: *** [arch/arm64/Makefile:163: vmlinuz.efi] Error 2 make[1]: *** Waiting for unfinished jobs.... make: *** [Makefile:234: __sub-make] Error 2 vmlinuz.efi depends on Image, but such a dependency is not specified in arch/arm64/Makefile. Signed-off-by: Masahiro Yamada Acked-by: Ard Biesheuvel Reviewed-by: SImon Glass Link: https://lore.kernel.org/r/20231119053234.2367621-1-masahiroy@kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 4bd85cc0d32b..9a2d3723cd0f 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -158,7 +158,7 @@ endif all: $(notdir $(KBUILD_IMAGE)) - +vmlinuz.efi: Image Image vmlinuz.efi: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -- cgit v1.2.3 From e11d4cccd094a7cd4696c8c42e672c76c092dad5 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 15:37:50 +0100 Subject: parisc: Mark ex_table entries 32-bit aligned in assembly.h Add an align statement to tell the linker that all ex_table entries and as such the whole ex_table section should be 32-bit aligned in vmlinux and modules. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/assembly.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 75677b526b2b..74d17d7e759d 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -574,6 +574,7 @@ */ #define ASM_EXCEPTIONTABLE_ENTRY(fault_addr, except_addr) \ .section __ex_table,"aw" ! \ + .align 4 ! \ .word (fault_addr - .), (except_addr - .) ! \ .previous -- cgit v1.2.3 From a80aeb86542a50aa8521729ea4cc731ee7174f03 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 15:39:03 +0100 Subject: parisc: Mark ex_table entries 32-bit aligned in uaccess.h Add an align statement to tell the linker that all ex_table entries and as such the whole ex_table section should be 32-bit aligned in vmlinux and modules. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/uaccess.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 2bf660eabe42..4165079898d9 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -41,6 +41,7 @@ struct exception_table_entry { #define ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr )\ ".section __ex_table,\"aw\"\n" \ + ".align 4\n" \ ".word (" #fault_addr " - .), (" #except_addr " - .)\n\t" \ ".previous\n" -- cgit v1.2.3 From 33f806da2df68606f77d7b892cd1298ba3d463e8 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 23:10:20 +0100 Subject: parisc: Mark altinstructions read-only and 32-bit aligned Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/alternative.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/alternative.h b/arch/parisc/include/asm/alternative.h index 1ed45fd085d3..1eb488f25b83 100644 --- a/arch/parisc/include/asm/alternative.h +++ b/arch/parisc/include/asm/alternative.h @@ -34,7 +34,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* Alternative SMP implementation. */ #define ALTERNATIVE(cond, replacement) "!0:" \ - ".section .altinstructions, \"aw\" !" \ + ".section .altinstructions, \"a\" !" \ + ".align 4 !" \ ".word (0b-4-.) !" \ ".hword 1, " __stringify(cond) " !" \ ".word " __stringify(replacement) " !" \ @@ -44,7 +45,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* to replace one single instructions by a new instruction */ #define ALTERNATIVE(from, to, cond, replacement)\ - .section .altinstructions, "aw" ! \ + .section .altinstructions, "a" ! \ + .align 4 ! \ .word (from - .) ! \ .hword (to - from)/4, cond ! \ .word replacement ! \ @@ -52,7 +54,8 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end, /* to replace multiple instructions by new code */ #define ALTERNATIVE_CODE(from, num_instructions, cond, new_instr_ptr)\ - .section .altinstructions, "aw" ! \ + .section .altinstructions, "a" ! \ + .align 4 ! \ .word (from - .) ! \ .hword -num_instructions, cond ! \ .word (new_instr_ptr - .) ! \ -- cgit v1.2.3 From 07eecff8ae78df7f28800484d31337e1f9bfca3a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 23:14:39 +0100 Subject: parisc: Mark jump_table naturally aligned The jump_table stores two 32-bit words and one 32- (on 32-bit kernel) or one 64-bit word (on 64-bit kernel). Ensure that the last word is always 64-bit aligned on a 64-bit kernel by aligning the whole structure on sizeof(long). Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/jump_label.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/jump_label.h b/arch/parisc/include/asm/jump_label.h index af2a598bc0f8..94428798b6aa 100644 --- a/arch/parisc/include/asm/jump_label.h +++ b/arch/parisc/include/asm/jump_label.h @@ -15,10 +15,12 @@ static __always_inline bool arch_static_branch(struct static_key *key, bool bran asm_volatile_goto("1:\n\t" "nop\n\t" ".pushsection __jump_table, \"aw\"\n\t" + ".align %1\n\t" ".word 1b - ., %l[l_yes] - .\n\t" __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" ".popsection\n\t" - : : "i" (&((char *)key)[branch]) : : l_yes); + : : "i" (&((char *)key)[branch]), "i" (sizeof(long)) + : : l_yes); return false; l_yes: @@ -30,10 +32,12 @@ static __always_inline bool arch_static_branch_jump(struct static_key *key, bool asm_volatile_goto("1:\n\t" "b,n %l[l_yes]\n\t" ".pushsection __jump_table, \"aw\"\n\t" + ".align %1\n\t" ".word 1b - ., %l[l_yes] - .\n\t" __stringify(ASM_ULONG_INSN) " %c0 - .\n\t" ".popsection\n\t" - : : "i" (&((char *)key)[branch]) : : l_yes); + : : "i" (&((char *)key)[branch]), "i" (sizeof(long)) + : : l_yes); return false; l_yes: -- cgit v1.2.3 From b28fc0d8739c03e7b6c44914a9d00d4c6dddc0ea Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 25 Nov 2023 09:11:56 +0100 Subject: parisc: Mark lock_aligned variables 16-byte aligned on SMP On parisc we need 16-byte alignment for variables which are used for locking. Mark the __lock_aligned attribute acordingly so that the .data..lock_aligned section will get that alignment in the generated object files. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/ldcw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/ldcw.h b/arch/parisc/include/asm/ldcw.h index ee9e071859b2..47ebc4c91eaf 100644 --- a/arch/parisc/include/asm/ldcw.h +++ b/arch/parisc/include/asm/ldcw.h @@ -55,7 +55,7 @@ }) #ifdef CONFIG_SMP -# define __lock_aligned __section(".data..lock_aligned") +# define __lock_aligned __section(".data..lock_aligned") __aligned(16) #endif #endif /* __PARISC_LDCW_H */ -- cgit v1.2.3 From c9fcb2b65c2849e8ff3be23fd8828312fb68dc19 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sat, 25 Nov 2023 09:16:02 +0100 Subject: parisc: Ensure 32-bit alignment on parisc unwind section Make sure the .PARISC.unwind section will be 32-bit aligned. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/kernel/vmlinux.lds.S | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S index 58694d1989c2..548051b0b4af 100644 --- a/arch/parisc/kernel/vmlinux.lds.S +++ b/arch/parisc/kernel/vmlinux.lds.S @@ -130,6 +130,7 @@ SECTIONS RO_DATA(8) /* unwind info */ + . = ALIGN(4); .PARISC.unwind : { __start___unwind = .; *(.PARISC.unwind) -- cgit v1.2.3 From fe76a1349f235969381832c83d703bc911021eb6 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 20 Nov 2023 23:30:49 +0100 Subject: parisc: Use natural CPU alignment for bug_table Make sure that the __bug_table section gets 32- or 64-bit aligned, depending if a 32- or 64-bit kernel is being built. Mark it non-writeable and use .blockz instead of the .org assembler directive to pad the struct. Signed-off-by: Helge Deller Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/bug.h | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 4b6d60b94124..b9cad0bb4461 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -28,13 +28,15 @@ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align %4\n" \ "2:\t" ASM_WORD_INSN "1b, %c0\n" \ - "\t.short %c1, %c2\n" \ - "\t.org 2b+%c3\n" \ + "\t.short %1, %2\n" \ + "\t.blockz %3-2*%4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (0), "i" (sizeof(struct bug_entry)) ); \ + "i" (0), "i" (sizeof(struct bug_entry)), \ + "i" (sizeof(long)) ); \ unreachable(); \ } while(0) @@ -51,27 +53,31 @@ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align %4\n" \ "2:\t" ASM_WORD_INSN "1b, %c0\n" \ - "\t.short %c1, %c2\n" \ - "\t.org 2b+%c3\n" \ + "\t.short %1, %2\n" \ + "\t.blockz %3-2*%4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)) ); \ + "i" (sizeof(struct bug_entry)), \ + "i" (sizeof(long)) ); \ } while(0) #else #define __WARN_FLAGS(flags) \ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ - "\t.pushsection __bug_table,\"aw\"\n" \ + "\t.pushsection __bug_table,\"a\"\n" \ + "\t.align %2\n" \ "2:\t" ASM_WORD_INSN "1b\n" \ - "\t.short %c0\n" \ - "\t.org 2b+%c1\n" \ + "\t.short %0\n" \ + "\t.blockz %1-%2-2\n" \ "\t.popsection" \ : : "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)) ); \ + "i" (sizeof(struct bug_entry)), \ + "i" (sizeof(long)) ); \ } while(0) #endif -- cgit v1.2.3 From e5f3e299a2b1e9c3ece24a38adfc089aef307e8a Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 23 Nov 2023 20:28:27 +0100 Subject: parisc: Drop the HP-UX ENOSYM and EREMOTERELEASE error codes Those return codes are only defined for the parisc architecture and are leftovers from when we wanted to be HP-UX compatible. They are not returned by any Linux kernel syscall but do trigger problems with the glibc strerrorname_np() and strerror() functions as reported in glibc issue #31080. There is no need to keep them, so simply remove them. Signed-off-by: Helge Deller Reported-by: Bruno Haible Closes: https://sourceware.org/bugzilla/show_bug.cgi?id=31080 Cc: stable@vger.kernel.org --- arch/parisc/include/uapi/asm/errno.h | 2 -- lib/errname.c | 6 ------ tools/arch/parisc/include/uapi/asm/errno.h | 2 -- 3 files changed, 10 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h index 87245c584784..8d94739d75c6 100644 --- a/arch/parisc/include/uapi/asm/errno.h +++ b/arch/parisc/include/uapi/asm/errno.h @@ -75,7 +75,6 @@ /* We now return you to your regularly scheduled HPUX. */ -#define ENOSYM 215 /* symbol does not exist in executable */ #define ENOTSOCK 216 /* Socket operation on non-socket */ #define EDESTADDRREQ 217 /* Destination address required */ #define EMSGSIZE 218 /* Message too long */ @@ -101,7 +100,6 @@ #define ETIMEDOUT 238 /* Connection timed out */ #define ECONNREFUSED 239 /* Connection refused */ #define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ -#define EREMOTERELEASE 240 /* Remote peer released connection */ #define EHOSTDOWN 241 /* Host is down */ #define EHOSTUNREACH 242 /* No route to host */ diff --git a/lib/errname.c b/lib/errname.c index dd1b998552cd..4f9112b38f3a 100644 --- a/lib/errname.c +++ b/lib/errname.c @@ -111,9 +111,6 @@ static const char *names_0[] = { E(ENOSPC), E(ENOSR), E(ENOSTR), -#ifdef ENOSYM - E(ENOSYM), -#endif E(ENOSYS), E(ENOTBLK), E(ENOTCONN), @@ -144,9 +141,6 @@ static const char *names_0[] = { #endif E(EREMOTE), E(EREMOTEIO), -#ifdef EREMOTERELEASE - E(EREMOTERELEASE), -#endif E(ERESTART), E(ERFKILL), E(EROFS), diff --git a/tools/arch/parisc/include/uapi/asm/errno.h b/tools/arch/parisc/include/uapi/asm/errno.h index 87245c584784..8d94739d75c6 100644 --- a/tools/arch/parisc/include/uapi/asm/errno.h +++ b/tools/arch/parisc/include/uapi/asm/errno.h @@ -75,7 +75,6 @@ /* We now return you to your regularly scheduled HPUX. */ -#define ENOSYM 215 /* symbol does not exist in executable */ #define ENOTSOCK 216 /* Socket operation on non-socket */ #define EDESTADDRREQ 217 /* Destination address required */ #define EMSGSIZE 218 /* Message too long */ @@ -101,7 +100,6 @@ #define ETIMEDOUT 238 /* Connection timed out */ #define ECONNREFUSED 239 /* Connection refused */ #define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ -#define EREMOTERELEASE 240 /* Remote peer released connection */ #define EHOSTDOWN 241 /* Host is down */ #define EHOSTUNREACH 242 /* No route to host */ -- cgit v1.2.3 From 43266838515d30dc0c45d5c7e6e7edacee6cce92 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Thu, 23 Nov 2023 21:57:19 +0100 Subject: parisc: Reduce size of the bug_table on 64-bit kernel by half Enable GENERIC_BUG_RELATIVE_POINTERS which will store 32-bit relative offsets to the bug address and the source file name instead of 64-bit absolute addresses. This effectively reduces the size of the bug_table[] array by half on 64-bit kernels. Signed-off-by: Helge Deller --- arch/parisc/Kconfig | 7 +++++-- arch/parisc/include/asm/bug.h | 34 +++++++++++++++++----------------- 2 files changed, 22 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index a7c9c0e69e5a..d14ccc948a29 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -115,9 +115,12 @@ config ARCH_HAS_ILOG2_U64 default n config GENERIC_BUG - bool - default y + def_bool y depends on BUG + select GENERIC_BUG_RELATIVE_POINTERS if 64BIT + +config GENERIC_BUG_RELATIVE_POINTERS + bool config GENERIC_HWEIGHT bool diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index b9cad0bb4461..1641ff9a8b83 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -17,26 +17,27 @@ #define PARISC_BUG_BREAK_ASM "break 0x1f, 0x1fff" #define PARISC_BUG_BREAK_INSN 0x03ffe01f /* PARISC_BUG_BREAK_ASM */ -#if defined(CONFIG_64BIT) -#define ASM_WORD_INSN ".dword\t" +#ifdef CONFIG_GENERIC_BUG_RELATIVE_POINTERS +# define __BUG_REL(val) ".word " __stringify(val) " - ." #else -#define ASM_WORD_INSN ".word\t" +# define __BUG_REL(val) ".word " __stringify(val) #endif + #ifdef CONFIG_DEBUG_BUGVERBOSE #define BUG() \ do { \ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ - "\t.align %4\n" \ - "2:\t" ASM_WORD_INSN "1b, %c0\n" \ + "\t.align 4\n" \ + "2:\t" __BUG_REL(1b) "\n" \ + "\t" __BUG_REL(%c0) "\n" \ "\t.short %1, %2\n" \ - "\t.blockz %3-2*%4-2*2\n" \ + "\t.blockz %3-2*4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ - "i" (0), "i" (sizeof(struct bug_entry)), \ - "i" (sizeof(long)) ); \ + "i" (0), "i" (sizeof(struct bug_entry)) ); \ unreachable(); \ } while(0) @@ -54,15 +55,15 @@ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ - "\t.align %4\n" \ - "2:\t" ASM_WORD_INSN "1b, %c0\n" \ + "\t.align 4\n" \ + "2:\t" __BUG_REL(1b) "\n" \ + "\t" __BUG_REL(%c0) "\n" \ "\t.short %1, %2\n" \ - "\t.blockz %3-2*%4-2*2\n" \ + "\t.blockz %3-2*4-2*2\n" \ "\t.popsection" \ : : "i" (__FILE__), "i" (__LINE__), \ "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)), \ - "i" (sizeof(long)) ); \ + "i" (sizeof(struct bug_entry)) ); \ } while(0) #else #define __WARN_FLAGS(flags) \ @@ -71,13 +72,12 @@ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ "\t.align %2\n" \ - "2:\t" ASM_WORD_INSN "1b\n" \ + "2:\t" __BUG_REL(1b) "\n" \ "\t.short %0\n" \ - "\t.blockz %1-%2-2\n" \ + "\t.blockz %1-4-2\n" \ "\t.popsection" \ : : "i" (BUGFLAG_WARNING|(flags)), \ - "i" (sizeof(struct bug_entry)), \ - "i" (sizeof(long)) ); \ + "i" (sizeof(struct bug_entry)) ); \ } while(0) #endif -- cgit v1.2.3 From 79997eda0d31bc68203c95ecb978773ee6ce7a1f Mon Sep 17 00:00:00 2001 From: Conor Dooley Date: Sun, 26 Nov 2023 11:40:54 +0000 Subject: riscv: dts: microchip: move timebase-frequency to mpfs.dtsi The timebase-frequency on PolarFire SoC is not set by an oscillator on the board, but rather by an internal divider, so move the property to mpfs.dtsi. This looks to be copy-pasta from the SiFive Unleashed as the comments in both places were almost identical. In the Unleashed's case this looks to actually be valid, as the clock is provided by a crystal on the PCB. Signed-off-by: Conor Dooley --- CC: Conor Dooley CC: Daire McNamara CC: Rob Herring CC: Krzysztof Kozlowski CC: Paul Walmsley CC: Palmer Dabbelt CC: linux-riscv@lists.infradead.org CC: devicetree@vger.kernel.org --- arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts | 7 ------- arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts | 7 ------- arch/riscv/boot/dts/microchip/mpfs-polarberry.dts | 7 ------- arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts | 7 ------- arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts | 7 ------- arch/riscv/boot/dts/microchip/mpfs.dtsi | 1 + 6 files changed, 1 insertion(+), 35 deletions(-) (limited to 'arch') diff --git a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts index 90b261114763..dce96f27cc89 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-icicle-kit.dts @@ -8,9 +8,6 @@ #include #include -/* Clock frequency (in Hz) of the rtcclk */ -#define RTCCLK_FREQ 1000000 - / { model = "Microchip PolarFire-SoC Icicle Kit"; compatible = "microchip,mpfs-icicle-reference-rtlv2210", "microchip,mpfs-icicle-kit", @@ -29,10 +26,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = ; - }; - leds { compatible = "gpio-leds"; diff --git a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts index 184cb36a175e..a8d623ee9fa4 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-m100pfsevp.dts @@ -10,9 +10,6 @@ #include "mpfs.dtsi" #include "mpfs-m100pfs-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { model = "Aries Embedded M100PFEVPS"; compatible = "aries,m100pfsevp", "microchip,mpfs"; @@ -33,10 +30,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = ; - }; - ddrc_cache_lo: memory@80000000 { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x40000000>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts index c87cc2d8fe29..ea0808ab1042 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-polarberry.dts @@ -6,9 +6,6 @@ #include "mpfs.dtsi" #include "mpfs-polarberry-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { model = "Sundance PolarBerry"; compatible = "sundance,polarberry", "microchip,mpfs"; @@ -22,10 +19,6 @@ stdout-path = "serial0:115200n8"; }; - cpus { - timebase-frequency = ; - }; - ddrc_cache_lo: memory@80000000 { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x2e000000>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts index 013cb666c72d..f9a890579438 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-sev-kit.dts @@ -6,9 +6,6 @@ #include "mpfs.dtsi" #include "mpfs-sev-kit-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { #address-cells = <2>; #size-cells = <2>; @@ -28,10 +25,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = ; - }; - reserved-memory { #address-cells = <2>; #size-cells = <2>; diff --git a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts index e0797c7e1b35..d1120f5f2c01 100644 --- a/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts +++ b/arch/riscv/boot/dts/microchip/mpfs-tysom-m.dts @@ -11,9 +11,6 @@ #include "mpfs.dtsi" #include "mpfs-tysom-m-fabric.dtsi" -/* Clock frequency (in Hz) of the rtcclk */ -#define MTIMER_FREQ 1000000 - / { model = "Aldec TySOM-M-MPFS250T-REV2"; compatible = "aldec,tysom-m-mpfs250t-rev2", "microchip,mpfs"; @@ -34,10 +31,6 @@ stdout-path = "serial1:115200n8"; }; - cpus { - timebase-frequency = ; - }; - ddrc_cache_lo: memory@80000000 { device_type = "memory"; reg = <0x0 0x80000000 0x0 0x30000000>; diff --git a/arch/riscv/boot/dts/microchip/mpfs.dtsi b/arch/riscv/boot/dts/microchip/mpfs.dtsi index a6faf24f1dba..266489d43912 100644 --- a/arch/riscv/boot/dts/microchip/mpfs.dtsi +++ b/arch/riscv/boot/dts/microchip/mpfs.dtsi @@ -13,6 +13,7 @@ cpus { #address-cells = <1>; #size-cells = <0>; + timebase-frequency = <1000000>; cpu0: cpu@0 { compatible = "sifive,e51", "sifive,rocket0", "riscv"; -- cgit v1.2.3 From 2bfba37b3d90d6d2d499d5b0dfe99c05c38b1b54 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Thu, 19 Oct 2023 08:32:17 +0200 Subject: arm64: dt: imx93: tqma9352-mba93xxla: Fix LPUART2 pad config LPUART2_RTS# has an external pull-down, so do not enable the internal pull-up at the same time, use a pull-down instead. Fixes: c982ecfa7992a ("arm64: dts: freescale: add initial device tree for MBa93xxLA SBC board") Signed-off-by: Alexander Stein Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts index f06139bdff97..3c5c67ebee5d 100644 --- a/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts +++ b/arch/arm64/boot/dts/freescale/imx93-tqma9352-mba93xxla.dts @@ -577,7 +577,7 @@ fsl,pins = < MX93_PAD_UART2_TXD__LPUART2_TX 0x31e MX93_PAD_UART2_RXD__LPUART2_RX 0x31e - MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x31e + MX93_PAD_SAI1_TXD0__LPUART2_RTS_B 0x51e >; }; -- cgit v1.2.3 From 6552218f4dc47ba3c6c5b58cc1e9eb208a2b438b Mon Sep 17 00:00:00 2001 From: Stefan Kerkmann Date: Wed, 1 Nov 2023 12:03:37 +0100 Subject: ARM: dts: imx6q: skov: fix ethernet clock regression A regression was introduced in the Skov specific i.MX6 flavor reve-mi1010ait-1cp1 device tree causing the external ethernet controller to not being selected as the clock source for the i.MX6 ethernet MAC, resulting in a none functional ethernet interface. The root cause is that the ethernet clock selection is now part of the clocks node, which is overwritten in the specific device tree and wasn't updated to contain these ethernet clocks. Fixes: c89614079e44 ("ARM: dts: imx6qdl-skov-cpu: configure ethernet reference clock parent") Signed-off-by: Stefan Kerkmann Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts index a3f247c722b4..0342a79ccd5d 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts +++ b/arch/arm/boot/dts/nxp/imx/imx6q-skov-reve-mi1010ait-1cp1.dts @@ -37,9 +37,9 @@ &clks { assigned-clocks = <&clks IMX6QDL_CLK_LDB_DI0_SEL>, - <&clks IMX6QDL_CLK_LDB_DI1_SEL>; + <&clks IMX6QDL_CLK_LDB_DI1_SEL>, <&clks IMX6QDL_CLK_ENET_REF_SEL>; assigned-clock-parents = <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, - <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>; + <&clks IMX6QDL_CLK_PLL5_VIDEO_DIV>, <&clk50m_phy>; }; &hdmi { -- cgit v1.2.3 From 487635756198cad563feb47539c6a37ea57f1dae Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Mon, 27 Nov 2023 10:39:26 +0100 Subject: parisc: Fix asm operand number out of range build error in bug table Build is broken if CONFIG_DEBUG_BUGVERBOSE=n. Fix it be using the correct asm operand number. Signed-off-by: Helge Deller Reported-by: Linux Kernel Functional Testing Fixes: fe76a1349f23 ("parisc: Use natural CPU alignment for bug_table") Cc: stable@vger.kernel.org # v6.0+ --- arch/parisc/include/asm/bug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/bug.h b/arch/parisc/include/asm/bug.h index 1641ff9a8b83..833555f74ffa 100644 --- a/arch/parisc/include/asm/bug.h +++ b/arch/parisc/include/asm/bug.h @@ -71,7 +71,7 @@ asm volatile("\n" \ "1:\t" PARISC_BUG_BREAK_ASM "\n" \ "\t.pushsection __bug_table,\"a\"\n" \ - "\t.align %2\n" \ + "\t.align 4\n" \ "2:\t" __BUG_REL(1b) "\n" \ "\t.short %0\n" \ "\t.blockz %1-4-2\n" \ -- cgit v1.2.3 From 076a948f5ad0da8a4438fdb3ec1b4a473084b40a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 24 Nov 2023 10:50:30 +0100 Subject: ARM: dts: rockchip: minor whitespace cleanup around '=' The DTS code coding style expects exactly one space before and after '=' sign. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231124095031.58555-1-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rk322x.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/rockchip/rk322x.dtsi b/arch/arm/boot/dts/rockchip/rk322x.dtsi index ffc16d6b97e1..a721744cbfd1 100644 --- a/arch/arm/boot/dts/rockchip/rk322x.dtsi +++ b/arch/arm/boot/dts/rockchip/rk322x.dtsi @@ -215,9 +215,9 @@ power-domain@RK3228_PD_VOP { reg = ; - clocks =<&cru ACLK_VOP>, - <&cru DCLK_VOP>, - <&cru HCLK_VOP>; + clocks = <&cru ACLK_VOP>, + <&cru DCLK_VOP>, + <&cru HCLK_VOP>; pm_qos = <&qos_vop>; #power-domain-cells = <0>; }; -- cgit v1.2.3 From 93dc6cd15f207be502739072ad122fa5ac812908 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 24 Nov 2023 10:50:31 +0100 Subject: arm64: dts: rockchip: minor whitespace cleanup around '=' The DTS code coding style expects exactly one space before and after '=' sign. Signed-off-by: Krzysztof Kozlowski Link: https://lore.kernel.org/r/20231124095031.58555-2-krzysztof.kozlowski@linaro.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts | 2 +- arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts index de0a1f2af983..7d4c5324c61b 100644 --- a/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/px30-ringneck-haikou.dts @@ -86,7 +86,7 @@ sgtl5000_clk: sgtl5000-oscillator { compatible = "fixed-clock"; #clock-cells = <0>; - clock-frequency = <24576000>; + clock-frequency = <24576000>; }; dc_12v: dc-12v-regulator { diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts index 8f399c4317bd..e3a839a12dc6 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts +++ b/arch/arm64/boot/dts/rockchip/rk3588s-orangepi-5.dts @@ -38,7 +38,7 @@ leds { compatible = "gpio-leds"; pinctrl-names = "default"; - pinctrl-0 =<&leds_gpio>; + pinctrl-0 = <&leds_gpio>; led-1 { gpios = <&gpio1 RK_PA2 GPIO_ACTIVE_HIGH>; -- cgit v1.2.3 From 0c349b5001f8bdcead844484c15a0c4dfb341157 Mon Sep 17 00:00:00 2001 From: Alex Bee Date: Mon, 27 Nov 2023 19:46:44 +0100 Subject: ARM: dts: rockchip: Fix sdmmc_pwren's pinmux setting for RK3128 RK3128's reference design uses sdmmc_pwren pincontrol as GPIO - see [0]. Let's change it in the SoC DT as well. [0] https://github.com/rockchip-linux/kernel/commit/8c62deaf6025 Fixes: a0201bff6259 ("ARM: dts: rockchip: add rk3128 soc dtsi") Signed-off-by: Alex Bee Link: https://lore.kernel.org/r/20231127184643.13314-2-knaerzche@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rockchip/rk3128.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/rockchip/rk3128.dtsi b/arch/arm/boot/dts/rockchip/rk3128.dtsi index 7bf557c99561..01edf244ddee 100644 --- a/arch/arm/boot/dts/rockchip/rk3128.dtsi +++ b/arch/arm/boot/dts/rockchip/rk3128.dtsi @@ -848,7 +848,7 @@ }; sdmmc_pwren: sdmmc-pwren { - rockchip,pins = <1 RK_PB6 1 &pcfg_pull_default>; + rockchip,pins = <1 RK_PB6 RK_FUNC_GPIO &pcfg_pull_default>; }; sdmmc_bus4: sdmmc-bus4 { -- cgit v1.2.3 From db2832309a82b9acc4b8cc33a1831d36507ec13e Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 24 Nov 2023 08:48:52 +0100 Subject: x86/xen: fix percpu vcpu_info allocation Today the percpu struct vcpu_info is allocated via DEFINE_PER_CPU(), meaning that it could cross a page boundary. In this case registering it with the hypervisor will fail, resulting in a panic(). This can easily be fixed by using DEFINE_PER_CPU_ALIGNED() instead, as struct vcpu_info is guaranteed to have a size of 64 bytes, matching the cache line size of x86 64-bit processors (Xen doesn't support 32-bit processors). Fixes: 5ead97c84fa7 ("xen: Core Xen implementation") Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Link: https://lore.kernel.org/r/20231124074852.25161-1-jgross@suse.com Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 6 +++++- arch/x86/xen/xen-ops.h | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 0337392a3121..3c61bb98c10e 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,9 +33,12 @@ EXPORT_SYMBOL_GPL(hypercall_page); * and xen_vcpu_setup for details. By default it points to share_info->vcpu_info * but during boot it is switched to point to xen_vcpu_info. * The pointer is used in xen_evtchn_do_upcall to acknowledge pending events. + * Make sure that xen_vcpu_info doesn't cross a page boundary by making it + * cache-line aligned (the struct is guaranteed to have a size of 64 bytes, + * which matches the cache line size of 64-bit x86 processors). */ DEFINE_PER_CPU(struct vcpu_info *, xen_vcpu); -DEFINE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DEFINE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info); /* Linux <-> Xen vCPU id mapping */ DEFINE_PER_CPU(uint32_t, xen_vcpu_id); @@ -160,6 +163,7 @@ void xen_vcpu_setup(int cpu) int err; struct vcpu_info *vcpup; + BUILD_BUG_ON(sizeof(*vcpup) > SMP_CACHE_BYTES); BUG_ON(HYPERVISOR_shared_info == &xen_dummy_shared_info); /* diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index 408a2aa66c69..a87ab36889e7 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -21,7 +21,7 @@ extern void *xen_initial_gdt; struct trap_info; void xen_copy_trap_info(struct trap_info *traps); -DECLARE_PER_CPU(struct vcpu_info, xen_vcpu_info); +DECLARE_PER_CPU_ALIGNED(struct vcpu_info, xen_vcpu_info); DECLARE_PER_CPU(unsigned long, xen_cr3); DECLARE_PER_CPU(unsigned long, xen_current_cr3); -- cgit v1.2.3 From 5e1d824f9a283cbf90f25241b66d1f69adb3835b Mon Sep 17 00:00:00 2001 From: Timothy Pearson Date: Sun, 19 Nov 2023 09:18:02 -0600 Subject: powerpc: Don't clobber f0/vs0 during fp|altivec register save During floating point and vector save to thread data f0/vs0 are clobbered by the FPSCR/VSCR store routine. This has been obvserved to lead to userspace register corruption and application data corruption with io-uring. Fix it by restoring f0/vs0 after FPSCR/VSCR store has completed for all the FP, altivec, VMX register save paths. Tested under QEMU in kvm mode, running on a Talos II workstation with dual POWER9 DD2.2 CPUs. Additional detail (mpe): Typically save_fpu() is called from __giveup_fpu() which saves the FP regs and also *turns off FP* in the tasks MSR, meaning the kernel will reload the FP regs from the thread struct before letting the task use FP again. So in that case save_fpu() is free to clobber f0 because the FP regs no longer hold live values for the task. There is another case though, which is the path via: sys_clone() ... copy_process() dup_task_struct() arch_dup_task_struct() flush_all_to_thread() save_all() That path saves the FP regs but leaves them live. That's meant as an optimisation for a process that's using FP/VSX and then calls fork(), leaving the regs live means the parent process doesn't have to take a fault after the fork to get its FP regs back. The optimisation was added in commit 8792468da5e1 ("powerpc: Add the ability to save FPU without giving it up"). That path does clobber f0, but f0 is volatile across function calls, and typically programs reach copy_process() from userspace via a syscall wrapper function. So in normal usage f0 being clobbered across a syscall doesn't cause visible data corruption. But there is now a new path, because io-uring can call copy_process() via create_io_thread() from the signal handling path. That's OK if the signal is handled as part of syscall return, but it's not OK if the signal is handled due to some other interrupt. That path is: interrupt_return_srr_user() interrupt_exit_user_prepare() interrupt_exit_user_prepare_main() do_notify_resume() get_signal() task_work_run() create_worker_cb() create_io_worker() copy_process() dup_task_struct() arch_dup_task_struct() flush_all_to_thread() save_all() if (tsk->thread.regs->msr & MSR_FP) save_fpu() # f0 is clobbered and potentially live in userspace Note the above discussion applies equally to save_altivec(). Fixes: 8792468da5e1 ("powerpc: Add the ability to save FPU without giving it up") Cc: stable@vger.kernel.org # v4.6+ Closes: https://lore.kernel.org/all/480932026.45576726.1699374859845.JavaMail.zimbra@raptorengineeringinc.com/ Closes: https://lore.kernel.org/linuxppc-dev/480221078.47953493.1700206777956.JavaMail.zimbra@raptorengineeringinc.com/ Tested-by: Timothy Pearson Tested-by: Jens Axboe Signed-off-by: Timothy Pearson [mpe: Reword change log to describe exact path of corruption & other minor tweaks] Signed-off-by: Michael Ellerman Link: https://msgid.link/1921539696.48534988.1700407082933.JavaMail.zimbra@raptorengineeringinc.com --- arch/powerpc/kernel/fpu.S | 13 +++++++++++++ arch/powerpc/kernel/vector.S | 2 ++ 2 files changed, 15 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S index 6a9acfb690c9..2f8f3f93cbb6 100644 --- a/arch/powerpc/kernel/fpu.S +++ b/arch/powerpc/kernel/fpu.S @@ -23,6 +23,15 @@ #include #ifdef CONFIG_VSX +#define __REST_1FPVSR(n,c,base) \ +BEGIN_FTR_SECTION \ + b 2f; \ +END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ + REST_FPR(n,base); \ + b 3f; \ +2: REST_VSR(n,c,base); \ +3: + #define __REST_32FPVSRS(n,c,base) \ BEGIN_FTR_SECTION \ b 2f; \ @@ -41,9 +50,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX); \ 2: SAVE_32VSRS(n,c,base); \ 3: #else +#define __REST_1FPVSR(n,b,base) REST_FPR(n, base) #define __REST_32FPVSRS(n,b,base) REST_32FPRS(n, base) #define __SAVE_32FPVSRS(n,b,base) SAVE_32FPRS(n, base) #endif +#define REST_1FPVSR(n,c,base) __REST_1FPVSR(n,__REG_##c,__REG_##base) #define REST_32FPVSRS(n,c,base) __REST_32FPVSRS(n,__REG_##c,__REG_##base) #define SAVE_32FPVSRS(n,c,base) __SAVE_32FPVSRS(n,__REG_##c,__REG_##base) @@ -67,6 +78,7 @@ _GLOBAL(store_fp_state) SAVE_32FPVSRS(0, R4, R3) mffs fr0 stfd fr0,FPSTATE_FPSCR(r3) + REST_1FPVSR(0, R4, R3) blr EXPORT_SYMBOL(store_fp_state) @@ -138,4 +150,5 @@ _GLOBAL(save_fpu) 2: SAVE_32FPVSRS(0, R4, R6) mffs fr0 stfd fr0,FPSTATE_FPSCR(r6) + REST_1FPVSR(0, R4, R6) blr diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S index 4094e4c4c77a..80b3f6e476b6 100644 --- a/arch/powerpc/kernel/vector.S +++ b/arch/powerpc/kernel/vector.S @@ -33,6 +33,7 @@ _GLOBAL(store_vr_state) mfvscr v0 li r4, VRSTATE_VSCR stvx v0, r4, r3 + lvx v0, 0, r3 blr EXPORT_SYMBOL(store_vr_state) @@ -109,6 +110,7 @@ _GLOBAL(save_altivec) mfvscr v0 li r4,VRSTATE_VSCR stvx v0,r4,r7 + lvx v0,0,r7 blr #ifdef CONFIG_VSX -- cgit v1.2.3 From dc761f11af2e39119d3a7942e3d10615f3d900e7 Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sat, 18 Nov 2023 13:42:52 +0100 Subject: ARM: dts: bcm2711-rpi-400: Fix delete-node of led_act The LED ACT which is included from bcm2711-rpi-4-b doesn't exists on the Raspberry Pi 400. So the bcm2711-rpi-400.dts tries to use the delete-node directive in order to remove the complete node. Unfortunately the usage get broken in commit 1156e3a78bcc ("ARM: dts: bcm283x: Move ACT LED into separate dtsi") and now ACT and PWR LED using the same GPIO and this prevent probing of led-gpios on Raspberry Pi 400: leds-gpio: probe of leds failed with error -16 So fix the delete-node directive. Fixes: 1156e3a78bcc ("ARM: dts: bcm283x: Move ACT LED into separate dtsi") Signed-off-by: Stefan Wahren Link: https://lore.kernel.org/r/20231118124252.14838-3-wahrenst@gmx.net Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts index 1ab8184302db..5a2869a18bd5 100644 --- a/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts +++ b/arch/arm/boot/dts/broadcom/bcm2711-rpi-400.dts @@ -36,9 +36,7 @@ gpios = <&gpio 42 GPIO_ACTIVE_HIGH>; }; -&leds { - /delete-node/ led_act; -}; +/delete-node/ &led_act; &pm { /delete-property/ system-power-controller; -- cgit v1.2.3 From 61b94d54421a1f3670ddd5396ec70afe833e9405 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 6 Jul 2023 11:58:41 +0200 Subject: arm64: dts: mediatek: mt8195: Fix PM suspend/resume with venc clocks Before suspending the LARBs we're making sure that any operation is done: this never happens because we are unexpectedly unclocking the LARB20 before executing the suspend handler for the MediaTek Smart Multimedia Interface (SMI) and the cause of this is incorrect clocks on this LARB. Fix this issue by changing the Local Arbiter 20 (used by the video encoder secondary core) apb clock to CLK_VENC_CORE1_VENC; furthermore, in order to make sure that both the PM resume and video encoder operation is stable, add the CLK_VENC(_CORE1)_LARB clock to the VENC (main core) and VENC_CORE1 power domains, as this IP cannot communicate with the rest of the system (the AP) without local arbiter clocks being operational. Cc: stable@vger.kernel.org Fixes: 3b5838d1d82e ("arm64: dts: mt8195: Add iommu and smi nodes") Fixes: 2b515194bf0c ("arm64: dts: mt8195: Add power domains controller") Reviewed-by: Alexandre Mergnat Link: https://lore.kernel.org/r/20230706095841.109315-1-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8195.dtsi | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt8195.dtsi b/arch/arm64/boot/dts/mediatek/mt8195.dtsi index 54c674c45b49..e0ac2e9f5b72 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195.dtsi @@ -627,6 +627,8 @@ power-domain@MT8195_POWER_DOMAIN_VENC_CORE1 { reg = ; + clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>; + clock-names = "venc1-larb"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -689,6 +691,8 @@ power-domain@MT8195_POWER_DOMAIN_VENC { reg = ; + clocks = <&vencsys CLK_VENC_LARB>; + clock-names = "venc0-larb"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -2665,7 +2669,7 @@ reg = <0 0x1b010000 0 0x1000>; mediatek,larb-id = <20>; mediatek,smi = <&smi_common_vpp>; - clocks = <&vencsys_core1 CLK_VENC_CORE1_LARB>, + clocks = <&vencsys_core1 CLK_VENC_CORE1_VENC>, <&vencsys_core1 CLK_VENC_CORE1_GALS>, <&vppsys0 CLK_VPP0_GALS_VDO0_VDO1_VENCSYS_CORE1>; clock-names = "apb", "smi", "gals"; -- cgit v1.2.3 From 19cba9a6c071db57888dc6b2ec1d9bf8996ea681 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 11:38:13 +0200 Subject: arm64: dts: mediatek: mt8183: Fix unit address for scp reserved memory The reserved memory for scp had node name "scp_mem_region" and also without unit-address: change the name to "memory@(address)". This fixes a unit_address_vs_reg warning. Cc: stable@vger.kernel.org Fixes: 1652dbf7363a ("arm64: dts: mt8183: add scp node") Link: https://lore.kernel.org/r/20231025093816.44327-6-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8183-evb.dts | 2 +- arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts index ce336a48c897..7adf473f15dc 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts @@ -31,7 +31,7 @@ #address-cells = <2>; #size-cells = <2>; ranges; - scp_mem_reserved: scp_mem_region { + scp_mem_reserved: memory@50000000 { compatible = "shared-dma-pool"; reg = <0 0x50000000 0 0x2900000>; no-map; diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index bf7de35ffcbc..6e421e941e3a 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -116,7 +116,7 @@ #size-cells = <2>; ranges; - scp_mem_reserved: scp_mem_region { + scp_mem_reserved: memory@50000000 { compatible = "shared-dma-pool"; reg = <0 0x50000000 0 0x2900000>; no-map; -- cgit v1.2.3 From 9dea1c724fc36643e83216c1f5a26613412150db Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 11:38:14 +0200 Subject: arm64: dts: mediatek: mt8183-evb: Fix unit_address_vs_reg warning on ntc The NTC is defined as ntc@0 but it doesn't need any address at all. Fix the unit_address_vs_reg warning by dropping the unit address: since the node name has to be generic also fully rename it from ntc@0 to thermal-sensor. Cc: stable@vger.kernel.org Fixes: ff9ea5c62279 ("arm64: dts: mediatek: mt8183-evb: Add node for thermistor") Link: https://lore.kernel.org/r/20231025093816.44327-7-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8183-evb.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts index 7adf473f15dc..77f9ab94c00b 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8183-evb.dts @@ -38,7 +38,7 @@ }; }; - ntc@0 { + thermal-sensor { compatible = "murata,ncp03wf104"; pullup-uv = <1800000>; pullup-ohm = <390000>; -- cgit v1.2.3 From 24165c5dad7ba7c7624d05575a5e0cc851396c71 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 11:38:15 +0200 Subject: arm64: dts: mediatek: mt8173-evb: Fix regulator-fixed node names Fix a unit_address_vs_reg warning for the USB VBUS fixed regulators by renaming the regulator nodes from regulator@{0,1} to regulator-usb-p0 and regulator-usb-p1. Cc: stable@vger.kernel.org Fixes: c0891284a74a ("arm64: dts: mediatek: add USB3 DRD driver") Link: https://lore.kernel.org/r/20231025093816.44327-8-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8173-evb.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts index 5122963d8743..d258c80213b2 100644 --- a/arch/arm64/boot/dts/mediatek/mt8173-evb.dts +++ b/arch/arm64/boot/dts/mediatek/mt8173-evb.dts @@ -44,7 +44,7 @@ id-gpio = <&pio 16 GPIO_ACTIVE_HIGH>; }; - usb_p1_vbus: regulator@0 { + usb_p1_vbus: regulator-usb-p1 { compatible = "regulator-fixed"; regulator-name = "usb_vbus"; regulator-min-microvolt = <5000000>; @@ -53,7 +53,7 @@ enable-active-high; }; - usb_p0_vbus: regulator@1 { + usb_p0_vbus: regulator-usb-p0 { compatible = "regulator-fixed"; regulator-name = "vbus"; regulator-min-microvolt = <5000000>; -- cgit v1.2.3 From 5a60d63439694590cd5ab1f998fc917ff7ba1c1d Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Wed, 25 Oct 2023 11:38:16 +0200 Subject: arm64: dts: mediatek: mt8183: Move thermal-zones to the root node The thermal zones are not a soc bus device: move it to the root node to solve simple_bus_reg warnings. Cc: stable@vger.kernel.org Fixes: b325ce39785b ("arm64: dts: mt8183: add thermal zone node") Link: https://lore.kernel.org/r/20231025093816.44327-9-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8183.dtsi | 242 +++++++++++++++---------------- 1 file changed, 121 insertions(+), 121 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt8183.dtsi b/arch/arm64/boot/dts/mediatek/mt8183.dtsi index 5169779d01df..976dc968b3ca 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183.dtsi @@ -1210,127 +1210,6 @@ nvmem-cell-names = "calibration-data"; }; - thermal_zones: thermal-zones { - cpu_thermal: cpu-thermal { - polling-delay-passive = <100>; - polling-delay = <500>; - thermal-sensors = <&thermal 0>; - sustainable-power = <5000>; - - trips { - threshold: trip-point0 { - temperature = <68000>; - hysteresis = <2000>; - type = "passive"; - }; - - target: trip-point1 { - temperature = <80000>; - hysteresis = <2000>; - type = "passive"; - }; - - cpu_crit: cpu-crit { - temperature = <115000>; - hysteresis = <2000>; - type = "critical"; - }; - }; - - cooling-maps { - map0 { - trip = <&target>; - cooling-device = <&cpu0 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu1 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu2 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu3 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>; - contribution = <3072>; - }; - map1 { - trip = <&target>; - cooling-device = <&cpu4 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu5 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu6 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>, - <&cpu7 - THERMAL_NO_LIMIT - THERMAL_NO_LIMIT>; - contribution = <1024>; - }; - }; - }; - - /* The tzts1 ~ tzts6 don't need to polling */ - /* The tzts1 ~ tzts6 don't need to thermal throttle */ - - tzts1: tzts1 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 1>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts2: tzts2 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 2>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts3: tzts3 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 3>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts4: tzts4 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 4>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tzts5: tzts5 { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 5>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - - tztsABB: tztsABB { - polling-delay-passive = <0>; - polling-delay = <0>; - thermal-sensors = <&thermal 6>; - sustainable-power = <5000>; - trips {}; - cooling-maps {}; - }; - }; - pwm0: pwm@1100e000 { compatible = "mediatek,mt8183-disp-pwm"; reg = <0 0x1100e000 0 0x1000>; @@ -2105,4 +1984,125 @@ power-domains = <&spm MT8183_POWER_DOMAIN_CAM>; }; }; + + thermal_zones: thermal-zones { + cpu_thermal: cpu-thermal { + polling-delay-passive = <100>; + polling-delay = <500>; + thermal-sensors = <&thermal 0>; + sustainable-power = <5000>; + + trips { + threshold: trip-point0 { + temperature = <68000>; + hysteresis = <2000>; + type = "passive"; + }; + + target: trip-point1 { + temperature = <80000>; + hysteresis = <2000>; + type = "passive"; + }; + + cpu_crit: cpu-crit { + temperature = <115000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + + cooling-maps { + map0 { + trip = <&target>; + cooling-device = <&cpu0 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu1 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu2 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu3 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + contribution = <3072>; + }; + map1 { + trip = <&target>; + cooling-device = <&cpu4 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu5 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu6 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>, + <&cpu7 + THERMAL_NO_LIMIT + THERMAL_NO_LIMIT>; + contribution = <1024>; + }; + }; + }; + + /* The tzts1 ~ tzts6 don't need to polling */ + /* The tzts1 ~ tzts6 don't need to thermal throttle */ + + tzts1: tzts1 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 1>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts2: tzts2 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 2>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts3: tzts3 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 3>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts4: tzts4 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 4>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tzts5: tzts5 { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 5>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + + tztsABB: tztsABB { + polling-delay-passive = <0>; + polling-delay = <0>; + thermal-sensors = <&thermal 6>; + sustainable-power = <5000>; + trips {}; + cooling-maps {}; + }; + }; }; -- cgit v1.2.3 From 8980c30141d3986beab815d85762b9c67196ed72 Mon Sep 17 00:00:00 2001 From: Hsin-Yi Wang Date: Thu, 26 Oct 2023 12:09:10 -0700 Subject: arm64: dts: mt8183: kukui: Fix underscores in node names Replace underscores with hyphens in pinctrl node names both for consistency and to adhere to the bindings. Cc: stable@vger.kernel.org Fixes: cd894e274b74 ("arm64: dts: mt8183: Add krane-sku176 board") Fixes: 1652dbf7363a ("arm64: dts: mt8183: add scp node") Fixes: 27eaf34df364 ("arm64: dts: mt8183: config dsi node") Signed-off-by: Hsin-Yi Wang Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231026191343.3345279-2-hsinyi@chromium.org Signed-off-by: AngeloGioacchino Del Regno --- .../boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi | 6 +- arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi | 94 +++++++++++----------- 2 files changed, 50 insertions(+), 50 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi index bf97b60ae4d1..06fde1a9aab7 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi @@ -441,20 +441,20 @@ }; touchscreen_pins: touchscreen-pins { - touch_int_odl { + touch-int-odl { pinmux = ; input-enable; bias-pull-up; }; - touch_rst_l { + touch-rst-l { pinmux = ; output-high; }; }; trackpad_pins: trackpad-pins { - trackpad_int { + trackpad-int { pinmux = ; input-enable; bias-disable; /* pulled externally */ diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi index 6e421e941e3a..7881a27be029 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui.dtsi @@ -460,7 +460,7 @@ &pio { aud_pins_default: audiopins { - pins_bus { + pins-bus { pinmux = , , , @@ -482,7 +482,7 @@ }; aud_pins_tdm_out_on: audiotdmouton { - pins_bus { + pins-bus { pinmux = , , , @@ -494,7 +494,7 @@ }; aud_pins_tdm_out_off: audiotdmoutoff { - pins_bus { + pins-bus { pinmux = , , , @@ -508,13 +508,13 @@ }; bt_pins: bt-pins { - pins_bt_en { + pins-bt-en { pinmux = ; output-low; }; }; - ec_ap_int_odl: ec_ap_int_odl { + ec_ap_int_odl: ec-ap-int-odl { pins1 { pinmux = ; input-enable; @@ -522,7 +522,7 @@ }; }; - h1_int_od_l: h1_int_od_l { + h1_int_od_l: h1-int-od-l { pins1 { pinmux = ; input-enable; @@ -530,7 +530,7 @@ }; i2c0_pins: i2c0 { - pins_bus { + pins-bus { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -539,7 +539,7 @@ }; i2c1_pins: i2c1 { - pins_bus { + pins-bus { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -548,7 +548,7 @@ }; i2c2_pins: i2c2 { - pins_bus { + pins-bus { pinmux = , ; bias-disable; @@ -557,7 +557,7 @@ }; i2c3_pins: i2c3 { - pins_bus { + pins-bus { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -566,7 +566,7 @@ }; i2c4_pins: i2c4 { - pins_bus { + pins-bus { pinmux = , ; bias-disable; @@ -575,7 +575,7 @@ }; i2c5_pins: i2c5 { - pins_bus { + pins-bus { pinmux = , ; mediatek,pull-up-adv = <3>; @@ -584,7 +584,7 @@ }; i2c6_pins: i2c6 { - pins_bus { + pins-bus { pinmux = , ; bias-disable; @@ -592,7 +592,7 @@ }; mmc0_pins_default: mmc0-pins-default { - pins_cmd_dat { + pins-cmd-dat { pinmux = , , , @@ -607,13 +607,13 @@ mediatek,pull-up-adv = <01>; }; - pins_clk { + pins-clk { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <10>; }; - pins_rst { + pins-rst { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <01>; @@ -621,7 +621,7 @@ }; mmc0_pins_uhs: mmc0-pins-uhs { - pins_cmd_dat { + pins-cmd-dat { pinmux = , , , @@ -636,19 +636,19 @@ mediatek,pull-up-adv = <01>; }; - pins_clk { + pins-clk { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <10>; }; - pins_ds { + pins-ds { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <10>; }; - pins_rst { + pins-rst { pinmux = ; drive-strength = ; mediatek,pull-up-adv = <01>; @@ -656,7 +656,7 @@ }; mmc1_pins_default: mmc1-pins-default { - pins_cmd_dat { + pins-cmd-dat { pinmux = , , , @@ -666,7 +666,7 @@ mediatek,pull-up-adv = <10>; }; - pins_clk { + pins-clk { pinmux = ; input-enable; mediatek,pull-down-adv = <10>; @@ -674,7 +674,7 @@ }; mmc1_pins_uhs: mmc1-pins-uhs { - pins_cmd_dat { + pins-cmd-dat { pinmux = , , , @@ -685,7 +685,7 @@ mediatek,pull-up-adv = <10>; }; - pins_clk { + pins-clk { pinmux = ; drive-strength = ; mediatek,pull-down-adv = <10>; @@ -693,15 +693,15 @@ }; }; - panel_pins_default: panel_pins_default { - panel_reset { + panel_pins_default: panel-pins-default { + panel-reset { pinmux = ; output-low; bias-pull-up; }; }; - pwm0_pin_default: pwm0_pin_default { + pwm0_pin_default: pwm0-pin-default { pins1 { pinmux = ; output-high; @@ -713,14 +713,14 @@ }; scp_pins: scp { - pins_scp_uart { + pins-scp-uart { pinmux = , ; }; }; spi0_pins: spi0 { - pins_spi { + pins-spi { pinmux = , , , @@ -730,7 +730,7 @@ }; spi1_pins: spi1 { - pins_spi { + pins-spi { pinmux = , , , @@ -740,20 +740,20 @@ }; spi2_pins: spi2 { - pins_spi { + pins-spi { pinmux = , , ; bias-disable; }; - pins_spi_mi { + pins-spi-mi { pinmux = ; mediatek,pull-down-adv = <00>; }; }; spi3_pins: spi3 { - pins_spi { + pins-spi { pinmux = , , , @@ -763,7 +763,7 @@ }; spi4_pins: spi4 { - pins_spi { + pins-spi { pinmux = , , , @@ -773,7 +773,7 @@ }; spi5_pins: spi5 { - pins_spi { + pins-spi { pinmux = , , , @@ -783,63 +783,63 @@ }; uart0_pins_default: uart0-pins-default { - pins_rx { + pins-rx { pinmux = ; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = ; }; }; uart1_pins_default: uart1-pins-default { - pins_rx { + pins-rx { pinmux = ; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = ; }; - pins_rts { + pins-rts { pinmux = ; output-enable; }; - pins_cts { + pins-cts { pinmux = ; input-enable; }; }; uart1_pins_sleep: uart1-pins-sleep { - pins_rx { + pins-rx { pinmux = ; input-enable; bias-pull-up; }; - pins_tx { + pins-tx { pinmux = ; }; - pins_rts { + pins-rts { pinmux = ; output-enable; }; - pins_cts { + pins-cts { pinmux = ; input-enable; }; }; wifi_pins_pwrseq: wifi-pins-pwrseq { - pins_wifi_enable { + pins-wifi-enable { pinmux = ; output-low; }; }; wifi_pins_wakeup: wifi-pins-wakeup { - pins_wifi_wakeup { + pins-wifi-wakeup { pinmux = ; input-enable; }; -- cgit v1.2.3 From 8dfe51c3f6ef31502fca3e2da8cd250ebbe4b8f2 Mon Sep 17 00:00:00 2001 From: Eric Woudstra Date: Wed, 25 Oct 2023 19:08:28 +0200 Subject: arm64: dts: mt7986: fix emmc hs400 mode without uboot initialization Eric reports errors on emmc with hs400 mode when booting linux on bpi-r3 without uboot [1]. Booting with uboot does not show this because clocks seem to be initialized by uboot. Fix this by adding assigned-clocks and assigned-clock-parents like it's done in uboot [2]. [1] https://forum.banana-pi.org/t/bpi-r3-kernel-fails-setting-emmc-clock-to-416m-depends-on-u-boot/15170 [2] https://github.com/u-boot/u-boot/blob/master/arch/arm/dts/mt7986.dtsi#L287 Cc: stable@vger.kernel.org Fixes: 513b49d19b34 ("arm64: dts: mt7986: add mmc related device nodes") Signed-off-by: Eric Woudstra Signed-off-by: Frank Wunderlich Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231025170832.78727-2-linux@fw-web.de Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index 24eda00e320d..77ddd9e44ed2 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -374,6 +374,10 @@ reg = <0 0x11230000 0 0x1000>, <0 0x11c20000 0 0x1000>; interrupts = ; + assigned-clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>, + <&topckgen CLK_TOP_EMMC_250M_SEL>; + assigned-clock-parents = <&apmixedsys CLK_APMIXED_MPLL>, + <&topckgen CLK_TOP_NET1PLL_D5_D2>; clocks = <&topckgen CLK_TOP_EMMC_416M_SEL>, <&infracfg CLK_INFRA_MSDC_HCK_CK>, <&infracfg CLK_INFRA_MSDC_CK>, -- cgit v1.2.3 From 6413cbc17f89b3a160f3a6f3fad1232b1678fe40 Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Wed, 25 Oct 2023 19:08:29 +0200 Subject: arm64: dts: mt7986: define 3W max power to both SFP on BPI-R3 All SFP power supplies are connected to the system VDD33 which is 3v3/8A. Set 3A per SFP slot to allow SFPs work which need more power than the default 1W. Cc: stable@vger.kernel.org Fixes: 8e01fb15b815 ("arm64: dts: mt7986: add Bananapi R3") Signed-off-by: Frank Wunderlich Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231025170832.78727-3-linux@fw-web.de Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts index af4a4309bda4..f9702284607a 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts +++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts @@ -126,6 +126,7 @@ compatible = "sff,sfp"; i2c-bus = <&i2c_sfp1>; los-gpios = <&pio 46 GPIO_ACTIVE_HIGH>; + maximum-power-milliwatt = <3000>; mod-def0-gpios = <&pio 49 GPIO_ACTIVE_LOW>; tx-disable-gpios = <&pio 20 GPIO_ACTIVE_HIGH>; tx-fault-gpios = <&pio 7 GPIO_ACTIVE_HIGH>; @@ -137,6 +138,7 @@ i2c-bus = <&i2c_sfp2>; los-gpios = <&pio 31 GPIO_ACTIVE_HIGH>; mod-def0-gpios = <&pio 47 GPIO_ACTIVE_LOW>; + maximum-power-milliwatt = <3000>; tx-disable-gpios = <&pio 15 GPIO_ACTIVE_HIGH>; tx-fault-gpios = <&pio 48 GPIO_ACTIVE_HIGH>; }; -- cgit v1.2.3 From 1fcda8ceb014aafd56f10b33e0077c93b5dd45d1 Mon Sep 17 00:00:00 2001 From: Frank Wunderlich Date: Wed, 25 Oct 2023 19:08:30 +0200 Subject: arm64: dts: mt7986: change cooling trips Add Critical and hot trips for emergency system shutdown and limiting system load. Change passive trip to active to make sure fan is activated on the lowest trip. Cc: stable@vger.kernel.org Fixes: 1f5be05132f3 ("arm64: dts: mt7986: add thermal-zones") Fixes: c26f779a2295 ("arm64: dts: mt7986: add pwm-fan and cooling-maps to BPI-R3 dts") Suggested-by: Daniel Golle Signed-off-by: Frank Wunderlich Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20231025170832.78727-4-linux@fw-web.de Signed-off-by: AngeloGioacchino Del Regno --- .../boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts | 10 +++++----- arch/arm64/boot/dts/mediatek/mt7986a.dtsi | 20 ++++++++++++++++---- 2 files changed, 21 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts index f9702284607a..b876e501216b 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts +++ b/arch/arm64/boot/dts/mediatek/mt7986a-bananapi-bpi-r3.dts @@ -152,16 +152,16 @@ trip = <&cpu_trip_active_high>; }; - cpu-active-low { + cpu-active-med { /* active: set fan to cooling level 1 */ cooling-device = <&fan 1 1>; - trip = <&cpu_trip_active_low>; + trip = <&cpu_trip_active_med>; }; - cpu-passive { - /* passive: set fan to cooling level 0 */ + cpu-active-low { + /* active: set fan to cooling level 0 */ cooling-device = <&fan 0 0>; - trip = <&cpu_trip_passive>; + trip = <&cpu_trip_active_low>; }; }; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi index 77ddd9e44ed2..fc751e049953 100644 --- a/arch/arm64/boot/dts/mediatek/mt7986a.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7986a.dtsi @@ -614,22 +614,34 @@ thermal-sensors = <&thermal 0>; trips { + cpu_trip_crit: crit { + temperature = <125000>; + hysteresis = <2000>; + type = "critical"; + }; + + cpu_trip_hot: hot { + temperature = <120000>; + hysteresis = <2000>; + type = "hot"; + }; + cpu_trip_active_high: active-high { temperature = <115000>; hysteresis = <2000>; type = "active"; }; - cpu_trip_active_low: active-low { + cpu_trip_active_med: active-med { temperature = <85000>; hysteresis = <2000>; type = "active"; }; - cpu_trip_passive: passive { - temperature = <40000>; + cpu_trip_active_low: active-low { + temperature = <60000>; hysteresis = <2000>; - type = "passive"; + type = "active"; }; }; }; -- cgit v1.2.3 From 59fa1e51ba54e1f513985a8177969b62973f7fd5 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Thu, 5 Oct 2023 17:11:50 +0200 Subject: arm64: dts: mediatek: mt8186: Change gpu speedbin nvmem cell name MT8186's GPU speedbin value must be interpreted, or the value will not be meaningful. Use the correct "gpu-speedbin" nvmem cell name for the GPU speedbin to allow triggering the cell info fixup handler, hence feeding the right speedbin number to the users. Cc: stable@vger.kernel.org Fixes: 263d2fd02afc ("arm64: dts: mediatek: mt8186: Add GPU speed bin NVMEM cells") Reviewed-by: Chen-Yu Tsai Link: https://lore.kernel.org/r/20231005151150.355536-1-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8186.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi index f04ae70c470a..f4c4f61c779d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi @@ -1656,7 +1656,7 @@ #address-cells = <1>; #size-cells = <1>; - gpu_speedbin: gpu-speed-bin@59c { + gpu_speedbin: gpu-speedbin@59c { reg = <0x59c 0x4>; bits = <0 3>; }; -- cgit v1.2.3 From 9adf7580f6d498a5839e02fa1d1535e934364602 Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Thu, 5 Oct 2023 13:30:41 +0300 Subject: arm64: dts: mediatek: mt8186: fix clock names for power domains Clocks for each power domain are split into big categories: pd clocks and subsys clocks. According to the binding, all clocks which have a dash '-' in their name are treated as subsys clocks, and must be placed at the end of the list. The other clocks which are pd clocks must come first. Fixed the naming and the placing of all clocks in the power domains. For the avoidance of doubt, prefixed all subsys clocks with the 'subsys' prefix. The binding does not enforce strict clock names, the driver uses them in bulk, only making a difference for pd clocks vs subsys clocks. The above problem appears to be trivial, however, it leads to incorrect power up and power down sequence of the power domains, because some clocks will be mistakenly taken for subsys clocks and viceversa. One consequence is the fact that if the DIS power domain goes power down and power back up during the boot process, when it comes back up, there are still transactions left on the bus which makes the display inoperable. Some of the clocks for the DIS power domain were wrongly using '_' instead of '-', which again made these clocks being treated as pd clocks instead of subsys clocks. Cc: stable@vger.kernel.org Fixes: d9e43c1e7a38 ("arm64: dts: mt8186: Add power domains controller") Signed-off-by: Eugen Hristev Tested-by: Chen-Yu Tsai Reviewed-by: AngeloGioacchino Del Regno Tested-by: AngeloGioacchino Del Regno Reviewed-by: Alexandre Mergnat Link: https://lore.kernel.org/r/20231005103041.352478-1-eugen.hristev@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8186.dtsi | 42 ++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt8186.dtsi b/arch/arm64/boot/dts/mediatek/mt8186.dtsi index f4c4f61c779d..df0c04f2ba1d 100644 --- a/arch/arm64/boot/dts/mediatek/mt8186.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8186.dtsi @@ -924,7 +924,8 @@ reg = ; clocks = <&topckgen CLK_TOP_SENINF>, <&topckgen CLK_TOP_SENINF1>; - clock-names = "csirx_top0", "csirx_top1"; + clock-names = "subsys-csirx-top0", + "subsys-csirx-top1"; #power-domain-cells = <0>; }; @@ -942,7 +943,8 @@ reg = ; clocks = <&topckgen CLK_TOP_AUDIODSP>, <&topckgen CLK_TOP_ADSP_BUS>; - clock-names = "audioadsp", "adsp_bus"; + clock-names = "audioadsp", + "subsys-adsp-bus"; #address-cells = <1>; #size-cells = <0>; #power-domain-cells = <1>; @@ -975,8 +977,11 @@ <&mmsys CLK_MM_SMI_COMMON>, <&mmsys CLK_MM_SMI_GALS>, <&mmsys CLK_MM_SMI_IOMMU>; - clock-names = "disp", "mdp", "smi_infra", "smi_common", - "smi_gals", "smi_iommu"; + clock-names = "disp", "mdp", + "subsys-smi-infra", + "subsys-smi-common", + "subsys-smi-gals", + "subsys-smi-iommu"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -993,15 +998,17 @@ power-domain@MT8186_POWER_DOMAIN_CAM { reg = ; - clocks = <&topckgen CLK_TOP_CAM>, - <&topckgen CLK_TOP_SENINF>, + clocks = <&topckgen CLK_TOP_SENINF>, <&topckgen CLK_TOP_SENINF1>, <&topckgen CLK_TOP_SENINF2>, <&topckgen CLK_TOP_SENINF3>, + <&camsys CLK_CAM2MM_GALS>, <&topckgen CLK_TOP_CAMTM>, - <&camsys CLK_CAM2MM_GALS>; - clock-names = "cam-top", "cam0", "cam1", "cam2", - "cam3", "cam-tm", "gals"; + <&topckgen CLK_TOP_CAM>; + clock-names = "cam0", "cam1", "cam2", + "cam3", "gals", + "subsys-cam-tm", + "subsys-cam-top"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -1020,9 +1027,9 @@ power-domain@MT8186_POWER_DOMAIN_IMG { reg = ; - clocks = <&topckgen CLK_TOP_IMG1>, - <&imgsys1 CLK_IMG1_GALS_IMG1>; - clock-names = "img-top", "gals"; + clocks = <&imgsys1 CLK_IMG1_GALS_IMG1>, + <&topckgen CLK_TOP_IMG1>; + clock-names = "gals", "subsys-img-top"; mediatek,infracfg = <&infracfg_ao>; #address-cells = <1>; #size-cells = <0>; @@ -1041,8 +1048,11 @@ <&ipesys CLK_IPE_LARB20>, <&ipesys CLK_IPE_SMI_SUBCOM>, <&ipesys CLK_IPE_GALS_IPE>; - clock-names = "ipe-top", "ipe-larb0", "ipe-larb1", - "ipe-smi", "ipe-gals"; + clock-names = "subsys-ipe-top", + "subsys-ipe-larb0", + "subsys-ipe-larb1", + "subsys-ipe-smi", + "subsys-ipe-gals"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; @@ -1061,7 +1071,9 @@ clocks = <&topckgen CLK_TOP_WPE>, <&wpesys CLK_WPE_SMI_LARB8_CK_EN>, <&wpesys CLK_WPE_SMI_LARB8_PCLK_EN>; - clock-names = "wpe0", "larb-ck", "larb-pclk"; + clock-names = "wpe0", + "subsys-larb-ck", + "subsys-larb-pclk"; mediatek,infracfg = <&infracfg_ao>; #power-domain-cells = <0>; }; -- cgit v1.2.3 From 8e6ecbfd44b5542a7598c1c5fc9c6dcb5d367f2a Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 14 Aug 2023 09:50:42 +0300 Subject: arm64: dts: mediatek: mt7622: fix memory node warning check dtbs_check throws a warning at the memory node: Warning (unit_address_vs_reg): /memory: node has a reg or ranges property, but no unit name fix by adding the address into the node name. Cc: stable@vger.kernel.org Fixes: 0b6286dd96c0 ("arm64: dts: mt7622: add bananapi BPI-R64 board") Signed-off-by: Eugen Hristev Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230814065042.4973-1-eugen.hristev@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts | 2 +- arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 3b7a176b7904..c46682150e50 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -73,7 +73,7 @@ }; }; - memory { + memory@40000000 { reg = <0 0x40000000 0 0x40000000>; }; diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts index a885a3fbe456..2dc1bdc74e21 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts @@ -55,7 +55,7 @@ }; }; - memory { + memory@40000000 { reg = <0 0x40000000 0 0x20000000>; }; -- cgit v1.2.3 From 74543b303a9abfe4fa253d1fa215281baa05ff3a Mon Sep 17 00:00:00 2001 From: Eugen Hristev Date: Mon, 14 Aug 2023 10:10:53 +0300 Subject: arm64: dts: mediatek: mt8183-kukui-jacuzzi: fix dsi unnecessary cells properties dtbs_check throws a warning at the dsi node: Warning (avoid_unnecessary_addr_size): /soc/dsi@14014000: unnecessary #address-cells/#size-cells without "ranges" or child "reg" property Other DTS have a panel child node with a reg, so the parent dtsi must have the address-cells and size-cells, however this specific DT has the panel removed, but not the cells, hence the warning above. If panel is deleted then the cells must also be deleted since they are tied together, as the child node in this DT does not have a reg. Cc: stable@vger.kernel.org Fixes: cabc71b08eb5 ("arm64: dts: mt8183: Add kukui-jacuzzi-damu board") Signed-off-by: Eugen Hristev Reviewed-by: AngeloGioacchino Del Regno Link: https://lore.kernel.org/r/20230814071053.5459-1-eugen.hristev@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi index 06fde1a9aab7..820260348de9 100644 --- a/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8183-kukui-jacuzzi.dtsi @@ -91,6 +91,8 @@ &dsi0 { status = "okay"; + /delete-property/#size-cells; + /delete-property/#address-cells; /delete-node/panel@0; ports { port { -- cgit v1.2.3 From 5943b8f7449df9881b273db07bdde1e7120dccf0 Mon Sep 17 00:00:00 2001 From: AngeloGioacchino Del Regno Date: Mon, 27 Nov 2023 14:20:26 +0100 Subject: arm64: dts: mediatek: cherry: Fix interrupt cells for MT6360 on I2C7 Change interrupt cells to 2 to suppress interrupts_property warning. Cc: stable@vger.kernel.org Fixes: 0de0fe950f1b ("arm64: dts: mediatek: cherry: Enable MT6360 sub-pmic on I2C7") Link: https://lore.kernel.org/r/20231127132026.165027-1-angelogioacchino.delregno@collabora.com Signed-off-by: AngeloGioacchino Del Regno --- arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi index dd5b89b73190..5a7cab489ff3 100644 --- a/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt8195-cherry.dtsi @@ -389,7 +389,7 @@ pinctrl-0 = <&i2c7_pins>; pmic@34 { - #interrupt-cells = <1>; + #interrupt-cells = <2>; compatible = "mediatek,mt6360"; reg = <0x34>; interrupt-controller; -- cgit v1.2.3 From dc158d23b33df9033bcc8e7117e8591dd2f9d125 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 22 Nov 2023 12:58:11 +1000 Subject: KVM: PPC: Book3S HV: Fix KVM_RUN clobbering FP/VEC user registers Before running a guest, the host process (e.g., QEMU) FP/VEC registers are saved if they were being used, similarly to when the kernel uses FP registers. The guest values are then loaded into regs, and the host process registers will be restored lazily when it uses FP/VEC. KVM HV has a bug here: the host process registers do get saved, but the user MSR bits remain enabled, which indicates the registers are valid for the process. After they are clobbered by running the guest, this valid indication causes the host process to take on the FP/VEC register values of the guest. Fixes: 34e119c96b2b ("KVM: PPC: Book3S HV P9: Reduce mtmsrd instructions required to save host SPRs") Cc: stable@vger.kernel.org # v5.17+ Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman Link: https://msgid.link/20231122025811.2973-1-npiggin@gmail.com --- arch/powerpc/kernel/process.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 392404688cec..9452a54d356c 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1198,11 +1198,11 @@ void kvmppc_save_user_regs(void) usermsr = current->thread.regs->msr; + /* Caller has enabled FP/VEC/VSX/TM in MSR */ if (usermsr & MSR_FP) - save_fpu(current); - + __giveup_fpu(current); if (usermsr & MSR_VEC) - save_altivec(current); + __giveup_altivec(current); #ifdef CONFIG_PPC_TRANSACTIONAL_MEM if (usermsr & MSR_TM) { -- cgit v1.2.3 From 547c91929f437b42e6a9c90fec1fb5aec3e64aac Mon Sep 17 00:00:00 2001 From: Like Xu Date: Thu, 23 Nov 2023 15:58:18 +0800 Subject: KVM: x86: Get CPL directly when checking if loaded vCPU is in kernel mode When querying whether or not a vCPU "is" running in kernel mode, directly get the CPL if the vCPU is the currently loaded vCPU. In scenarios where a guest is profiled via perf-kvm, querying vcpu->arch.preempted_in_kernel from kvm_guest_state() is wrong if vCPU is actively running, i.e. isn't scheduled out due to being preempted and so preempted_in_kernel is stale. This affects perf/core's ability to accurately tag guest RIP with PERF_RECORD_MISC_GUEST_{KERNEL|USER} and record it in the sample. This causes perf/tool to fail to connect the vCPU RIPs to the guest kernel space symbols when parsing these samples due to incorrect PERF_RECORD_MISC flags: Before (perf-report of a cpu-cycles sample): 1.23% :58945 [unknown] [u] 0xffffffff818012e0 After: 1.35% :60703 [kernel.vmlinux] [g] asm_exc_page_fault Note, checking preempted_in_kernel in kvm_arch_vcpu_in_kernel() is awful as nothing in the API's suggests that it's safe to use if and only if the vCPU was preempted. That can be cleaned up in the future, for now just fix the glaring correctness bug. Note #2, checking vcpu->preempted is NOT safe, as getting the CPL on VMX requires VMREAD, i.e. is correct if and only if the vCPU is loaded. If the target vCPU *was* preempted, then it can be scheduled back in after the check on vcpu->preempted in kvm_vcpu_on_spin(), i.e. KVM could end up trying to do VMREAD on a VMCS that isn't loaded on the current pCPU. Signed-off-by: Like Xu Fixes: e1bfc24577cc ("KVM: Move x86's perf guest info callbacks to generic KVM") Link: https://lore.kernel.org/r/20231123075818.12521-1-likexu@tencent.com [sean: massage changelong, add Fixes] Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 41cce5031126..3b6a0109191b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -12910,7 +12910,10 @@ bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) if (vcpu->arch.guest_state_protected) return true; - return vcpu->arch.preempted_in_kernel; + if (vcpu != kvm_get_running_vcpu()) + return vcpu->arch.preempted_in_kernel; + + return static_call(kvm_x86_get_cpl)(vcpu) == 0; } unsigned long kvm_arch_vcpu_get_ip(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 27d25348d42161837be08fc63b04a2559d2e781c Mon Sep 17 00:00:00 2001 From: Ashwin Dayanand Kamat Date: Wed, 29 Nov 2023 16:10:29 +0530 Subject: x86/sev: Fix kernel crash due to late update to read-only ghcb_version A write-access violation page fault kernel crash was observed while running cpuhotplug LTP testcases on SEV-ES enabled systems. The crash was observed during hotplug, after the CPU was offlined and the process was migrated to different CPU. setup_ghcb() is called again which tries to update ghcb_version in sev_es_negotiate_protocol(). Ideally this is a read_only variable which is initialised during booting. Trying to write it results in a pagefault: BUG: unable to handle page fault for address: ffffffffba556e70 #PF: supervisor write access in kernel mode #PF: error_code(0x0003) - permissions violation [ ...] Call Trace: ? __die_body.cold+0x1a/0x1f ? __die+0x2a/0x35 ? page_fault_oops+0x10c/0x270 ? setup_ghcb+0x71/0x100 ? __x86_return_thunk+0x5/0x6 ? search_exception_tables+0x60/0x70 ? __x86_return_thunk+0x5/0x6 ? fixup_exception+0x27/0x320 ? kernelmode_fixup_or_oops+0xa2/0x120 ? __bad_area_nosemaphore+0x16a/0x1b0 ? kernel_exc_vmm_communication+0x60/0xb0 ? bad_area_nosemaphore+0x16/0x20 ? do_kern_addr_fault+0x7a/0x90 ? exc_page_fault+0xbd/0x160 ? asm_exc_page_fault+0x27/0x30 ? setup_ghcb+0x71/0x100 ? setup_ghcb+0xe/0x100 cpu_init_exception_handling+0x1b9/0x1f0 The fix is to call sev_es_negotiate_protocol() only in the BSP boot phase, and it only needs to be done once in any case. [ mingo: Refined the changelog. ] Fixes: 95d33bfaa3e1 ("x86/sev: Register GHCB memory when SEV-SNP is active") Suggested-by: Tom Lendacky Co-developed-by: Bo Gan Signed-off-by: Bo Gan Signed-off-by: Ashwin Dayanand Kamat Signed-off-by: Ingo Molnar Acked-by: Tom Lendacky Link: https://lore.kernel.org/r/1701254429-18250-1-git-send-email-kashwindayan@vmware.com --- arch/x86/kernel/sev.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c index 70472eebe719..c67285824e82 100644 --- a/arch/x86/kernel/sev.c +++ b/arch/x86/kernel/sev.c @@ -1234,10 +1234,6 @@ void setup_ghcb(void) if (!cc_platform_has(CC_ATTR_GUEST_STATE_ENCRYPT)) return; - /* First make sure the hypervisor talks a supported protocol. */ - if (!sev_es_negotiate_protocol()) - sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); - /* * Check whether the runtime #VC exception handler is active. It uses * the per-CPU GHCB page which is set up by sev_es_init_vc_handling(). @@ -1254,6 +1250,13 @@ void setup_ghcb(void) return; } + /* + * Make sure the hypervisor talks a supported protocol. + * This gets called only in the BSP boot phase. + */ + if (!sev_es_negotiate_protocol()) + sev_es_terminate(SEV_TERM_SET_GEN, GHCB_SEV_ES_GEN_REQ); + /* * Clear the boot_ghcb. The first exception comes in before the bss * section is cleared. -- cgit v1.2.3 From f5259997f3e8d6edfcc2daf5b2c0b34f074d7bc0 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 27 Nov 2023 13:00:51 +0100 Subject: arm64: Avoid enabling KPTI unnecessarily Commit 42c5a3b04bf6 refactored the KPTI init code in a way that results in the use of non-global kernel mappings even on systems that have no need for it, and even when KPTI has been disabled explicitly via the command line. Ensure that this only happens when we have decided (based on the detected system-wide CPU features) that KPTI should be enabled. Fixes: 42c5a3b04bf6 ("arm64: Split kpti_install_ng_mappings()") Signed-off-by: Ard Biesheuvel Acked-by: Will Deacon Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20231127120049.2258650-6-ardb@google.com Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 646591c67e7a..91d2d6714969 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1839,6 +1839,10 @@ static int __init __kpti_install_ng_mappings(void *__unused) static void __init kpti_install_ng_mappings(void) { + /* Check whether KPTI is going to be used */ + if (!cpus_have_cap(ARM64_UNMAP_KERNEL_AT_EL0)) + return; + /* * We don't need to rewrite the page-tables if either we've done * it already or we have KASLR enabled and therefore have not -- cgit v1.2.3 From 087e15206d6ac0d46734e2b0ab34370c0fdca481 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 18 Oct 2023 13:46:22 -0700 Subject: KVM: Set file_operations.owner appropriately for all such structures Set .owner for all KVM-owned filed types so that the KVM module is pinned until any files with callbacks back into KVM are completely freed. Using "struct kvm" as a proxy for the module, i.e. keeping KVM-the-module alive while there are active VMs, doesn't provide full protection. Userspace can invoke delete_module() the instant the last reference to KVM is put. If KVM itself puts the last reference, e.g. via kvm_destroy_vm(), then it's possible for KVM to be preempted and deleted/unloaded before KVM fully exits, e.g. when the task running kvm_destroy_vm() is scheduled back in, it will jump to a code page that is no longer mapped. Note, file types that can call into sub-module code, e.g. kvm-intel.ko or kvm-amd.ko on x86, must use the module pointer passed to kvm_init(), not THIS_MODULE (which points at kvm.ko). KVM assumes that if /dev/kvm is reachable, e.g. VMs are active, then the vendor module is loaded. To reduce the probability of forgetting to set .owner entirely, use THIS_MODULE for stats files where KVM does not call back into vendor code. This reverts commit 70375c2d8fa3fb9b0b59207a9c5df1e2e1205c10, and fixes several other file types that have been buggy since their introduction. Fixes: 70375c2d8fa3 ("Revert "KVM: set owner of cpu and vm file operations"") Fixes: 3bcd0662d66f ("KVM: X86: Introduce mmu_rmaps_stat per-vm debugfs file") Reported-by: Al Viro Link: https://lore.kernel.org/all/20231010003746.GN800259@ZenIV Link: https://lore.kernel.org/r/20231018204624.1905300-2-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/debugfs.c | 1 + virt/kvm/kvm_main.c | 11 ++++++++--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/debugfs.c b/arch/x86/kvm/debugfs.c index ee8c4c3496ed..eea6ea7f14af 100644 --- a/arch/x86/kvm/debugfs.c +++ b/arch/x86/kvm/debugfs.c @@ -182,6 +182,7 @@ static int kvm_mmu_rmaps_stat_release(struct inode *inode, struct file *file) } static const struct file_operations mmu_rmaps_stat_fops = { + .owner = THIS_MODULE, .open = kvm_mmu_rmaps_stat_open, .read = seq_read, .llseek = seq_lseek, diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 486800a7024b..1e65a506985f 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3887,7 +3887,7 @@ static int kvm_vcpu_release(struct inode *inode, struct file *filp) return 0; } -static const struct file_operations kvm_vcpu_fops = { +static struct file_operations kvm_vcpu_fops = { .release = kvm_vcpu_release, .unlocked_ioctl = kvm_vcpu_ioctl, .mmap = kvm_vcpu_mmap, @@ -4081,6 +4081,7 @@ static int kvm_vcpu_stats_release(struct inode *inode, struct file *file) } static const struct file_operations kvm_vcpu_stats_fops = { + .owner = THIS_MODULE, .read = kvm_vcpu_stats_read, .release = kvm_vcpu_stats_release, .llseek = noop_llseek, @@ -4431,7 +4432,7 @@ static int kvm_device_release(struct inode *inode, struct file *filp) return 0; } -static const struct file_operations kvm_device_fops = { +static struct file_operations kvm_device_fops = { .unlocked_ioctl = kvm_device_ioctl, .release = kvm_device_release, KVM_COMPAT(kvm_device_ioctl), @@ -4759,6 +4760,7 @@ static int kvm_vm_stats_release(struct inode *inode, struct file *file) } static const struct file_operations kvm_vm_stats_fops = { + .owner = THIS_MODULE, .read = kvm_vm_stats_read, .release = kvm_vm_stats_release, .llseek = noop_llseek, @@ -5060,7 +5062,7 @@ static long kvm_vm_compat_ioctl(struct file *filp, } #endif -static const struct file_operations kvm_vm_fops = { +static struct file_operations kvm_vm_fops = { .release = kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, .llseek = noop_llseek, @@ -6095,6 +6097,9 @@ int kvm_init(unsigned vcpu_size, unsigned vcpu_align, struct module *module) goto err_async_pf; kvm_chardev_ops.owner = module; + kvm_vm_fops.owner = module; + kvm_vcpu_fops.owner = module; + kvm_device_fops.owner = module; kvm_preempt_ops.sched_in = kvm_sched_in; kvm_preempt_ops.sched_out = kvm_sched_out; -- cgit v1.2.3 From ef8d89033c3f1f6a64757f066b2c17e76d1189f8 Mon Sep 17 00:00:00 2001 From: Like Xu Date: Sat, 7 Oct 2023 14:40:19 +0800 Subject: KVM: x86: Remove 'return void' expression for 'void function' The requested info will be stored in 'guest_xsave->region' referenced by the incoming pointer "struct kvm_xsave *guest_xsave", thus there is no need to explicitly use return void expression for a void function "static void kvm_vcpu_ioctl_x86_get_xsave(...)". The issue is caught with [-Wpedantic]. Fixes: 2d287ec65e79 ("x86/fpu: Allow caller to constrain xfeatures when copying to uabi buffer") Signed-off-by: Like Xu Reviewed-by: Maxim Levitsky Link: https://lore.kernel.org/r/20231007064019.17472-1-likexu@tencent.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3b6a0109191b..a8bebf5c0a05 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5411,8 +5411,8 @@ static void kvm_vcpu_ioctl_x86_get_xsave2(struct kvm_vcpu *vcpu, static void kvm_vcpu_ioctl_x86_get_xsave(struct kvm_vcpu *vcpu, struct kvm_xsave *guest_xsave) { - return kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, - sizeof(guest_xsave->region)); + kvm_vcpu_ioctl_x86_get_xsave2(vcpu, (void *)guest_xsave->region, + sizeof(guest_xsave->region)); } static int kvm_vcpu_ioctl_x86_set_xsave(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 9b8493dc43044376716d789d07699f17d538a7c4 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Fri, 1 Dec 2023 19:37:27 +0100 Subject: x86/CPU/AMD: Check vendor in the AMD microcode callback Commit in Fixes added an AMD-specific microcode callback. However, it didn't check the CPU vendor the kernel runs on explicitly. The only reason the Zenbleed check in it didn't run on other x86 vendors hardware was pure coincidental luck: if (!cpu_has_amd_erratum(c, amd_zenbleed)) return; gives true on other vendors because they don't have those families and models. However, with the removal of the cpu_has_amd_erratum() in 05f5f73936fa ("x86/CPU/AMD: Drop now unused CPU erratum checking function") that coincidental condition is gone, leading to the zenbleed check getting executed on other vendors too. Add the explicit vendor check for the whole callback as it should've been done in the first place. Fixes: 522b1d69219d ("x86/cpu/amd: Add a Zenbleed fix") Cc: Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/r/20231201184226.16749-1-bp@alien8.de --- arch/x86/kernel/cpu/amd.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index a7eab05e5f29..f322ebd053a9 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -1320,6 +1320,9 @@ static void zenbleed_check_cpu(void *unused) void amd_check_microcode(void) { + if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) + return; + on_each_cpu(zenbleed_check_cpu, NULL, 1); } -- cgit v1.2.3 From 9f6acd2b4dfef81dcc45486ac704cf602c88db02 Mon Sep 17 00:00:00 2001 From: Sam Edwards Date: Fri, 1 Dec 2023 23:12:12 -0800 Subject: arm64: dts: rockchip: Fix Turing RK1 interrupt pinctrls The pinctrls for the hym8563 interrupt line and fan-tach input were both mistakenly defined as `pcfg_pull_none`. As these are active-low signals (level-triggered, in the hym8563 case) which may not be driven at times, these should really be pull-up. The lack of any bias results in spurious interrupts. Fix this by modifying the `rockchip,pins` properties as necessary to enable the pull-up resistors. Fixes: 2806a69f3fef6 ("arm64: dts: rockchip: Add Turing RK1 SoM support") Signed-off-by: Sam Edwards Link: https://lore.kernel.org/r/20231202071212.1606800-1-CFSworks@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi index 9570b34aca2e..d88c0e852356 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588-turing-rk1.dtsi @@ -235,13 +235,13 @@ &pinctrl { fan { fan_int: fan-int { - rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_none>; + rockchip,pins = <0 RK_PA4 RK_FUNC_GPIO &pcfg_pull_up>; }; }; hym8563 { hym8563_int: hym8563-int { - rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_none>; + rockchip,pins = <0 RK_PB0 RK_FUNC_GPIO &pcfg_pull_up>; }; }; -- cgit v1.2.3 From d8792a5734b0f3e58b898c2e2f910bfac48e9ee3 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Mon, 27 Nov 2023 14:04:59 -0800 Subject: riscv: Safely remove entries from relocation list MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the safe versions of list and hlist iteration to safely remove entries from the module relocation lists. To allow mutliple threads to load modules concurrently, move relocation list pointers onto the stack rather than using global variables. Fixes: 8fd6c5142395 ("riscv: Add remaining module relocations") Reported-by: Ron Economos Closes: https://lore.kernel.org/linux-riscv/444de86a-7e7c-4de7-5d1d-c1c40eefa4ba@w6rz.net Signed-off-by: Charlie Jenkins Tested-by: Björn Töpel Link: https://lore.kernel.org/r/20231127-module_linking_freeing-v4-1-a2ca1d7027d0@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 110 +++++++++++++++++++++++++++++++++------------ 1 file changed, 82 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 56a8c78e9e21..53593fe58cd8 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -40,15 +40,6 @@ struct relocation_handlers { long buffer); }; -unsigned int initialize_relocation_hashtable(unsigned int num_relocations); -void process_accumulated_relocations(struct module *me); -int add_relocation_to_accumulate(struct module *me, int type, void *location, - unsigned int hashtable_bits, Elf_Addr v); - -struct hlist_head *relocation_hashtable; - -struct list_head used_buckets_list; - /* * The auipc+jalr instruction pair can reach any PC-relative offset * in the range [-2^31 - 2^11, 2^31 - 2^11) @@ -604,7 +595,10 @@ static const struct relocation_handlers reloc_handlers[] = { /* 192-255 nonstandard ABI extensions */ }; -void process_accumulated_relocations(struct module *me) +static void +process_accumulated_relocations(struct module *me, + struct hlist_head **relocation_hashtable, + struct list_head *used_buckets_list) { /* * Only ADD/SUB/SET/ULEB128 should end up here. @@ -624,18 +618,25 @@ void process_accumulated_relocations(struct module *me) * - Each relocation entry for a location address */ struct used_bucket *bucket_iter; + struct used_bucket *bucket_iter_tmp; struct relocation_head *rel_head_iter; + struct hlist_node *rel_head_iter_tmp; struct relocation_entry *rel_entry_iter; + struct relocation_entry *rel_entry_iter_tmp; int curr_type; void *location; long buffer; - list_for_each_entry(bucket_iter, &used_buckets_list, head) { - hlist_for_each_entry(rel_head_iter, bucket_iter->bucket, node) { + list_for_each_entry_safe(bucket_iter, bucket_iter_tmp, + used_buckets_list, head) { + hlist_for_each_entry_safe(rel_head_iter, rel_head_iter_tmp, + bucket_iter->bucket, node) { buffer = 0; location = rel_head_iter->location; - list_for_each_entry(rel_entry_iter, - rel_head_iter->rel_entry, head) { + list_for_each_entry_safe(rel_entry_iter, + rel_entry_iter_tmp, + rel_head_iter->rel_entry, + head) { curr_type = rel_entry_iter->type; reloc_handlers[curr_type].reloc_handler( me, &buffer, rel_entry_iter->value); @@ -648,11 +649,14 @@ void process_accumulated_relocations(struct module *me) kfree(bucket_iter); } - kfree(relocation_hashtable); + kfree(*relocation_hashtable); } -int add_relocation_to_accumulate(struct module *me, int type, void *location, - unsigned int hashtable_bits, Elf_Addr v) +static int add_relocation_to_accumulate(struct module *me, int type, + void *location, + unsigned int hashtable_bits, Elf_Addr v, + struct hlist_head *relocation_hashtable, + struct list_head *used_buckets_list) { struct relocation_entry *entry; struct relocation_head *rel_head; @@ -661,6 +665,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, unsigned long hash; entry = kmalloc(sizeof(*entry), GFP_KERNEL); + + if (!entry) + return -ENOMEM; + INIT_LIST_HEAD(&entry->head); entry->type = type; entry->value = v; @@ -669,7 +677,10 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, current_head = &relocation_hashtable[hash]; - /* Find matching location (if any) */ + /* + * Search for the relocation_head for the relocations that happen at the + * provided location + */ bool found = false; struct relocation_head *rel_head_iter; @@ -681,19 +692,45 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, } } + /* + * If there has not yet been any relocations at the provided location, + * create a relocation_head for that location and populate it with this + * relocation_entry. + */ if (!found) { rel_head = kmalloc(sizeof(*rel_head), GFP_KERNEL); + + if (!rel_head) { + kfree(entry); + return -ENOMEM; + } + rel_head->rel_entry = kmalloc(sizeof(struct list_head), GFP_KERNEL); + + if (!rel_head->rel_entry) { + kfree(entry); + kfree(rel_head); + return -ENOMEM; + } + INIT_LIST_HEAD(rel_head->rel_entry); rel_head->location = location; INIT_HLIST_NODE(&rel_head->node); if (!current_head->first) { bucket = kmalloc(sizeof(struct used_bucket), GFP_KERNEL); + + if (!bucket) { + kfree(entry); + kfree(rel_head); + kfree(rel_head->rel_entry); + return -ENOMEM; + } + INIT_LIST_HEAD(&bucket->head); bucket->bucket = current_head; - list_add(&bucket->head, &used_buckets_list); + list_add(&bucket->head, used_buckets_list); } hlist_add_head(&rel_head->node, current_head); } @@ -704,7 +741,9 @@ int add_relocation_to_accumulate(struct module *me, int type, void *location, return 0; } -unsigned int initialize_relocation_hashtable(unsigned int num_relocations) +static unsigned int +initialize_relocation_hashtable(unsigned int num_relocations, + struct hlist_head **relocation_hashtable) { /* Can safely assume that bits is not greater than sizeof(long) */ unsigned long hashtable_size = roundup_pow_of_two(num_relocations); @@ -720,12 +759,13 @@ unsigned int initialize_relocation_hashtable(unsigned int num_relocations) hashtable_size <<= should_double_size; - relocation_hashtable = kmalloc_array(hashtable_size, - sizeof(*relocation_hashtable), - GFP_KERNEL); - __hash_init(relocation_hashtable, hashtable_size); + *relocation_hashtable = kmalloc_array(hashtable_size, + sizeof(*relocation_hashtable), + GFP_KERNEL); + if (!*relocation_hashtable) + return -ENOMEM; - INIT_LIST_HEAD(&used_buckets_list); + __hash_init(*relocation_hashtable, hashtable_size); return hashtable_bits; } @@ -742,7 +782,17 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, Elf_Addr v; int res; unsigned int num_relocations = sechdrs[relsec].sh_size / sizeof(*rel); - unsigned int hashtable_bits = initialize_relocation_hashtable(num_relocations); + struct hlist_head *relocation_hashtable; + struct list_head used_buckets_list; + unsigned int hashtable_bits; + + hashtable_bits = initialize_relocation_hashtable(num_relocations, + &relocation_hashtable); + + if (hashtable_bits < 0) + return hashtable_bits; + + INIT_LIST_HEAD(&used_buckets_list); pr_debug("Applying relocate section %u to %u\n", relsec, sechdrs[relsec].sh_info); @@ -823,14 +873,18 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } if (reloc_handlers[type].accumulate_handler) - res = add_relocation_to_accumulate(me, type, location, hashtable_bits, v); + res = add_relocation_to_accumulate(me, type, location, + hashtable_bits, v, + relocation_hashtable, + &used_buckets_list); else res = handler(me, location, v); if (res) return res; } - process_accumulated_relocations(me); + process_accumulated_relocations(me, &relocation_hashtable, + &used_buckets_list); return 0; } -- cgit v1.2.3 From 4a92a87950c4f86bc372ee3b1da4ba9d092252a9 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Mon, 27 Nov 2023 14:05:00 -0800 Subject: riscv: Correct type casting in module loading MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use __le16 with le16_to_cpu. Fixes: 8fd6c5142395 ("riscv: Add remaining module relocations") Signed-off-by: Charlie Jenkins Reviewed-by: Samuel Holland Tested-by: Samuel Holland Tested-by: Björn Töpel Link: https://lore.kernel.org/r/20231127-module_linking_freeing-v4-2-a2ca1d7027d0@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 53593fe58cd8..aac019ed63b1 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -55,7 +55,7 @@ static bool riscv_insn_valid_32bit_offset(ptrdiff_t val) static int riscv_insn_rmw(void *location, u32 keep, u32 set) { - u16 *parcel = location; + __le16 *parcel = location; u32 insn = (u32)le16_to_cpu(parcel[0]) | (u32)le16_to_cpu(parcel[1]) << 16; insn &= keep; @@ -68,7 +68,7 @@ static int riscv_insn_rmw(void *location, u32 keep, u32 set) static int riscv_insn_rvc_rmw(void *location, u16 keep, u16 set) { - u16 *parcel = location; + __le16 *parcel = location; u16 insn = le16_to_cpu(*parcel); insn &= keep; -- cgit v1.2.3 From 815f986f33eeb06652d59d8a4d405d4fdb4e59a8 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Fri, 1 Dec 2023 14:48:59 +0100 Subject: arm64: dts: rockchip: drop interrupt-names property from rk3588s dfi The dfi binding does not specify interrupt names, with the interrupts just specifying channels 0-x. So drop the unspecified property. Fixes: 5a6976b1040a ("arm64: dts: rockchip: Add DFI to rk3588s") Reported-by: Jagan Teki Signed-off-by: Heiko Stuebner Link: https://lore.kernel.org/r/20231201134859.322491-1-heiko@sntech.de --- arch/arm64/boot/dts/rockchip/rk3588s.dtsi | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi index 7064c0e9179f..8aa0499f9b03 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s.dtsi @@ -1362,7 +1362,6 @@ , , ; - interrupt-names = "ch0", "ch1", "ch2", "ch3"; rockchip,pmu = <&pmu1grf>; }; -- cgit v1.2.3 From c13c823a78b77ea0e5f1f73112d910e259911101 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 30 Nov 2023 13:18:29 -0600 Subject: arm64: dts: rockchip: Fix PCI node addresses on rk3399-gru The rk3399-gru PCI node addresses are wrong. In rk3399-gru-scarlet, the bus number in the address should be 0. This is because bus number assignment is dynamic and not known up front. For FDT, the bus number is simply ignored. In rk3399-gru-chromebook, the addresses are simply invalid. The first "reg" entry must be the configuration space for the device. The entry should be all 0s except for device/slot and function numbers. The existing 64-bit memory space (0x83000000) entries are not valid because they must have the BAR address in the lower byte of the first cell. Warnings for these are enabled by adding the missing 'device_type = "pci"' for the root port node. Signed-off-by: Rob Herring Link: https://lore.kernel.org/r/20231130191830.2424361-1-robh@kernel.org Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi | 3 +-- arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts | 4 ++-- arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi | 1 + 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi index 5c1929d41cc0..cacbad35cfc8 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-chromebook.dtsi @@ -509,8 +509,7 @@ ap_i2c_tp: &i2c5 { &pci_rootport { mvl_wifi: wifi@0,0 { compatible = "pci1b4b,2b42"; - reg = <0x83010000 0x0 0x00000000 0x0 0x00100000 - 0x83010000 0x0 0x00100000 0x0 0x00100000>; + reg = <0x0000 0x0 0x0 0x0 0x0>; interrupt-parent = <&gpio0>; interrupts = <8 IRQ_TYPE_LEVEL_LOW>; pinctrl-names = "default"; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts index 853e88455e75..9e4b12ed62cb 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru-scarlet-dumo.dts @@ -34,8 +34,8 @@ &pci_rootport { wifi@0,0 { compatible = "qcom,ath10k"; - reg = <0x00010000 0x0 0x00000000 0x0 0x00000000>, - <0x03010010 0x0 0x00000000 0x0 0x00200000>; + reg = <0x00000000 0x0 0x00000000 0x0 0x00000000>, + <0x03000010 0x0 0x00000000 0x0 0x00200000>; qcom,ath10k-calibration-variant = "GO_DUMO"; }; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index c9bf1d5c3a42..789fd0dcc88b 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -489,6 +489,7 @@ ap_i2c_audio: &i2c8 { #address-cells = <3>; #size-cells = <2>; ranges; + device_type = "pci"; }; }; -- cgit v1.2.3 From 4b3338aaa74d7d4ec5b6734dc298f0db94ec83d2 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Thu, 30 Nov 2023 12:29:47 +0530 Subject: powerpc/ftrace: Fix stack teardown in ftrace_no_trace Commit 41a506ef71eb ("powerpc/ftrace: Create a dummy stackframe to fix stack unwind") added use of a new stack frame on ftrace entry to fix stack unwind. However, the commit missed updating the offset used while tearing down the ftrace stack when ftrace is disabled. Fix the same. In addition, the commit missed saving the correct stack pointer in pt_regs. Update the same. Fixes: 41a506ef71eb ("powerpc/ftrace: Create a dummy stackframe to fix stack unwind") Cc: stable@vger.kernel.org # v6.5+ Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://msgid.link/20231130065947.2188860-1-naveen@kernel.org --- arch/powerpc/kernel/trace/ftrace_entry.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 90701885762c..40677416d7b2 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -62,7 +62,7 @@ .endif /* Save previous stack pointer (r1) */ - addi r8, r1, SWITCH_FRAME_SIZE + addi r8, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE PPC_STL r8, GPR1(r1) .if \allregs == 1 @@ -182,7 +182,7 @@ ftrace_no_trace: mflr r3 mtctr r3 REST_GPR(3, r1) - addi r1, r1, SWITCH_FRAME_SIZE + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE mtlr r0 bctr #endif -- cgit v1.2.3 From 209043cf092d7b0d4739921b3f11d6d0b451eabf Mon Sep 17 00:00:00 2001 From: Nathan Rossi Date: Mon, 6 Nov 2023 02:14:36 +0000 Subject: arm64: dts: imx8mp: imx8mq: Add parkmode-disable-ss-quirk on DWC3 The i.MX8MP and i.MX8MQ devices both use the same DWC3 controller and are both affected by a known issue with the controller due to specific behaviour when park mode is enabled in SuperSpeed host mode operation. Under heavy USB traffic from multiple endpoints the controller will sometimes incorrectly process transactions such that some transactions are lost, or the controller may hang when processing transactions. When the controller hangs it does not recover. This issue is documented partially within the linux-imx vendor kernel which references a Synopsys STAR number 9001415732 in commits [1] and additional details in [2]. Those commits provide some additional controller internal implementation specifics around the incorrect behaviour of the SuperSpeed host controller operation when park mode is enabled. The summary of this issue is that the host controller can incorrectly enter/exit park mode such that part of the controller is in a state which behaves as if in park mode even though it is not. In this state the controller incorrectly calculates the number of TRBs available which results in incorrect access of the internal caches causing the overwrite of pending requests in the cache which should have been processed but are ignored. This can cause the controller to drop the requests or hang waiting for the pending state of the dropped requests. The workaround for this issue is to disable park mode for SuperSpeed operation of the controller through the GUCTL1[17] bit. This is already available as a quirk for the DWC3 controller and can be enabled via the 'snps,parkmode-disable-ss-quirk' device tree property. It is possible to replicate this failure on an i.MX8MP EVK with a USB Hub connecting 4 SuperSpeed USB flash drives. Performing continuous small read operations (dd if=/dev/sd... of=/dev/null bs=16) on the block devices will result in device errors initially and will eventually result in the controller hanging. [13240.896936] xhci-hcd xhci-hcd.0.auto: WARN Event TRB for slot 4 ep 2 with no TDs queued? [13240.990708] usb 2-1.3: reset SuperSpeed USB device number 5 using xhci-hcd [13241.015582] sd 2:0:0:0: [sdc] tag#0 UNKNOWN(0x2003) Result: hostbyte=0x07 driverbyte=DRIVER_OK cmd_age=0s [13241.025198] sd 2:0:0:0: [sdc] tag#0 CDB: opcode=0x28 28 00 00 00 03 e0 00 01 00 00 [13241.032949] I/O error, dev sdc, sector 992 op 0x0:(READ) flags 0x80700 phys_seg 25 prio class 2 [13272.150710] usb 2-1.2: reset SuperSpeed USB device number 4 using xhci-hcd [13272.175469] sd 1:0:0:0: [sdb] tag#0 UNKNOWN(0x2003) Result: hostbyte=0x03 driverbyte=DRIVER_OK cmd_age=31s [13272.185365] sd 1:0:0:0: [sdb] tag#0 CDB: opcode=0x28 28 00 00 00 03 e0 00 01 00 00 [13272.193385] I/O error, dev sdb, sector 992 op 0x0:(READ) flags 0x80700 phys_seg 18 prio class 2 [13434.846556] xhci-hcd xhci-hcd.0.auto: xHCI host not responding to stop endpoint command [13434.854592] xhci-hcd xhci-hcd.0.auto: xHCI host controller not responding, assume dead [13434.862553] xhci-hcd xhci-hcd.0.auto: HC died; cleaning up [1] https://github.com/nxp-imx/linux-imx/commit/97a5349d936b08cf301730b59e4e8855283f815c [2] https://github.com/nxp-imx/linux-imx/commit/b4b5cbc5a12d7c3b920d1d7cba0ada3379e4e42b Fixes: fb8587a2c165 ("arm64: dtsi: imx8mp: add usb nodes") Fixes: ad37549cb5dc ("arm64: dts: imx8mq: add USB nodes") Signed-off-by: Nathan Rossi Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8mp.dtsi | 2 ++ arch/arm64/boot/dts/freescale/imx8mq.dtsi | 2 ++ 2 files changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi index c9a610ba4836..1264da6012f9 100644 --- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi @@ -2072,6 +2072,7 @@ phys = <&usb3_phy0>, <&usb3_phy0>; phy-names = "usb2-phy", "usb3-phy"; snps,gfladj-refclk-lpm-sel-quirk; + snps,parkmode-disable-ss-quirk; }; }; @@ -2114,6 +2115,7 @@ phys = <&usb3_phy1>, <&usb3_phy1>; phy-names = "usb2-phy", "usb3-phy"; snps,gfladj-refclk-lpm-sel-quirk; + snps,parkmode-disable-ss-quirk; }; }; diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi index 4b1ce9fc1758..c6dc3ba0d43b 100644 --- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi @@ -1649,6 +1649,7 @@ phys = <&usb3_phy0>, <&usb3_phy0>; phy-names = "usb2-phy", "usb3-phy"; power-domains = <&pgc_otg1>; + snps,parkmode-disable-ss-quirk; status = "disabled"; }; @@ -1680,6 +1681,7 @@ phys = <&usb3_phy1>, <&usb3_phy1>; phy-names = "usb2-phy", "usb3-phy"; power-domains = <&pgc_otg2>; + snps,parkmode-disable-ss-quirk; status = "disabled"; }; -- cgit v1.2.3 From d951f8f5f23a9417b7952f22b33784c73caa1ebb Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sun, 5 Nov 2023 10:32:19 -0300 Subject: ARM: dts: imx6ul-pico: Describe the Ethernet PHY clock Since commit c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup")thet Ethernet PHY is no longer configured via code in board file. This caused Ethernet to stop working. Fix this problem by describing the clocks and clock-names to the Ethernet PHY node so that the KSZ8081 chip can be clocked correctly. Fixes: c7e73b5051d6 ("ARM: imx: mach-imx6ul: remove 14x14 EVK specific PHY fixup") Signed-off-by: Fabio Estevam Reviewed-by: Andrew Lunn Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi index 4ffe99ed55ca..07dcecbe485d 100644 --- a/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx6ul-pico.dtsi @@ -121,6 +121,8 @@ max-speed = <100>; interrupt-parent = <&gpio5>; interrupts = <6 IRQ_TYPE_LEVEL_LOW>; + clocks = <&clks IMX6UL_CLK_ENET_REF>; + clock-names = "rmii-ref"; }; }; }; -- cgit v1.2.3 From 55702ec9603ebeffb15e6f7b113623fe1d8872f4 Mon Sep 17 00:00:00 2001 From: Stefan Wiehler Date: Mon, 6 Nov 2023 13:12:07 +0100 Subject: mips/smp: Call rcutree_report_cpu_starting() earlier rcutree_report_cpu_starting() must be called before clockevents_register_device() to avoid the following lockdep splat triggered by calling list_add() when CONFIG_PROVE_RCU_LIST=y: WARNING: suspicious RCU usage ... ----------------------------- kernel/locking/lockdep.c:3680 RCU-list traversed in non-reader section!! other info that might help us debug this: RCU used illegally from offline CPU! rcu_scheduler_active = 1, debug_locks = 1 no locks held by swapper/1/0. ... Call Trace: [] show_stack+0x64/0x158 [] dump_stack_lvl+0x90/0xc4 [] __lock_acquire+0x1404/0x2940 [] lock_acquire+0x14c/0x448 [] _raw_spin_lock_irqsave+0x50/0x88 [] clockevents_register_device+0x60/0x1e8 [] r4k_clockevent_init+0x220/0x3a0 [] start_secondary+0x50/0x3b8 raw_smp_processor_id() is required in order to avoid calling into lockdep before RCU has declared the CPU to be watched for readers. See also commit 29368e093921 ("x86/smpboot: Move rcu_cpu_starting() earlier"), commit de5d9dae150c ("s390/smp: move rcu_cpu_starting() earlier") and commit 99f070b62322 ("powerpc/smp: Call rcu_cpu_starting() earlier"). Signed-off-by: Stefan Wiehler Reviewed-by: Huacai Chen Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/smp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 8fbef537fb88..82e2e051b416 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -351,10 +351,11 @@ early_initcall(mips_smp_ipi_init); */ asmlinkage void start_secondary(void) { - unsigned int cpu; + unsigned int cpu = raw_smp_processor_id(); cpu_probe(); per_cpu_trap_init(false); + rcutree_report_cpu_starting(cpu); mips_clockevent_init(); mp_ops->init_secondary(); cpu_report(); @@ -366,7 +367,6 @@ asmlinkage void start_secondary(void) */ calibrate_delay(); - cpu = smp_processor_id(); cpu_data[cpu].udelay_val = loops_per_jiffy; set_cpu_sibling_map(cpu); -- cgit v1.2.3 From 8f7aa77a463f47c9e00592d02747a9fcf2271543 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 7 Nov 2023 11:15:18 +0000 Subject: MIPS: Loongson64: Reserve vgabios memory on boot vgabios is passed from firmware to kernel on Loongson64 systems. Sane firmware will keep this pointer in reserved memory space passed from the firmware but insane firmware keeps it in low memory before kernel entry that is not reserved. Previously kernel won't try to allocate memory from low memory before kernel entry on boot, but after converting to memblock it will do that. Fix by resversing those memory on early boot. Cc: stable@vger.kernel.org Fixes: a94e4f24ec83 ("MIPS: init: Drop boot_mem_map") Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/loongson64/init.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index ee8de1735b7c..d62262f93069 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -88,6 +88,11 @@ void __init szmem(unsigned int node) break; } } + + /* Reserve vgabios if it comes from firmware */ + if (loongson_sysconf.vgabios_addr) + memblock_reserve(virt_to_phys((void *)loongson_sysconf.vgabios_addr), + SZ_256K); } #ifndef CONFIG_NUMA -- cgit v1.2.3 From edc0378eee00200a5bedf1bb9f00ad390e0d1bd4 Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 7 Nov 2023 11:15:19 +0000 Subject: MIPS: Loongson64: Enable DMA noncoherent support There are some Loongson64 systems come with broken coherent DMA support, firmware will set a bit in boot_param and pass nocoherentio in cmdline. However nonconherent support was missed out when spin off Loongson-2EF form Loongson64, and that boot_param change never made itself into upstream. Support DMA noncoherent properly to get those systems working. Cc: stable@vger.kernel.org Fixes: 71e2f4dd5a65 ("MIPS: Fork loongson2ef from loongson64") Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/Kconfig | 2 ++ arch/mips/include/asm/mach-loongson64/boot_param.h | 3 ++- arch/mips/loongson64/env.c | 10 +++++++++- 3 files changed, 13 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 76db82542519..797ae590ebdb 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -460,6 +460,7 @@ config MACH_LOONGSON2EF config MACH_LOONGSON64 bool "Loongson 64-bit family of machines" + select ARCH_DMA_DEFAULT_COHERENT select ARCH_SPARSEMEM_ENABLE select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO @@ -1251,6 +1252,7 @@ config CPU_LOONGSON64 select CPU_SUPPORTS_MSA select CPU_DIEI_BROKEN if !LOONGSON3_ENHANCEMENT select CPU_MIPSR2_IRQ_VI + select DMA_NONCOHERENT select WEAK_ORDERING select WEAK_REORDERING_BEYOND_LLSC select MIPS_ASID_BITS_VARIABLE diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h index 035b1a69e2d0..c454ef734c45 100644 --- a/arch/mips/include/asm/mach-loongson64/boot_param.h +++ b/arch/mips/include/asm/mach-loongson64/boot_param.h @@ -117,7 +117,8 @@ struct irq_source_routing_table { u64 pci_io_start_addr; u64 pci_io_end_addr; u64 pci_config_addr; - u32 dma_mask_bits; + u16 dma_mask_bits; + u16 dma_noncoherent; } __packed; struct interface_info { diff --git a/arch/mips/loongson64/env.c b/arch/mips/loongson64/env.c index c961e2999f15..ef3750a6ffac 100644 --- a/arch/mips/loongson64/env.c +++ b/arch/mips/loongson64/env.c @@ -13,6 +13,8 @@ * Copyright (C) 2009 Lemote Inc. * Author: Wu Zhangjin, wuzhangjin@gmail.com */ + +#include #include #include #include @@ -147,8 +149,14 @@ void __init prom_lefi_init_env(void) loongson_sysconf.dma_mask_bits = eirq_source->dma_mask_bits; if (loongson_sysconf.dma_mask_bits < 32 || - loongson_sysconf.dma_mask_bits > 64) + loongson_sysconf.dma_mask_bits > 64) { loongson_sysconf.dma_mask_bits = 32; + dma_default_coherent = true; + } else { + dma_default_coherent = !eirq_source->dma_noncoherent; + } + + pr_info("Firmware: Coherent DMA: %s\n", dma_default_coherent ? "on" : "off"); loongson_sysconf.restart_addr = boot_p->reset_system.ResetWarm; loongson_sysconf.poweroff_addr = boot_p->reset_system.Shutdown; -- cgit v1.2.3 From c7206e7bd214ebb3ca6fa474a4423662327d9beb Mon Sep 17 00:00:00 2001 From: Jiaxun Yang Date: Tue, 7 Nov 2023 11:15:20 +0000 Subject: MIPS: Loongson64: Handle more memory types passed from firmware There are many types of revsered memory passed from firmware that should be reserved in memblock, and UMA memory passed from firmware that should be added to system memory for system to use. Also for memblock there is no need to align those space into page, which actually cause problems. Handle them properly to prevent memory corruption on some systems. Cc: stable@vger.kernel.org Signed-off-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/include/asm/mach-loongson64/boot_param.h | 6 +++- arch/mips/loongson64/init.c | 42 +++++++++++++--------- 2 files changed, 31 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/mips/include/asm/mach-loongson64/boot_param.h b/arch/mips/include/asm/mach-loongson64/boot_param.h index c454ef734c45..e007edd6b60a 100644 --- a/arch/mips/include/asm/mach-loongson64/boot_param.h +++ b/arch/mips/include/asm/mach-loongson64/boot_param.h @@ -14,7 +14,11 @@ #define ADAPTER_ROM 8 #define ACPI_TABLE 9 #define SMBIOS_TABLE 10 -#define MAX_MEMORY_TYPE 11 +#define UMA_VIDEO_RAM 11 +#define VUMA_VIDEO_RAM 12 +#define MAX_MEMORY_TYPE 13 + +#define MEM_SIZE_IS_IN_BYTES (1 << 31) #define LOONGSON3_BOOT_MEM_MAP_MAX 128 struct efi_memory_map_loongson { diff --git a/arch/mips/loongson64/init.c b/arch/mips/loongson64/init.c index d62262f93069..f25caa6aa9d3 100644 --- a/arch/mips/loongson64/init.c +++ b/arch/mips/loongson64/init.c @@ -49,8 +49,7 @@ void virtual_early_config(void) void __init szmem(unsigned int node) { u32 i, mem_type; - static unsigned long num_physpages; - u64 node_id, node_psize, start_pfn, end_pfn, mem_start, mem_size; + phys_addr_t node_id, mem_start, mem_size; /* Otherwise come from DTB */ if (loongson_sysconf.fw_interface != LOONGSON_LEFI) @@ -64,27 +63,38 @@ void __init szmem(unsigned int node) mem_type = loongson_memmap->map[i].mem_type; mem_size = loongson_memmap->map[i].mem_size; - mem_start = loongson_memmap->map[i].mem_start; + + /* Memory size comes in MB if MEM_SIZE_IS_IN_BYTES not set */ + if (mem_size & MEM_SIZE_IS_IN_BYTES) + mem_size &= ~MEM_SIZE_IS_IN_BYTES; + else + mem_size = mem_size << 20; + + mem_start = (node_id << 44) | loongson_memmap->map[i].mem_start; switch (mem_type) { case SYSTEM_RAM_LOW: case SYSTEM_RAM_HIGH: - start_pfn = ((node_id << 44) + mem_start) >> PAGE_SHIFT; - node_psize = (mem_size << 20) >> PAGE_SHIFT; - end_pfn = start_pfn + node_psize; - num_physpages += node_psize; - pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", - (u32)node_id, mem_type, mem_start, mem_size); - pr_info(" start_pfn:0x%llx, end_pfn:0x%llx, num_physpages:0x%lx\n", - start_pfn, end_pfn, num_physpages); - memblock_add_node(PFN_PHYS(start_pfn), - PFN_PHYS(node_psize), node, + case UMA_VIDEO_RAM: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes usable\n", + (u32)node_id, mem_type, &mem_start, &mem_size); + memblock_add_node(mem_start, mem_size, node, MEMBLOCK_NONE); break; case SYSTEM_RAM_RESERVED: - pr_info("Node%d: mem_type:%d, mem_start:0x%llx, mem_size:0x%llx MB\n", - (u32)node_id, mem_type, mem_start, mem_size); - memblock_reserve(((node_id << 44) + mem_start), mem_size << 20); + case VIDEO_ROM: + case ADAPTER_ROM: + case ACPI_TABLE: + case SMBIOS_TABLE: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes reserved\n", + (u32)node_id, mem_type, &mem_start, &mem_size); + memblock_reserve(mem_start, mem_size); + break; + /* We should not reserve VUMA_VIDEO_RAM as it overlaps with MMIO */ + case VUMA_VIDEO_RAM: + default: + pr_info("Node %d, mem_type:%d\t[%pa], %pa bytes unhandled\n", + (u32)node_id, mem_type, &mem_start, &mem_size); break; } } -- cgit v1.2.3 From a58a173444a68412bb08849bd81c679395f20ca0 Mon Sep 17 00:00:00 2001 From: Thomas Bogendoerfer Date: Thu, 30 Nov 2023 17:36:01 +0100 Subject: MIPS: kernel: Clear FPU states when setting up kernel threads io_uring sets up the io worker kernel thread via a syscall out of an user space prrocess. This process might have used FPU and since copy_thread() didn't clear FPU states for kernel threads a BUG() is triggered for using FPU inside kernel. Move code around to always clear FPU state for user and kernel threads. Cc: stable@vger.kernel.org Reported-by: Aurelien Jarno Closes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=1055021 Suggested-by: Jiaxun Yang Reviewed-by: Jiaxun Yang Signed-off-by: Thomas Bogendoerfer --- arch/mips/kernel/process.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 5387ed0a5186..b630604c577f 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -121,6 +121,19 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) /* Put the stack after the struct pt_regs. */ childksp = (unsigned long) childregs; p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK; + + /* + * New tasks lose permission to use the fpu. This accelerates context + * switching for most programs since they don't use the fpu. + */ + clear_tsk_thread_flag(p, TIF_USEDFPU); + clear_tsk_thread_flag(p, TIF_USEDMSA); + clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE); + +#ifdef CONFIG_MIPS_MT_FPAFF + clear_tsk_thread_flag(p, TIF_FPUBOUND); +#endif /* CONFIG_MIPS_MT_FPAFF */ + if (unlikely(args->fn)) { /* kernel thread */ unsigned long status = p->thread.cp0_status; @@ -149,20 +162,8 @@ int copy_thread(struct task_struct *p, const struct kernel_clone_args *args) p->thread.reg29 = (unsigned long) childregs; p->thread.reg31 = (unsigned long) ret_from_fork; - /* - * New tasks lose permission to use the fpu. This accelerates context - * switching for most programs since they don't use the fpu. - */ childregs->cp0_status &= ~(ST0_CU2|ST0_CU1); - clear_tsk_thread_flag(p, TIF_USEDFPU); - clear_tsk_thread_flag(p, TIF_USEDMSA); - clear_tsk_thread_flag(p, TIF_MSA_CTX_LIVE); - -#ifdef CONFIG_MIPS_MT_FPAFF - clear_tsk_thread_flag(p, TIF_FPUBOUND); -#endif /* CONFIG_MIPS_MT_FPAFF */ - #ifdef CONFIG_MIPS_FP_SUPPORT atomic_set(&p->thread.bd_emu_frame, BD_EMUFRAME_NONE); #endif -- cgit v1.2.3 From d863a2f4f47560d71447650822857fc3d2aea715 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 6 Nov 2023 16:13:25 +0100 Subject: arm64: dts: freescale: imx8-ss-lsio: Fix #pwm-cells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i.MX8QM/QXP supports inverted PWM output, thus #pwm-cells needs to be set to 3. Fixes: 23fa99b205ea ("arm64: dts: freescale: imx8-ss-lsio: add support for lsio_pwm0-3") Signed-off-by: Alexander Stein Reviewed-by: Uwe Kleine-König Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi index 49ad3413db94..7e510b21bbac 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-lsio.dtsi @@ -29,7 +29,7 @@ lsio_subsys: bus@5d000000 { <&pwm0_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = ; status = "disabled"; }; @@ -42,7 +42,7 @@ lsio_subsys: bus@5d000000 { <&pwm1_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_1 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = ; status = "disabled"; }; @@ -55,7 +55,7 @@ lsio_subsys: bus@5d000000 { <&pwm2_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_2 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = ; status = "disabled"; }; @@ -68,7 +68,7 @@ lsio_subsys: bus@5d000000 { <&pwm3_lpcg 1>; assigned-clocks = <&clk IMX_SC_R_PWM_3 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; interrupts = ; status = "disabled"; }; -- cgit v1.2.3 From 7cef7c0b1dea1e17c7913826b74403f4ab7edeb9 Mon Sep 17 00:00:00 2001 From: Alexander Stein Date: Mon, 6 Nov 2023 16:13:26 +0100 Subject: arm64: dts: freescale: imx8-ss-dma: Fix #pwm-cells MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i.MX8QXP supports inverted PWM output, thus #pwm-cells needs to be set to 3. Fixes: f1d6a6b991ef9 ("arm64: dts: imx8qxp: add adma_pwm in adma") Signed-off-by: Alexander Stein Reviewed-by: Uwe Kleine-König Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi index ce66d30a4839..b0bb77150adc 100644 --- a/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-ss-dma.dtsi @@ -149,7 +149,7 @@ dma_subsys: bus@5a000000 { clock-names = "ipg", "per"; assigned-clocks = <&clk IMX_SC_R_LCD_0_PWM_0 IMX_SC_PM_CLK_PER>; assigned-clock-rates = <24000000>; - #pwm-cells = <2>; + #pwm-cells = <3>; power-domains = <&pd IMX_SC_R_LCD_0_PWM_0>; }; -- cgit v1.2.3 From b37e75bddc35d4a40a4caeb9921cb95c33c3eba9 Mon Sep 17 00:00:00 2001 From: Xiaolei Wang Date: Fri, 10 Nov 2023 15:25:31 +0800 Subject: arm64: dts: imx8qm: Add imx8qm's own pm to avoid panic during startup Add imx8qm's own pm, otherwise the following panic will occur during the startup process: Kernel panic - not syncing: Asynchronous SError Interrupt Hardware name: Freescale i.MX8QM MEK (DT) Workqueue: events_unbound deferred_probe_work_func Call trace: dump_backtrace+0x98/0xf0 show_stack+0x18/0x24 dump_stack_lvl+0x60/0xac dump_stack+0x18/0x24 panic+0x340/0x3a0 nmi_panic+0x8c/0x90 arm64_serror_panic+0x6c/0x78 do_serror+0x3c/0x78 el1h_64_error_handler+0x38/0x50 el1h_64_error+0x64/0x68 fsl_edma_chan_mux+0x98/0xdc fsl_edma_probe+0x278/0x898 platform_probe+0x68/0xd8 really_probe+0x110/0x27c __driver_probe_device+0x78/0x12c driver_probe_device+0x3c/0x118 __device_attach_driver+0xb8/0xf8 bus_for_each_drv+0x84/0xe4 __device_attach+0xfc/0x18c device_initial_probe+0x14/0x20 Fixes: e4d7a330fb7a ("arm64: dts: imx8: add edma[0..3]") Signed-off-by: Xiaolei Wang Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi index 01539df335f8..8439dd6b3935 100644 --- a/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8qm-ss-dma.dtsi @@ -96,6 +96,17 @@ status = "okay"; }; +&edma3 { + power-domains = <&pd IMX_SC_R_DMA_1_CH0>, + <&pd IMX_SC_R_DMA_1_CH1>, + <&pd IMX_SC_R_DMA_1_CH2>, + <&pd IMX_SC_R_DMA_1_CH3>, + <&pd IMX_SC_R_DMA_1_CH4>, + <&pd IMX_SC_R_DMA_1_CH5>, + <&pd IMX_SC_R_DMA_1_CH6>, + <&pd IMX_SC_R_DMA_1_CH7>; +}; + &flexcan1 { fsl,clk-source = /bits/ 8 <1>; }; -- cgit v1.2.3 From d4cb68a5d3a1ed30ecaf1591eb901523faa13496 Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Mon, 13 Nov 2023 18:02:29 +0800 Subject: arm64: dts: imx93: correct mediamix power "nic_media" clock should be enabled when power on/off mediamix, otherwise power on/off will fail. Because "media_axi_root" clock is the parent of "nic_media" clock, so replace "media_axi_clock" clock with "nic_media" clock in mediamix node. Link: https://github.com/nxp-imx/linux-imx/commit/ce18e6d0071ae9df5486af8613708ebe920484be Fixes: f2d03ba997cb ("arm64: dts: imx93: reorder device nodes") Fixes: e85d3458a804 ("arm64: dts: imx93: add src node") Reviewed-by: Jacky Bai Signed-off-by: Peng Fan Signed-off-by: Liu Ying Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index ceccf4766440..0053f3c89f5f 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -417,7 +417,7 @@ compatible = "fsl,imx93-src-slice"; reg = <0x44462400 0x400>, <0x44465800 0x400>; #power-domain-cells = <0>; - clocks = <&clk IMX93_CLK_MEDIA_AXI>, + clocks = <&clk IMX93_CLK_NIC_MEDIA_GATE>, <&clk IMX93_CLK_MEDIA_APB>; }; }; -- cgit v1.2.3 From 4af1b258b68c5c948601082d5ce858ba31eb64f4 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 15 Nov 2023 11:56:20 +0800 Subject: arm64: dts: imx93: update gpio node name to align with register address Change the gpio node name to align with register address Fixes: c1d0782b5fc3 ("arm64: dts: imx93: update gpio node") Signed-off-by: Haibo Chen Reviewed-by: Marco Felsch Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx93.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx93.dtsi b/arch/arm64/boot/dts/freescale/imx93.dtsi index 0053f3c89f5f..34c0540276d1 100644 --- a/arch/arm64/boot/dts/freescale/imx93.dtsi +++ b/arch/arm64/boot/dts/freescale/imx93.dtsi @@ -957,7 +957,7 @@ }; }; - gpio2: gpio@43810080 { + gpio2: gpio@43810000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x43810000 0x1000>; gpio-controller; @@ -972,7 +972,7 @@ gpio-ranges = <&iomuxc 0 4 30>; }; - gpio3: gpio@43820080 { + gpio3: gpio@43820000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x43820000 0x1000>; gpio-controller; @@ -988,7 +988,7 @@ <&iomuxc 26 34 2>, <&iomuxc 28 0 4>; }; - gpio4: gpio@43830080 { + gpio4: gpio@43830000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x43830000 0x1000>; gpio-controller; @@ -1003,7 +1003,7 @@ gpio-ranges = <&iomuxc 0 38 28>, <&iomuxc 28 36 2>; }; - gpio1: gpio@47400080 { + gpio1: gpio@47400000 { compatible = "fsl,imx93-gpio", "fsl,imx8ulp-gpio"; reg = <0x47400000 0x1000>; gpio-controller; -- cgit v1.2.3 From 8ae06f1366390972fdcce0f0cee3cac0f63b3209 Mon Sep 17 00:00:00 2001 From: Haibo Chen Date: Wed, 15 Nov 2023 11:56:21 +0800 Subject: arm64: dts: imx8ulp: update gpio node name to align with register address Change the gpio node name to align with register address. Fixes: ac7bcf48ddba ("arm64: dts: imx8ulp: update gpio node") Signed-off-by: Haibo Chen Reviewed-by: Marco Felsch Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8ulp.dtsi | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi index f22c1ac391c9..c4a0082f30d3 100644 --- a/arch/arm64/boot/dts/freescale/imx8ulp.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8ulp.dtsi @@ -483,7 +483,7 @@ }; }; - gpioe: gpio@2d000080 { + gpioe: gpio@2d000000 { compatible = "fsl,imx8ulp-gpio"; reg = <0x2d000000 0x1000>; gpio-controller; @@ -498,7 +498,7 @@ gpio-ranges = <&iomuxc1 0 32 24>; }; - gpiof: gpio@2d010080 { + gpiof: gpio@2d010000 { compatible = "fsl,imx8ulp-gpio"; reg = <0x2d010000 0x1000>; gpio-controller; @@ -534,7 +534,7 @@ }; }; - gpiod: gpio@2e200080 { + gpiod: gpio@2e200000 { compatible = "fsl,imx8ulp-gpio"; reg = <0x2e200000 0x1000>; gpio-controller; -- cgit v1.2.3 From 1c2b1049af3f86545fcc5fae0fc725fb64b3a09e Mon Sep 17 00:00:00 2001 From: Kunwu Chan Date: Wed, 22 Nov 2023 14:46:36 +0800 Subject: ARM: imx: Check return value of devm_kasprintf in imx_mmdc_perf_init devm_kasprintf() returns a pointer to dynamically allocated memory which can be NULL upon failure. Ensure the allocation was successful by checking the pointer validity. Release the id allocated in 'mmdc_pmu_init' when 'devm_kasprintf' return NULL Suggested-by: Ahmad Fatoum Fixes: e76bdfd7403a ("ARM: imx: Added perf functionality to mmdc driver") Signed-off-by: Kunwu Chan Signed-off-by: Shawn Guo --- arch/arm/mach-imx/mmdc.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/mmdc.c b/arch/arm/mach-imx/mmdc.c index 2157493b78a9..df69af932375 100644 --- a/arch/arm/mach-imx/mmdc.c +++ b/arch/arm/mach-imx/mmdc.c @@ -501,6 +501,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "mmdc%d", ret); + if (!name) { + ret = -ENOMEM; + goto pmu_release_id; + } pmu_mmdc->mmdc_ipg_clk = mmdc_ipg_clk; pmu_mmdc->devtype_data = (struct fsl_mmdc_devtype_data *)of_id->data; @@ -523,9 +527,10 @@ static int imx_mmdc_perf_init(struct platform_device *pdev, void __iomem *mmdc_b pmu_register_err: pr_warn("MMDC Perf PMU failed (%d), disabled\n", ret); - ida_simple_remove(&mmdc_ida, pmu_mmdc->id); cpuhp_state_remove_instance_nocalls(cpuhp_mmdc_state, &pmu_mmdc->node); hrtimer_cancel(&pmu_mmdc->hrtimer); +pmu_release_id: + ida_simple_remove(&mmdc_ida, pmu_mmdc->id); pmu_free: kfree(pmu_mmdc); return ret; -- cgit v1.2.3 From 04179605ab604dba32571a05cd06423afc9eca19 Mon Sep 17 00:00:00 2001 From: Stefan Eichenberger Date: Thu, 23 Nov 2023 11:48:12 +0100 Subject: arm64: dts: imx8-apalis: set wifi regulator to always-on Make sure that the wifi regulator is always on. The wifi driver itself puts the wifi module into suspend mode. If we cut the power the driver will crash when resuming from suspend. Signed-off-by: Stefan Eichenberger Signed-off-by: Francesco Dolcini Fixes: ad0de4ceb706 ("arm64: dts: freescale: add initial apalis imx8 aka quadmax module support") Signed-off-by: Shawn Guo --- arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi index 5ce5fbf2b38e..f69b0c17560a 100644 --- a/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi +++ b/arch/arm64/boot/dts/freescale/imx8-apalis-v1.1.dtsi @@ -82,12 +82,9 @@ pinctrl-0 = <&pinctrl_wifi_pdn>; gpio = <&lsio_gpio1 28 GPIO_ACTIVE_HIGH>; enable-active-high; + regulator-always-on; regulator-name = "wifi_pwrdn_fake_regulator"; regulator-settling-time-us = <100>; - - regulator-state-mem { - regulator-off-in-suspend; - }; }; reg_pcie_switch: regulator-pcie-switch { -- cgit v1.2.3 From 397caf68e2d36532054cb14ae8995537f27f8b61 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Mon, 27 Nov 2023 17:05:01 +0100 Subject: ARM: dts: imx7: Declare timers compatible with fsl,imx6dl-gpt The timer nodes declare compatibility with "fsl,imx6sx-gpt", which itself is compatible with "fsl,imx6dl-gpt". Switch the fallback compatible from "fsl,imx6sx-gpt" to "fsl,imx6dl-gpt". Fixes: 949673450291 ("ARM: dts: add imx7d soc dtsi file") Signed-off-by: Philipp Zabel Signed-off-by: Roland Hieber Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/imx/imx7s.dtsi | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi index 29b8fd03567a..5387da8a2a0a 100644 --- a/arch/arm/boot/dts/nxp/imx/imx7s.dtsi +++ b/arch/arm/boot/dts/nxp/imx/imx7s.dtsi @@ -454,7 +454,7 @@ }; gpt1: timer@302d0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302d0000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT1_ROOT_CLK>, @@ -463,7 +463,7 @@ }; gpt2: timer@302e0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302e0000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT2_ROOT_CLK>, @@ -473,7 +473,7 @@ }; gpt3: timer@302f0000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x302f0000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT3_ROOT_CLK>, @@ -483,7 +483,7 @@ }; gpt4: timer@30300000 { - compatible = "fsl,imx7d-gpt", "fsl,imx6sx-gpt"; + compatible = "fsl,imx7d-gpt", "fsl,imx6dl-gpt"; reg = <0x30300000 0x10000>; interrupts = ; clocks = <&clks IMX7D_GPT4_ROOT_CLK>, -- cgit v1.2.3 From 63ef8fc9bcee6b73ca445a19a7ac6bd544723c9f Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 5 Dec 2023 10:27:35 -0300 Subject: ARM: dts: imx28-xea: Pass the 'model' property Per root-node.yaml, 'model' is a required property. Pass it to fix the following dt-schema warning: imx28-xea.dtb: /: 'model' is a required property from schema $id: http://devicetree.org/schemas/root-node.yaml# Signed-off-by: Fabio Estevam Fixes: 445ae16ac1c5 ("ARM: dts: imx28: Add DTS description of imx28 based XEA board") Signed-off-by: Shawn Guo --- arch/arm/boot/dts/nxp/mxs/imx28-xea.dts | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts index a400c108f66a..6c5e6856648a 100644 --- a/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts +++ b/arch/arm/boot/dts/nxp/mxs/imx28-xea.dts @@ -8,6 +8,7 @@ #include "imx28-lwe.dtsi" / { + model = "Liebherr XEA board"; compatible = "lwn,imx28-xea", "fsl,imx28"; }; -- cgit v1.2.3 From 37f3d6108730713c411827ab4af764909f4dfc78 Mon Sep 17 00:00:00 2001 From: Sam Edwards Date: Tue, 5 Dec 2023 12:29:00 -0800 Subject: arm64: dts: rockchip: Fix eMMC Data Strobe PD on rk3588 JEDEC standard JESD84-B51 defines the eMMC Data Strobe line, which is currently used only in HS400 mode, as a device->host clock signal that "is used only in read operation. The Data Strobe is always High-Z (not driven by the device and pulled down by RDS) or Driven Low in write operation, except during CRC status response." RDS is a pull-down resistor specified in the 10K-100K ohm range. Thus per the standard, the Data Strobe is always pulled to ground (by the eMMC and/or RDS) during write operations. Evidently, the eMMC host controller in the RK3588 considers an active voltage on the eMMC-DS line during a write to be an error. The default (i.e. hardware reset, and Rockchip BSP) behavior for the RK3588 is to activate the eMMC-DS pin's builtin pull-down. As a result, many RK3588 board designers do not bother adding a dedicated RDS resistor, instead relying on the RK3588's internal bias. The current devicetree, however, disables this bias (`pcfg_pull_none`), breaking HS400-mode writes for boards without a dedicated RDS, but with an eMMC chip that chooses to High-Z (instead of drive-low) the eMMC-DS line. (The Turing RK1 is one such board.) Fix this by changing the bias in the (common) emmc_data_strobe case to reflect the expected hardware/BSP behavior. This is unlikely to cause regressions elsewhere: the pull-down is only relevant for High-Z eMMCs, and if this is redundant with a (dedicated) RDS resistor, the effective result is only a lower resistance to ground -- where the range of tolerance is quite high. If it does, it's better fixed in the specific devicetrees. Fixes: d85f8a5c798d5 ("arm64: dts: rockchip: Add rk3588 pinctrl data") Signed-off-by: Sam Edwards Link: https://lore.kernel.org/r/20231205202900.4617-2-CFSworks@gmail.com Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi index 63151d9d2377..30db12c4fc82 100644 --- a/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3588s-pinctrl.dtsi @@ -369,7 +369,7 @@ emmc_data_strobe: emmc-data-strobe { rockchip,pins = /* emmc_data_strobe */ - <2 RK_PA2 1 &pcfg_pull_none>; + <2 RK_PA2 1 &pcfg_pull_down>; }; }; -- cgit v1.2.3 From 82180b1fae2432ee88b4a54cc6c376ba01e57b22 Mon Sep 17 00:00:00 2001 From: Charlie Jenkins Date: Wed, 22 Nov 2023 15:35:54 -0800 Subject: Support rv32 ULEB128 test Use opcodes available to both rv32 and rv64 in uleb128 module linking test. Fixes: af71bc194916 ("riscv: Add tests for riscv module loading") Signed-off-by: Charlie Jenkins Reported-by: Randy Dunlap Closes: https://lore.kernel.org/lkml/1d7c71ee-5742-4df4-b8ef-a2aea0a624eb@infradead.org/ Tested-by: Randy Dunlap # build-tested Link: https://lore.kernel.org/r/20231122-module_fixup-v2-1-dfb9565e9ea5@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/tests/module_test/test_uleb128.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/tests/module_test/test_uleb128.S b/arch/riscv/kernel/tests/module_test/test_uleb128.S index 90f22049d553..8515ed7cd8c1 100644 --- a/arch/riscv/kernel/tests/module_test/test_uleb128.S +++ b/arch/riscv/kernel/tests/module_test/test_uleb128.S @@ -6,13 +6,13 @@ .text .global test_uleb_basic test_uleb_basic: - ld a0, second + lw a0, second addi a0, a0, -127 ret .global test_uleb_large test_uleb_large: - ld a0, fourth + lw a0, fourth addi a0, a0, -0x07e8 ret @@ -22,10 +22,10 @@ first: second: .reloc second, R_RISCV_SET_ULEB128, second .reloc second, R_RISCV_SUB_ULEB128, first - .dword 0 + .word 0 third: .space 1000 fourth: .reloc fourth, R_RISCV_SET_ULEB128, fourth .reloc fourth, R_RISCV_SUB_ULEB128, third - .dword 0 + .word 0 -- cgit v1.2.3 From 777c0d761be7d981a2ae5494dfbc636311908dfb Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 1 Nov 2023 15:19:09 +0100 Subject: RISC-V: hwprobe: Always use u64 for extension bits Extensions are getting added quickly and their hwprobe bits will soon exceed 31 (which pair values accommodate, since they're of type u64). However, in one tree, where a bunch of extensions got merged prior to zicboz, zicboz already got pushed to bit 32. Pushing it exposed a 32-bit compilation bug, since unsigned long was used instead of u64. Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202310311801.hxduISrr-lkp@intel.com/ Fixes: 9c7646d5ffd2 ("RISC-V: hwprobe: Expose Zicboz extension and its block size") Signed-off-by: Andrew Jones Link: https://lore.kernel.org/r/20231101141908.192198-2-ajones@ventanamicro.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sys_riscv.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/sys_riscv.c b/arch/riscv/kernel/sys_riscv.c index c712037dbe10..a2ca5b7756a5 100644 --- a/arch/riscv/kernel/sys_riscv.c +++ b/arch/riscv/kernel/sys_riscv.c @@ -169,7 +169,7 @@ static void hwprobe_isa_ext0(struct riscv_hwprobe *pair, pair->value &= ~missing; } -static bool hwprobe_ext0_has(const struct cpumask *cpus, unsigned long ext) +static bool hwprobe_ext0_has(const struct cpumask *cpus, u64 ext) { struct riscv_hwprobe pair; -- cgit v1.2.3 From 22e0eb04837a63af111fae35a92f7577676b9bc8 Mon Sep 17 00:00:00 2001 From: Clément Léger Date: Fri, 3 Nov 2023 10:02:23 +0100 Subject: riscv: fix misaligned access handling of C.SWSP and C.SDSP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a backport of a fix that was done in OpenSBI: ec0559eb315b ("lib: sbi_misaligned_ldst: Fix handling of C.SWSP and C.SDSP"). Unlike C.LWSP/C.LDSP, these encodings can be used with the zero register, so checking that the rs2 field is non-zero is unnecessary. Additionally, the previous check was incorrect since it was checking the immediate field of the instruction instead of the rs2 field. Fixes: 956d705dd279 ("riscv: Unaligned load/store handling for M_MODE") Signed-off-by: Clément Léger Link: https://lore.kernel.org/r/20231103090223.702340-1-cleger@rivosinc.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps_misaligned.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c index 5eba37147caa..5255f8134aef 100644 --- a/arch/riscv/kernel/traps_misaligned.c +++ b/arch/riscv/kernel/traps_misaligned.c @@ -550,16 +550,14 @@ int handle_misaligned_store(struct pt_regs *regs) } else if ((insn & INSN_MASK_C_SD) == INSN_MATCH_C_SD) { len = 8; val.data_ulong = GET_RS2S(insn, regs); - } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP && - ((insn >> SH_RD) & 0x1f)) { + } else if ((insn & INSN_MASK_C_SDSP) == INSN_MATCH_C_SDSP) { len = 8; val.data_ulong = GET_RS2C(insn, regs); #endif } else if ((insn & INSN_MASK_C_SW) == INSN_MATCH_C_SW) { len = 4; val.data_ulong = GET_RS2S(insn, regs); - } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP && - ((insn >> SH_RD) & 0x1f)) { + } else if ((insn & INSN_MASK_C_SWSP) == INSN_MATCH_C_SWSP) { len = 4; val.data_ulong = GET_RS2C(insn, regs); } else if ((insn & INSN_MASK_C_FSD) == INSN_MATCH_C_FSD) { -- cgit v1.2.3 From f40cab8e18ed57d2c7b5213437d83d955f78097f Mon Sep 17 00:00:00 2001 From: Samuel Holland Date: Tue, 21 Nov 2023 13:19:29 -0800 Subject: riscv: Fix SMP when shadow call stacks are enabled This fixes two bugs in SCS initialization for secondary CPUs. First, the SCS was not initialized at all in the spinwait boot path. Second, the code for the SBI HSM path attempted to initialize the SCS before enabling the MMU. However, that involves dereferencing the thread pointer, which requires the MMU to be enabled. Fix both issues by setting up the SCS in the common secondary entry path, after enabling the MMU. Fixes: d1584d791a29 ("riscv: Implement Shadow Call Stack") Signed-off-by: Samuel Holland Reviewed-by: Sami Tolvanen Link: https://lore.kernel.org/r/20231121211958.3158576-1-samuel.holland@sifive.com Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/head.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index b77397432403..76ace1e0b46f 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -154,7 +154,6 @@ secondary_start_sbi: XIP_FIXUP_OFFSET a3 add a3, a3, a1 REG_L sp, (a3) - scs_load_current .Lsecondary_start_common: @@ -165,6 +164,7 @@ secondary_start_sbi: call relocate_enable_mmu #endif call .Lsetup_trap_vector + scs_load_current tail smp_callin #endif /* CONFIG_SMP */ -- cgit v1.2.3 From ed5b7cfd7839f9280a63365c1133482b42d0981f Mon Sep 17 00:00:00 2001 From: Lad Prabhakar Date: Thu, 30 Nov 2023 21:26:47 +0000 Subject: riscv: errata: andes: Probe for IOCP only once in boot stage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to probe for IOCP only once during boot stage, as we were probing for IOCP for all the stages this caused the below issue during module-init stage, [9.019104] Unable to handle kernel paging request at virtual address ffffffff8100d3a0 [9.027153] Oops [#1] [9.029421] Modules linked in: rcar_canfd renesas_usbhs i2c_riic can_dev spi_rspi i2c_core [9.037686] CPU: 0 PID: 90 Comm: udevd Not tainted 6.7.0-rc1+ #57 [9.043756] Hardware name: Renesas SMARC EVK based on r9a07g043f01 (DT) [9.050339] epc : riscv_noncoherent_supported+0x10/0x3e [9.055558]  ra : andes_errata_patch_func+0x4a/0x52 [9.060418] epc : ffffffff8000d8c2 ra : ffffffff8000d95c sp : ffffffc8003abb00 [9.067607]  gp : ffffffff814e25a0 tp : ffffffd80361e540 t0 : 0000000000000000 [9.074795]  t1 : 000000000900031e t2 : 0000000000000001 s0 : ffffffc8003abb20 [9.081984]  s1 : ffffffff015b57c7 a0 : 0000000000000000 a1 : 0000000000000001 [9.089172]  a2 : 0000000000000000 a3 : 0000000000000000 a4 : ffffffff8100d8be [9.096360]  a5 : 0000000000000001 a6 : 0000000000000001 a7 : 000000000900031e [9.103548]  s2 : ffffffff015b57d7 s3 : 0000000000000001 s4 : 000000000000031e [9.110736]  s5 : 8000000000008a45 s6 : 0000000000000500 s7 : 000000000000003f [9.117924]  s8 : ffffffc8003abd48 s9 : ffffffff015b1140 s10: ffffffff8151a1b0 [9.125113]  s11: ffffffff015b1000 t3 : 0000000000000001 t4 : fefefefefefefeff [9.132301]  t5 : ffffffff015b57c7 t6 : ffffffd8b63a6000 [9.137587] status: 0000000200000120 badaddr: ffffffff8100d3a0 cause: 000000000000000f [9.145468] [] riscv_noncoherent_supported+0x10/0x3e [9.151972] [] _apply_alternatives+0x84/0x86 [9.157784] [] apply_module_alternatives+0x10/0x1a [9.164113] [] module_finalize+0x5e/0x7a [9.169583] [] load_module+0xfd8/0x179c [9.174965] [] init_module_from_file+0x76/0xaa [9.180948] [] __riscv_sys_finit_module+0x176/0x2a8 [9.187365] [] do_trap_ecall_u+0xbe/0x130 [9.192922] [] ret_from_exception+0x0/0x64 [9.198573] Code: 0009 b7e9 6797 014d a783 85a7 c799 4785 0717 0100 (0123) aef7 [9.205994] ---[ end trace 0000000000000000 ]--- This is because we called riscv_noncoherent_supported() for all the stages during IOCP probe. riscv_noncoherent_supported() function sets noncoherent_supported variable to true which has an annotation set to "__ro_after_init" due to which we were seeing the above splat. Fix this by probing for IOCP only once in boot stage by having a boolean variable "done" which will be set to true upon IOCP probe in errata_probe_iocp() and we bail out early if "done" is set to true. While at it make return type of errata_probe_iocp() to void as we were not checking the return value in andes_errata_patch_func(). Fixes: e021ae7f5145 ("riscv: errata: Add Andes alternative ports") Signed-off-by: Lad Prabhakar Reviewed-by: Geert Uytterhoeven Reviewed-by: Yu Chien Peter Lin Link: https://lore.kernel.org/r/20231130212647.108746-1-prabhakar.mahadev-lad.rj@bp.renesas.com Signed-off-by: Palmer Dabbelt --- arch/riscv/errata/andes/errata.c | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/riscv/errata/andes/errata.c b/arch/riscv/errata/andes/errata.c index 197db68cc8da..17a904869724 100644 --- a/arch/riscv/errata/andes/errata.c +++ b/arch/riscv/errata/andes/errata.c @@ -38,29 +38,35 @@ static long ax45mp_iocp_sw_workaround(void) return ret.error ? 0 : ret.value; } -static bool errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid) +static void errata_probe_iocp(unsigned int stage, unsigned long arch_id, unsigned long impid) { + static bool done; + if (!IS_ENABLED(CONFIG_ERRATA_ANDES_CMO)) - return false; + return; + + if (done) + return; + + done = true; if (arch_id != ANDESTECH_AX45MP_MARCHID || impid != ANDESTECH_AX45MP_MIMPID) - return false; + return; if (!ax45mp_iocp_sw_workaround()) - return false; + return; /* Set this just to make core cbo code happy */ riscv_cbom_block_size = 1; riscv_noncoherent_supported(); - - return true; } void __init_or_module andes_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, unsigned long archid, unsigned long impid, unsigned int stage) { - errata_probe_iocp(stage, archid, impid); + if (stage == RISCV_ALTERNATIVES_BOOT) + errata_probe_iocp(stage, archid, impid); /* we have nothing to patch here ATM so just return back */ } -- cgit v1.2.3 From 4b7de801606e504e69689df71475d27e35336fb3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Dec 2023 09:30:40 +0100 Subject: bpf: Fix prog_array_map_poke_run map poke update Lee pointed out issue found by syscaller [0] hitting BUG in prog array map poke update in prog_array_map_poke_run function due to error value returned from bpf_arch_text_poke function. There's race window where bpf_arch_text_poke can fail due to missing bpf program kallsym symbols, which is accounted for with check for -EINVAL in that BUG_ON call. The problem is that in such case we won't update the tail call jump and cause imbalance for the next tail call update check which will fail with -EBUSY in bpf_arch_text_poke. I'm hitting following race during the program load: CPU 0 CPU 1 bpf_prog_load bpf_check do_misc_fixups prog_array_map_poke_track map_update_elem bpf_fd_array_map_update_elem prog_array_map_poke_run bpf_arch_text_poke returns -EINVAL bpf_prog_kallsyms_add After bpf_arch_text_poke (CPU 1) fails to update the tail call jump, the next poke update fails on expected jump instruction check in bpf_arch_text_poke with -EBUSY and triggers the BUG_ON in prog_array_map_poke_run. Similar race exists on the program unload. Fixing this by moving the update to bpf_arch_poke_desc_update function which makes sure we call __bpf_arch_text_poke that skips the bpf address check. Each architecture has slightly different approach wrt looking up bpf address in bpf_arch_text_poke, so instead of splitting the function or adding new 'checkip' argument in previous version, it seems best to move the whole map_poke_run update as arch specific code. [0] https://syzkaller.appspot.com/bug?extid=97a4fe20470e9bc30810 Fixes: ebf7d1f508a7 ("bpf, x64: rework pro/epilogue and tailcall handling in JIT") Reported-by: syzbot+97a4fe20470e9bc30810@syzkaller.appspotmail.com Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Yonghong Song Cc: Lee Jones Cc: Maciej Fijalkowski Link: https://lore.kernel.org/bpf/20231206083041.1306660-2-jolsa@kernel.org --- arch/x86/net/bpf_jit_comp.c | 46 +++++++++++++++++++++++++++++++++++ include/linux/bpf.h | 3 +++ kernel/bpf/arraymap.c | 58 ++++++++------------------------------------- 3 files changed, 59 insertions(+), 48 deletions(-) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 8c10d9abc239..e89e415aa743 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -3025,3 +3025,49 @@ void arch_bpf_stack_walk(bool (*consume_fn)(void *cookie, u64 ip, u64 sp, u64 bp #endif WARN(1, "verification of programs using bpf_throw should have failed\n"); } + +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old) +{ + u8 *old_addr, *new_addr, *old_bypass_addr; + int ret; + + old_bypass_addr = old ? NULL : poke->bypass_addr; + old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; + new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; + + /* + * On program loading or teardown, the program's kallsym entry + * might not be in place, so we use __bpf_arch_text_poke to skip + * the kallsyms check. + */ + if (new) { + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, new_addr); + BUG_ON(ret < 0); + if (!old) { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + poke->bypass_addr, + NULL); + BUG_ON(ret < 0); + } + } else { + ret = __bpf_arch_text_poke(poke->tailcall_bypass, + BPF_MOD_JUMP, + old_bypass_addr, + poke->bypass_addr); + BUG_ON(ret < 0); + /* let other CPUs finish the execution of program + * so that it will not possible to expose them + * to invalid nop, stack unwind, nop state + */ + if (!ret) + synchronize_rcu(); + ret = __bpf_arch_text_poke(poke->tailcall_target, + BPF_MOD_JUMP, + old_addr, NULL); + BUG_ON(ret < 0); + } +} diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6762dac3ef76..cff5bb08820e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3175,6 +3175,9 @@ enum bpf_text_poke_type { int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, void *addr1, void *addr2); +void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old); + void *bpf_arch_text_copy(void *dst, void *src, size_t len); int bpf_arch_text_invalidate(void *dst, size_t len); diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index 2058e89b5ddd..c85ff9162a5c 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -1012,11 +1012,16 @@ static void prog_array_map_poke_untrack(struct bpf_map *map, mutex_unlock(&aux->poke_mutex); } +void __weak bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, + struct bpf_prog *new, struct bpf_prog *old) +{ + WARN_ON_ONCE(1); +} + static void prog_array_map_poke_run(struct bpf_map *map, u32 key, struct bpf_prog *old, struct bpf_prog *new) { - u8 *old_addr, *new_addr, *old_bypass_addr; struct prog_poke_elem *elem; struct bpf_array_aux *aux; @@ -1025,7 +1030,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, list_for_each_entry(elem, &aux->poke_progs, list) { struct bpf_jit_poke_descriptor *poke; - int i, ret; + int i; for (i = 0; i < elem->aux->size_poke_tab; i++) { poke = &elem->aux->poke_tab[i]; @@ -1044,21 +1049,10 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, * activated, so tail call updates can arrive from here * while JIT is still finishing its final fixup for * non-activated poke entries. - * 3) On program teardown, the program's kallsym entry gets - * removed out of RCU callback, but we can only untrack - * from sleepable context, therefore bpf_arch_text_poke() - * might not see that this is in BPF text section and - * bails out with -EINVAL. As these are unreachable since - * RCU grace period already passed, we simply skip them. - * 4) Also programs reaching refcount of zero while patching + * 3) Also programs reaching refcount of zero while patching * is in progress is okay since we're protected under * poke_mutex and untrack the programs before the JIT - * buffer is freed. When we're still in the middle of - * patching and suddenly kallsyms entry of the program - * gets evicted, we just skip the rest which is fine due - * to point 3). - * 5) Any other error happening below from bpf_arch_text_poke() - * is a unexpected bug. + * buffer is freed. */ if (!READ_ONCE(poke->tailcall_target_stable)) continue; @@ -1068,39 +1062,7 @@ static void prog_array_map_poke_run(struct bpf_map *map, u32 key, poke->tail_call.key != key) continue; - old_bypass_addr = old ? NULL : poke->bypass_addr; - old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; - new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; - - if (new) { - ret = bpf_arch_text_poke(poke->tailcall_target, - BPF_MOD_JUMP, - old_addr, new_addr); - BUG_ON(ret < 0 && ret != -EINVAL); - if (!old) { - ret = bpf_arch_text_poke(poke->tailcall_bypass, - BPF_MOD_JUMP, - poke->bypass_addr, - NULL); - BUG_ON(ret < 0 && ret != -EINVAL); - } - } else { - ret = bpf_arch_text_poke(poke->tailcall_bypass, - BPF_MOD_JUMP, - old_bypass_addr, - poke->bypass_addr); - BUG_ON(ret < 0 && ret != -EINVAL); - /* let other CPUs finish the execution of program - * so that it will not possible to expose them - * to invalid nop, stack unwind, nop state - */ - if (!ret) - synchronize_rcu(); - ret = bpf_arch_text_poke(poke->tailcall_target, - BPF_MOD_JUMP, - old_addr, NULL); - BUG_ON(ret < 0 && ret != -EINVAL); - } + bpf_arch_poke_desc_update(poke, new, old); } } } -- cgit v1.2.3 From dccf78d39f1069a5ddf4328bf0c97aa5f2f4296e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 28 Nov 2023 13:44:57 +0800 Subject: kernel/Kconfig.kexec: drop select of KEXEC for CRASH_DUMP Ignat Korchagin complained that a potential config regression was introduced by commit 89cde455915f ("kexec: consolidate kexec and crash options into kernel/Kconfig.kexec"). Before the commit, CONFIG_CRASH_DUMP has no dependency on CONFIG_KEXEC. After the commit, CRASH_DUMP selects KEXEC. That enforces system to have CONFIG_KEXEC=y as long as CONFIG_CRASH_DUMP=Y which people may not want. In Ignat's case, he sets CONFIG_CRASH_DUMP=y, CONFIG_KEXEC_FILE=y and CONFIG_KEXEC=n because kexec_load interface could have security issue if kernel/initrd has no chance to be signed and verified. CRASH_DUMP has select of KEXEC because Eric, author of above commit, met a LKP report of build failure when posting patch of earlier version. Please see below link to get detail of the LKP report: https://lore.kernel.org/all/3e8eecd1-a277-2cfb-690e-5de2eb7b988e@oracle.com/T/#u In fact, that LKP report is triggered because arm's is wrapped in CONFIG_KEXEC ifdeffery scope. That is wrong. CONFIG_KEXEC controls the enabling/disabling of kexec_load interface, but not kexec feature. Removing the wrongly added CONFIG_KEXEC ifdeffery scope in of arm allows us to drop the select KEXEC for CRASH_DUMP. Meanwhile, change arch/arm/kernel/Makefile to let machine_kexec.o relocate_kernel.o depend on KEXEC_CORE. Link: https://lkml.kernel.org/r/20231128054457.659452-1-bhe@redhat.com Fixes: 89cde455915f ("kexec: consolidate kexec and crash options into kernel/Kconfig.kexec") Signed-off-by: Baoquan He Reported-by: Ignat Korchagin Tested-by: Ignat Korchagin [compile-time only] Tested-by: Alexander Gordeev Reviewed-by: Eric DeVolder Tested-by: Eric DeVolder Signed-off-by: Andrew Morton --- arch/arm/include/asm/kexec.h | 4 ---- arch/arm/kernel/Makefile | 2 +- kernel/Kconfig.kexec | 1 - 3 files changed, 1 insertion(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/kexec.h b/arch/arm/include/asm/kexec.h index e62832dcba76..a8287e7ab9d4 100644 --- a/arch/arm/include/asm/kexec.h +++ b/arch/arm/include/asm/kexec.h @@ -2,8 +2,6 @@ #ifndef _ARM_KEXEC_H #define _ARM_KEXEC_H -#ifdef CONFIG_KEXEC - /* Maximum physical address we can use pages from */ #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) /* Maximum address we can reach in physical address mode */ @@ -82,6 +80,4 @@ static inline struct page *boot_pfn_to_page(unsigned long boot_pfn) #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_KEXEC */ - #endif /* _ARM_KEXEC_H */ diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index d53f56d6f840..771264d4726a 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -59,7 +59,7 @@ obj-$(CONFIG_FUNCTION_TRACER) += entry-ftrace.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o patch.o obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o patch.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o # Main staffs in KPROBES are in arch/arm/probes/ . obj-$(CONFIG_KPROBES) += patch.o insn.o obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 7aff28ded2f4..1cc3b1c595d7 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -97,7 +97,6 @@ config CRASH_DUMP depends on ARCH_SUPPORTS_KEXEC select CRASH_CORE select KEXEC_CORE - select KEXEC help Generate crash dump after being started by kexec. This should be normally only set in special crash dump kernels -- cgit v1.2.3 From b82a8dbd3d2f4563156f7150c6f2ecab6e960b30 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 4 Dec 2023 11:31:38 +0300 Subject: x86/coco: Disable 32-bit emulation by default on TDX and SEV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The INT 0x80 instruction is used for 32-bit x86 Linux syscalls. The kernel expects to receive a software interrupt as a result of the INT 0x80 instruction. However, an external interrupt on the same vector triggers the same handler. The kernel interprets an external interrupt on vector 0x80 as a 32-bit system call that came from userspace. A VMM can inject external interrupts on any arbitrary vector at any time. This remains true even for TDX and SEV guests where the VMM is untrusted. Put together, this allows an untrusted VMM to trigger int80 syscall handling at any given point. The content of the guest register file at that moment defines what syscall is triggered and its arguments. It opens the guest OS to manipulation from the VMM side. Disable 32-bit emulation by default for TDX and SEV. User can override it with the ia32_emulation=y command line option. [ dhansen: reword the changelog ] Reported-by: Supraja Sridhara Reported-by: Benedict Schlüter Reported-by: Mark Kuhne Reported-by: Andrin Bertschi Reported-by: Shweta Shinde Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+: 1da5c9b x86: Introduce ia32_enabled() Cc: # v6.0+ --- arch/x86/coco/tdx/tdx.c | 10 ++++++++++ arch/x86/include/asm/ia32.h | 7 +++++++ arch/x86/mm/mem_encrypt_amd.c | 11 +++++++++++ 3 files changed, 28 insertions(+) (limited to 'arch') diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 1b5d17a9f70d..545e54b829b2 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -891,5 +892,14 @@ void __init tdx_early_init(void) */ x86_cpuinit.parallel_bringup = false; + /* + * The VMM is capable of injecting interrupt 0x80 and triggering the + * compatibility syscall path. + * + * By default, the 32-bit emulation is disabled in order to ensure + * the safety of the VM. + */ + ia32_disable(); + pr_info("Guest detected\n"); } diff --git a/arch/x86/include/asm/ia32.h b/arch/x86/include/asm/ia32.h index 5a2ae24b1204..9805629479d9 100644 --- a/arch/x86/include/asm/ia32.h +++ b/arch/x86/include/asm/ia32.h @@ -75,6 +75,11 @@ static inline bool ia32_enabled(void) return __ia32_enabled; } +static inline void ia32_disable(void) +{ + __ia32_enabled = false; +} + #else /* !CONFIG_IA32_EMULATION */ static inline bool ia32_enabled(void) @@ -82,6 +87,8 @@ static inline bool ia32_enabled(void) return IS_ENABLED(CONFIG_X86_32); } +static inline void ia32_disable(void) {} + #endif #endif /* _ASM_X86_IA32_H */ diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index a68f2dda0948..70b91de2e053 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "mm_internal.h" @@ -481,6 +482,16 @@ void __init sme_early_init(void) */ if (sev_status & MSR_AMD64_SEV_ES_ENABLED) x86_cpuinit.parallel_bringup = false; + + /* + * The VMM is capable of injecting interrupt 0x80 and triggering the + * compatibility syscall path. + * + * By default, the 32-bit emulation is disabled in order to ensure + * the safety of the VM. + */ + if (sev_status & MSR_AMD64_SEV_ENABLED) + ia32_disable(); } void __init mem_encrypt_free_decrypted_mem(void) -- cgit v1.2.3 From be5341eb0d43b1e754799498bd2e8756cc167a41 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2023 11:31:39 +0300 Subject: x86/entry: Convert INT 0x80 emulation to IDTENTRY There is no real reason to have a separate ASM entry point implementation for the legacy INT 0x80 syscall emulation on 64-bit. IDTENTRY provides all the functionality needed with the only difference that it does not: - save the syscall number (AX) into pt_regs::orig_ax - set pt_regs::ax to -ENOSYS Both can be done safely in the C code of an IDTENTRY before invoking any of the syscall related functions which depend on this convention. Aside of ASM code reduction this prepares for detecting and handling a local APIC injected vector 0x80. [ kirill.shutemov: More verbose comments ] Suggested-by: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+ --- arch/x86/entry/common.c | 58 +++++++++++++++++++++++++++++- arch/x86/entry/entry_64_compat.S | 77 ---------------------------------------- arch/x86/include/asm/idtentry.h | 4 +++ arch/x86/include/asm/proto.h | 4 --- arch/x86/kernel/idt.c | 2 +- arch/x86/xen/enlighten_pv.c | 2 +- arch/x86/xen/xen-asm.S | 2 +- 7 files changed, 64 insertions(+), 85 deletions(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index d813160b14d8..8ca4d57d68eb 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -167,7 +167,62 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) } } -/* Handles int $0x80 */ +#ifdef CONFIG_IA32_EMULATION +/** + * int80_emulation - 32-bit legacy syscall entry + * + * This entry point can be used by 32-bit and 64-bit programs to perform + * 32-bit system calls. Instances of INT $0x80 can be found inline in + * various programs and libraries. It is also used by the vDSO's + * __kernel_vsyscall fallback for hardware that doesn't support a faster + * entry method. Restarted 32-bit system calls also fall back to INT + * $0x80 regardless of what instruction was originally used to do the + * system call. + * + * This is considered a slow path. It is not used by most libc + * implementations on modern hardware except during process startup. + * + * The arguments for the INT $0x80 based syscall are on stack in the + * pt_regs structure: + * eax: system call number + * ebx, ecx, edx, esi, edi, ebp: arg1 - arg 6 + */ +DEFINE_IDTENTRY_RAW(int80_emulation) +{ + int nr; + + /* Establish kernel context. */ + enter_from_user_mode(regs); + + instrumentation_begin(); + add_random_kstack_offset(); + + /* + * The low level idtentry code pushed -1 into regs::orig_ax + * and regs::ax contains the syscall number. + * + * User tracing code (ptrace or signal handlers) might assume + * that the regs::orig_ax contains a 32-bit number on invoking + * a 32-bit syscall. + * + * Establish the syscall convention by saving the 32bit truncated + * syscall number in regs::orig_ax and by invalidating regs::ax. + */ + regs->orig_ax = regs->ax & GENMASK(31, 0); + regs->ax = -ENOSYS; + + nr = syscall_32_enter(regs); + + local_irq_enable(); + nr = syscall_enter_from_user_mode_work(regs, nr); + do_syscall_32_irqs_on(regs, nr); + + instrumentation_end(); + syscall_exit_to_user_mode(regs); +} +#else /* CONFIG_IA32_EMULATION */ + +/* Handles int $0x80 on a 32bit kernel */ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) { int nr = syscall_32_enter(regs); @@ -186,6 +241,7 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) instrumentation_end(); syscall_exit_to_user_mode(regs); } +#endif /* !CONFIG_IA32_EMULATION */ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs) { diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 27c05d08558a..de94e2e84ecc 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -275,80 +275,3 @@ SYM_INNER_LABEL(entry_SYSRETL_compat_end, SYM_L_GLOBAL) ANNOTATE_NOENDBR int3 SYM_CODE_END(entry_SYSCALL_compat) - -/* - * 32-bit legacy system call entry. - * - * 32-bit x86 Linux system calls traditionally used the INT $0x80 - * instruction. INT $0x80 lands here. - * - * This entry point can be used by 32-bit and 64-bit programs to perform - * 32-bit system calls. Instances of INT $0x80 can be found inline in - * various programs and libraries. It is also used by the vDSO's - * __kernel_vsyscall fallback for hardware that doesn't support a faster - * entry method. Restarted 32-bit system calls also fall back to INT - * $0x80 regardless of what instruction was originally used to do the - * system call. - * - * This is considered a slow path. It is not used by most libc - * implementations on modern hardware except during process startup. - * - * Arguments: - * eax system call number - * ebx arg1 - * ecx arg2 - * edx arg3 - * esi arg4 - * edi arg5 - * ebp arg6 - */ -SYM_CODE_START(entry_INT80_compat) - UNWIND_HINT_ENTRY - ENDBR - /* - * Interrupts are off on entry. - */ - ASM_CLAC /* Do this early to minimize exposure */ - ALTERNATIVE "swapgs", "", X86_FEATURE_XENPV - - /* - * User tracing code (ptrace or signal handlers) might assume that - * the saved RAX contains a 32-bit number when we're invoking a 32-bit - * syscall. Just in case the high bits are nonzero, zero-extend - * the syscall number. (This could almost certainly be deleted - * with no ill effects.) - */ - movl %eax, %eax - - /* switch to thread stack expects orig_ax and rdi to be pushed */ - pushq %rax /* pt_regs->orig_ax */ - - /* Need to switch before accessing the thread stack. */ - SWITCH_TO_KERNEL_CR3 scratch_reg=%rax - - /* In the Xen PV case we already run on the thread stack. */ - ALTERNATIVE "", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV - - movq %rsp, %rax - movq PER_CPU_VAR(pcpu_hot + X86_top_of_stack), %rsp - - pushq 5*8(%rax) /* regs->ss */ - pushq 4*8(%rax) /* regs->rsp */ - pushq 3*8(%rax) /* regs->eflags */ - pushq 2*8(%rax) /* regs->cs */ - pushq 1*8(%rax) /* regs->ip */ - pushq 0*8(%rax) /* regs->orig_ax */ -.Lint80_keep_stack: - - PUSH_AND_CLEAR_REGS rax=$-ENOSYS - UNWIND_HINT_REGS - - cld - - IBRS_ENTER - UNTRAIN_RET - - movq %rsp, %rdi - call do_int80_syscall_32 - jmp swapgs_restore_regs_and_return_to_usermode -SYM_CODE_END(entry_INT80_compat) diff --git a/arch/x86/include/asm/idtentry.h b/arch/x86/include/asm/idtentry.h index 05fd175cec7d..13639e57e1f8 100644 --- a/arch/x86/include/asm/idtentry.h +++ b/arch/x86/include/asm/idtentry.h @@ -569,6 +569,10 @@ DECLARE_IDTENTRY_RAW(X86_TRAP_UD, exc_invalid_op); DECLARE_IDTENTRY_RAW(X86_TRAP_BP, exc_int3); DECLARE_IDTENTRY_RAW_ERRORCODE(X86_TRAP_PF, exc_page_fault); +#if defined(CONFIG_IA32_EMULATION) +DECLARE_IDTENTRY_RAW(IA32_SYSCALL_VECTOR, int80_emulation); +#endif + #ifdef CONFIG_X86_MCE #ifdef CONFIG_X86_64 DECLARE_IDTENTRY_MCE(X86_TRAP_MC, exc_machine_check); diff --git a/arch/x86/include/asm/proto.h b/arch/x86/include/asm/proto.h index 4d84122bd643..484f4f0131a5 100644 --- a/arch/x86/include/asm/proto.h +++ b/arch/x86/include/asm/proto.h @@ -32,10 +32,6 @@ void entry_SYSCALL_compat(void); void entry_SYSCALL_compat_safe_stack(void); void entry_SYSRETL_compat_unsafe_stack(void); void entry_SYSRETL_compat_end(void); -void entry_INT80_compat(void); -#ifdef CONFIG_XEN_PV -void xen_entry_INT80_compat(void); -#endif #else /* !CONFIG_IA32_EMULATION */ #define entry_SYSCALL_compat NULL #define entry_SYSENTER_compat NULL diff --git a/arch/x86/kernel/idt.c b/arch/x86/kernel/idt.c index 8857abc706e4..660b601f1d6c 100644 --- a/arch/x86/kernel/idt.c +++ b/arch/x86/kernel/idt.c @@ -121,7 +121,7 @@ static const __initconst struct idt_data def_idts[] = { static const struct idt_data ia32_idt[] __initconst = { #if defined(CONFIG_IA32_EMULATION) - SYSG(IA32_SYSCALL_VECTOR, entry_INT80_compat), + SYSG(IA32_SYSCALL_VECTOR, asm_int80_emulation), #elif defined(CONFIG_X86_32) SYSG(IA32_SYSCALL_VECTOR, entry_INT80_32), #endif diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index bbbfdd495ebd..aeb33e0a3f76 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -704,7 +704,7 @@ static struct trap_array_entry trap_array[] = { TRAP_ENTRY(exc_int3, false ), TRAP_ENTRY(exc_overflow, false ), #ifdef CONFIG_IA32_EMULATION - { entry_INT80_compat, xen_entry_INT80_compat, false }, + TRAP_ENTRY(int80_emulation, false ), #endif TRAP_ENTRY(exc_page_fault, false ), TRAP_ENTRY(exc_divide_error, false ), diff --git a/arch/x86/xen/xen-asm.S b/arch/x86/xen/xen-asm.S index 9e5e68008785..1a9cd18dfbd3 100644 --- a/arch/x86/xen/xen-asm.S +++ b/arch/x86/xen/xen-asm.S @@ -156,7 +156,7 @@ xen_pv_trap asm_xenpv_exc_machine_check #endif /* CONFIG_X86_MCE */ xen_pv_trap asm_exc_simd_coprocessor_error #ifdef CONFIG_IA32_EMULATION -xen_pv_trap entry_INT80_compat +xen_pv_trap asm_int80_emulation #endif xen_pv_trap asm_exc_xen_unknown_trap xen_pv_trap asm_exc_xen_hypervisor_callback -- cgit v1.2.3 From 55617fb991df535f953589586468612351575704 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 4 Dec 2023 11:31:40 +0300 Subject: x86/entry: Do not allow external 0x80 interrupts The INT 0x80 instruction is used for 32-bit x86 Linux syscalls. The kernel expects to receive a software interrupt as a result of the INT 0x80 instruction. However, an external interrupt on the same vector also triggers the same codepath. An external interrupt on vector 0x80 will currently be interpreted as a 32-bit system call, and assuming that it was a user context. Panic on external interrupts on the vector. To distinguish software interrupts from external ones, the kernel checks the APIC ISR bit relevant to the 0x80 vector. For software interrupts, this bit will be 0. Signed-off-by: Thomas Gleixner Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+ --- arch/x86/entry/common.c | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index 8ca4d57d68eb..6356060caaf3 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -26,6 +26,7 @@ #include #endif +#include #include #include #include @@ -168,6 +169,25 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs, int nr) } #ifdef CONFIG_IA32_EMULATION +static __always_inline bool int80_is_external(void) +{ + const unsigned int offs = (0x80 / 32) * 0x10; + const u32 bit = BIT(0x80 % 32); + + /* The local APIC on XENPV guests is fake */ + if (cpu_feature_enabled(X86_FEATURE_XENPV)) + return false; + + /* + * If vector 0x80 is set in the APIC ISR then this is an external + * interrupt. Either from broken hardware or injected by a VMM. + * + * Note: In guest mode this is only valid for secure guests where + * the secure module fully controls the vAPIC exposed to the guest. + */ + return apic_read(APIC_ISR + offs) & bit; +} + /** * int80_emulation - 32-bit legacy syscall entry * @@ -191,12 +211,27 @@ DEFINE_IDTENTRY_RAW(int80_emulation) { int nr; - /* Establish kernel context. */ + /* Kernel does not use INT $0x80! */ + if (unlikely(!user_mode(regs))) { + irqentry_enter(regs); + instrumentation_begin(); + panic("Unexpected external interrupt 0x80\n"); + } + + /* + * Establish kernel context for instrumentation, including for + * int80_is_external() below which calls into the APIC driver. + * Identical for soft and external interrupts. + */ enter_from_user_mode(regs); instrumentation_begin(); add_random_kstack_offset(); + /* Validate that this is a soft interrupt to the extent possible */ + if (unlikely(int80_is_external())) + panic("Unexpected external interrupt 0x80\n"); + /* * The low level idtentry code pushed -1 into regs::orig_ax * and regs::ax contains the syscall number. -- cgit v1.2.3 From f4116bfc44621882556bbf70f5284fbf429a5cf6 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 4 Dec 2023 11:31:41 +0300 Subject: x86/tdx: Allow 32-bit emulation by default 32-bit emulation was disabled on TDX to prevent a possible attack by a VMM injecting an interrupt on vector 0x80. Now that int80_emulation() has a check for external interrupts the limitation can be lifted. To distinguish software interrupts from external ones, int80_emulation() checks the APIC ISR bit relevant to the 0x80 vector. For software interrupts, this bit will be 0. On TDX, the VAPIC state (including ISR) is protected and cannot be manipulated by the VMM. The ISR bit is set by the microcode flow during the handling of posted interrupts. [ dhansen: more changelog tweaks ] Signed-off-by: Kirill A. Shutemov Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Reviewed-by: Borislav Petkov (AMD) Cc: # v6.0+ --- arch/x86/coco/tdx/tdx.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/coco/tdx/tdx.c b/arch/x86/coco/tdx/tdx.c index 545e54b829b2..cf1f13c82175 100644 --- a/arch/x86/coco/tdx/tdx.c +++ b/arch/x86/coco/tdx/tdx.c @@ -892,14 +892,5 @@ void __init tdx_early_init(void) */ x86_cpuinit.parallel_bringup = false; - /* - * The VMM is capable of injecting interrupt 0x80 and triggering the - * compatibility syscall path. - * - * By default, the 32-bit emulation is disabled in order to ensure - * the safety of the VM. - */ - ia32_disable(); - pr_info("Guest detected\n"); } -- cgit v1.2.3 From 4cdf351d3630a640ab6a05721ef055b9df62277f Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Fri, 7 May 2021 09:59:46 -0700 Subject: KVM: SVM: Update EFER software model on CR0 trap for SEV-ES In general, activating long mode involves setting the EFER_LME bit in the EFER register and then enabling the X86_CR0_PG bit in the CR0 register. At this point, the EFER_LMA bit will be set automatically by hardware. In the case of SVM/SEV guests where writes to CR0 are intercepted, it's necessary for the host to set EFER_LMA on behalf of the guest since hardware does not see the actual CR0 write. In the case of SEV-ES guests where writes to CR0 are trapped instead of intercepted, the hardware *does* see/record the write to CR0 before exiting and passing the value on to the host, so as part of enabling SEV-ES support commit f1c6366e3043 ("KVM: SVM: Add required changes to support intercepts under SEV-ES") dropped special handling of the EFER_LMA bit with the understanding that it would be set automatically. However, since the guest never explicitly sets the EFER_LMA bit, the host never becomes aware that it has been set. This becomes problematic when userspace tries to get/set the EFER values via KVM_GET_SREGS/KVM_SET_SREGS, since the EFER contents tracked by the host will be missing the EFER_LMA bit, and when userspace attempts to pass the EFER value back via KVM_SET_SREGS it will fail a sanity check that asserts that EFER_LMA should always be set when X86_CR0_PG and EFER_LME are set. Fix this by always inferring the value of EFER_LMA based on X86_CR0_PG and EFER_LME, regardless of whether or not SEV-ES is enabled. Fixes: f1c6366e3043 ("KVM: SVM: Add required changes to support intercepts under SEV-ES") Reported-by: Peter Gonda Cc: stable@vger.kernel.org Signed-off-by: Sean Christopherson Message-Id: <20210507165947.2502412-2-seanjc@google.com> [A two year old patch that was revived after we noticed the failure in KVM_SET_SREGS and a similar patch was posted by Michael Roth. This is Sean's patch, but with Michael's more complete commit message. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm/svm.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c index 712146312358..f3bb30b40876 100644 --- a/arch/x86/kvm/svm/svm.c +++ b/arch/x86/kvm/svm/svm.c @@ -1855,15 +1855,17 @@ void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) bool old_paging = is_paging(vcpu); #ifdef CONFIG_X86_64 - if (vcpu->arch.efer & EFER_LME && !vcpu->arch.guest_state_protected) { + if (vcpu->arch.efer & EFER_LME) { if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) { vcpu->arch.efer |= EFER_LMA; - svm->vmcb->save.efer |= EFER_LMA | EFER_LME; + if (!vcpu->arch.guest_state_protected) + svm->vmcb->save.efer |= EFER_LMA | EFER_LME; } if (is_paging(vcpu) && !(cr0 & X86_CR0_PG)) { vcpu->arch.efer &= ~EFER_LMA; - svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); + if (!vcpu->arch.guest_state_protected) + svm->vmcb->save.efer &= ~(EFER_LMA | EFER_LME); } } #endif -- cgit v1.2.3 From eea673e9d5ea994c60b550ffb684413d3759b3f4 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Sat, 9 Dec 2023 15:49:15 +0800 Subject: LoongArch: Apply dynamic relocations for LLD For the following assembly code: .text .global func func: nop .data var: .dword func When linked with `-pie`, GNU LD populates the `var` variable with the pre-relocated value of `func`. However, LLVM LLD does not exhibit the same behavior. This issue also arises with the `kernel_entry` in arch/ loongarch/kernel/head.S: _head: .word MZ_MAGIC /* "MZ", MS-DOS header */ .org 0x8 .dword kernel_entry /* Kernel entry point */ The correct kernel entry from the MS-DOS header is crucial for jumping to vmlinux from zboot. This necessity is why the compressed relocatable kernel compiled by Clang encounters difficulties in booting. To address this problem, it is proposed to apply dynamic relocations to place with `--apply-dynamic-relocs`. Link: https://github.com/ClangBuiltLinux/linux/issues/1962 Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/loongarch/Makefile b/arch/loongarch/Makefile index 204b94b2e6aa..4ba8d67ddb09 100644 --- a/arch/loongarch/Makefile +++ b/arch/loongarch/Makefile @@ -83,7 +83,7 @@ endif ifeq ($(CONFIG_RELOCATABLE),y) KBUILD_CFLAGS_KERNEL += -fPIE -LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext +LDFLAGS_vmlinux += -static -pie --no-dynamic-linker -z notext $(call ld-option, --apply-dynamic-relocs) endif cflags-y += $(call cc-option, -mno-check-zero-division) -- cgit v1.2.3 From 8146c5b349074da7732f1d45eb4a5f9fd192c7c1 Mon Sep 17 00:00:00 2001 From: Xi Ruoyao Date: Sat, 9 Dec 2023 15:49:15 +0800 Subject: LoongArch: Slightly clean up drdtime() As we are just discarding the stable clock ID, simply write it into $zero instead of allocating a temporary register. Signed-off-by: Xi Ruoyao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/loongarch.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/loongarch.h b/arch/loongarch/include/asm/loongarch.h index 9b4957cefa8a..46366e783c84 100644 --- a/arch/loongarch/include/asm/loongarch.h +++ b/arch/loongarch/include/asm/loongarch.h @@ -1098,12 +1098,11 @@ static __always_inline u64 drdtime(void) { - int rID = 0; u64 val = 0; __asm__ __volatile__( - "rdtime.d %0, %1 \n\t" - : "=r"(val), "=r"(rID) + "rdtime.d %0, $zero\n\t" + : "=r"(val) : ); return val; -- cgit v1.2.3 From 97ceddbc9404a7d1e2c4049435bff29427d762cc Mon Sep 17 00:00:00 2001 From: Jinyang He Date: Sat, 9 Dec 2023 15:49:15 +0800 Subject: LoongArch: Set unwind stack type to unknown rather than set error flag During unwinding, unwind_done() is used as an end condition. Normally it unwind to the user stack and then set the stack type to unknown, which is a normal exit. When something unexpected happens in unwind process and we cannot unwind anymore, we should set the error flag, and also set the stack type to unknown to indicate that the unwind process can not continue. The error flag emphasizes that the unwind process produce an unexpected error. There is no unexpected things when we unwind the PT_REGS in the top of IRQ stack and find out that is an user mode PT_REGS. Thus, we should not set error flag and just set stack type to unknown. Reported-by: Hengqi Chen Acked-by: Hengqi Chen Signed-off-by: Jinyang He Signed-off-by: Huacai Chen --- arch/loongarch/kernel/stacktrace.c | 2 +- arch/loongarch/kernel/unwind.c | 1 - arch/loongarch/kernel/unwind_prologue.c | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/kernel/stacktrace.c b/arch/loongarch/kernel/stacktrace.c index 92270f14db94..f623feb2129f 100644 --- a/arch/loongarch/kernel/stacktrace.c +++ b/arch/loongarch/kernel/stacktrace.c @@ -32,7 +32,7 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie, } for (unwind_start(&state, task, regs); - !unwind_done(&state) && !unwind_error(&state); unwind_next_frame(&state)) { + !unwind_done(&state); unwind_next_frame(&state)) { addr = unwind_get_return_address(&state); if (!addr || !consume_entry(cookie, addr)) break; diff --git a/arch/loongarch/kernel/unwind.c b/arch/loongarch/kernel/unwind.c index ba324ba76fa1..a463d6961344 100644 --- a/arch/loongarch/kernel/unwind.c +++ b/arch/loongarch/kernel/unwind.c @@ -28,6 +28,5 @@ bool default_next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); - state->error = true; return false; } diff --git a/arch/loongarch/kernel/unwind_prologue.c b/arch/loongarch/kernel/unwind_prologue.c index 55afc27320e1..929ae240280a 100644 --- a/arch/loongarch/kernel/unwind_prologue.c +++ b/arch/loongarch/kernel/unwind_prologue.c @@ -227,7 +227,7 @@ static bool next_frame(struct unwind_state *state) } while (!get_stack_info(state->sp, state->task, info)); out: - state->error = true; + state->stack_info.type = STACK_TYPE_UNKNOWN; return false; } -- cgit v1.2.3 From d6c5f06e46a836e6a70c7cfd95bb38a67d9252ec Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sat, 9 Dec 2023 15:49:15 +0800 Subject: LoongArch: Preserve syscall nr across execve() Currently, we store syscall nr in pt_regs::regs[11] and syscall execve() accidentally overrides it during its execution: sys_execve() -> do_execve() -> do_execveat_common() -> bprm_execve() -> exec_binprm() -> search_binary_handler() -> load_elf_binary() -> ELF_PLAT_INIT() ELF_PLAT_INIT() reset regs[11] to 0, so in syscall_exit_to_user_mode() we later get a wrong syscall nr. This breaks tools like execsnoop since it relies on execve() tracepoints. Skip pt_regs::regs[11] reset in ELF_PLAT_INIT() to fix the issue. Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/elf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/elf.h b/arch/loongarch/include/asm/elf.h index b9a4ab54285c..9b16a3b8e706 100644 --- a/arch/loongarch/include/asm/elf.h +++ b/arch/loongarch/include/asm/elf.h @@ -293,7 +293,7 @@ extern const char *__elf_platform; #define ELF_PLAT_INIT(_r, load_addr) do { \ _r->regs[1] = _r->regs[2] = _r->regs[3] = _r->regs[4] = 0; \ _r->regs[5] = _r->regs[6] = _r->regs[7] = _r->regs[8] = 0; \ - _r->regs[9] = _r->regs[10] = _r->regs[11] = _r->regs[12] = 0; \ + _r->regs[9] = _r->regs[10] /* syscall n */ = _r->regs[12] = 0; \ _r->regs[13] = _r->regs[14] = _r->regs[15] = _r->regs[16] = 0; \ _r->regs[17] = _r->regs[18] = _r->regs[19] = _r->regs[20] = 0; \ _r->regs[21] = _r->regs[22] = _r->regs[23] = _r->regs[24] = 0; \ -- cgit v1.2.3 From fe5757553bf9ebe45ae8ecab5922f6937c8d8dfc Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sat, 9 Dec 2023 15:49:16 +0800 Subject: LoongArch: BPF: Don't sign extend memory load operand The `cgrp_local_storage` test triggers a kernel panic like: # ./test_progs -t cgrp_local_storage Can't find bpf_testmod.ko kernel module: -2 WARNING! Selftests relying on bpf_testmod.ko will be skipped. [ 550.930632] CPU 1 Unable to handle kernel paging request at virtual address 0000000000000080, era == ffff80000200be34, ra == ffff80000200be00 [ 550.931781] Oops[#1]: [ 550.931966] CPU: 1 PID: 1303 Comm: test_progs Not tainted 6.7.0-rc2-loong-devel-g2f56bb0d2327 #35 a896aca3f4164f09cc346f89f2e09832e07be5f6 [ 550.932215] Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 [ 550.932403] pc ffff80000200be34 ra ffff80000200be00 tp 9000000108350000 sp 9000000108353dc0 [ 550.932545] a0 0000000000000000 a1 0000000000000517 a2 0000000000000118 a3 00007ffffbb15558 [ 550.932682] a4 00007ffffbb15620 a5 90000001004e7700 a6 0000000000000021 a7 0000000000000118 [ 550.932824] t0 ffff80000200bdc0 t1 0000000000000517 t2 0000000000000517 t3 00007ffff1c06ee0 [ 550.932961] t4 0000555578ae04d0 t5 fffffffffffffff8 t6 0000000000000004 t7 0000000000000020 [ 550.933097] t8 0000000000000040 u0 00000000000007b8 s9 9000000108353e00 s0 90000001004e7700 [ 550.933241] s1 9000000004005000 s2 0000000000000001 s3 0000000000000000 s4 0000555555eb2ec8 [ 550.933379] s5 00007ffffbb15bb8 s6 00007ffff1dafd60 s7 000055555663f610 s8 00007ffff1db0050 [ 550.933520] ra: ffff80000200be00 bpf_prog_98f1b9e767be2a84_on_enter+0x40/0x200 [ 550.933911] ERA: ffff80000200be34 bpf_prog_98f1b9e767be2a84_on_enter+0x74/0x200 [ 550.934105] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) [ 550.934596] PRMD: 00000004 (PPLV0 +PIE -PWE) [ 550.934712] EUEN: 00000003 (+FPE +SXE -ASXE -BTE) [ 550.934836] ECFG: 00071c1c (LIE=2-4,10-12 VS=7) [ 550.934976] ESTAT: 00010000 [PIL] (IS= ECode=1 EsubCode=0) [ 550.935097] BADV: 0000000000000080 [ 550.935181] PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) [ 550.935291] Modules linked in: [ 550.935391] Process test_progs (pid: 1303, threadinfo=000000006c3b1c41, task=0000000061f84a55) [ 550.935643] Stack : 00007ffffbb15bb8 0000555555eb2ec8 0000000000000000 0000000000000001 [ 550.935844] 9000000004005000 ffff80001b864000 00007ffffbb15450 90000000029aa034 [ 550.935990] 0000000000000000 9000000108353ec0 0000000000000118 d07d9dfb09721a09 [ 550.936175] 0000000000000001 0000000000000000 9000000108353ec0 0000000000000118 [ 550.936314] 9000000101d46ad0 900000000290abf0 000055555663f610 0000000000000000 [ 550.936479] 0000000000000003 9000000108353ec0 00007ffffbb15450 90000000029d7288 [ 550.936635] 00007ffff1dafd60 000055555663f610 0000000000000000 0000000000000003 [ 550.936779] 9000000108353ec0 90000000035dd1f0 00007ffff1dafd58 9000000002841c5c [ 550.936939] 0000000000000119 0000555555eea5a8 00007ffff1d78780 00007ffffbb153e0 [ 550.937083] ffffffffffffffda 00007ffffbb15518 0000000000000040 00007ffffbb15558 [ 550.937224] ... [ 550.937299] Call Trace: [ 550.937521] [] bpf_prog_98f1b9e767be2a84_on_enter+0x74/0x200 [ 550.937910] [<90000000029aa034>] bpf_trace_run2+0x90/0x154 [ 550.938105] [<900000000290abf0>] syscall_trace_enter.isra.0+0x1cc/0x200 [ 550.938224] [<90000000035dd1f0>] do_syscall+0x48/0x94 [ 550.938319] [<9000000002841c5c>] handle_syscall+0xbc/0x158 [ 550.938477] [ 550.938607] Code: 580009ae 50016000 262402e4 <28c20085> 14092084 03a00084 16000024 03240084 00150006 [ 550.938851] [ 550.939021] ---[ end trace 0000000000000000 ]--- Further investigation shows that this panic is triggered by memory load operations: ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); The expression `task->cgroups->dfl_cgrp` involves two memory load. Since the field offset fits in imm12 or imm14, we use ldd or ldptrd instructions. But both instructions have the side effect that it will signed-extended the imm operand. Finally, we got the wrong addresses and panics is inevitable. Use a generic ldxd instruction to avoid this kind of issues. With this change, we have: # ./test_progs -t cgrp_local_storage Can't find bpf_testmod.ko kernel module: -2 WARNING! Selftests relying on bpf_testmod.ko will be skipped. test_cgrp_local_storage:PASS:join_cgroup /cgrp_local_storage 0 nsec #48/1 cgrp_local_storage/tp_btf:OK test_attach_cgroup:PASS:skel_open 0 nsec test_attach_cgroup:PASS:prog_attach 0 nsec test_attach_cgroup:PASS:prog_attach 0 nsec libbpf: prog 'update_cookie_tracing': failed to attach: ERROR: strerror_r(-524)=22 test_attach_cgroup:FAIL:prog_attach unexpected error: -524 #48/2 cgrp_local_storage/attach_cgroup:FAIL test_recursion:PASS:skel_open_and_load 0 nsec libbpf: prog 'on_lookup': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'on_lookup': failed to auto-attach: -524 test_recursion:FAIL:skel_attach unexpected error: -524 (errno 524) #48/3 cgrp_local_storage/recursion:FAIL #48/4 cgrp_local_storage/negative:OK #48/5 cgrp_local_storage/cgroup_iter_sleepable:OK test_yes_rcu_lock:PASS:skel_open 0 nsec test_yes_rcu_lock:PASS:skel_load 0 nsec libbpf: prog 'yes_rcu_lock': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'yes_rcu_lock': failed to auto-attach: -524 test_yes_rcu_lock:FAIL:skel_attach unexpected error: -524 (errno 524) #48/6 cgrp_local_storage/yes_rcu_lock:FAIL #48/7 cgrp_local_storage/no_rcu_lock:OK #48 cgrp_local_storage:FAIL All error logs: test_cgrp_local_storage:PASS:join_cgroup /cgrp_local_storage 0 nsec test_attach_cgroup:PASS:skel_open 0 nsec test_attach_cgroup:PASS:prog_attach 0 nsec test_attach_cgroup:PASS:prog_attach 0 nsec libbpf: prog 'update_cookie_tracing': failed to attach: ERROR: strerror_r(-524)=22 test_attach_cgroup:FAIL:prog_attach unexpected error: -524 #48/2 cgrp_local_storage/attach_cgroup:FAIL test_recursion:PASS:skel_open_and_load 0 nsec libbpf: prog 'on_lookup': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'on_lookup': failed to auto-attach: -524 test_recursion:FAIL:skel_attach unexpected error: -524 (errno 524) #48/3 cgrp_local_storage/recursion:FAIL test_yes_rcu_lock:PASS:skel_open 0 nsec test_yes_rcu_lock:PASS:skel_load 0 nsec libbpf: prog 'yes_rcu_lock': failed to attach: ERROR: strerror_r(-524)=22 libbpf: prog 'yes_rcu_lock': failed to auto-attach: -524 test_yes_rcu_lock:FAIL:skel_attach unexpected error: -524 (errno 524) #48/6 cgrp_local_storage/yes_rcu_lock:FAIL #48 cgrp_local_storage:FAIL Summary: 0/4 PASSED, 0 SKIPPED, 1 FAILED No panics any more (The test still failed because lack of BPF trampoline which I am actively working on). Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support") Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index 169ff8b3915e..d2161c95ceb4 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -988,14 +988,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext } break; case BPF_DW: - if (is_signed_imm12(off)) { - emit_insn(ctx, ldd, dst, src, off); - } else if (is_signed_imm14(off)) { - emit_insn(ctx, ldptrd, dst, src, off); - } else { - move_imm(ctx, t1, off, is32); - emit_insn(ctx, ldxd, dst, src, t1); - } + move_imm(ctx, t1, off, is32); + emit_insn(ctx, ldxd, dst, src, t1); break; } -- cgit v1.2.3 From 5d47ec2e6f4c64e30e392cfe9532df98c9beb106 Mon Sep 17 00:00:00 2001 From: Hengqi Chen Date: Sat, 9 Dec 2023 15:49:16 +0800 Subject: LoongArch: BPF: Don't sign extend function return value The `cls_redirect` test triggers a kernel panic like: # ./test_progs -t cls_redirect Can't find bpf_testmod.ko kernel module: -2 WARNING! Selftests relying on bpf_testmod.ko will be skipped. [ 30.938489] CPU 3 Unable to handle kernel paging request at virtual address fffffffffd814de0, era == ffff800002009fb8, ra == ffff800002009f9c [ 30.939331] Oops[#1]: [ 30.939513] CPU: 3 PID: 1260 Comm: test_progs Not tainted 6.7.0-rc2-loong-devel-g2f56bb0d2327 #35 a896aca3f4164f09cc346f89f2e09832e07be5f6 [ 30.939732] Hardware name: QEMU QEMU Virtual Machine, BIOS unknown 2/2/2022 [ 30.939901] pc ffff800002009fb8 ra ffff800002009f9c tp 9000000104da4000 sp 9000000104da7ab0 [ 30.940038] a0 fffffffffd814de0 a1 9000000104da7a68 a2 0000000000000000 a3 9000000104da7c10 [ 30.940183] a4 9000000104da7c14 a5 0000000000000002 a6 0000000000000021 a7 00005555904d7f90 [ 30.940321] t0 0000000000000110 t1 0000000000000000 t2 fffffffffd814de0 t3 0004c4b400000000 [ 30.940456] t4 ffffffffffffffff t5 00000000c3f63600 t6 0000000000000000 t7 0000000000000000 [ 30.940590] t8 000000000006d803 u0 0000000000000020 s9 9000000104da7b10 s0 900000010504c200 [ 30.940727] s1 fffffffffd814de0 s2 900000010504c200 s3 9000000104da7c10 s4 9000000104da7ad0 [ 30.940866] s5 0000000000000000 s6 90000000030e65bc s7 9000000104da7b44 s8 90000000044f6fc0 [ 30.941015] ra: ffff800002009f9c bpf_prog_846803e5ae81417f_cls_redirect+0xa0/0x590 [ 30.941535] ERA: ffff800002009fb8 bpf_prog_846803e5ae81417f_cls_redirect+0xbc/0x590 [ 30.941696] CRMD: 000000b0 (PLV0 -IE -DA +PG DACF=CC DACM=CC -WE) [ 30.942224] PRMD: 00000004 (PPLV0 +PIE -PWE) [ 30.942330] EUEN: 00000003 (+FPE +SXE -ASXE -BTE) [ 30.942453] ECFG: 00071c1c (LIE=2-4,10-12 VS=7) [ 30.942612] ESTAT: 00010000 [PIL] (IS= ECode=1 EsubCode=0) [ 30.942764] BADV: fffffffffd814de0 [ 30.942854] PRID: 0014c010 (Loongson-64bit, Loongson-3A5000) [ 30.942974] Modules linked in: [ 30.943078] Process test_progs (pid: 1260, threadinfo=00000000ce303226, task=000000007d10bb76) [ 30.943306] Stack : 900000010a064000 90000000044f6fc0 9000000104da7b48 0000000000000000 [ 30.943495] 0000000000000000 9000000104da7c14 9000000104da7c10 900000010504c200 [ 30.943626] 0000000000000001 ffff80001b88c000 9000000104da7b70 90000000030e6668 [ 30.943785] 0000000000000000 9000000104da7b58 ffff80001b88c048 9000000003d05000 [ 30.943936] 900000000303ac88 0000000000000000 0000000000000000 9000000104da7b70 [ 30.944091] 0000000000000000 0000000000000001 0000000731eeab00 0000000000000000 [ 30.944245] ffff80001b88c000 0000000000000000 0000000000000000 54b99959429f83b8 [ 30.944402] ffff80001b88c000 90000000044f6fc0 9000000101d70000 ffff80001b88c000 [ 30.944538] 000000000000005a 900000010504c200 900000010a064000 900000010a067000 [ 30.944697] 9000000104da7d88 0000000000000000 9000000003d05000 90000000030e794c [ 30.944852] ... [ 30.944924] Call Trace: [ 30.945120] [] bpf_prog_846803e5ae81417f_cls_redirect+0xbc/0x590 [ 30.945650] [<90000000030e6668>] bpf_test_run+0x1ec/0x2f8 [ 30.945958] [<90000000030e794c>] bpf_prog_test_run_skb+0x31c/0x684 [ 30.946065] [<90000000026d4f68>] __sys_bpf+0x678/0x2724 [ 30.946159] [<90000000026d7288>] sys_bpf+0x20/0x2c [ 30.946253] [<90000000032dd224>] do_syscall+0x7c/0x94 [ 30.946343] [<9000000002541c5c>] handle_syscall+0xbc/0x158 [ 30.946492] [ 30.946549] Code: 0015030e 5c0009c0 5001d000 <28c00304> 02c00484 29c00304 00150009 2a42d2e4 0280200d [ 30.946793] [ 30.946971] ---[ end trace 0000000000000000 ]--- [ 32.093225] Kernel panic - not syncing: Fatal exception in interrupt [ 32.093526] Kernel relocated by 0x2320000 [ 32.093630] .text @ 0x9000000002520000 [ 32.093725] .data @ 0x9000000003400000 [ 32.093792] .bss @ 0x9000000004413200 [ 34.971998] ---[ end Kernel panic - not syncing: Fatal exception in interrupt ]--- This is because we signed-extend function return values. When subprog mode is enabled, we have: cls_redirect() -> get_global_metrics() returns pcpu ptr 0xfffffefffc00b480 The pointer returned is later signed-extended to 0xfffffffffc00b480 at `BPF_JMP | BPF_EXIT`. During BPF prog run, this triggers unhandled page fault and a kernel panic. Drop the unnecessary signed-extension on return values like other architectures do. With this change, we have: # ./test_progs -t cls_redirect Can't find bpf_testmod.ko kernel module: -2 WARNING! Selftests relying on bpf_testmod.ko will be skipped. #51/1 cls_redirect/cls_redirect_inlined:OK #51/2 cls_redirect/IPv4 TCP accept unknown (no hops, flags: SYN):OK #51/3 cls_redirect/IPv6 TCP accept unknown (no hops, flags: SYN):OK #51/4 cls_redirect/IPv4 TCP accept unknown (no hops, flags: ACK):OK #51/5 cls_redirect/IPv6 TCP accept unknown (no hops, flags: ACK):OK #51/6 cls_redirect/IPv4 TCP forward unknown (one hop, flags: ACK):OK #51/7 cls_redirect/IPv6 TCP forward unknown (one hop, flags: ACK):OK #51/8 cls_redirect/IPv4 TCP accept known (one hop, flags: ACK):OK #51/9 cls_redirect/IPv6 TCP accept known (one hop, flags: ACK):OK #51/10 cls_redirect/IPv4 UDP accept unknown (no hops, flags: none):OK #51/11 cls_redirect/IPv6 UDP accept unknown (no hops, flags: none):OK #51/12 cls_redirect/IPv4 UDP forward unknown (one hop, flags: none):OK #51/13 cls_redirect/IPv6 UDP forward unknown (one hop, flags: none):OK #51/14 cls_redirect/IPv4 UDP accept known (one hop, flags: none):OK #51/15 cls_redirect/IPv6 UDP accept known (one hop, flags: none):OK #51/16 cls_redirect/cls_redirect_subprogs:OK #51/17 cls_redirect/IPv4 TCP accept unknown (no hops, flags: SYN):OK #51/18 cls_redirect/IPv6 TCP accept unknown (no hops, flags: SYN):OK #51/19 cls_redirect/IPv4 TCP accept unknown (no hops, flags: ACK):OK #51/20 cls_redirect/IPv6 TCP accept unknown (no hops, flags: ACK):OK #51/21 cls_redirect/IPv4 TCP forward unknown (one hop, flags: ACK):OK #51/22 cls_redirect/IPv6 TCP forward unknown (one hop, flags: ACK):OK #51/23 cls_redirect/IPv4 TCP accept known (one hop, flags: ACK):OK #51/24 cls_redirect/IPv6 TCP accept known (one hop, flags: ACK):OK #51/25 cls_redirect/IPv4 UDP accept unknown (no hops, flags: none):OK #51/26 cls_redirect/IPv6 UDP accept unknown (no hops, flags: none):OK #51/27 cls_redirect/IPv4 UDP forward unknown (one hop, flags: none):OK #51/28 cls_redirect/IPv6 UDP forward unknown (one hop, flags: none):OK #51/29 cls_redirect/IPv4 UDP accept known (one hop, flags: none):OK #51/30 cls_redirect/IPv6 UDP accept known (one hop, flags: none):OK #51/31 cls_redirect/cls_redirect_dynptr:OK #51/32 cls_redirect/IPv4 TCP accept unknown (no hops, flags: SYN):OK #51/33 cls_redirect/IPv6 TCP accept unknown (no hops, flags: SYN):OK #51/34 cls_redirect/IPv4 TCP accept unknown (no hops, flags: ACK):OK #51/35 cls_redirect/IPv6 TCP accept unknown (no hops, flags: ACK):OK #51/36 cls_redirect/IPv4 TCP forward unknown (one hop, flags: ACK):OK #51/37 cls_redirect/IPv6 TCP forward unknown (one hop, flags: ACK):OK #51/38 cls_redirect/IPv4 TCP accept known (one hop, flags: ACK):OK #51/39 cls_redirect/IPv6 TCP accept known (one hop, flags: ACK):OK #51/40 cls_redirect/IPv4 UDP accept unknown (no hops, flags: none):OK #51/41 cls_redirect/IPv6 UDP accept unknown (no hops, flags: none):OK #51/42 cls_redirect/IPv4 UDP forward unknown (one hop, flags: none):OK #51/43 cls_redirect/IPv6 UDP forward unknown (one hop, flags: none):OK #51/44 cls_redirect/IPv4 UDP accept known (one hop, flags: none):OK #51/45 cls_redirect/IPv6 UDP accept known (one hop, flags: none):OK #51 cls_redirect:OK Summary: 1/45 PASSED, 0 SKIPPED, 0 FAILED Fixes: 5dc615520c4d ("LoongArch: Add BPF JIT support") Signed-off-by: Hengqi Chen Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index d2161c95ceb4..d76281efc5b8 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -911,8 +911,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext /* function return */ case BPF_JMP | BPF_EXIT: - emit_sext_32(ctx, regmap[BPF_REG_0], true); - if (i == ctx->prog->len - 1) break; -- cgit v1.2.3 From 772cbe948fb07389639d4e698a2d3299f8e538b8 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 9 Dec 2023 15:49:16 +0800 Subject: LoongArch: BPF: Fix sign-extension mov instructions We can see that "Short form of movsx, dst_reg = (s8,s16,s32)src_reg" in include/linux/filter.h, additionally, for BPF_ALU64 the value of the destination register is unchanged whereas for BPF_ALU the upper 32 bits of the destination register are zeroed, so it should clear the upper 32 bits for BPF_ALU. [root@linux fedora]# echo 1 > /proc/sys/net/core/bpf_jit_enable [root@linux fedora]# modprobe test_bpf Before: test_bpf: #81 ALU_MOVSX | BPF_B jited:1 ret 2 != 1 (0x2 != 0x1)FAIL (1 times) test_bpf: #82 ALU_MOVSX | BPF_H jited:1 ret 2 != 1 (0x2 != 0x1)FAIL (1 times) After: test_bpf: #81 ALU_MOVSX | BPF_B jited:1 6 PASS test_bpf: #82 ALU_MOVSX | BPF_H jited:1 6 PASS By the way, the bpf selftest case "./test_progs -t verifier_movsx" can also be fixed with this patch. Fixes: f48012f16150 ("LoongArch: BPF: Support sign-extension mov instructions") Acked-by: Hengqi Chen Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index d76281efc5b8..cd002f183648 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -480,10 +480,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext case 8: move_reg(ctx, t1, src); emit_insn(ctx, extwb, dst, t1); + emit_zext_32(ctx, dst, is32); break; case 16: move_reg(ctx, t1, src); emit_insn(ctx, extwh, dst, t1); + emit_zext_32(ctx, dst, is32); break; case 32: emit_insn(ctx, addw, dst, src, LOONGARCH_GPR_ZERO); -- cgit v1.2.3 From e2f7b3d8b4b300956a77fa1ab084c931ba1c7421 Mon Sep 17 00:00:00 2001 From: Tiezhu Yang Date: Sat, 9 Dec 2023 15:49:16 +0800 Subject: LoongArch: BPF: Fix unconditional bswap instructions We can see that "bswap32: Takes an unsigned 32-bit number in either big- or little-endian format and returns the equivalent number with the same bit width but opposite endianness" in BPF Instruction Set Specification, so it should clear the upper 32 bits in "case 32:" for both BPF_ALU and BPF_ALU64. [root@linux fedora]# echo 1 > /proc/sys/net/core/bpf_jit_enable [root@linux fedora]# modprobe test_bpf Before: test_bpf: #313 BSWAP 32: 0x0123456789abcdef -> 0xefcdab89 jited:1 ret 1460850314 != -271733879 (0x5712ce8a != 0xefcdab89)FAIL (1 times) test_bpf: #317 BSWAP 32: 0xfedcba9876543210 -> 0x10325476 jited:1 ret -1460850316 != 271733878 (0xa8ed3174 != 0x10325476)FAIL (1 times) After: test_bpf: #313 BSWAP 32: 0x0123456789abcdef -> 0xefcdab89 jited:1 4 PASS test_bpf: #317 BSWAP 32: 0xfedcba9876543210 -> 0x10325476 jited:1 4 PASS Fixes: 4ebf9216e7df ("LoongArch: BPF: Support unconditional bswap instructions") Acked-by: Hengqi Chen Signed-off-by: Tiezhu Yang Signed-off-by: Huacai Chen --- arch/loongarch/net/bpf_jit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/net/bpf_jit.c b/arch/loongarch/net/bpf_jit.c index cd002f183648..4fcd6cd6da23 100644 --- a/arch/loongarch/net/bpf_jit.c +++ b/arch/loongarch/net/bpf_jit.c @@ -774,8 +774,8 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, bool ext break; case 32: emit_insn(ctx, revb2w, dst, dst); - /* zero-extend 32 bits into 64 bits */ - emit_zext_32(ctx, dst, is32); + /* clear the upper 32 bits */ + emit_zext_32(ctx, dst, true); break; case 64: emit_insn(ctx, revbd, dst, dst); -- cgit v1.2.3 From 271f2a4a9576b87ed1f8584909d6d270039e52ea Mon Sep 17 00:00:00 2001 From: Wang Yao Date: Wed, 6 Dec 2023 08:24:27 +0800 Subject: efi/loongarch: Use load address to calculate kernel entry address The efi_relocate_kernel() may load the PIE kernel to anywhere, the loaded address may not be equal to link address or EFI_KIMG_PREFERRED_ADDRESS. Acked-by: Huacai Chen Signed-off-by: Wang Yao Signed-off-by: Ard Biesheuvel --- arch/loongarch/include/asm/efi.h | 2 +- drivers/firmware/efi/libstub/loongarch-stub.c | 4 ++-- drivers/firmware/efi/libstub/loongarch.c | 6 +++--- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/efi.h b/arch/loongarch/include/asm/efi.h index 091897d40b03..91d81f9730ab 100644 --- a/arch/loongarch/include/asm/efi.h +++ b/arch/loongarch/include/asm/efi.h @@ -32,6 +32,6 @@ static inline unsigned long efi_get_kimg_min_align(void) #define EFI_KIMG_PREFERRED_ADDRESS PHYSADDR(VMLINUX_LOAD_ADDRESS) -unsigned long kernel_entry_address(void); +unsigned long kernel_entry_address(unsigned long kernel_addr); #endif /* _ASM_LOONGARCH_EFI_H */ diff --git a/drivers/firmware/efi/libstub/loongarch-stub.c b/drivers/firmware/efi/libstub/loongarch-stub.c index 72c71ae201f0..d6ec5d4b8dbe 100644 --- a/drivers/firmware/efi/libstub/loongarch-stub.c +++ b/drivers/firmware/efi/libstub/loongarch-stub.c @@ -35,9 +35,9 @@ efi_status_t handle_kernel_image(unsigned long *image_addr, return status; } -unsigned long kernel_entry_address(void) +unsigned long kernel_entry_address(unsigned long kernel_addr) { unsigned long base = (unsigned long)&kernel_offset - kernel_offset; - return (unsigned long)&kernel_entry - base + VMLINUX_LOAD_ADDRESS; + return (unsigned long)&kernel_entry - base + kernel_addr; } diff --git a/drivers/firmware/efi/libstub/loongarch.c b/drivers/firmware/efi/libstub/loongarch.c index 807cba2693fc..0e0aa6cda73f 100644 --- a/drivers/firmware/efi/libstub/loongarch.c +++ b/drivers/firmware/efi/libstub/loongarch.c @@ -37,9 +37,9 @@ static efi_status_t exit_boot_func(struct efi_boot_memmap *map, void *priv) return EFI_SUCCESS; } -unsigned long __weak kernel_entry_address(void) +unsigned long __weak kernel_entry_address(unsigned long kernel_addr) { - return *(unsigned long *)(PHYSADDR(VMLINUX_LOAD_ADDRESS) + 8); + return *(unsigned long *)(kernel_addr + 8) - VMLINUX_LOAD_ADDRESS + kernel_addr; } efi_status_t efi_boot_kernel(void *handle, efi_loaded_image_t *image, @@ -73,7 +73,7 @@ efi_status_t efi_boot_kernel(void *handle, efi_loaded_image_t *image, csr_write64(CSR_DMW0_INIT, LOONGARCH_CSR_DMWIN0); csr_write64(CSR_DMW1_INIT, LOONGARCH_CSR_DMWIN1); - real_kernel_entry = (void *)kernel_entry_address(); + real_kernel_entry = (void *)kernel_entry_address(kernel_addr); real_kernel_entry(true, (unsigned long)cmdline_ptr, (unsigned long)efi_system_table); -- cgit v1.2.3 From 3c0696076aad60a2f04c019761921954579e1b0e Mon Sep 17 00:00:00 2001 From: James Houghton Date: Mon, 4 Dec 2023 17:26:46 +0000 Subject: arm64: mm: Always make sw-dirty PTEs hw-dirty in pte_modify It is currently possible for a userspace application to enter an infinite page fault loop when using HugeTLB pages implemented with contiguous PTEs when HAFDBS is not available. This happens because: 1. The kernel may sometimes write PTEs that are sw-dirty but hw-clean (PTE_DIRTY | PTE_RDONLY | PTE_WRITE). 2. If, during a write, the CPU uses a sw-dirty, hw-clean PTE in handling the memory access on a system without HAFDBS, we will get a page fault. 3. HugeTLB will check if it needs to update the dirty bits on the PTE. For contiguous PTEs, it will check to see if the pgprot bits need updating. In this case, HugeTLB wants to write a sequence of sw-dirty, hw-dirty PTEs, but it finds that all the PTEs it is about to overwrite are all pte_dirty() (pte_sw_dirty() => pte_dirty()), so it thinks no update is necessary. We can get the kernel to write a sw-dirty, hw-clean PTE with the following steps (showing the relevant VMA flags and pgprot bits): i. Create a valid, writable contiguous PTE. VMA vmflags: VM_SHARED | VM_READ | VM_WRITE VMA pgprot bits: PTE_RDONLY | PTE_WRITE PTE pgprot bits: PTE_DIRTY | PTE_WRITE ii. mprotect the VMA to PROT_NONE. VMA vmflags: VM_SHARED VMA pgprot bits: PTE_RDONLY PTE pgprot bits: PTE_DIRTY | PTE_RDONLY iii. mprotect the VMA back to PROT_READ | PROT_WRITE. VMA vmflags: VM_SHARED | VM_READ | VM_WRITE VMA pgprot bits: PTE_RDONLY | PTE_WRITE PTE pgprot bits: PTE_DIRTY | PTE_WRITE | PTE_RDONLY Make it impossible to create a writeable sw-dirty, hw-clean PTE with pte_modify(). Such a PTE should be impossible to create, and there may be places that assume that pte_dirty() implies pte_hw_dirty(). Signed-off-by: James Houghton Fixes: 031e6e6b4e12 ("arm64: hugetlb: Avoid unnecessary clearing in huge_ptep_set_access_flags") Cc: Acked-by: Will Deacon Reviewed-by: Ryan Roberts Link: https://lore.kernel.org/r/20231204172646.2541916-3-jthoughton@google.com Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index b19a8aee684c..79ce70fbb751 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -834,6 +834,12 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) pte = set_pte_bit(pte, __pgprot(PTE_DIRTY)); pte_val(pte) = (pte_val(pte) & ~mask) | (pgprot_val(newprot) & mask); + /* + * If we end up clearing hw dirtiness for a sw-dirty PTE, set hardware + * dirtiness again. + */ + if (pte_sw_dirty(pte)) + pte = pte_mkdirty(pte); return pte; } -- cgit v1.2.3 From c41bd2514184d75db087fe4c1221237fb7922875 Mon Sep 17 00:00:00 2001 From: Ignat Korchagin Date: Wed, 29 Nov 2023 22:04:09 +0000 Subject: kexec: drop dependency on ARCH_SUPPORTS_KEXEC from CRASH_DUMP In commit f8ff23429c62 ("kernel/Kconfig.kexec: drop select of KEXEC for CRASH_DUMP") we tried to fix a config regression, where CONFIG_CRASH_DUMP required CONFIG_KEXEC. However, it was not enough at least for arm64 platforms. While further testing the patch with our arm64 config I noticed that CONFIG_CRASH_DUMP is unavailable in menuconfig. This is because CONFIG_CRASH_DUMP still depends on the new CONFIG_ARCH_SUPPORTS_KEXEC introduced in commit 91506f7e5d21 ("arm64/kexec: refactor for kernel/Kconfig.kexec") and on arm64 CONFIG_ARCH_SUPPORTS_KEXEC requires CONFIG_PM_SLEEP_SMP=y, which in turn requires either CONFIG_SUSPEND=y or CONFIG_HIBERNATION=y neither of which are set in our config. Given that we already established that CONFIG_KEXEC (which is a switch for kexec system call itself) is not required for CONFIG_CRASH_DUMP drop CONFIG_ARCH_SUPPORTS_KEXEC dependency as well. The arm64 kernel builds just fine with CONFIG_CRASH_DUMP=y and with both CONFIG_KEXEC=n and CONFIG_KEXEC_FILE=n after f8ff23429c62 ("kernel/Kconfig.kexec: drop select of KEXEC for CRASH_DUMP") and this patch are applied given that the necessary shared bits are included via CONFIG_KEXEC_CORE dependency. [bhe@redhat.com: don't export some symbols when CONFIG_MMU=n] Link: https://lkml.kernel.org/r/ZW03ODUKGGhP1ZGU@MiWiFi-R3L-srv [bhe@redhat.com: riscv, kexec: fix dependency of two items] Link: https://lkml.kernel.org/r/ZW04G/SKnhbE5mnX@MiWiFi-R3L-srv Link: https://lkml.kernel.org/r/20231129220409.55006-1-ignat@cloudflare.com Fixes: 91506f7e5d21 ("arm64/kexec: refactor for kernel/Kconfig.kexec") Signed-off-by: Ignat Korchagin Signed-off-by: Baoquan He Acked-by: Baoquan He Cc: Alexander Gordeev Cc: # 6.6+: f8ff234: kernel/Kconfig.kexec: drop select of KEXEC for CRASH_DUMP Cc: # 6.6+ Signed-off-by: Andrew Morton --- arch/riscv/Kconfig | 4 ++-- arch/riscv/kernel/crash_core.c | 4 +++- kernel/Kconfig.kexec | 1 - 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 95a2a06acc6a..24c1799e2ec4 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -685,7 +685,7 @@ config RISCV_BOOT_SPINWAIT If unsure what to do here, say N. config ARCH_SUPPORTS_KEXEC - def_bool MMU + def_bool y config ARCH_SELECTS_KEXEC def_bool y @@ -693,7 +693,7 @@ config ARCH_SELECTS_KEXEC select HOTPLUG_CPU if SMP config ARCH_SUPPORTS_KEXEC_FILE - def_bool 64BIT && MMU + def_bool 64BIT config ARCH_SELECTS_KEXEC_FILE def_bool y diff --git a/arch/riscv/kernel/crash_core.c b/arch/riscv/kernel/crash_core.c index 55f1d7856b54..8706736fd4e2 100644 --- a/arch/riscv/kernel/crash_core.c +++ b/arch/riscv/kernel/crash_core.c @@ -5,17 +5,19 @@ void arch_crash_save_vmcoreinfo(void) { - VMCOREINFO_NUMBER(VA_BITS); VMCOREINFO_NUMBER(phys_ram_base); vmcoreinfo_append_str("NUMBER(PAGE_OFFSET)=0x%lx\n", PAGE_OFFSET); vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); +#ifdef CONFIG_MMU + VMCOREINFO_NUMBER(VA_BITS); vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); #ifdef CONFIG_64BIT vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); +#endif #endif vmcoreinfo_append_str("NUMBER(KERNEL_LINK_ADDR)=0x%lx\n", KERNEL_LINK_ADDR); vmcoreinfo_append_str("NUMBER(va_kernel_pa_offset)=0x%lx\n", diff --git a/kernel/Kconfig.kexec b/kernel/Kconfig.kexec index 1cc3b1c595d7..2fd510256604 100644 --- a/kernel/Kconfig.kexec +++ b/kernel/Kconfig.kexec @@ -94,7 +94,6 @@ config KEXEC_JUMP config CRASH_DUMP bool "kernel crash dumps" depends on ARCH_SUPPORTS_CRASH_DUMP - depends on ARCH_SUPPORTS_KEXEC select CRASH_CORE select KEXEC_CORE help -- cgit v1.2.3 From ac88ff6b9d7dea9f0907c86bdae204dde7d5c0e6 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 5 Dec 2023 11:02:55 +0800 Subject: riscv: fix VMALLOC_START definition When below config items are set, compiler complained: -------------------- CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y ...... ----------------------- ------------------------------------------------------------------- arch/riscv/kernel/crash_core.c: In function 'arch_crash_save_vmcoreinfo': arch/riscv/kernel/crash_core.c:11:58: warning: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'int' [-Wformat=] 11 | vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); | ~~^ | | | long unsigned int | %x ---------------------------------------------------------------------- This is because on riscv macro VMALLOC_START has different type when CONFIG_MMU is set or unset. arch/riscv/include/asm/pgtable.h: -------------------------------------------------- Changing it to _AC(0, UL) in case CONFIG_MMU=n can fix the warning. Link: https://lkml.kernel.org/r/ZW7OsX4zQRA3mO4+@MiWiFi-R3L-srv Signed-off-by: Baoquan He Reported-by: Randy Dunlap Acked-by: Randy Dunlap Tested-by: Randy Dunlap # build-tested Cc: Eric DeVolder Cc: Ignat Korchagin Cc: Stephen Rothwell Cc: Paul Walmsley Cc: Palmer Dabbelt Cc: Albert Ou Signed-off-by: Andrew Morton --- arch/riscv/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 294044429e8e..ab00235b018f 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -899,7 +899,7 @@ static inline pte_t pte_swp_clear_exclusive(pte_t pte) #define PAGE_KERNEL __pgprot(0) #define swapper_pg_dir NULL #define TASK_SIZE 0xffffffffUL -#define VMALLOC_START 0 +#define VMALLOC_START _AC(0, UL) #define VMALLOC_END TASK_SIZE #endif /* !CONFIG_MMU */ -- cgit v1.2.3 From 655fc6cd45521aba4a21c6e607533f1a21e06c2e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 8 Dec 2023 15:30:32 +0800 Subject: loongarch, kexec: change dependency of object files Patch series "kexec: fix the incorrect ifdeffery and dependency of CONFIG_KEXEC". The select of KEXEC for CRASH_DUMP in kernel/Kconfig.kexec will be dropped, then compiling errors will be triggered if below config items are set: === CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y === E.g on mips, below link error are seen: -------------------------------------------------------------------- mipsel-linux-ld: kernel/kexec_core.o: in function `kimage_free': kernel/kexec_core.c:(.text+0x2200): undefined reference to `machine_kexec_cleanup' mipsel-linux-ld: kernel/kexec_core.o: in function `__crash_kexec': kernel/kexec_core.c:(.text+0x2480): undefined reference to `machine_crash_shutdown' mipsel-linux-ld: kernel/kexec_core.c:(.text+0x2488): undefined reference to `machine_kexec' mipsel-linux-ld: kernel/kexec_core.o: in function `kernel_kexec': kernel/kexec_core.c:(.text+0x29b8): undefined reference to `machine_shutdown' mipsel-linux-ld: kernel/kexec_core.c:(.text+0x29c0): undefined reference to `machine_kexec' -------------------------------------------------------------------- Here, change the incorrect dependency of building kexec_core related object files, and the ifdeffery on architectures from CONFIG_KEXEC to CONFIG_KEXEC_CORE. Testing: ======== Passed on mips and loognarch with the LKP reproducer. This patch (of 5): Currently, in arch/loongarch/kernel/Makefile, building machine_kexec.o relocate_kernel.o depends on CONFIG_KEXEC. Whereas, since we will drop the select of KEXEC for CRASH_DUMP in kernel/Kconfig.kexec, compiling error will be triggered if below config items are set: === CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y === --------------------------------------------------------------- loongarch64-linux-ld: kernel/kexec_core.o: in function `.L209': >> kexec_core.c:(.text+0x1660): undefined reference to `machine_kexec_cleanup' loongarch64-linux-ld: kernel/kexec_core.o: in function `.L287': >> kexec_core.c:(.text+0x1c5c): undefined reference to `machine_crash_shutdown' >> loongarch64-linux-ld: kexec_core.c:(.text+0x1c64): undefined reference to `machine_kexec' loongarch64-linux-ld: kernel/kexec_core.o: in function `.L2^B5': >> kexec_core.c:(.text+0x2090): undefined reference to `machine_shutdown' loongarch64-linux-ld: kexec_core.c:(.text+0x20a0): undefined reference to `machine_kexec' --------------------------------------------------------------- Here, change the dependency of machine_kexec.o relocate_kernel.o to CONFIG_KEXEC_CORE can fix above building error. Link: https://lkml.kernel.org/r/20231208073036.7884-1-bhe@redhat.com Link: https://lkml.kernel.org/r/20231208073036.7884-2-bhe@redhat.com Signed-off-by: Baoquan He Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311300946.kHE9Iu71-lkp@intel.com/ Cc: Eric DeVolder Cc: Ignat Korchagin Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- arch/loongarch/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/loongarch/kernel/Makefile b/arch/loongarch/kernel/Makefile index 4fcc168f0732..3c808c680370 100644 --- a/arch/loongarch/kernel/Makefile +++ b/arch/loongarch/kernel/Makefile @@ -57,7 +57,7 @@ obj-$(CONFIG_MAGIC_SYSRQ) += sysrq.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o -- cgit v1.2.3 From 9bad6b75fca1b38b08d94e93f49a90cda44702b9 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 8 Dec 2023 15:30:33 +0800 Subject: m68k, kexec: fix the incorrect ifdeffery and build dependency of CONFIG_KEXEC The select of KEXEC for CRASH_DUMP in kernel/Kconfig.kexec will be dropped, then compiling errors will be triggered if below config items are set: === CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y === Here, change the dependency of buinding machine_kexec.o relocate_kernel.o and the ifdeffery in asm/kexe.h to CONFIG_KEXEC_CORE. Link: https://lkml.kernel.org/r/20231208073036.7884-3-bhe@redhat.com Signed-off-by: Baoquan He Cc: Eric DeVolder Cc: Ignat Korchagin Cc: kernel test robot Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- arch/m68k/include/asm/kexec.h | 4 ++-- arch/m68k/kernel/Makefile | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/m68k/include/asm/kexec.h b/arch/m68k/include/asm/kexec.h index f5a8b2defa4b..3b0b64f0a353 100644 --- a/arch/m68k/include/asm/kexec.h +++ b/arch/m68k/include/asm/kexec.h @@ -2,7 +2,7 @@ #ifndef _ASM_M68K_KEXEC_H #define _ASM_M68K_KEXEC_H -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* Maximum physical address we can use pages from */ #define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) @@ -25,6 +25,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs, #endif /* __ASSEMBLY__ */ -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* _ASM_M68K_KEXEC_H */ diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile index 01fb69a5095f..f335bf3268a1 100644 --- a/arch/m68k/kernel/Makefile +++ b/arch/m68k/kernel/Makefile @@ -25,7 +25,7 @@ obj-$(CONFIG_PCI) += pcibios.o obj-$(CONFIG_M68K_NONCOHERENT_DMA) += dma.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_BOOTINFO_PROC) += bootinfo_proc.o obj-$(CONFIG_UBOOT) += uboot.o -- cgit v1.2.3 From 8cd2accb71f5eb8e92d775fc1978d3779875c2e5 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 8 Dec 2023 15:30:34 +0800 Subject: mips, kexec: fix the incorrect ifdeffery and dependency of CONFIG_KEXEC The select of KEXEC for CRASH_DUMP in kernel/Kconfig.kexec will be dropped, then compiling errors will be triggered if below config items are set: === CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y === -------------------------------------------------------------------- mipsel-linux-ld: kernel/kexec_core.o: in function `kimage_free': kernel/kexec_core.c:(.text+0x2200): undefined reference to `machine_kexec_cleanup' mipsel-linux-ld: kernel/kexec_core.o: in function `__crash_kexec': kernel/kexec_core.c:(.text+0x2480): undefined reference to `machine_crash_shutdown' mipsel-linux-ld: kernel/kexec_core.c:(.text+0x2488): undefined reference to `machine_kexec' mipsel-linux-ld: kernel/kexec_core.o: in function `kernel_kexec': kernel/kexec_core.c:(.text+0x29b8): undefined reference to `machine_shutdown' mipsel-linux-ld: kernel/kexec_core.c:(.text+0x29c0): undefined reference to `machine_kexec' -------------------------------------------------------------------- Here, change the dependency of building kexec_core related object files, and the ifdeffery in mips from CONFIG_KEXEC to CONFIG_KEXEC_CORE. Link: https://lkml.kernel.org/r/20231208073036.7884-4-bhe@redhat.com Signed-off-by: Baoquan He Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202311302042.sn8cDPIX-lkp@intel.com/ Cc: Eric DeVolder Cc: Ignat Korchagin Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- arch/mips/cavium-octeon/smp.c | 4 ++-- arch/mips/include/asm/kexec.h | 2 +- arch/mips/include/asm/smp-ops.h | 2 +- arch/mips/include/asm/smp.h | 2 +- arch/mips/kernel/Makefile | 2 +- arch/mips/kernel/smp-bmips.c | 4 ++-- arch/mips/kernel/smp-cps.c | 10 +++++----- arch/mips/loongson64/reset.c | 4 ++-- arch/mips/loongson64/smp.c | 2 +- 9 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 33c09688210f..08ea2cde1eb5 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -422,7 +422,7 @@ static const struct plat_smp_ops octeon_smp_ops = { .cpu_disable = octeon_cpu_disable, .cpu_die = octeon_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; @@ -502,7 +502,7 @@ static const struct plat_smp_ops octeon_78xx_smp_ops = { .cpu_disable = octeon_cpu_disable, .cpu_die = octeon_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; diff --git a/arch/mips/include/asm/kexec.h b/arch/mips/include/asm/kexec.h index d6d5fa5cc31d..69e579e41e66 100644 --- a/arch/mips/include/asm/kexec.h +++ b/arch/mips/include/asm/kexec.h @@ -31,7 +31,7 @@ static inline void crash_setup_regs(struct pt_regs *newregs, prepare_frametrace(newregs); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE struct kimage; extern unsigned long kexec_args[4]; extern int (*_machine_kexec_prepare)(struct kimage *); diff --git a/arch/mips/include/asm/smp-ops.h b/arch/mips/include/asm/smp-ops.h index 5719ff49eff1..0c59e168f800 100644 --- a/arch/mips/include/asm/smp-ops.h +++ b/arch/mips/include/asm/smp-ops.h @@ -35,7 +35,7 @@ struct plat_smp_ops { void (*cpu_die)(unsigned int cpu); void (*cleanup_dead_cpu)(unsigned cpu); #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void (*kexec_nonboot_cpu)(void); #endif }; diff --git a/arch/mips/include/asm/smp.h b/arch/mips/include/asm/smp.h index a40d8c0e4b87..901bc61fa7ae 100644 --- a/arch/mips/include/asm/smp.h +++ b/arch/mips/include/asm/smp.h @@ -93,7 +93,7 @@ static inline void __cpu_die(unsigned int cpu) extern void __noreturn play_dead(void); #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static inline void kexec_nonboot_cpu(void) { extern const struct plat_smp_ops *mp_ops; /* private */ diff --git a/arch/mips/kernel/Makefile b/arch/mips/kernel/Makefile index 853a43ee4b44..ecf3278a32f7 100644 --- a/arch/mips/kernel/Makefile +++ b/arch/mips/kernel/Makefile @@ -90,7 +90,7 @@ obj-$(CONFIG_GPIO_TXX9) += gpio_txx9.o obj-$(CONFIG_RELOCATABLE) += relocate.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o crash.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o obj-$(CONFIG_EARLY_PRINTK_8250) += early_printk_8250.o diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index c074ecce3fbf..b3dbf9ecb0d6 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -434,7 +434,7 @@ const struct plat_smp_ops bmips43xx_smp_ops = { .cpu_disable = bmips_cpu_disable, .cpu_die = bmips_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; @@ -451,7 +451,7 @@ const struct plat_smp_ops bmips5000_smp_ops = { .cpu_disable = bmips_cpu_disable, .cpu_die = bmips_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index dd55d59b88db..f6c37d407f36 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -392,7 +392,7 @@ static void cps_smp_finish(void) local_irq_enable(); } -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC) +#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC_CORE) enum cpu_death { CPU_DEATH_HALT, @@ -429,7 +429,7 @@ static void cps_shutdown_this_cpu(enum cpu_death death) } } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE static void cps_kexec_nonboot_cpu(void) { @@ -439,9 +439,9 @@ static void cps_kexec_nonboot_cpu(void) cps_shutdown_this_cpu(CPU_DEATH_POWER); } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ -#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC */ +#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC_CORE */ #ifdef CONFIG_HOTPLUG_CPU @@ -610,7 +610,7 @@ static const struct plat_smp_ops cps_smp_ops = { .cpu_die = cps_cpu_die, .cleanup_dead_cpu = cps_cleanup_dead_cpu, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = cps_kexec_nonboot_cpu, #endif }; diff --git a/arch/mips/loongson64/reset.c b/arch/mips/loongson64/reset.c index e420800043b0..e01c8d4a805a 100644 --- a/arch/mips/loongson64/reset.c +++ b/arch/mips/loongson64/reset.c @@ -53,7 +53,7 @@ static void loongson_halt(void) } } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* 0X80000000~0X80200000 is safe */ #define MAX_ARGS 64 @@ -158,7 +158,7 @@ static int __init mips_reboot_setup(void) _machine_halt = loongson_halt; pm_power_off = loongson_poweroff; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE kexec_argv = kmalloc(KEXEC_ARGV_SIZE, GFP_KERNEL); if (WARN_ON(!kexec_argv)) return -ENOMEM; diff --git a/arch/mips/loongson64/smp.c b/arch/mips/loongson64/smp.c index e015a26a40f7..498bdc1bb0ed 100644 --- a/arch/mips/loongson64/smp.c +++ b/arch/mips/loongson64/smp.c @@ -864,7 +864,7 @@ const struct plat_smp_ops loongson3_smp_ops = { .cpu_disable = loongson3_cpu_disable, .cpu_die = loongson3_cpu_die, #endif -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .kexec_nonboot_cpu = kexec_nonboot_cpu_jump, #endif }; -- cgit v1.2.3 From d70c27b728b8da1ab9c3b7ca117ee1c99dc86d29 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 8 Dec 2023 15:30:35 +0800 Subject: sh, kexec: fix the incorrect ifdeffery and dependency of CONFIG_KEXEC The select of KEXEC for CRASH_DUMP in kernel/Kconfig.kexec will be dropped, then compiling errors will be triggered if below config items are set: === CONFIG_CRASH_CORE=y CONFIG_KEXEC_CORE=y CONFIG_CRASH_DUMP=y === Here, change the dependency of building kexec_core related object files, and the ifdeffery on SuperH from CONFIG_KEXEC to CONFIG_KEXEC_CORE. Link: https://lkml.kernel.org/r/20231208073036.7884-5-bhe@redhat.com Signed-off-by: Baoquan He Cc: Eric DeVolder Cc: Ignat Korchagin Cc: kernel test robot Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- arch/sh/include/asm/kexec.h | 4 ++-- arch/sh/kernel/Makefile | 2 +- arch/sh/kernel/reboot.c | 4 ++-- arch/sh/kernel/setup.c | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/sh/include/asm/kexec.h b/arch/sh/include/asm/kexec.h index 927d80ba2332..76631714673c 100644 --- a/arch/sh/include/asm/kexec.h +++ b/arch/sh/include/asm/kexec.h @@ -28,7 +28,7 @@ /* The native architecture */ #define KEXEC_ARCH KEXEC_ARCH_SH -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE /* arch/sh/kernel/machine_kexec.c */ void reserve_crashkernel(void); @@ -67,6 +67,6 @@ static inline void crash_setup_regs(struct pt_regs *newregs, } #else static inline void reserve_crashkernel(void) { } -#endif /* CONFIG_KEXEC */ +#endif /* CONFIG_KEXEC_CORE */ #endif /* __ASM_SH_KEXEC_H */ diff --git a/arch/sh/kernel/Makefile b/arch/sh/kernel/Makefile index 69cd9ac4b2ab..2d7e70537de0 100644 --- a/arch/sh/kernel/Makefile +++ b/arch/sh/kernel/Makefile @@ -33,7 +33,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_SH_STANDARD_BIOS) += sh_bios.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_MODULES) += sh_ksyms_32.o module.o -obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_STACKTRACE) += stacktrace.o obj-$(CONFIG_IO_TRAPPED) += io_trapped.o diff --git a/arch/sh/kernel/reboot.c b/arch/sh/kernel/reboot.c index e8eeedc9b182..1de006b1c339 100644 --- a/arch/sh/kernel/reboot.c +++ b/arch/sh/kernel/reboot.c @@ -63,7 +63,7 @@ struct machine_ops machine_ops = { .shutdown = native_machine_shutdown, .restart = native_machine_restart, .halt = native_machine_halt, -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE .crash_shutdown = native_machine_crash_shutdown, #endif }; @@ -88,7 +88,7 @@ void machine_halt(void) machine_ops.halt(); } -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE void machine_crash_shutdown(struct pt_regs *regs) { machine_ops.crash_shutdown(regs); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index 3d80515298d2..d3175f09b3aa 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -220,7 +220,7 @@ void __init __add_active_range(unsigned int nid, unsigned long start_pfn, request_resource(res, &code_resource); request_resource(res, &data_resource); request_resource(res, &bss_resource); -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE request_resource(res, &crashk_res); #endif -- cgit v1.2.3 From 69f8ca8d36b5e52360f45c3b63bcb3d075da36df Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 8 Dec 2023 15:30:36 +0800 Subject: x86, kexec: fix the wrong ifdeffery CONFIG_KEXEC With the current ifdeffery CONFIG_KEXEC, get_cmdline_acpi_rsdp() is only available when kexec_load interface is taken, while kexec_file_load interface can't make use of it. Now change it to CONFIG_KEXEC_CORE. Link: https://lkml.kernel.org/r/20231208073036.7884-6-bhe@redhat.com Signed-off-by: Baoquan He Cc: Eric DeVolder Cc: Ignat Korchagin Cc: kernel test robot Cc: Stephen Rothwell Signed-off-by: Andrew Morton --- arch/x86/boot/compressed/acpi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/boot/compressed/acpi.c b/arch/x86/boot/compressed/acpi.c index 55c98fdd67d2..18d15d1ce87d 100644 --- a/arch/x86/boot/compressed/acpi.c +++ b/arch/x86/boot/compressed/acpi.c @@ -178,7 +178,7 @@ static unsigned long get_cmdline_acpi_rsdp(void) { unsigned long addr = 0; -#ifdef CONFIG_KEXEC +#ifdef CONFIG_KEXEC_CORE char val[MAX_ADDR_LEN] = { }; int ret; -- cgit v1.2.3 From 4907a3f54b12b8209864572a312cf967befcae80 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 11 Dec 2023 18:33:54 +0800 Subject: MIPS: dts: loongson: drop incorrect dwmac fallback compatible Device binds to proper PCI ID (LOONGSON, 0x7a03), already listed in DTS, so checking for some other compatible does not make sense. It cannot be bound to unsupported platform. Drop useless, incorrect (space in between) and undocumented compatible. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Yanteng Si Reviewed-by: Conor Dooley Acked-by: Jiaxun Yang Signed-off-by: David S. Miller --- arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi | 3 +-- arch/mips/boot/dts/loongson/ls7a-pch.dtsi | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi index f878f47e4501..ee3e2153dd13 100644 --- a/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi +++ b/arch/mips/boot/dts/loongson/loongson64-2k1000.dtsi @@ -130,8 +130,7 @@ compatible = "pci0014,7a03.0", "pci0014,7a03", "pciclass0c0320", - "pciclass0c03", - "loongson, pci-gmac"; + "pciclass0c03"; reg = <0x1800 0x0 0x0 0x0 0x0>; interrupts = <12 IRQ_TYPE_LEVEL_LOW>, diff --git a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi index 7c69e8245c2f..cce9428afc41 100644 --- a/arch/mips/boot/dts/loongson/ls7a-pch.dtsi +++ b/arch/mips/boot/dts/loongson/ls7a-pch.dtsi @@ -193,8 +193,7 @@ compatible = "pci0014,7a03.0", "pci0014,7a03", "pciclass020000", - "pciclass0200", - "loongson, pci-gmac"; + "pciclass0200"; reg = <0x1800 0x0 0x0 0x0 0x0>; interrupts = <12 IRQ_TYPE_LEVEL_HIGH>, -- cgit v1.2.3 From 0cf72f7f14d12cb065c3d01954cf42fc5638aa69 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Sat, 25 Nov 2023 15:51:04 -0800 Subject: powerpc/pseries/vas: Migration suspend waits for no in-progress open windows The hypervisor returns migration failure if all VAS windows are not closed. During pre-migration stage, vas_migration_handler() sets migration_in_progress flag and closes all windows from the list. The allocate VAS window routine checks the migration flag, setup the window and then add it to the list. So there is possibility of the migration handler missing the window that is still in the process of setup. t1: Allocate and open VAS t2: Migration event window lock vas_pseries_mutex If migration_in_progress set unlock vas_pseries_mutex return open window HCALL unlock vas_pseries_mutex Modify window HCALL lock vas_pseries_mutex setup window migration_in_progress=true Closes all windows from the list // May miss windows that are // not in the list unlock vas_pseries_mutex lock vas_pseries_mutex return if nr_closed_windows == 0 // No DLPAR CPU or migration add window to the list // Window will be added to the // list after the setup is completed unlock vas_pseries_mutex return unlock vas_pseries_mutex Close VAS window // due to DLPAR CPU or migration return -EBUSY This patch resolves the issue with the following steps: - Set the migration_in_progress flag without holding mutex. - Introduce nr_open_wins_progress counter in VAS capabilities struct - This counter tracks the number of open windows are still in progress - The allocate setup window thread closes windows if the migration is set and decrements nr_open_window_progress counter - The migration handler waits for no in-progress open windows. The code flow with the fix is as follows: t1: Allocate and open VAS t2: Migration event window lock vas_pseries_mutex If migration_in_progress set unlock vas_pseries_mutex return open window HCALL nr_open_wins_progress++ // Window opened, but not // added to the list yet unlock vas_pseries_mutex Modify window HCALL migration_in_progress=true setup window lock vas_pseries_mutex Closes all windows from the list While nr_open_wins_progress { unlock vas_pseries_mutex lock vas_pseries_mutex sleep if nr_closed_windows == 0 // Wait if any open window in or migration is not started // progress. The open window // No DLPAR CPU or migration // thread closes the window without add window to the list // adding to the list and return if nr_open_wins_progress-- // the migration is in progress. unlock vas_pseries_mutex return Close VAS window nr_open_wins_progress-- unlock vas_pseries_mutex return -EBUSY lock vas_pseries_mutex } unlock vas_pseries_mutex return Fixes: 37e6764895ef ("powerpc/pseries/vas: Add VAS migration handler") Signed-off-by: Haren Myneni Signed-off-by: Michael Ellerman Link: https://msgid.link/20231125235104.3405008-1-haren@linux.ibm.com --- arch/powerpc/platforms/pseries/vas.c | 51 +++++++++++++++++++++++++++++++----- arch/powerpc/platforms/pseries/vas.h | 2 ++ 2 files changed, 46 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/pseries/vas.c b/arch/powerpc/platforms/pseries/vas.c index b1f25bac280b..71d52a670d95 100644 --- a/arch/powerpc/platforms/pseries/vas.c +++ b/arch/powerpc/platforms/pseries/vas.c @@ -385,11 +385,15 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * same fault IRQ is not freed by the OS before. */ mutex_lock(&vas_pseries_mutex); - if (migration_in_progress) + if (migration_in_progress) { rc = -EBUSY; - else + } else { rc = allocate_setup_window(txwin, (u64 *)&domain[0], cop_feat_caps->win_type); + if (!rc) + caps->nr_open_wins_progress++; + } + mutex_unlock(&vas_pseries_mutex); if (rc) goto out; @@ -404,8 +408,17 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, goto out_free; txwin->win_type = cop_feat_caps->win_type; - mutex_lock(&vas_pseries_mutex); + /* + * The migration SUSPEND thread sets migration_in_progress and + * closes all open windows from the list. But the window is + * added to the list after open and modify HCALLs. So possible + * that migration_in_progress is set before modify HCALL which + * may cause some windows are still open when the hypervisor + * initiates the migration. + * So checks the migration_in_progress flag again and close all + * open windows. + * * Possible to lose the acquired credit with DLPAR core * removal after the window is opened. So if there are any * closed windows (means with lost credits), do not give new @@ -413,9 +426,11 @@ static struct vas_window *vas_allocate_window(int vas_id, u64 flags, * after the existing windows are reopened when credits are * available. */ - if (!caps->nr_close_wins) { + mutex_lock(&vas_pseries_mutex); + if (!caps->nr_close_wins && !migration_in_progress) { list_add(&txwin->win_list, &caps->list); caps->nr_open_windows++; + caps->nr_open_wins_progress--; mutex_unlock(&vas_pseries_mutex); vas_user_win_add_mm_context(&txwin->vas_win.task_ref); return &txwin->vas_win; @@ -433,6 +448,12 @@ out_free: */ free_irq_setup(txwin); h_deallocate_vas_window(txwin->vas_win.winid); + /* + * Hold mutex and reduce nr_open_wins_progress counter. + */ + mutex_lock(&vas_pseries_mutex); + caps->nr_open_wins_progress--; + mutex_unlock(&vas_pseries_mutex); out: atomic_dec(&cop_feat_caps->nr_used_credits); kfree(txwin); @@ -937,14 +958,14 @@ int vas_migration_handler(int action) struct vas_caps *vcaps; int i, rc = 0; + pr_info("VAS migration event %d\n", action); + /* * NX-GZIP is not enabled. Nothing to do for migration. */ if (!copypaste_feat) return rc; - mutex_lock(&vas_pseries_mutex); - if (action == VAS_SUSPEND) migration_in_progress = true; else @@ -990,12 +1011,27 @@ int vas_migration_handler(int action) switch (action) { case VAS_SUSPEND: + mutex_lock(&vas_pseries_mutex); rc = reconfig_close_windows(vcaps, vcaps->nr_open_windows, true); + /* + * Windows are included in the list after successful + * open. So wait for closing these in-progress open + * windows in vas_allocate_window() which will be + * done if the migration_in_progress is set. + */ + while (vcaps->nr_open_wins_progress) { + mutex_unlock(&vas_pseries_mutex); + msleep(10); + mutex_lock(&vas_pseries_mutex); + } + mutex_unlock(&vas_pseries_mutex); break; case VAS_RESUME: + mutex_lock(&vas_pseries_mutex); atomic_set(&caps->nr_total_credits, new_nr_creds); rc = reconfig_open_windows(vcaps, new_nr_creds, true); + mutex_unlock(&vas_pseries_mutex); break; default: /* should not happen */ @@ -1011,8 +1047,9 @@ int vas_migration_handler(int action) goto out; } + pr_info("VAS migration event (%d) successful\n", action); + out: - mutex_unlock(&vas_pseries_mutex); return rc; } diff --git a/arch/powerpc/platforms/pseries/vas.h b/arch/powerpc/platforms/pseries/vas.h index 7115043ec488..45567cd13178 100644 --- a/arch/powerpc/platforms/pseries/vas.h +++ b/arch/powerpc/platforms/pseries/vas.h @@ -91,6 +91,8 @@ struct vas_cop_feat_caps { struct vas_caps { struct vas_cop_feat_caps caps; struct list_head list; /* List of open windows */ + int nr_open_wins_progress; /* Number of open windows in */ + /* progress. Used in migration */ int nr_close_wins; /* closed windows in the hypervisor for DLPAR */ int nr_open_windows; /* Number of successful open windows */ u8 feat; /* Feature type */ -- cgit v1.2.3 From ae1914174a63a558113e80d24ccac2773f9f7b2b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 15 Dec 2023 13:40:57 -0700 Subject: cred: get rid of CONFIG_DEBUG_CREDENTIALS This code is rarely (never?) enabled by distros, and it hasn't caught anything in decades. Let's kill off this legacy debug code. Suggested-by: Linus Torvalds Signed-off-by: Jens Axboe Signed-off-by: Linus Torvalds --- arch/powerpc/configs/skiroot_defconfig | 1 - arch/s390/configs/debug_defconfig | 1 - fs/nfsd/auth.c | 4 - fs/nfsd/nfssvc.c | 1 - fs/nfsd/vfs.c | 9 +- fs/open.c | 3 - include/linux/cred.h | 50 ------- kernel/cred.c | 231 +++--------------------------- kernel/exit.c | 3 - lib/Kconfig.debug | 15 -- net/sunrpc/auth.c | 3 - security/selinux/hooks.c | 6 - tools/objtool/noreturns.h | 1 - tools/testing/selftests/bpf/config.x86_64 | 1 - tools/testing/selftests/hid/config.common | 1 - 15 files changed, 17 insertions(+), 313 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 8d3eacb50d56..9d44e6630908 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -301,7 +301,6 @@ CONFIG_WQ_WATCHDOG=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y CONFIG_BUG_ON_DATA_CORRUPTION=y -CONFIG_DEBUG_CREDENTIALS=y # CONFIG_FTRACE is not set CONFIG_XMON=y # CONFIG_RUNTIME_TESTING_MENU is not set diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 438cd92e6080..dd0608629310 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -834,7 +834,6 @@ CONFIG_DEBUG_IRQFLAGS=y CONFIG_DEBUG_LIST=y CONFIG_DEBUG_SG=y CONFIG_DEBUG_NOTIFIERS=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_RCU_TORTURE_TEST=m CONFIG_RCU_REF_SCALE_TEST=m CONFIG_RCU_CPU_STALL_TIMEOUT=300 diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index fdf2aad73470..e6beaaf4f170 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -26,8 +26,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) int i; int flags = nfsexp_flags(rqstp, exp); - validate_process_creds(); - /* discard any old override before preparing the new set */ revert_creds(get_cred(current_real_cred())); new = prepare_creds(); @@ -81,10 +79,8 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) else new->cap_effective = cap_raise_nfsd_set(new->cap_effective, new->cap_permitted); - validate_process_creds(); put_cred(override_creds(new)); put_cred(new); - validate_process_creds(); return 0; oom: diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index fe61d9bbcc1f..5014ab87d313 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -955,7 +955,6 @@ nfsd(void *vrqstp) rqstp->rq_server->sv_maxconn = nn->max_connections; svc_recv(rqstp); - validate_process_creds(); } atomic_dec(&nfsdstats.th_cnt); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index fbbea7498f02..e01e4e2acbd9 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -901,7 +901,6 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int host_err; bool retried = false; - validate_process_creds(); /* * If we get here, then the client has already done an "open", * and (hopefully) checked permission - so allow OWNER_OVERRIDE @@ -926,7 +925,6 @@ retry: } err = nfserrno(host_err); } - validate_process_creds(); return err; } @@ -943,12 +941,7 @@ int nfsd_open_verified(struct svc_rqst *rqstp, struct svc_fh *fhp, int may_flags, struct file **filp) { - int err; - - validate_process_creds(); - err = __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); - validate_process_creds(); - return err; + return __nfsd_open(rqstp, fhp, S_IFREG, may_flags, filp); } /* diff --git a/fs/open.c b/fs/open.c index 02dc608d40d8..3494a9cd8046 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1088,8 +1088,6 @@ struct file *dentry_open(const struct path *path, int flags, int error; struct file *f; - validate_creds(cred); - /* We must always pass in a valid mount pointer. */ BUG_ON(!path->mnt); @@ -1128,7 +1126,6 @@ struct file *dentry_create(const struct path *path, int flags, umode_t mode, struct file *f; int error; - validate_creds(cred); f = alloc_empty_file(flags, cred); if (IS_ERR(f)) return f; diff --git a/include/linux/cred.h b/include/linux/cred.h index a3383f8efb8f..2976f534a7a3 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -110,13 +110,6 @@ static inline int groups_search(const struct group_info *group_info, kgid_t grp) */ struct cred { atomic_long_t usage; -#ifdef CONFIG_DEBUG_CREDENTIALS - atomic_t subscribers; /* number of processes subscribed */ - void *put_addr; - unsigned magic; -#define CRED_MAGIC 0x43736564 -#define CRED_MAGIC_DEAD 0x44656144 -#endif kuid_t uid; /* real UID of the task */ kgid_t gid; /* real GID of the task */ kuid_t suid; /* saved UID of the task */ @@ -172,46 +165,6 @@ extern int cred_fscmp(const struct cred *, const struct cred *); extern void __init cred_init(void); extern int set_cred_ucounts(struct cred *); -/* - * check for validity of credentials - */ -#ifdef CONFIG_DEBUG_CREDENTIALS -extern void __noreturn __invalid_creds(const struct cred *, const char *, unsigned); -extern void __validate_process_creds(struct task_struct *, - const char *, unsigned); - -extern bool creds_are_invalid(const struct cred *cred); - -static inline void __validate_creds(const struct cred *cred, - const char *file, unsigned line) -{ - if (unlikely(creds_are_invalid(cred))) - __invalid_creds(cred, file, line); -} - -#define validate_creds(cred) \ -do { \ - __validate_creds((cred), __FILE__, __LINE__); \ -} while(0) - -#define validate_process_creds() \ -do { \ - __validate_process_creds(current, __FILE__, __LINE__); \ -} while(0) - -extern void validate_creds_for_do_exit(struct task_struct *); -#else -static inline void validate_creds(const struct cred *cred) -{ -} -static inline void validate_creds_for_do_exit(struct task_struct *tsk) -{ -} -static inline void validate_process_creds(void) -{ -} -#endif - static inline bool cap_ambient_invariant_ok(const struct cred *cred) { return cap_issubset(cred->cap_ambient, @@ -264,7 +217,6 @@ static inline const struct cred *get_cred_many(const struct cred *cred, int nr) struct cred *nonconst_cred = (struct cred *) cred; if (!cred) return cred; - validate_creds(cred); nonconst_cred->non_rcu = 0; return get_new_cred_many(nonconst_cred, nr); } @@ -290,7 +242,6 @@ static inline const struct cred *get_cred_rcu(const struct cred *cred) return NULL; if (!atomic_long_inc_not_zero(&nonconst_cred->usage)) return NULL; - validate_creds(cred); nonconst_cred->non_rcu = 0; return cred; } @@ -312,7 +263,6 @@ static inline void put_cred_many(const struct cred *_cred, int nr) struct cred *cred = (struct cred *) _cred; if (cred) { - validate_creds(cred); if (atomic_long_sub_and_test(nr, &cred->usage)) __put_cred(cred); } diff --git a/kernel/cred.c b/kernel/cred.c index 4a6cd0f0fef5..c033a201c808 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -43,10 +43,6 @@ static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; */ struct cred init_cred = { .usage = ATOMIC_INIT(4), -#ifdef CONFIG_DEBUG_CREDENTIALS - .subscribers = ATOMIC_INIT(2), - .magic = CRED_MAGIC, -#endif .uid = GLOBAL_ROOT_UID, .gid = GLOBAL_ROOT_GID, .suid = GLOBAL_ROOT_UID, @@ -66,31 +62,6 @@ struct cred init_cred = { .ucounts = &init_ucounts, }; -static inline void set_cred_subscribers(struct cred *cred, int n) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - atomic_set(&cred->subscribers, n); -#endif -} - -static inline int read_cred_subscribers(const struct cred *cred) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - return atomic_read(&cred->subscribers); -#else - return 0; -#endif -} - -static inline void alter_cred_subscribers(const struct cred *_cred, int n) -{ -#ifdef CONFIG_DEBUG_CREDENTIALS - struct cred *cred = (struct cred *) _cred; - - atomic_add(n, &cred->subscribers); -#endif -} - /* * The RCU callback to actually dispose of a set of credentials */ @@ -100,20 +71,9 @@ static void put_cred_rcu(struct rcu_head *rcu) kdebug("put_cred_rcu(%p)", cred); -#ifdef CONFIG_DEBUG_CREDENTIALS - if (cred->magic != CRED_MAGIC_DEAD || - atomic_long_read(&cred->usage) != 0 || - read_cred_subscribers(cred) != 0) - panic("CRED: put_cred_rcu() sees %p with" - " mag %x, put %p, usage %ld, subscr %d\n", - cred, cred->magic, cred->put_addr, - atomic_long_read(&cred->usage), - read_cred_subscribers(cred)); -#else if (atomic_long_read(&cred->usage) != 0) panic("CRED: put_cred_rcu() sees %p with usage %ld\n", cred, atomic_long_read(&cred->usage)); -#endif security_cred_free(cred); key_put(cred->session_keyring); @@ -137,16 +97,10 @@ static void put_cred_rcu(struct rcu_head *rcu) */ void __put_cred(struct cred *cred) { - kdebug("__put_cred(%p{%ld,%d})", cred, - atomic_long_read(&cred->usage), - read_cred_subscribers(cred)); + kdebug("__put_cred(%p{%ld})", cred, + atomic_long_read(&cred->usage)); BUG_ON(atomic_long_read(&cred->usage) != 0); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(cred) != 0); - cred->magic = CRED_MAGIC_DEAD; - cred->put_addr = __builtin_return_address(0); -#endif BUG_ON(cred == current->cred); BUG_ON(cred == current->real_cred); @@ -164,9 +118,8 @@ void exit_creds(struct task_struct *tsk) { struct cred *real_cred, *cred; - kdebug("exit_creds(%u,%p,%p,{%ld,%d})", tsk->pid, tsk->real_cred, tsk->cred, - atomic_long_read(&tsk->cred->usage), - read_cred_subscribers(tsk->cred)); + kdebug("exit_creds(%u,%p,%p,{%ld})", tsk->pid, tsk->real_cred, tsk->cred, + atomic_long_read(&tsk->cred->usage)); real_cred = (struct cred *) tsk->real_cred; tsk->real_cred = NULL; @@ -174,15 +127,10 @@ void exit_creds(struct task_struct *tsk) cred = (struct cred *) tsk->cred; tsk->cred = NULL; - validate_creds(cred); if (real_cred == cred) { - alter_cred_subscribers(cred, -2); put_cred_many(cred, 2); } else { - validate_creds(real_cred); - alter_cred_subscribers(real_cred, -1); put_cred(real_cred); - alter_cred_subscribers(cred, -1); put_cred(cred); } @@ -231,9 +179,6 @@ struct cred *cred_alloc_blank(void) return NULL; atomic_long_set(&new->usage, 1); -#ifdef CONFIG_DEBUG_CREDENTIALS - new->magic = CRED_MAGIC; -#endif if (security_cred_alloc_blank(new, GFP_KERNEL_ACCOUNT) < 0) goto error; @@ -264,8 +209,6 @@ struct cred *prepare_creds(void) const struct cred *old; struct cred *new; - validate_process_creds(); - new = kmem_cache_alloc(cred_jar, GFP_KERNEL); if (!new) return NULL; @@ -277,7 +220,6 @@ struct cred *prepare_creds(void) new->non_rcu = 0; atomic_long_set(&new->usage, 1); - set_cred_subscribers(new, 0); get_group_info(new->group_info); get_uid(new->user); get_user_ns(new->user_ns); @@ -300,7 +242,6 @@ struct cred *prepare_creds(void) if (security_prepare_creds(new, old, GFP_KERNEL_ACCOUNT) < 0) goto error; - validate_creds(new); return new; error: @@ -362,10 +303,8 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) clone_flags & CLONE_THREAD ) { p->real_cred = get_cred_many(p->cred, 2); - alter_cred_subscribers(p->cred, 2); - kdebug("share_creds(%p{%ld,%d})", - p->cred, atomic_long_read(&p->cred->usage), - read_cred_subscribers(p->cred)); + kdebug("share_creds(%p{%ld})", + p->cred, atomic_long_read(&p->cred->usage)); inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); return 0; } @@ -404,8 +343,6 @@ int copy_creds(struct task_struct *p, unsigned long clone_flags) p->cred = p->real_cred = get_cred(new); inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1); - alter_cred_subscribers(new, 2); - validate_creds(new); return 0; error_put: @@ -457,16 +394,10 @@ int commit_creds(struct cred *new) struct task_struct *task = current; const struct cred *old = task->real_cred; - kdebug("commit_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), - read_cred_subscribers(new)); + kdebug("commit_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); BUG_ON(task->cred != old); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(old) < 2); - validate_creds(old); - validate_creds(new); -#endif BUG_ON(atomic_long_read(&new->usage) < 1); get_cred(new); /* we will require a ref for the subj creds too */ @@ -502,14 +433,12 @@ int commit_creds(struct cred *new) * RLIMIT_NPROC limits on user->processes have already been checked * in set_user(). */ - alter_cred_subscribers(new, 2); if (new->user != old->user || new->user_ns != old->user_ns) inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1); rcu_assign_pointer(task->real_cred, new); rcu_assign_pointer(task->cred, new); if (new->user != old->user || new->user_ns != old->user_ns) dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1); - alter_cred_subscribers(old, -2); /* send notifications */ if (!uid_eq(new->uid, old->uid) || @@ -539,13 +468,9 @@ EXPORT_SYMBOL(commit_creds); */ void abort_creds(struct cred *new) { - kdebug("abort_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), - read_cred_subscribers(new)); + kdebug("abort_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); -#ifdef CONFIG_DEBUG_CREDENTIALS - BUG_ON(read_cred_subscribers(new) != 0); -#endif BUG_ON(atomic_long_read(&new->usage) < 1); put_cred(new); } @@ -562,12 +487,8 @@ const struct cred *override_creds(const struct cred *new) { const struct cred *old = current->cred; - kdebug("override_creds(%p{%ld,%d})", new, - atomic_long_read(&new->usage), - read_cred_subscribers(new)); - - validate_creds(old); - validate_creds(new); + kdebug("override_creds(%p{%ld})", new, + atomic_long_read(&new->usage)); /* * NOTE! This uses 'get_new_cred()' rather than 'get_cred()'. @@ -576,18 +497,12 @@ const struct cred *override_creds(const struct cred *new) * we are only installing the cred into the thread-synchronous * '->cred' pointer, not the '->real_cred' pointer that is * visible to other threads under RCU. - * - * Also note that we did validate_creds() manually, not depending - * on the validation in 'get_cred()'. */ get_new_cred((struct cred *)new); - alter_cred_subscribers(new, 1); rcu_assign_pointer(current->cred, new); - alter_cred_subscribers(old, -1); - kdebug("override_creds() = %p{%ld,%d}", old, - atomic_long_read(&old->usage), - read_cred_subscribers(old)); + kdebug("override_creds() = %p{%ld}", old, + atomic_long_read(&old->usage)); return old; } EXPORT_SYMBOL(override_creds); @@ -603,15 +518,10 @@ void revert_creds(const struct cred *old) { const struct cred *override = current->cred; - kdebug("revert_creds(%p{%ld,%d})", old, - atomic_long_read(&old->usage), - read_cred_subscribers(old)); + kdebug("revert_creds(%p{%ld})", old, + atomic_long_read(&old->usage)); - validate_creds(old); - validate_creds(override); - alter_cred_subscribers(old, 1); rcu_assign_pointer(current->cred, old); - alter_cred_subscribers(override, -1); put_cred(override); } EXPORT_SYMBOL(revert_creds); @@ -731,12 +641,10 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) kdebug("prepare_kernel_cred() alloc %p", new); old = get_task_cred(daemon); - validate_creds(old); *new = *old; new->non_rcu = 0; atomic_long_set(&new->usage, 1); - set_cred_subscribers(new, 0); get_uid(new->user); get_user_ns(new->user_ns); get_group_info(new->group_info); @@ -760,7 +668,6 @@ struct cred *prepare_kernel_cred(struct task_struct *daemon) goto error; put_cred(old); - validate_creds(new); return new; error: @@ -825,109 +732,3 @@ int set_create_files_as(struct cred *new, struct inode *inode) return security_kernel_create_files_as(new, inode); } EXPORT_SYMBOL(set_create_files_as); - -#ifdef CONFIG_DEBUG_CREDENTIALS - -bool creds_are_invalid(const struct cred *cred) -{ - if (cred->magic != CRED_MAGIC) - return true; - return false; -} -EXPORT_SYMBOL(creds_are_invalid); - -/* - * dump invalid credentials - */ -static void dump_invalid_creds(const struct cred *cred, const char *label, - const struct task_struct *tsk) -{ - pr_err("%s credentials: %p %s%s%s\n", - label, cred, - cred == &init_cred ? "[init]" : "", - cred == tsk->real_cred ? "[real]" : "", - cred == tsk->cred ? "[eff]" : ""); - pr_err("->magic=%x, put_addr=%p\n", - cred->magic, cred->put_addr); - pr_err("->usage=%ld, subscr=%d\n", - atomic_long_read(&cred->usage), - read_cred_subscribers(cred)); - pr_err("->*uid = { %d,%d,%d,%d }\n", - from_kuid_munged(&init_user_ns, cred->uid), - from_kuid_munged(&init_user_ns, cred->euid), - from_kuid_munged(&init_user_ns, cred->suid), - from_kuid_munged(&init_user_ns, cred->fsuid)); - pr_err("->*gid = { %d,%d,%d,%d }\n", - from_kgid_munged(&init_user_ns, cred->gid), - from_kgid_munged(&init_user_ns, cred->egid), - from_kgid_munged(&init_user_ns, cred->sgid), - from_kgid_munged(&init_user_ns, cred->fsgid)); -#ifdef CONFIG_SECURITY - pr_err("->security is %p\n", cred->security); - if ((unsigned long) cred->security >= PAGE_SIZE && - (((unsigned long) cred->security & 0xffffff00) != - (POISON_FREE << 24 | POISON_FREE << 16 | POISON_FREE << 8))) - pr_err("->security {%x, %x}\n", - ((u32*)cred->security)[0], - ((u32*)cred->security)[1]); -#endif -} - -/* - * report use of invalid credentials - */ -void __noreturn __invalid_creds(const struct cred *cred, const char *file, unsigned line) -{ - pr_err("Invalid credentials\n"); - pr_err("At %s:%u\n", file, line); - dump_invalid_creds(cred, "Specified", current); - BUG(); -} -EXPORT_SYMBOL(__invalid_creds); - -/* - * check the credentials on a process - */ -void __validate_process_creds(struct task_struct *tsk, - const char *file, unsigned line) -{ - if (tsk->cred == tsk->real_cred) { - if (unlikely(read_cred_subscribers(tsk->cred) < 2 || - creds_are_invalid(tsk->cred))) - goto invalid_creds; - } else { - if (unlikely(read_cred_subscribers(tsk->real_cred) < 1 || - read_cred_subscribers(tsk->cred) < 1 || - creds_are_invalid(tsk->real_cred) || - creds_are_invalid(tsk->cred))) - goto invalid_creds; - } - return; - -invalid_creds: - pr_err("Invalid process credentials\n"); - pr_err("At %s:%u\n", file, line); - - dump_invalid_creds(tsk->real_cred, "Real", tsk); - if (tsk->cred != tsk->real_cred) - dump_invalid_creds(tsk->cred, "Effective", tsk); - else - pr_err("Effective creds == Real creds\n"); - BUG(); -} -EXPORT_SYMBOL(__validate_process_creds); - -/* - * check creds for do_exit() - */ -void validate_creds_for_do_exit(struct task_struct *tsk) -{ - kdebug("validate_creds_for_do_exit(%p,%p{%ld,%d})", - tsk->real_cred, tsk->cred, - atomic_long_read(&tsk->cred->usage), - read_cred_subscribers(tsk->cred)); - - __validate_process_creds(tsk, __FILE__, __LINE__); -} - -#endif /* CONFIG_DEBUG_CREDENTIALS */ diff --git a/kernel/exit.c b/kernel/exit.c index ee9f43bed49a..aedc0832c9f4 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -824,8 +824,6 @@ void __noreturn do_exit(long code) ptrace_event(PTRACE_EVENT_EXIT, code); user_events_exit(tsk); - validate_creds_for_do_exit(tsk); - io_uring_files_cancel(); exit_signals(tsk); /* sets PF_EXITING */ @@ -909,7 +907,6 @@ void __noreturn do_exit(long code) if (tsk->task_frag.page) put_page(tsk->task_frag.page); - validate_creds_for_do_exit(tsk); exit_task_stack_account(tsk); check_stack_usage(); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index cc7d53d9dc01..4405f81248fb 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1739,21 +1739,6 @@ config DEBUG_MAPLE_TREE endmenu -config DEBUG_CREDENTIALS - bool "Debug credential management" - depends on DEBUG_KERNEL - help - Enable this to turn on some debug checking for credential - management. The additional code keeps track of the number of - pointers from task_structs to any given cred struct, and checks to - see that this number never exceeds the usage count of the cred - struct. - - Furthermore, if SELinux is enabled, this also checks that the - security pointer in the cred struct is never seen to be invalid. - - If unsure, say N. - source "kernel/rcu/Kconfig.debug" config DEBUG_WQ_FORCE_RR_CPU diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 7bfe7d9a32aa..04534ea537c8 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -40,9 +40,6 @@ static unsigned long number_cred_unused; static struct cred machine_cred = { .usage = ATOMIC_INIT(1), -#ifdef CONFIG_DEBUG_CREDENTIALS - .magic = CRED_MAGIC, -#endif }; /* diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index feda711c6b7b..340b2bbbb2dd 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -1660,8 +1660,6 @@ static int inode_has_perm(const struct cred *cred, struct inode_security_struct *isec; u32 sid; - validate_creds(cred); - if (unlikely(IS_PRIVATE(inode))) return 0; @@ -3056,8 +3054,6 @@ static int selinux_inode_follow_link(struct dentry *dentry, struct inode *inode, struct inode_security_struct *isec; u32 sid; - validate_creds(cred); - ad.type = LSM_AUDIT_DATA_DENTRY; ad.u.dentry = dentry; sid = cred_sid(cred); @@ -3101,8 +3097,6 @@ static int selinux_inode_permission(struct inode *inode, int mask) if (!mask) return 0; - validate_creds(cred); - if (unlikely(IS_PRIVATE(inode))) return 0; diff --git a/tools/objtool/noreturns.h b/tools/objtool/noreturns.h index 649ebdef9c3f..1685d7ea6a9f 100644 --- a/tools/objtool/noreturns.h +++ b/tools/objtool/noreturns.h @@ -6,7 +6,6 @@ * * Yes, this is unfortunate. A better solution is in the works. */ -NORETURN(__invalid_creds) NORETURN(__kunit_abort) NORETURN(__module_put_and_kthread_exit) NORETURN(__reiserfs_panic) diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64 index 2e70a6048278..49a29dbc1910 100644 --- a/tools/testing/selftests/bpf/config.x86_64 +++ b/tools/testing/selftests/bpf/config.x86_64 @@ -50,7 +50,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_DEBUG_INFO_BTF=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common index 0617275d93cc..0f456dbab62f 100644 --- a/tools/testing/selftests/hid/config.common +++ b/tools/testing/selftests/hid/config.common @@ -46,7 +46,6 @@ CONFIG_CRYPTO_SEQIV=y CONFIG_CRYPTO_XXHASH=y CONFIG_DCB=y CONFIG_DEBUG_ATOMIC_SLEEP=y -CONFIG_DEBUG_CREDENTIALS=y CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEFAULT_FQ_CODEL=y -- cgit v1.2.3 From 7ab6fb505b2a7447c4a7237a12c59e3ad0c7298c Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 19 Dec 2023 10:48:27 +0800 Subject: LoongArch: KVM: Optimization for memslot hugepage checking During shadow mmu page fault, there is checking for huge page for specified memslot. Page fault is hot path, check logic can be done when memslot is created. Here two flags are added for huge page checking, KVM_MEM_HUGEPAGE_CAPABLE and KVM_MEM_HUGEPAGE_INCAPABLE. Indeed for an optimized qemu, memslot for DRAM is always huge page aligned. The flag is firstly checked during hot page fault path. Now only huge page flag is supported, there is a long way for super page support in LoongArch system. Since super page size is 64G for 16K pagesize and 1G for 4K pagesize, 64G physical address is rarely used and LoongArch kernel needs support super page for 4K. Also memory layout of LoongArch qemu VM should be 1G aligned. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_host.h | 3 + arch/loongarch/kvm/mmu.c | 124 +++++++++++++++++++++++----------- 2 files changed, 86 insertions(+), 41 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 11328700d4fa..0e89db020481 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -45,7 +45,10 @@ struct kvm_vcpu_stat { u64 signal_exits; }; +#define KVM_MEM_HUGEPAGE_CAPABLE (1UL << 0) +#define KVM_MEM_HUGEPAGE_INCAPABLE (1UL << 1) struct kvm_arch_memory_slot { + unsigned long flags; }; struct kvm_context { diff --git a/arch/loongarch/kvm/mmu.c b/arch/loongarch/kvm/mmu.c index 80480df5f550..915f17527893 100644 --- a/arch/loongarch/kvm/mmu.c +++ b/arch/loongarch/kvm/mmu.c @@ -13,6 +13,16 @@ #include #include +static inline bool kvm_hugepage_capable(struct kvm_memory_slot *slot) +{ + return slot->arch.flags & KVM_MEM_HUGEPAGE_CAPABLE; +} + +static inline bool kvm_hugepage_incapable(struct kvm_memory_slot *slot) +{ + return slot->arch.flags & KVM_MEM_HUGEPAGE_INCAPABLE; +} + static inline void kvm_ptw_prepare(struct kvm *kvm, kvm_ptw_ctx *ctx) { ctx->level = kvm->arch.root_level; @@ -365,6 +375,69 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm, kvm_ptw_top(kvm->arch.pgd, start << PAGE_SHIFT, end << PAGE_SHIFT, &ctx); } +int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, + struct kvm_memory_slot *new, enum kvm_mr_change change) +{ + gpa_t gpa_start; + hva_t hva_start; + size_t size, gpa_offset, hva_offset; + + if ((change != KVM_MR_MOVE) && (change != KVM_MR_CREATE)) + return 0; + /* + * Prevent userspace from creating a memory region outside of the + * VM GPA address space + */ + if ((new->base_gfn + new->npages) > (kvm->arch.gpa_size >> PAGE_SHIFT)) + return -ENOMEM; + + new->arch.flags = 0; + size = new->npages * PAGE_SIZE; + gpa_start = new->base_gfn << PAGE_SHIFT; + hva_start = new->userspace_addr; + if (IS_ALIGNED(size, PMD_SIZE) && IS_ALIGNED(gpa_start, PMD_SIZE) + && IS_ALIGNED(hva_start, PMD_SIZE)) + new->arch.flags |= KVM_MEM_HUGEPAGE_CAPABLE; + else { + /* + * Pages belonging to memslots that don't have the same + * alignment within a PMD for userspace and GPA cannot be + * mapped with PMD entries, because we'll end up mapping + * the wrong pages. + * + * Consider a layout like the following: + * + * memslot->userspace_addr: + * +-----+--------------------+--------------------+---+ + * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz| + * +-----+--------------------+--------------------+---+ + * + * memslot->base_gfn << PAGE_SIZE: + * +---+--------------------+--------------------+-----+ + * |abc|def Stage-2 block | Stage-2 block |tvxyz| + * +---+--------------------+--------------------+-----+ + * + * If we create those stage-2 blocks, we'll end up with this + * incorrect mapping: + * d -> f + * e -> g + * f -> h + */ + gpa_offset = gpa_start & (PMD_SIZE - 1); + hva_offset = hva_start & (PMD_SIZE - 1); + if (gpa_offset != hva_offset) { + new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE; + } else { + if (gpa_offset == 0) + gpa_offset = PMD_SIZE; + if ((size + gpa_offset) < (PMD_SIZE * 2)) + new->arch.flags |= KVM_MEM_HUGEPAGE_INCAPABLE; + } + } + + return 0; +} + void kvm_arch_commit_memory_region(struct kvm *kvm, struct kvm_memory_slot *old, const struct kvm_memory_slot *new, @@ -562,47 +635,23 @@ out: } static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot, - unsigned long hva, unsigned long map_size, bool write) + unsigned long hva, bool write) { - size_t size; - gpa_t gpa_start; - hva_t uaddr_start, uaddr_end; + hva_t start, end; /* Disable dirty logging on HugePages */ if (kvm_slot_dirty_track_enabled(memslot) && write) return false; - size = memslot->npages * PAGE_SIZE; - gpa_start = memslot->base_gfn << PAGE_SHIFT; - uaddr_start = memslot->userspace_addr; - uaddr_end = uaddr_start + size; + if (kvm_hugepage_capable(memslot)) + return true; - /* - * Pages belonging to memslots that don't have the same alignment - * within a PMD for userspace and GPA cannot be mapped with stage-2 - * PMD entries, because we'll end up mapping the wrong pages. - * - * Consider a layout like the following: - * - * memslot->userspace_addr: - * +-----+--------------------+--------------------+---+ - * |abcde|fgh Stage-1 block | Stage-1 block tv|xyz| - * +-----+--------------------+--------------------+---+ - * - * memslot->base_gfn << PAGE_SIZE: - * +---+--------------------+--------------------+-----+ - * |abc|def Stage-2 block | Stage-2 block |tvxyz| - * +---+--------------------+--------------------+-----+ - * - * If we create those stage-2 blocks, we'll end up with this incorrect - * mapping: - * d -> f - * e -> g - * f -> h - */ - if ((gpa_start & (map_size - 1)) != (uaddr_start & (map_size - 1))) + if (kvm_hugepage_incapable(memslot)) return false; + start = memslot->userspace_addr; + end = start + memslot->npages * PAGE_SIZE; + /* * Next, let's make sure we're not trying to map anything not covered * by the memslot. This means we have to prohibit block size mappings @@ -615,8 +664,7 @@ static bool fault_supports_huge_mapping(struct kvm_memory_slot *memslot, * userspace_addr or the base_gfn, as both are equally aligned (per * the check above) and equally sized. */ - return (hva & ~(map_size - 1)) >= uaddr_start && - (hva & ~(map_size - 1)) + map_size <= uaddr_end; + return (hva >= ALIGN(start, PMD_SIZE)) && (hva < ALIGN_DOWN(end, PMD_SIZE)); } /* @@ -842,7 +890,7 @@ retry: /* Disable dirty logging on HugePages */ level = 0; - if (!fault_supports_huge_mapping(memslot, hva, PMD_SIZE, write)) { + if (!fault_supports_huge_mapping(memslot, hva, write)) { level = 0; } else { level = host_pfn_mapping_level(kvm, gfn, memslot); @@ -901,12 +949,6 @@ void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot) { } -int kvm_arch_prepare_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, - struct kvm_memory_slot *new, enum kvm_mr_change change) -{ - return 0; -} - void kvm_arch_flush_remote_tlbs_memslot(struct kvm *kvm, const struct kvm_memory_slot *memslot) { -- cgit v1.2.3 From 161267320158920a601e40d83fdac60bcaa2acb5 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 19 Dec 2023 10:48:27 +0800 Subject: LoongArch: KVM: Remove SW timer switch when vcpu is halt polling With halt-polling supported, there is checking for pending events or interrupts when vcpu executes idle instruction. Pending interrupts include injected SW interrupts and passthrough HW interrupts, such as HW timer interrupts, since HW timer works still even if vcpu exists from VM mode. Since HW timer pending interrupt can be set directly with CSR status register, and pending HW timer interrupt checking is used in vcpu block checking function, it is not necessary to switch to SW timer during halt-polling. This patch adds preemption disabling in function kvm_cpu_has_pending_timer(), and removes SW timer switching in idle instruction emulation function. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/exit.c | 13 ++----------- arch/loongarch/kvm/timer.c | 12 +++++++++--- arch/loongarch/kvm/vcpu.c | 9 ++++++++- 3 files changed, 19 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index ce8de3fa472c..e708a1786d6b 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -200,17 +200,8 @@ int kvm_emu_idle(struct kvm_vcpu *vcpu) ++vcpu->stat.idle_exits; trace_kvm_exit_idle(vcpu, KVM_TRACE_EXIT_IDLE); - if (!kvm_arch_vcpu_runnable(vcpu)) { - /* - * Switch to the software timer before halt-polling/blocking as - * the guest's timer may be a break event for the vCPU, and the - * hypervisor timer runs only when the CPU is in guest mode. - * Switch before halt-polling so that KVM recognizes an expired - * timer before blocking. - */ - kvm_save_timer(vcpu); - kvm_vcpu_block(vcpu); - } + if (!kvm_arch_vcpu_runnable(vcpu)) + kvm_vcpu_halt(vcpu); return EMULATE_DONE; } diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 284bf553fefe..12d58040122d 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -155,11 +155,17 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu) */ hrtimer_cancel(&vcpu->arch.swtimer); hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); - } else + } else if (vcpu->stat.generic.blocking) { /* - * Inject timer interrupt so that hall polling can dectect and exit + * Inject timer interrupt so that halt polling can dectect and exit. + * VCPU is scheduled out already and sleeps in rcuwait queue and + * will not poll pending events again. kvm_queue_irq() is not enough, + * hrtimer swtimer should be used here. */ - kvm_queue_irq(vcpu, INT_TI); + expire = ktime_add_ns(ktime_get(), 10); + vcpu->arch.expire = expire; + hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); + } } /* diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 73d0c2b9c1a5..54f544b30f32 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -187,8 +187,15 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu, int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - return kvm_pending_timer(vcpu) || + int ret; + + /* Protect from TOD sync and vcpu_load/put() */ + preempt_disable(); + ret = kvm_pending_timer(vcpu) || kvm_read_hw_gcsr(LOONGARCH_CSR_ESTAT) & (1 << INT_TI); + preempt_enable(); + + return ret; } int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 0d2abe67029644741bf7400b0d00c2faa3e1c455 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 19 Dec 2023 10:48:27 +0800 Subject: LoongArch: KVM: Allow to access HW timer CSR registers always Currently HW timer CSR registers are allowed to access before entering to vm and disabled if switch to SW timer in host mode, instead it is not necessary to do so. HW timer CSR registers can be accessed always, it is nothing to do with whether it is in vm mode or host mode. This patch removes the limitation. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/main.c | 1 - arch/loongarch/kvm/timer.c | 27 ++++++--------------------- 2 files changed, 6 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/kvm/main.c b/arch/loongarch/kvm/main.c index 1c1d5199500e..86a2f2d0cb27 100644 --- a/arch/loongarch/kvm/main.c +++ b/arch/loongarch/kvm/main.c @@ -287,7 +287,6 @@ int kvm_arch_hardware_enable(void) if (env & CSR_GCFG_MATC_ROOT) gcfg |= CSR_GCFG_MATC_ROOT; - gcfg |= CSR_GCFG_TIT; write_csr_gcfg(gcfg); kvm_flush_tlb_all(); diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index 12d58040122d..d6d5bcea349b 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -70,15 +70,6 @@ void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long timer_hz) */ void kvm_acquire_timer(struct kvm_vcpu *vcpu) { - unsigned long cfg; - - cfg = read_csr_gcfg(); - if (!(cfg & CSR_GCFG_TIT)) - return; - - /* Enable guest access to hard timer */ - write_csr_gcfg(cfg & ~CSR_GCFG_TIT); - /* * Freeze the soft-timer and sync the guest stable timer with it. We do * this with interrupts disabled to avoid latency. @@ -174,21 +165,15 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu) */ void kvm_save_timer(struct kvm_vcpu *vcpu) { - unsigned long cfg; struct loongarch_csrs *csr = vcpu->arch.csr; preempt_disable(); - cfg = read_csr_gcfg(); - if (!(cfg & CSR_GCFG_TIT)) { - /* Disable guest use of hard timer */ - write_csr_gcfg(cfg | CSR_GCFG_TIT); - - /* Save hard timer state */ - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG); - kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL); - if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN) - _kvm_save_timer(vcpu); - } + + /* Save hard timer state */ + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TCFG); + kvm_save_hw_gcsr(csr, LOONGARCH_CSR_TVAL); + if (kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG) & CSR_TCFG_EN) + _kvm_save_timer(vcpu); /* Save timer-related state to vCPU context */ kvm_save_hw_gcsr(csr, LOONGARCH_CSR_ESTAT); -- cgit v1.2.3 From 1ab9c6099495f79bfbcd6058d02d7556034a89b0 Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 19 Dec 2023 10:48:28 +0800 Subject: LoongArch: KVM: Remove kvm_acquire_timer() before entering guest Timer emulation method in VM is switch to SW timer, there are two places where timer emulation is needed. One is during vcpu thread context switch, the other is halt-polling with idle instruction emulation. SW timer switching is removed during halt-polling mode, so it is not necessary to disable SW timer before entering to guest. This patch removes SW timer handling before entering guest mode, and put it in HW timer restoring flow when vcpu thread is sched-in. With this patch, vm timer emulation is simpler, there is SW/HW timer switch only in vcpu thread context switch scenario. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_vcpu.h | 1 - arch/loongarch/kvm/timer.c | 22 ++++++---------------- arch/loongarch/kvm/vcpu.c | 29 ----------------------------- 3 files changed, 6 insertions(+), 46 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index 553cfa2b2b1c..0e87652f780a 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -55,7 +55,6 @@ void kvm_save_fpu(struct loongarch_fpu *fpu); void kvm_restore_fpu(struct loongarch_fpu *fpu); void kvm_restore_fcsr(struct loongarch_fpu *fpu); -void kvm_acquire_timer(struct kvm_vcpu *vcpu); void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz); void kvm_reset_timer(struct kvm_vcpu *vcpu); void kvm_save_timer(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index d6d5bcea349b..d362d87a54aa 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -64,19 +64,6 @@ void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long timer_hz) kvm_write_sw_gcsr(vcpu->arch.csr, LOONGARCH_CSR_TVAL, 0); } -/* - * Restore hard timer state and enable guest to access timer registers - * without trap, should be called with irq disabled - */ -void kvm_acquire_timer(struct kvm_vcpu *vcpu) -{ - /* - * Freeze the soft-timer and sync the guest stable timer with it. We do - * this with interrupts disabled to avoid latency. - */ - hrtimer_cancel(&vcpu->arch.swtimer); -} - /* * Restore soft timer state from saved context. */ @@ -98,6 +85,11 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) return; } + /* + * Freeze the soft-timer and sync the guest stable timer with it. + */ + hrtimer_cancel(&vcpu->arch.swtimer); + /* * Set remainder tick value if not expired */ @@ -115,7 +107,7 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) /* * Inject timer here though sw timer should inject timer * interrupt async already, since sw timer may be cancelled - * during injecting intr async in function kvm_acquire_timer + * during injecting intr async */ kvm_queue_irq(vcpu, INT_TI); } @@ -140,11 +132,9 @@ static void _kvm_save_timer(struct kvm_vcpu *vcpu) vcpu->arch.expire = expire; if (ticks) { /* - * Update hrtimer to use new timeout * HRTIMER_MODE_PINNED is suggested since vcpu may run in * the same physical cpu in next time */ - hrtimer_cancel(&vcpu->arch.swtimer); hrtimer_start(&vcpu->arch.swtimer, expire, HRTIMER_MODE_ABS_PINNED); } else if (vcpu->stat.generic.blocking) { /* diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 54f544b30f32..53fcef8b24a1 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -95,7 +95,6 @@ static int kvm_pre_enter_guest(struct kvm_vcpu *vcpu) * check vmid before vcpu enter guest */ local_irq_disable(); - kvm_acquire_timer(vcpu); kvm_deliver_intr(vcpu); kvm_deliver_exception(vcpu); /* Make sure the vcpu mode has been written */ @@ -251,23 +250,6 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, return -EINVAL; } -/** - * kvm_migrate_count() - Migrate timer. - * @vcpu: Virtual CPU. - * - * Migrate hrtimer to the current CPU by cancelling and restarting it - * if the hrtimer is active. - * - * Must be called when the vCPU is migrated to a different CPU, so that - * the timer can interrupt the guest at the new CPU, and the timer irq can - * be delivered to the vCPU. - */ -static void kvm_migrate_count(struct kvm_vcpu *vcpu) -{ - if (hrtimer_cancel(&vcpu->arch.swtimer)) - hrtimer_restart(&vcpu->arch.swtimer); -} - static int _kvm_getcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 *val) { unsigned long gintc; @@ -796,17 +778,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) unsigned long flags; local_irq_save(flags); - if (vcpu->arch.last_sched_cpu != cpu) { - kvm_debug("[%d->%d]KVM vCPU[%d] switch\n", - vcpu->arch.last_sched_cpu, cpu, vcpu->vcpu_id); - /* - * Migrate the timer interrupt to the current CPU so that it - * always interrupts the guest and synchronously triggers a - * guest timer interrupt. - */ - kvm_migrate_count(vcpu); - } - /* Restore guest state to registers */ _kvm_vcpu_load(vcpu, cpu); local_irq_restore(flags); -- cgit v1.2.3 From 5b3d524993ff1fb36089be850ccb121ac3296bcf Mon Sep 17 00:00:00 2001 From: Bibo Mao Date: Tue, 19 Dec 2023 10:48:28 +0800 Subject: LoongArch: KVM: Fix timer emulation with oneshot mode When timer is fired in oneshot mode, CSR TVAL will be -1 rather than 0. There needs special handing for this situation. There are two scenarios when oneshot timer is fired. One scenario is that time is fired after exiting to host, CSR TVAL is set with 0 in order to inject hw interrupt, and -1 will assigned to CSR TVAL soon. The other situation is that timer is fired in VM and guest kernel is hanlding timer IRQ, IRQ is acked and is ready to set next expired timer value, then vm exits to host. Timer interrupt should not be inject at this point, else there will be spurious timer interrupt. Here hw timer irq status in CSR ESTAT is used to judge these two scenarios. If CSR TVAL is -1, the oneshot timer is fired; and if timer hw irq is on in CSR ESTAT register, it happens after exiting to host; else if timer hw irq is off, we think that it happens in vm and timer IRQ handler has already acked IRQ. With this patch, runltp with version ltp20230516 passes to run in vm. Signed-off-by: Bibo Mao Signed-off-by: Huacai Chen --- arch/loongarch/kvm/timer.c | 68 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/kvm/timer.c b/arch/loongarch/kvm/timer.c index d362d87a54aa..111328f60872 100644 --- a/arch/loongarch/kvm/timer.c +++ b/arch/loongarch/kvm/timer.c @@ -69,14 +69,19 @@ void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long timer_hz) */ void kvm_restore_timer(struct kvm_vcpu *vcpu) { - unsigned long cfg, delta, period; + unsigned long cfg, estat; + unsigned long ticks, delta, period; ktime_t expire, now; struct loongarch_csrs *csr = vcpu->arch.csr; /* * Set guest stable timer cfg csr + * Disable timer before restore estat CSR register, avoid to + * get invalid timer interrupt for old timer cfg */ cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG); + + write_gcsr_timercfg(0); kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_ESTAT); kvm_restore_hw_gcsr(csr, LOONGARCH_CSR_TCFG); if (!(cfg & CSR_TCFG_EN)) { @@ -90,20 +95,47 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) */ hrtimer_cancel(&vcpu->arch.swtimer); + /* + * From LoongArch Reference Manual Volume 1 Chapter 7.6.2 + * If oneshot timer is fired, CSR TVAL will be -1, there are two + * conditions: + * 1) timer is fired during exiting to host + * 2) timer is fired and vm is doing timer irq, and then exiting to + * host. Host should not inject timer irq to avoid spurious + * timer interrupt again + */ + ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL); + estat = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_ESTAT); + if (!(cfg & CSR_TCFG_PERIOD) && (ticks > cfg)) { + /* + * Writing 0 to LOONGARCH_CSR_TVAL will inject timer irq + * and set CSR TVAL with -1 + */ + write_gcsr_timertick(0); + + /* + * Writing CSR_TINTCLR_TI to LOONGARCH_CSR_TINTCLR will clear + * timer interrupt, and CSR TVAL keeps unchanged with -1, it + * avoids spurious timer interrupt + */ + if (!(estat & CPU_TIMER)) + gcsr_write(CSR_TINTCLR_TI, LOONGARCH_CSR_TINTCLR); + return; + } + /* * Set remainder tick value if not expired */ + delta = 0; now = ktime_get(); expire = vcpu->arch.expire; if (ktime_before(now, expire)) delta = ktime_to_tick(vcpu, ktime_sub(expire, now)); - else { - if (cfg & CSR_TCFG_PERIOD) { - period = cfg & CSR_TCFG_VAL; - delta = ktime_to_tick(vcpu, ktime_sub(now, expire)); - delta = period - (delta % period); - } else - delta = 0; + else if (cfg & CSR_TCFG_PERIOD) { + period = cfg & CSR_TCFG_VAL; + delta = ktime_to_tick(vcpu, ktime_sub(now, expire)); + delta = period - (delta % period); + /* * Inject timer here though sw timer should inject timer * interrupt async already, since sw timer may be cancelled @@ -122,15 +154,25 @@ void kvm_restore_timer(struct kvm_vcpu *vcpu) */ static void _kvm_save_timer(struct kvm_vcpu *vcpu) { - unsigned long ticks, delta; + unsigned long ticks, delta, cfg; ktime_t expire; struct loongarch_csrs *csr = vcpu->arch.csr; + cfg = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TCFG); ticks = kvm_read_sw_gcsr(csr, LOONGARCH_CSR_TVAL); - delta = tick_to_ns(vcpu, ticks); - expire = ktime_add_ns(ktime_get(), delta); - vcpu->arch.expire = expire; - if (ticks) { + + /* + * From LoongArch Reference Manual Volume 1 Chapter 7.6.2 + * If period timer is fired, CSR TVAL will be reloaded from CSR TCFG + * If oneshot timer is fired, CSR TVAL will be -1 + * Here judge one-shot timer fired by checking whether TVAL is larger + * than TCFG + */ + if (ticks < cfg) { + delta = tick_to_ns(vcpu, ticks); + expire = ktime_add_ns(ktime_get(), delta); + vcpu->arch.expire = expire; + /* * HRTIMER_MODE_PINNED is suggested since vcpu may run in * the same physical cpu in next time -- cgit v1.2.3 From db1ecca22edf27c5a3dd66af406c88b5b5ac7cc1 Mon Sep 17 00:00:00 2001 From: Tianrui Zhao Date: Tue, 19 Dec 2023 10:48:28 +0800 Subject: LoongArch: KVM: Add LSX (128bit SIMD) support This patch adds LSX (128bit SIMD) support for LoongArch KVM. There will be LSX exception in KVM when guest use the LSX instructions. KVM will enable LSX and restore the vector registers for guest and then return to guest to continue running. Signed-off-by: Tianrui Zhao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_host.h | 15 ++- arch/loongarch/include/asm/kvm_vcpu.h | 10 ++ arch/loongarch/include/uapi/asm/kvm.h | 1 + arch/loongarch/kernel/fpu.S | 1 + arch/loongarch/kvm/exit.c | 21 ++++ arch/loongarch/kvm/switch.S | 16 +++ arch/loongarch/kvm/trace.h | 4 +- arch/loongarch/kvm/vcpu.c | 220 +++++++++++++++++++++++++++++++++- 8 files changed, 280 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index 0e89db020481..b0c5cdd8014c 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -95,8 +95,9 @@ enum emulation_result { }; #define KVM_LARCH_FPU (0x1 << 0) -#define KVM_LARCH_SWCSR_LATEST (0x1 << 1) -#define KVM_LARCH_HWCSR_USABLE (0x1 << 2) +#define KVM_LARCH_LSX (0x1 << 1) +#define KVM_LARCH_SWCSR_LATEST (0x1 << 2) +#define KVM_LARCH_HWCSR_USABLE (0x1 << 3) struct kvm_vcpu_arch { /* @@ -178,6 +179,16 @@ static inline void writel_sw_gcsr(struct loongarch_csrs *csr, int reg, unsigned csr->csrs[reg] = val; } +static inline bool kvm_guest_has_fpu(struct kvm_vcpu_arch *arch) +{ + return arch->cpucfg[2] & CPUCFG2_FP; +} + +static inline bool kvm_guest_has_lsx(struct kvm_vcpu_arch *arch) +{ + return arch->cpucfg[2] & CPUCFG2_LSX; +} + /* Debug: dump vcpu state */ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index 0e87652f780a..db08dd46b525 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -55,6 +55,16 @@ void kvm_save_fpu(struct loongarch_fpu *fpu); void kvm_restore_fpu(struct loongarch_fpu *fpu); void kvm_restore_fcsr(struct loongarch_fpu *fpu); +#ifdef CONFIG_CPU_HAS_LSX +int kvm_own_lsx(struct kvm_vcpu *vcpu); +void kvm_save_lsx(struct loongarch_fpu *fpu); +void kvm_restore_lsx(struct loongarch_fpu *fpu); +#else +static inline int kvm_own_lsx(struct kvm_vcpu *vcpu) { } +static inline void kvm_save_lsx(struct loongarch_fpu *fpu) { } +static inline void kvm_restore_lsx(struct loongarch_fpu *fpu) { } +#endif + void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz); void kvm_reset_timer(struct kvm_vcpu *vcpu); void kvm_save_timer(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/include/uapi/asm/kvm.h b/arch/loongarch/include/uapi/asm/kvm.h index c6ad2ee6106c..923d0bd38294 100644 --- a/arch/loongarch/include/uapi/asm/kvm.h +++ b/arch/loongarch/include/uapi/asm/kvm.h @@ -79,6 +79,7 @@ struct kvm_fpu { #define LOONGARCH_REG_64(TYPE, REG) (TYPE | KVM_REG_SIZE_U64 | (REG << LOONGARCH_REG_SHIFT)) #define KVM_IOC_CSRID(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CSR, REG) #define KVM_IOC_CPUCFG(REG) LOONGARCH_REG_64(KVM_REG_LOONGARCH_CPUCFG, REG) +#define KVM_LOONGARCH_VCPU_CPUCFG 0 struct kvm_debug_exit_arch { }; diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index d53ab10f4644..a400924c0348 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -349,6 +349,7 @@ SYM_FUNC_START(_restore_lsx_upper) lsx_restore_all_upper a0 t0 t1 jr ra SYM_FUNC_END(_restore_lsx_upper) +EXPORT_SYMBOL(_restore_lsx_upper) SYM_FUNC_START(_init_lsx_upper) lsx_init_all_upper t1 diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index e708a1786d6b..676f7a3a335c 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -634,6 +634,11 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu) { struct kvm_run *run = vcpu->run; + if (!kvm_guest_has_fpu(&vcpu->arch)) { + kvm_queue_exception(vcpu, EXCCODE_INE, 0); + return RESUME_GUEST; + } + /* * If guest FPU not present, the FPU operation should have been * treated as a reserved instruction! @@ -650,6 +655,21 @@ static int kvm_handle_fpu_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +/* + * kvm_handle_lsx_disabled() - Guest used LSX while disabled in root. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use LSX when it is disabled in the root + * context. + */ +static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu) +{ + if (kvm_own_lsx(vcpu)) + kvm_queue_exception(vcpu, EXCCODE_INE, 0); + + return RESUME_GUEST; +} + /* * LoongArch KVM callback handling for unimplemented guest exiting */ @@ -678,6 +698,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = { [EXCCODE_TLBS] = kvm_handle_write_fault, [EXCCODE_TLBM] = kvm_handle_write_fault, [EXCCODE_FPDIS] = kvm_handle_fpu_disabled, + [EXCCODE_LSXDIS] = kvm_handle_lsx_disabled, [EXCCODE_GSPR] = kvm_handle_gspr, }; diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index 0ed9040307b7..00fbf772d16f 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -245,6 +245,22 @@ SYM_FUNC_START(kvm_restore_fpu) jr ra SYM_FUNC_END(kvm_restore_fpu) +#ifdef CONFIG_CPU_HAS_LSX +SYM_FUNC_START(kvm_save_lsx) + fpu_save_csr a0 t1 + fpu_save_cc a0 t1 t2 + lsx_save_data a0 t1 + jr ra +SYM_FUNC_END(kvm_save_lsx) + +SYM_FUNC_START(kvm_restore_lsx) + lsx_restore_data a0 t1 + fpu_restore_cc a0 t1 t2 + fpu_restore_csr a0 t1 t2 + jr ra +SYM_FUNC_END(kvm_restore_lsx) +#endif + .section ".rodata" SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry) SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest) diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h index a1e35d655418..7da4e230e896 100644 --- a/arch/loongarch/kvm/trace.h +++ b/arch/loongarch/kvm/trace.h @@ -102,6 +102,7 @@ TRACE_EVENT(kvm_exit_gspr, #define KVM_TRACE_AUX_DISCARD 4 #define KVM_TRACE_AUX_FPU 1 +#define KVM_TRACE_AUX_LSX 2 #define kvm_trace_symbol_aux_op \ { KVM_TRACE_AUX_SAVE, "save" }, \ @@ -111,7 +112,8 @@ TRACE_EVENT(kvm_exit_gspr, { KVM_TRACE_AUX_DISCARD, "discard" } #define kvm_trace_symbol_aux_state \ - { KVM_TRACE_AUX_FPU, "FPU" } + { KVM_TRACE_AUX_FPU, "FPU" }, \ + { KVM_TRACE_AUX_LSX, "LSX" } TRACE_EVENT(kvm_aux, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op, diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 53fcef8b24a1..80487d177ca4 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -298,6 +298,69 @@ static int _kvm_setcsr(struct kvm_vcpu *vcpu, unsigned int id, u64 val) return ret; } +static int _kvm_get_cpucfg(int id, u64 *v) +{ + int ret = 0; + + if (id < 0 && id >= KVM_MAX_CPUCFG_REGS) + return -EINVAL; + + switch (id) { + case 2: + /* Return CPUCFG2 features which have been supported by KVM */ + *v = CPUCFG2_FP | CPUCFG2_FPSP | CPUCFG2_FPDP | + CPUCFG2_FPVERS | CPUCFG2_LLFTP | CPUCFG2_LLFTPREV | + CPUCFG2_LAM; + /* + * If LSX is supported by CPU, it is also supported by KVM, + * as we implement it. + */ + if (cpu_has_lsx) + *v |= CPUCFG2_LSX; + break; + default: + ret = -EINVAL; + break; + } + return ret; +} + +static int kvm_check_cpucfg(int id, u64 val) +{ + u64 mask; + int ret = 0; + + if (id < 0 && id >= KVM_MAX_CPUCFG_REGS) + return -EINVAL; + + if (_kvm_get_cpucfg(id, &mask)) + return ret; + + switch (id) { + case 2: + /* CPUCFG2 features checking */ + if (val & ~mask) + /* The unsupported features should not be set */ + ret = -EINVAL; + else if (!(val & CPUCFG2_LLFTP)) + /* The LLFTP must be set, as guest must has a constant timer */ + ret = -EINVAL; + else if ((val & CPUCFG2_FP) && (!(val & CPUCFG2_FPSP) || !(val & CPUCFG2_FPDP))) + /* Single and double float point must both be set when enable FP */ + ret = -EINVAL; + else if ((val & CPUCFG2_LSX) && !(val & CPUCFG2_FP)) + /* FP should be set when enable LSX */ + ret = -EINVAL; + else if ((val & CPUCFG2_LASX) && !(val & CPUCFG2_LSX)) + /* LSX, FP should be set when enable LASX, and FP has been checked before. */ + ret = -EINVAL; + break; + default: + break; + } + return ret; +} + static int kvm_get_one_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg, u64 *v) { @@ -367,10 +430,10 @@ static int kvm_set_one_reg(struct kvm_vcpu *vcpu, break; case KVM_REG_LOONGARCH_CPUCFG: id = KVM_GET_IOC_CPUCFG_IDX(reg->id); - if (id >= 0 && id < KVM_MAX_CPUCFG_REGS) - vcpu->arch.cpucfg[id] = (u32)v; - else - ret = -EINVAL; + ret = kvm_check_cpucfg(id, v); + if (ret) + break; + vcpu->arch.cpucfg[id] = (u32)v; break; case KVM_REG_LOONGARCH_KVM: switch (reg->id) { @@ -460,10 +523,94 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu, return -EINVAL; } +static int kvm_loongarch_cpucfg_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case 2: + return 0; + default: + return -ENXIO; + } + + return -ENXIO; +} + +static int kvm_loongarch_vcpu_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + case KVM_LOONGARCH_VCPU_CPUCFG: + ret = kvm_loongarch_cpucfg_has_attr(vcpu, attr); + break; + default: + break; + } + + return ret; +} + +static int kvm_loongarch_get_cpucfg_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = 0; + uint64_t val; + uint64_t __user *uaddr = (uint64_t __user *)attr->addr; + + ret = _kvm_get_cpucfg(attr->attr, &val); + if (ret) + return ret; + + put_user(val, uaddr); + + return ret; +} + +static int kvm_loongarch_vcpu_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + case KVM_LOONGARCH_VCPU_CPUCFG: + ret = kvm_loongarch_get_cpucfg_attr(vcpu, attr); + break; + default: + break; + } + + return ret; +} + +static int kvm_loongarch_cpucfg_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + return -ENXIO; +} + +static int kvm_loongarch_vcpu_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret = -ENXIO; + + switch (attr->group) { + case KVM_LOONGARCH_VCPU_CPUCFG: + ret = kvm_loongarch_cpucfg_set_attr(vcpu, attr); + break; + default: + break; + } + + return ret; +} + long kvm_arch_vcpu_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { long r; + struct kvm_device_attr attr; void __user *argp = (void __user *)arg; struct kvm_vcpu *vcpu = filp->private_data; @@ -503,6 +650,27 @@ long kvm_arch_vcpu_ioctl(struct file *filp, r = kvm_vcpu_ioctl_enable_cap(vcpu, &cap); break; } + case KVM_HAS_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = kvm_loongarch_vcpu_has_attr(vcpu, &attr); + break; + } + case KVM_GET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = kvm_loongarch_vcpu_get_attr(vcpu, &attr); + break; + } + case KVM_SET_DEVICE_ATTR: { + r = -EFAULT; + if (copy_from_user(&attr, argp, sizeof(attr))) + break; + r = kvm_loongarch_vcpu_set_attr(vcpu, &attr); + break; + } default: r = -ENOIOCTLCMD; break; @@ -550,12 +718,54 @@ void kvm_own_fpu(struct kvm_vcpu *vcpu) preempt_enable(); } +#ifdef CONFIG_CPU_HAS_LSX +/* Enable LSX and restore context */ +int kvm_own_lsx(struct kvm_vcpu *vcpu) +{ + if (!kvm_guest_has_fpu(&vcpu->arch) || !kvm_guest_has_lsx(&vcpu->arch)) + return -EINVAL; + + preempt_disable(); + + /* Enable LSX for guest */ + set_csr_euen(CSR_EUEN_LSXEN | CSR_EUEN_FPEN); + switch (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { + case KVM_LARCH_FPU: + /* + * Guest FPU state already loaded, + * only restore upper LSX state + */ + _restore_lsx_upper(&vcpu->arch.fpu); + break; + default: + /* Neither FP or LSX already active, + * restore full LSX state + */ + kvm_restore_lsx(&vcpu->arch.fpu); + break; + } + + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LSX); + vcpu->arch.aux_inuse |= KVM_LARCH_LSX | KVM_LARCH_FPU; + preempt_enable(); + + return 0; +} +#endif + /* Save context and disable FPU */ void kvm_lose_fpu(struct kvm_vcpu *vcpu) { preempt_disable(); - if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { + if (vcpu->arch.aux_inuse & KVM_LARCH_LSX) { + kvm_save_lsx(&vcpu->arch.fpu); + vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_LSX); + + /* Disable LSX & FPU */ + clear_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN); + } else if (vcpu->arch.aux_inuse & KVM_LARCH_FPU) { kvm_save_fpu(&vcpu->arch.fpu); vcpu->arch.aux_inuse &= ~KVM_LARCH_FPU; trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_FPU); -- cgit v1.2.3 From 118e10cd893d57df55b3302dfd188a981b6e6d1c Mon Sep 17 00:00:00 2001 From: Tianrui Zhao Date: Tue, 19 Dec 2023 10:48:28 +0800 Subject: LoongArch: KVM: Add LASX (256bit SIMD) support This patch adds LASX (256bit SIMD) support for LoongArch KVM. There will be LASX exception in KVM when guest use the LASX instructions. KVM will enable LASX and restore the vector registers for guest and then return to guest to continue running. Reviewed-by: Bibo Mao Signed-off-by: Tianrui Zhao Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/kvm_host.h | 10 +++++-- arch/loongarch/include/asm/kvm_vcpu.h | 10 +++++++ arch/loongarch/kernel/fpu.S | 1 + arch/loongarch/kvm/exit.c | 16 +++++++++++ arch/loongarch/kvm/switch.S | 15 +++++++++++ arch/loongarch/kvm/trace.h | 4 ++- arch/loongarch/kvm/vcpu.c | 51 ++++++++++++++++++++++++++++++++++- 7 files changed, 103 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/loongarch/include/asm/kvm_host.h b/arch/loongarch/include/asm/kvm_host.h index b0c5cdd8014c..5bdb34b2c5d6 100644 --- a/arch/loongarch/include/asm/kvm_host.h +++ b/arch/loongarch/include/asm/kvm_host.h @@ -96,8 +96,9 @@ enum emulation_result { #define KVM_LARCH_FPU (0x1 << 0) #define KVM_LARCH_LSX (0x1 << 1) -#define KVM_LARCH_SWCSR_LATEST (0x1 << 2) -#define KVM_LARCH_HWCSR_USABLE (0x1 << 3) +#define KVM_LARCH_LASX (0x1 << 2) +#define KVM_LARCH_SWCSR_LATEST (0x1 << 3) +#define KVM_LARCH_HWCSR_USABLE (0x1 << 4) struct kvm_vcpu_arch { /* @@ -189,6 +190,11 @@ static inline bool kvm_guest_has_lsx(struct kvm_vcpu_arch *arch) return arch->cpucfg[2] & CPUCFG2_LSX; } +static inline bool kvm_guest_has_lasx(struct kvm_vcpu_arch *arch) +{ + return arch->cpucfg[2] & CPUCFG2_LASX; +} + /* Debug: dump vcpu state */ int kvm_arch_vcpu_dump_regs(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/include/asm/kvm_vcpu.h b/arch/loongarch/include/asm/kvm_vcpu.h index db08dd46b525..e71ceb88f29e 100644 --- a/arch/loongarch/include/asm/kvm_vcpu.h +++ b/arch/loongarch/include/asm/kvm_vcpu.h @@ -65,6 +65,16 @@ static inline void kvm_save_lsx(struct loongarch_fpu *fpu) { } static inline void kvm_restore_lsx(struct loongarch_fpu *fpu) { } #endif +#ifdef CONFIG_CPU_HAS_LASX +int kvm_own_lasx(struct kvm_vcpu *vcpu); +void kvm_save_lasx(struct loongarch_fpu *fpu); +void kvm_restore_lasx(struct loongarch_fpu *fpu); +#else +static inline int kvm_own_lasx(struct kvm_vcpu *vcpu) { } +static inline void kvm_save_lasx(struct loongarch_fpu *fpu) { } +static inline void kvm_restore_lasx(struct loongarch_fpu *fpu) { } +#endif + void kvm_init_timer(struct kvm_vcpu *vcpu, unsigned long hz); void kvm_reset_timer(struct kvm_vcpu *vcpu); void kvm_save_timer(struct kvm_vcpu *vcpu); diff --git a/arch/loongarch/kernel/fpu.S b/arch/loongarch/kernel/fpu.S index a400924c0348..4382e36ae3d4 100644 --- a/arch/loongarch/kernel/fpu.S +++ b/arch/loongarch/kernel/fpu.S @@ -385,6 +385,7 @@ SYM_FUNC_START(_restore_lasx_upper) lasx_restore_all_upper a0 t0 t1 jr ra SYM_FUNC_END(_restore_lasx_upper) +EXPORT_SYMBOL(_restore_lasx_upper) SYM_FUNC_START(_init_lasx_upper) lasx_init_all_upper t1 diff --git a/arch/loongarch/kvm/exit.c b/arch/loongarch/kvm/exit.c index 676f7a3a335c..ed1d89d53e2e 100644 --- a/arch/loongarch/kvm/exit.c +++ b/arch/loongarch/kvm/exit.c @@ -670,6 +670,21 @@ static int kvm_handle_lsx_disabled(struct kvm_vcpu *vcpu) return RESUME_GUEST; } +/* + * kvm_handle_lasx_disabled() - Guest used LASX while disabled in root. + * @vcpu: Virtual CPU context. + * + * Handle when the guest attempts to use LASX when it is disabled in the root + * context. + */ +static int kvm_handle_lasx_disabled(struct kvm_vcpu *vcpu) +{ + if (kvm_own_lasx(vcpu)) + kvm_queue_exception(vcpu, EXCCODE_INE, 0); + + return RESUME_GUEST; +} + /* * LoongArch KVM callback handling for unimplemented guest exiting */ @@ -699,6 +714,7 @@ static exit_handle_fn kvm_fault_tables[EXCCODE_INT_START] = { [EXCCODE_TLBM] = kvm_handle_write_fault, [EXCCODE_FPDIS] = kvm_handle_fpu_disabled, [EXCCODE_LSXDIS] = kvm_handle_lsx_disabled, + [EXCCODE_LASXDIS] = kvm_handle_lasx_disabled, [EXCCODE_GSPR] = kvm_handle_gspr, }; diff --git a/arch/loongarch/kvm/switch.S b/arch/loongarch/kvm/switch.S index 00fbf772d16f..ba976509bfe8 100644 --- a/arch/loongarch/kvm/switch.S +++ b/arch/loongarch/kvm/switch.S @@ -261,6 +261,21 @@ SYM_FUNC_START(kvm_restore_lsx) SYM_FUNC_END(kvm_restore_lsx) #endif +#ifdef CONFIG_CPU_HAS_LASX +SYM_FUNC_START(kvm_save_lasx) + fpu_save_csr a0 t1 + fpu_save_cc a0 t1 t2 + lasx_save_data a0 t1 + jr ra +SYM_FUNC_END(kvm_save_lasx) + +SYM_FUNC_START(kvm_restore_lasx) + lasx_restore_data a0 t1 + fpu_restore_cc a0 t1 t2 + fpu_restore_csr a0 t1 t2 + jr ra +SYM_FUNC_END(kvm_restore_lasx) +#endif .section ".rodata" SYM_DATA(kvm_exception_size, .quad kvm_exc_entry_end - kvm_exc_entry) SYM_DATA(kvm_enter_guest_size, .quad kvm_enter_guest_end - kvm_enter_guest) diff --git a/arch/loongarch/kvm/trace.h b/arch/loongarch/kvm/trace.h index 7da4e230e896..c2484ad4cffa 100644 --- a/arch/loongarch/kvm/trace.h +++ b/arch/loongarch/kvm/trace.h @@ -103,6 +103,7 @@ TRACE_EVENT(kvm_exit_gspr, #define KVM_TRACE_AUX_FPU 1 #define KVM_TRACE_AUX_LSX 2 +#define KVM_TRACE_AUX_LASX 3 #define kvm_trace_symbol_aux_op \ { KVM_TRACE_AUX_SAVE, "save" }, \ @@ -113,7 +114,8 @@ TRACE_EVENT(kvm_exit_gspr, #define kvm_trace_symbol_aux_state \ { KVM_TRACE_AUX_FPU, "FPU" }, \ - { KVM_TRACE_AUX_LSX, "LSX" } + { KVM_TRACE_AUX_LSX, "LSX" }, \ + { KVM_TRACE_AUX_LASX, "LASX" } TRACE_EVENT(kvm_aux, TP_PROTO(struct kvm_vcpu *vcpu, unsigned int op, diff --git a/arch/loongarch/kvm/vcpu.c b/arch/loongarch/kvm/vcpu.c index 80487d177ca4..27701991886d 100644 --- a/arch/loongarch/kvm/vcpu.c +++ b/arch/loongarch/kvm/vcpu.c @@ -317,6 +317,13 @@ static int _kvm_get_cpucfg(int id, u64 *v) */ if (cpu_has_lsx) *v |= CPUCFG2_LSX; + /* + * if LASX is supported by CPU, it is also supported by KVM, + * as we implement it. + */ + if (cpu_has_lasx) + *v |= CPUCFG2_LASX; + break; default: ret = -EINVAL; @@ -753,12 +760,54 @@ int kvm_own_lsx(struct kvm_vcpu *vcpu) } #endif +#ifdef CONFIG_CPU_HAS_LASX +/* Enable LASX and restore context */ +int kvm_own_lasx(struct kvm_vcpu *vcpu) +{ + if (!kvm_guest_has_fpu(&vcpu->arch) || !kvm_guest_has_lsx(&vcpu->arch) || !kvm_guest_has_lasx(&vcpu->arch)) + return -EINVAL; + + preempt_disable(); + + set_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); + switch (vcpu->arch.aux_inuse & (KVM_LARCH_FPU | KVM_LARCH_LSX)) { + case KVM_LARCH_LSX: + case KVM_LARCH_LSX | KVM_LARCH_FPU: + /* Guest LSX state already loaded, only restore upper LASX state */ + _restore_lasx_upper(&vcpu->arch.fpu); + break; + case KVM_LARCH_FPU: + /* Guest FP state already loaded, only restore upper LSX & LASX state */ + _restore_lsx_upper(&vcpu->arch.fpu); + _restore_lasx_upper(&vcpu->arch.fpu); + break; + default: + /* Neither FP or LSX already active, restore full LASX state */ + kvm_restore_lasx(&vcpu->arch.fpu); + break; + } + + trace_kvm_aux(vcpu, KVM_TRACE_AUX_RESTORE, KVM_TRACE_AUX_LASX); + vcpu->arch.aux_inuse |= KVM_LARCH_LASX | KVM_LARCH_LSX | KVM_LARCH_FPU; + preempt_enable(); + + return 0; +} +#endif + /* Save context and disable FPU */ void kvm_lose_fpu(struct kvm_vcpu *vcpu) { preempt_disable(); - if (vcpu->arch.aux_inuse & KVM_LARCH_LSX) { + if (vcpu->arch.aux_inuse & KVM_LARCH_LASX) { + kvm_save_lasx(&vcpu->arch.fpu); + vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU | KVM_LARCH_LASX); + trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_LASX); + + /* Disable LASX & LSX & FPU */ + clear_csr_euen(CSR_EUEN_FPEN | CSR_EUEN_LSXEN | CSR_EUEN_LASXEN); + } else if (vcpu->arch.aux_inuse & KVM_LARCH_LSX) { kvm_save_lsx(&vcpu->arch.fpu); vcpu->arch.aux_inuse &= ~(KVM_LARCH_LSX | KVM_LARCH_FPU); trace_kvm_aux(vcpu, KVM_TRACE_AUX_SAVE, KVM_TRACE_AUX_LSX); -- cgit v1.2.3