From 3429055f0451cd3a281f8ed6691335ead626b136 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 20 Mar 2024 11:18:49 +0100 Subject: mm: kmsan: implement kmsan_memmove() Provide a hook that can be used by custom memcpy implementations to tell KMSAN that the metadata needs to be copied. Without that, false positive reports are possible in the cases where KMSAN fails to intercept memory initialization. Link: https://lore.kernel.org/all/3b7dbd88-0861-4638-b2d2-911c97a4cadf@I-love.SAKURA.ne.jp/ Link: https://lkml.kernel.org/r/20240320101851.2589698-1-glider@google.com Signed-off-by: Alexander Potapenko Suggested-by: Tetsuo Handa Reviewed-by: Marco Elver Cc: Dmitry Vyukov Cc: Linus Torvalds Cc: Tetsuo Handa Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/kmsan-checks.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include') diff --git a/include/linux/kmsan-checks.h b/include/linux/kmsan-checks.h index c4cae333deec..e1082dc40abc 100644 --- a/include/linux/kmsan-checks.h +++ b/include/linux/kmsan-checks.h @@ -61,6 +61,17 @@ void kmsan_check_memory(const void *address, size_t size); void kmsan_copy_to_user(void __user *to, const void *from, size_t to_copy, size_t left); +/** + * kmsan_memmove() - Notify KMSAN about a data copy within kernel. + * @to: destination address in the kernel. + * @from: source address in the kernel. + * @size: number of bytes to copy. + * + * Invoked after non-instrumented version (e.g. implemented using assembly + * code) of memmove()/memcpy() is called, in order to copy KMSAN's metadata. + */ +void kmsan_memmove(void *to, const void *from, size_t to_copy); + #else static inline void kmsan_poison_memory(const void *address, size_t size, @@ -78,6 +89,10 @@ static inline void kmsan_copy_to_user(void __user *to, const void *from, { } +static inline void kmsan_memmove(void *to, const void *from, size_t to_copy) +{ +} + #endif #endif /* _LINUX_KMSAN_CHECKS_H */ -- cgit v1.2.3 From 922621a6828430ea3119b869336157d253489334 Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Wed, 20 Mar 2024 11:18:50 +0100 Subject: instrumented.h: add instrument_memcpy_before, instrument_memcpy_after Bug detection tools based on compiler instrumentation may miss memory accesses in custom memcpy implementations (such as copy_mc_to_kernel). Provide instrumentation hooks that tell KASAN, KCSAN, and KMSAN about such accesses. Link: https://lore.kernel.org/all/3b7dbd88-0861-4638-b2d2-911c97a4cadf@I-love.SAKURA.ne.jp/ Link: https://lkml.kernel.org/r/20240320101851.2589698-2-glider@google.com Signed-off-by: Alexander Potapenko Reviewed-by: Marco Elver Cc: Dmitry Vyukov Cc: Tetsuo Handa Cc: Linus Torvalds Cc: Thomas Gleixner Signed-off-by: Andrew Morton --- include/linux/instrumented.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include') diff --git a/include/linux/instrumented.h b/include/linux/instrumented.h index 1b608e00290a..711a1f0d1a73 100644 --- a/include/linux/instrumented.h +++ b/include/linux/instrumented.h @@ -147,6 +147,41 @@ instrument_copy_from_user_after(const void *to, const void __user *from, kmsan_unpoison_memory(to, n - left); } +/** + * instrument_memcpy_before - add instrumentation before non-instrumented memcpy + * @to: destination address + * @from: source address + * @n: number of bytes to copy + * + * Instrument memory accesses that happen in custom memcpy implementations. The + * instrumentation should be inserted before the memcpy call. + */ +static __always_inline void instrument_memcpy_before(void *to, const void *from, + unsigned long n) +{ + kasan_check_write(to, n); + kasan_check_read(from, n); + kcsan_check_write(to, n); + kcsan_check_read(from, n); +} + +/** + * instrument_memcpy_after - add instrumentation after non-instrumented memcpy + * @to: destination address + * @from: source address + * @n: number of bytes to copy + * @left: number of bytes not copied (if known) + * + * Instrument memory accesses that happen in custom memcpy implementations. The + * instrumentation should be inserted after the memcpy call. + */ +static __always_inline void instrument_memcpy_after(void *to, const void *from, + unsigned long n, + unsigned long left) +{ + kmsan_memmove(to, from, n - left); +} + /** * instrument_get_user() - add instrumentation to get_user()-like macros * @to: destination variable, may not be address-taken -- cgit v1.2.3 From 055e09ac54ae9c8396c1086fe06a73e0ce9bdd10 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 17 Apr 2024 23:11:23 +0300 Subject: cpumask: delete unused reset_cpu_possible_mask() Link: https://lkml.kernel.org/r/20240417201123.2961-1-adobriyan@gmail.com Signed-off-by: Alexey Dobriyan Cc: Rasmus Villemoes Cc: Yury Norov Signed-off-by: Andrew Morton --- include/linux/cpumask.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 1c29947db848..04536a29f10f 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1017,11 +1017,6 @@ void init_cpu_present(const struct cpumask *src); void init_cpu_possible(const struct cpumask *src); void init_cpu_online(const struct cpumask *src); -static inline void reset_cpu_possible_mask(void) -{ - bitmap_zero(cpumask_bits(&__cpu_possible_mask), NR_CPUS); -} - static inline void set_cpu_possible(unsigned int cpu, bool possible) { -- cgit v1.2.3 From f4af41bf177add167e39e4b0203460b1d0b531f6 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Tue, 9 Apr 2024 12:22:38 +0800 Subject: kexec: fix the unexpected kexec_dprintk() macro Jiri reported that the current kexec_dprintk() always prints out debugging message whenever kexec/kdmmp loading is triggered. That is not wanted. The debugging message is supposed to be printed out when 'kexec -s -d' is specified for kexec/kdump loading. After investigating, the reason is the current kexec_dprintk() takes printk(KERN_INFO) or printk(KERN_DEBUG) depending on whether '-d' is specified. However, distros usually have defaulg log level like below: [~]# cat /proc/sys/kernel/printk 7 4 1 7 So, even though '-d' is not specified, printk(KERN_DEBUG) also always prints out. I thought printk(KERN_DEBUG) is equal to pr_debug(), it's not. Fix it by changing to use pr_info() instead which are expected to work. Link: https://lkml.kernel.org/r/20240409042238.1240462-1-bhe@redhat.com Fixes: cbc2fe9d9cb2 ("kexec_file: add kexec_file flag to control debug printing") Signed-off-by: Baoquan He Reported-by: Jiri Slaby Closes: https://lore.kernel.org/all/4c775fca-5def-4a2d-8437-7130b02722a2@kernel.org Reviewed-by: Dave Young Cc: Signed-off-by: Andrew Morton --- include/linux/kexec.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 060835bb82d5..f31bd304df45 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -461,10 +461,8 @@ static inline void arch_kexec_pre_free_pages(void *vaddr, unsigned int pages) { extern bool kexec_file_dbg_print; -#define kexec_dprintk(fmt, ...) \ - printk("%s" fmt, \ - kexec_file_dbg_print ? KERN_INFO : KERN_DEBUG, \ - ##__VA_ARGS__) +#define kexec_dprintk(fmt, arg...) \ + do { if (kexec_file_dbg_print) pr_info(fmt, ##arg); } while (0) #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; -- cgit v1.2.3 From 22bcc915ae910bc823d8351542f6d9e7623fff24 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 23 Apr 2024 22:23:10 +0300 Subject: kfifo: don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Link: https://lkml.kernel.org/r/20240423192529.3249134-4-andriy.shevchenko@linux.intel.com Signed-off-by: Andy Shevchenko Cc: Alain Volmat Cc: AngeloGioacchino Del Regno Cc: Chen-Yu Tsai Cc: Hans Verkuil Cc: Jernej Skrabec Cc: Matthias Brugger Cc: Mauro Carvalho Chehab Cc: Patrice Chotard Cc: Rob Herring Cc: Samuel Holland Cc: Sean Wang Cc: Sean Young Cc: Stefani Seibold Signed-off-by: Andrew Morton --- include/linux/kfifo.h | 9 +++++++-- lib/kfifo.c | 8 ++++---- samples/kfifo/dma-example.c | 3 ++- 3 files changed, 13 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 0b35a41440ff..6b28d642f332 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -36,10 +36,15 @@ * to lock the reader. */ -#include +#include #include #include -#include +#include + +#include +#include + +struct scatterlist; struct __kfifo { unsigned int in; diff --git a/lib/kfifo.c b/lib/kfifo.c index 12f5a347aa13..15acdee4a8f3 100644 --- a/lib/kfifo.c +++ b/lib/kfifo.c @@ -5,13 +5,13 @@ * Copyright (C) 2009/2010 Stefani Seibold */ -#include -#include -#include #include +#include +#include #include +#include +#include #include -#include /* * internal helper to calculate the unused elements in a fifo diff --git a/samples/kfifo/dma-example.c b/samples/kfifo/dma-example.c index 0cf27483cb36..74fe915b7ffe 100644 --- a/samples/kfifo/dma-example.c +++ b/samples/kfifo/dma-example.c @@ -6,8 +6,9 @@ */ #include -#include #include +#include +#include /* * This module shows how to handle fifo dma operations. -- cgit v1.2.3 From 393fb313a2e150b768e4850658679e2afff431e9 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 29 Apr 2024 23:02:36 -0700 Subject: watchdog: allow nmi watchdog to use raw perf event NMI watchdog permanently consumes one hardware counters per CPU on the system. For systems that use many hardware counters, this causes more aggressive time multiplexing of perf events. OTOH, some CPUs (mostly Intel) support "ref-cycles" event, which is rarely used. Add kernel cmdline arg nmi_watchdog=rNNN to configure the watchdog to use raw event. For example, on Intel CPUs, we can use "r300" to configure the watchdog to use ref-cycles event. If the raw event does not work, fall back to use "cycles". [akpm@linux-foundation.org: fix kerneldoc] Link: https://lkml.kernel.org/r/20240430060236.1878002-2-song@kernel.org Signed-off-by: Song Liu Cc: Peter Zijlstra Cc: "Matthew Wilcox (Oracle)" Signed-off-by: Andrew Morton --- Documentation/admin-guide/kernel-parameters.txt | 5 +-- include/linux/nmi.h | 2 ++ kernel/watchdog.c | 2 ++ kernel/watchdog_perf.c | 46 +++++++++++++++++++++++++ 4 files changed, 53 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 902ecd92a29f..1fa79a3d0d1a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3773,10 +3773,12 @@ Format: [state][,regs][,debounce][,die] nmi_watchdog= [KNL,BUGS=X86] Debugging features for SMP kernels - Format: [panic,][nopanic,][num] + Format: [panic,][nopanic,][rNNN,][num] Valid num: 0 or 1 0 - turn hardlockup detector in nmi_watchdog off 1 - turn hardlockup detector in nmi_watchdog on + rNNN - configure the watchdog with raw perf event 0xNNN + When panic is specified, panic when an NMI watchdog timeout occurs (or 'nopanic' to not panic on an NMI watchdog, if CONFIG_BOOTPARAM_HARDLOCKUP_PANIC is set) @@ -7464,4 +7466,3 @@ memory, and other data can't be written using xmon commands. off xmon is disabled. - diff --git a/include/linux/nmi.h b/include/linux/nmi.h index f53438eae815..a8dfb38c9bb6 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -105,10 +105,12 @@ void watchdog_hardlockup_check(unsigned int cpu, struct pt_regs *regs); extern void hardlockup_detector_perf_stop(void); extern void hardlockup_detector_perf_restart(void); extern void hardlockup_detector_perf_cleanup(void); +extern void hardlockup_config_perf_event(const char *str); #else static inline void hardlockup_detector_perf_stop(void) { } static inline void hardlockup_detector_perf_restart(void) { } static inline void hardlockup_detector_perf_cleanup(void) { } +static inline void hardlockup_config_perf_event(const char *str) { } #endif void watchdog_hardlockup_stop(void); diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 7f54484de16f..ab0129b15f25 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -80,6 +80,8 @@ next: watchdog_hardlockup_user_enabled = 0; else if (!strncmp(str, "1", 1)) watchdog_hardlockup_user_enabled = 1; + else if (!strncmp(str, "r", 1)) + hardlockup_config_perf_event(str + 1); while (*(str++)) { if (*str == ',') { str++; diff --git a/kernel/watchdog_perf.c b/kernel/watchdog_perf.c index 8ea00c4a24b2..5f7d1f0d4268 100644 --- a/kernel/watchdog_perf.c +++ b/kernel/watchdog_perf.c @@ -90,6 +90,14 @@ static struct perf_event_attr wd_hw_attr = { .disabled = 1, }; +static struct perf_event_attr fallback_wd_hw_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, +}; + /* Callback function for perf event subsystem */ static void watchdog_overflow_callback(struct perf_event *event, struct perf_sample_data *data, @@ -122,6 +130,13 @@ static int hardlockup_detector_event_create(void) /* Try to register using hardware perf events */ evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL); + if (IS_ERR(evt)) { + wd_attr = &fallback_wd_hw_attr; + wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh); + evt = perf_event_create_kernel_counter(wd_attr, cpu, NULL, + watchdog_overflow_callback, NULL); + } + if (IS_ERR(evt)) { pr_debug("Perf event create on CPU %d failed with %ld\n", cpu, PTR_ERR(evt)); @@ -259,3 +274,34 @@ int __init watchdog_hardlockup_probe(void) } return ret; } + +/** + * hardlockup_config_perf_event - Overwrite config of wd_hw_attr. + * + * @str: number which identifies the raw perf event to use + */ +void __init hardlockup_config_perf_event(const char *str) +{ + u64 config; + char buf[24]; + char *comma = strchr(str, ','); + + if (!comma) { + if (kstrtoull(str, 16, &config)) + return; + } else { + unsigned int len = comma - str; + + if (len >= sizeof(buf)) + return; + + if (strscpy(buf, str, sizeof(buf)) < 0) + return; + buf[len] = 0; + if (kstrtoull(buf, 16, &config)) + return; + } + + wd_hw_attr.type = PERF_TYPE_RAW; + wd_hw_attr.config = config; +} -- cgit v1.2.3 From 33580d667bb20e00356fd06500f5197ef1baa1f5 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Tue, 7 May 2024 23:24:54 +0900 Subject: nilfs2: use __field_struct() for a bitwise field As one can see in include/trace/stages/stage4_event_fields.h, the implementation of __field() uses the is_signed_type() macro. As one can see in commit dcf8e5633e2e ("tracing: Define the is_signed_type() macro once"), there has been an attempt to not make is_signed_type() trigger sparse warnings for bitwise types. Despite that change, sparse complains when passing a bitwise type to is_signed_type(). The reason is that in its definition below, an inequality comparison will be made against bitwise types, which are random collections of bits (the casts to bitwise types themselves are semantically valid and not problematic): #define is_signed_type(type) (((type)(-1)) < (__force type)1) So, as a workaround, follow the example of and suppress the following sparse warnings by changing __field() into __field_struct() that doesn't use is_signed_type(): fs/nilfs2/segment.c: note: in included file (through include/trace/trace_events.h, include/trace/define_trace.h, include/trace/events/nilfs2.h): ./include/trace/events/nilfs2.h:191:1: warning: cast to restricted blk_opf_t ./include/trace/events/nilfs2.h:191:1: warning: restricted blk_opf_t degrades to integer ./include/trace/events/nilfs2.h:191:1: warning: restricted blk_opf_t degrades to integer [konishi.ryusuke: describe the reason for the warnings per Linus's explanation] Link: https://lkml.kernel.org/r/20240507222041.4876-1-konishi.ryusuke@gmail.com Link: https://lkml.kernel.org/r/20240507142454.3344-1-konishi.ryusuke@gmail.com Signed-off-by: Bart Van Assche Signed-off-by: Ryusuke Konishi Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202401092241.I4mm9OWl-lkp@intel.com/ Reported-by: Ryusuke Konishi Closes: https://lore.kernel.org/all/20240430080019.4242-2-konishi.ryusuke@gmail.com/ Cc: Linus Torvalds Cc: Rasmus Villemoes Signed-off-by: Andrew Morton --- include/trace/events/nilfs2.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/nilfs2.h b/include/trace/events/nilfs2.h index 8efc6236f57c..8880c11733dd 100644 --- a/include/trace/events/nilfs2.h +++ b/include/trace/events/nilfs2.h @@ -200,7 +200,11 @@ TRACE_EVENT(nilfs2_mdt_submit_block, __field(struct inode *, inode) __field(unsigned long, ino) __field(unsigned long, blkoff) - __field(enum req_op, mode) + /* + * Use field_struct() to avoid is_signed_type() on the + * bitwise type enum req_op. + */ + __field_struct(enum req_op, mode) ), TP_fast_assign( -- cgit v1.2.3