From 6e24628d78e4785385876125cba62315ca3b04b9 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 13 Feb 2020 23:51:29 -0800 Subject: lib: Introduce generic min-heap Supports push, pop and converting an array into a heap. If the sense of the compare function is inverted then it can provide a max-heap. Based-on-work-by: Peter Zijlstra (Intel) Signed-off-by: Ian Rogers Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20200214075133.181299-3-irogers@google.com --- lib/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/Makefile') diff --git a/lib/Makefile b/lib/Makefile index 611872c06926..09a8acb0cf92 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -67,6 +67,7 @@ CFLAGS_test_ubsan.o += $(call cc-disable-warning, vla) UBSAN_SANITIZE_test_ubsan.o := y obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_LIST_SORT) += test_list_sort.o +obj-$(CONFIG_TEST_MIN_HEAP) += test_min_heap.o obj-$(CONFIG_TEST_LKM) += test_module.o obj-$(CONFIG_TEST_VMALLOC) += test_vmalloc.o obj-$(CONFIG_TEST_OVERFLOW) += test_overflow.o -- cgit v1.2.3 From 30428ef5d1e8caf78639cc70a802f1cb7b1cec04 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 6 Apr 2020 20:09:47 -0700 Subject: lib/test_lockup: test module to generate lockups CONFIG_TEST_LOCKUP=m adds module "test_lockup" that helps to make sure that watchdogs and lockup detectors are working properly. Depending on module parameters test_lockup could emulate soft or hard lockup, "hung task", hold arbitrary lock, allocate bunch of pages. Also it could generate series of lockups with cooling-down periods, in this way it could be used as "ping" for locks or page allocator. Loop checks signals between iteration thus could be stopped by ^C. # modinfo test_lockup ... parm: time_secs:lockup time in seconds, default 0 (uint) parm: time_nsecs:nanoseconds part of lockup time, default 0 (uint) parm: cooldown_secs:cooldown time between iterations in seconds, default 0 (uint) parm: cooldown_nsecs:nanoseconds part of cooldown, default 0 (uint) parm: iterations:lockup iterations, default 1 (uint) parm: all_cpus:trigger lockup at all cpus at once (bool) parm: state:wait in 'R' running (default), 'D' uninterruptible, 'K' killable, 'S' interruptible state (charp) parm: use_hrtimer:use high-resolution timer for sleeping (bool) parm: iowait:account sleep time as iowait (bool) parm: lock_read:lock read-write locks for read (bool) parm: lock_single:acquire locks only at one cpu (bool) parm: reacquire_locks:release and reacquire locks/irq/preempt between iterations (bool) parm: touch_softlockup:touch soft-lockup watchdog between iterations (bool) parm: touch_hardlockup:touch hard-lockup watchdog between iterations (bool) parm: call_cond_resched:call cond_resched() between iterations (bool) parm: measure_lock_wait:measure lock wait time (bool) parm: lock_wait_threshold:print lock wait time longer than this in nanoseconds, default off (ulong) parm: disable_irq:disable interrupts: generate hard-lockups (bool) parm: disable_softirq:disable bottom-half irq handlers (bool) parm: disable_preempt:disable preemption: generate soft-lockups (bool) parm: lock_rcu:grab rcu_read_lock: generate rcu stalls (bool) parm: lock_mmap_sem:lock mm->mmap_sem: block procfs interfaces (bool) parm: lock_rwsem_ptr:lock rw_semaphore at address (ulong) parm: lock_mutex_ptr:lock mutex at address (ulong) parm: lock_spinlock_ptr:lock spinlock at address (ulong) parm: lock_rwlock_ptr:lock rwlock at address (ulong) parm: alloc_pages_nr:allocate and free pages under locks (uint) parm: alloc_pages_order:page order to allocate (uint) parm: alloc_pages_gfp:allocate pages with this gfp_mask, default GFP_KERNEL (uint) parm: alloc_pages_atomic:allocate pages with GFP_ATOMIC (bool) parm: reallocate_pages:free and allocate pages between iterations (bool) Parameters for locking by address are unsafe and taints kernel. With CONFIG_DEBUG_SPINLOCK=y they at least check magics for embedded spinlocks. Examples: task hang in D-state: modprobe test_lockup time_secs=1 iterations=60 state=D task hang in io-wait D-state: modprobe test_lockup time_secs=1 iterations=60 state=D iowait softlockup: modprobe test_lockup time_secs=1 iterations=60 state=R hardlockup: modprobe test_lockup time_secs=1 iterations=60 state=R disable_irq system-wide hardlockup: modprobe test_lockup time_secs=1 iterations=60 state=R \ disable_irq all_cpus rcu stall: modprobe test_lockup time_secs=1 iterations=60 state=R \ lock_rcu touch_softlockup lock mmap_sem / block procfs interfaces: modprobe test_lockup time_secs=1 iterations=60 state=S lock_mmap_sem lock tasklist_lock for read / block forks: TASKLIST_LOCK=$(awk '$3 == "tasklist_lock" {print "0x"$1}' /proc/kallsyms) modprobe test_lockup time_secs=1 iterations=60 state=R \ disable_irq lock_read lock_rwlock_ptr=$TASKLIST_LOCK lock namespace_sem / block vfs mount operations: NAMESPACE_SEM=$(awk '$3 == "namespace_sem" {print "0x"$1}' /proc/kallsyms) modprobe test_lockup time_secs=1 iterations=60 state=S \ lock_rwsem_ptr=$NAMESPACE_SEM lock cgroup mutex / block cgroup operations: CGROUP_MUTEX=$(awk '$3 == "cgroup_mutex" {print "0x"$1}' /proc/kallsyms) modprobe test_lockup time_secs=1 iterations=60 state=S \ lock_mutex_ptr=$CGROUP_MUTEX ping cgroup_mutex every second and measure maximum lock wait time: modprobe test_lockup cooldown_secs=1 iterations=60 state=S \ lock_mutex_ptr=$CGROUP_MUTEX reacquire_locks measure_lock_wait [linux@roeck-us.net: rename disable_irq to fix build error] Link: http://lkml.kernel.org/r/20200317133614.23152-1-linux@roeck-us.net Signed-off-by: Konstantin Khlebnikov Signed-off-by: Guenter Roeck Signed-off-by: Andrew Morton Cc: Sasha Levin Cc: Petr Mladek Cc: Kees Cook Cc: Peter Zijlstra Cc: Greg Kroah-Hartman Cc: Steven Rostedt Cc: Sergey Senozhatsky Cc: Dmitry Monakhov Cc: Guenter Roeck Link: http://lkml.kernel.org/r/158132859146.2797.525923171323227836.stgit@buzz Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 12 ++ lib/Makefile | 1 + lib/test_lockup.c | 554 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 567 insertions(+) create mode 100644 lib/test_lockup.c (limited to 'lib/Makefile') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7f9a89847b65..ddcf000022ae 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -976,6 +976,18 @@ config WQ_WATCHDOG state. This can be configured through kernel parameter "workqueue.watchdog_thresh" and its sysfs counterpart. +config TEST_LOCKUP + tristate "Test module to generate lockups" + help + This builds the "test_lockup" module that helps to make sure + that watchdogs and lockup detectors are working properly. + + Depending on module parameters it could emulate soft or hard + lockup, "hung task", or locking arbitrary lock for a long time. + Also it could generate series of lockups with cooling-down periods. + + If unsure, say N. + endmenu # "Debug lockups and hangs" menu "Scheduler Debugging" diff --git a/lib/Makefile b/lib/Makefile index 09a8acb0cf92..0fd125c4ad07 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -90,6 +90,7 @@ obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o +obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o obj-$(CONFIG_TEST_LIVEPATCH) += livepatch/ diff --git a/lib/test_lockup.c b/lib/test_lockup.c new file mode 100644 index 000000000000..9e8b8a0be9af --- /dev/null +++ b/lib/test_lockup.c @@ -0,0 +1,554 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Test module to generate lockups + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int time_secs; +module_param(time_secs, uint, 0600); +MODULE_PARM_DESC(time_secs, "lockup time in seconds, default 0"); + +static unsigned int time_nsecs; +module_param(time_nsecs, uint, 0600); +MODULE_PARM_DESC(time_nsecs, "nanoseconds part of lockup time, default 0"); + +static unsigned int cooldown_secs; +module_param(cooldown_secs, uint, 0600); +MODULE_PARM_DESC(cooldown_secs, "cooldown time between iterations in seconds, default 0"); + +static unsigned int cooldown_nsecs; +module_param(cooldown_nsecs, uint, 0600); +MODULE_PARM_DESC(cooldown_nsecs, "nanoseconds part of cooldown, default 0"); + +static unsigned int iterations = 1; +module_param(iterations, uint, 0600); +MODULE_PARM_DESC(iterations, "lockup iterations, default 1"); + +static bool all_cpus; +module_param(all_cpus, bool, 0400); +MODULE_PARM_DESC(all_cpus, "trigger lockup at all cpus at once"); + +static int wait_state; +static char *state = "R"; +module_param(state, charp, 0400); +MODULE_PARM_DESC(state, "wait in 'R' running (default), 'D' uninterruptible, 'K' killable, 'S' interruptible state"); + +static bool use_hrtimer; +module_param(use_hrtimer, bool, 0400); +MODULE_PARM_DESC(use_hrtimer, "use high-resolution timer for sleeping"); + +static bool iowait; +module_param(iowait, bool, 0400); +MODULE_PARM_DESC(iowait, "account sleep time as iowait"); + +static bool lock_read; +module_param(lock_read, bool, 0400); +MODULE_PARM_DESC(lock_read, "lock read-write locks for read"); + +static bool lock_single; +module_param(lock_single, bool, 0400); +MODULE_PARM_DESC(lock_single, "acquire locks only at one cpu"); + +static bool reacquire_locks; +module_param(reacquire_locks, bool, 0400); +MODULE_PARM_DESC(reacquire_locks, "release and reacquire locks/irq/preempt between iterations"); + +static bool touch_softlockup; +module_param(touch_softlockup, bool, 0600); +MODULE_PARM_DESC(touch_softlockup, "touch soft-lockup watchdog between iterations"); + +static bool touch_hardlockup; +module_param(touch_hardlockup, bool, 0600); +MODULE_PARM_DESC(touch_hardlockup, "touch hard-lockup watchdog between iterations"); + +static bool call_cond_resched; +module_param(call_cond_resched, bool, 0600); +MODULE_PARM_DESC(call_cond_resched, "call cond_resched() between iterations"); + +static bool measure_lock_wait; +module_param(measure_lock_wait, bool, 0400); +MODULE_PARM_DESC(measure_lock_wait, "measure lock wait time"); + +static unsigned long lock_wait_threshold = ULONG_MAX; +module_param(lock_wait_threshold, ulong, 0400); +MODULE_PARM_DESC(lock_wait_threshold, "print lock wait time longer than this in nanoseconds, default off"); + +static bool test_disable_irq; +module_param_named(disable_irq, test_disable_irq, bool, 0400); +MODULE_PARM_DESC(disable_irq, "disable interrupts: generate hard-lockups"); + +static bool disable_softirq; +module_param(disable_softirq, bool, 0400); +MODULE_PARM_DESC(disable_softirq, "disable bottom-half irq handlers"); + +static bool disable_preempt; +module_param(disable_preempt, bool, 0400); +MODULE_PARM_DESC(disable_preempt, "disable preemption: generate soft-lockups"); + +static bool lock_rcu; +module_param(lock_rcu, bool, 0400); +MODULE_PARM_DESC(lock_rcu, "grab rcu_read_lock: generate rcu stalls"); + +static bool lock_mmap_sem; +module_param(lock_mmap_sem, bool, 0400); +MODULE_PARM_DESC(lock_mmap_sem, "lock mm->mmap_sem: block procfs interfaces"); + +static unsigned long lock_rwsem_ptr; +module_param_unsafe(lock_rwsem_ptr, ulong, 0400); +MODULE_PARM_DESC(lock_rwsem_ptr, "lock rw_semaphore at address"); + +static unsigned long lock_mutex_ptr; +module_param_unsafe(lock_mutex_ptr, ulong, 0400); +MODULE_PARM_DESC(lock_mutex_ptr, "lock mutex at address"); + +static unsigned long lock_spinlock_ptr; +module_param_unsafe(lock_spinlock_ptr, ulong, 0400); +MODULE_PARM_DESC(lock_spinlock_ptr, "lock spinlock at address"); + +static unsigned long lock_rwlock_ptr; +module_param_unsafe(lock_rwlock_ptr, ulong, 0400); +MODULE_PARM_DESC(lock_rwlock_ptr, "lock rwlock at address"); + +static unsigned int alloc_pages_nr; +module_param_unsafe(alloc_pages_nr, uint, 0600); +MODULE_PARM_DESC(alloc_pages_nr, "allocate and free pages under locks"); + +static unsigned int alloc_pages_order; +module_param(alloc_pages_order, uint, 0400); +MODULE_PARM_DESC(alloc_pages_order, "page order to allocate"); + +static gfp_t alloc_pages_gfp = GFP_KERNEL; +module_param_unsafe(alloc_pages_gfp, uint, 0400); +MODULE_PARM_DESC(alloc_pages_gfp, "allocate pages with this gfp_mask, default GFP_KERNEL"); + +static bool alloc_pages_atomic; +module_param(alloc_pages_atomic, bool, 0400); +MODULE_PARM_DESC(alloc_pages_atomic, "allocate pages with GFP_ATOMIC"); + +static bool reallocate_pages; +module_param(reallocate_pages, bool, 0400); +MODULE_PARM_DESC(reallocate_pages, "free and allocate pages between iterations"); + +static atomic_t alloc_pages_failed = ATOMIC_INIT(0); + +static atomic64_t max_lock_wait = ATOMIC64_INIT(0); + +static struct task_struct *main_task; +static int master_cpu; + +static void test_lock(bool master, bool verbose) +{ + u64 uninitialized_var(wait_start); + + if (measure_lock_wait) + wait_start = local_clock(); + + if (lock_mutex_ptr && master) { + if (verbose) + pr_notice("lock mutex %ps\n", (void *)lock_mutex_ptr); + mutex_lock((struct mutex *)lock_mutex_ptr); + } + + if (lock_rwsem_ptr && master) { + if (verbose) + pr_notice("lock rw_semaphore %ps\n", + (void *)lock_rwsem_ptr); + if (lock_read) + down_read((struct rw_semaphore *)lock_rwsem_ptr); + else + down_write((struct rw_semaphore *)lock_rwsem_ptr); + } + + if (lock_mmap_sem && master) { + if (verbose) + pr_notice("lock mmap_sem pid=%d\n", main_task->pid); + if (lock_read) + down_read(&main_task->mm->mmap_sem); + else + down_write(&main_task->mm->mmap_sem); + } + + if (test_disable_irq) + local_irq_disable(); + + if (disable_softirq) + local_bh_disable(); + + if (disable_preempt) + preempt_disable(); + + if (lock_rcu) + rcu_read_lock(); + + if (lock_spinlock_ptr && master) { + if (verbose) + pr_notice("lock spinlock %ps\n", + (void *)lock_spinlock_ptr); + spin_lock((spinlock_t *)lock_spinlock_ptr); + } + + if (lock_rwlock_ptr && master) { + if (verbose) + pr_notice("lock rwlock %ps\n", + (void *)lock_rwlock_ptr); + if (lock_read) + read_lock((rwlock_t *)lock_rwlock_ptr); + else + write_lock((rwlock_t *)lock_rwlock_ptr); + } + + if (measure_lock_wait) { + s64 cur_wait = local_clock() - wait_start; + s64 max_wait = atomic64_read(&max_lock_wait); + + do { + if (cur_wait < max_wait) + break; + max_wait = atomic64_cmpxchg(&max_lock_wait, + max_wait, cur_wait); + } while (max_wait != cur_wait); + + if (cur_wait > lock_wait_threshold) + pr_notice_ratelimited("lock wait %lld ns\n", cur_wait); + } +} + +static void test_unlock(bool master, bool verbose) +{ + if (lock_rwlock_ptr && master) { + if (lock_read) + read_unlock((rwlock_t *)lock_rwlock_ptr); + else + write_unlock((rwlock_t *)lock_rwlock_ptr); + if (verbose) + pr_notice("unlock rwlock %ps\n", + (void *)lock_rwlock_ptr); + } + + if (lock_spinlock_ptr && master) { + spin_unlock((spinlock_t *)lock_spinlock_ptr); + if (verbose) + pr_notice("unlock spinlock %ps\n", + (void *)lock_spinlock_ptr); + } + + if (lock_rcu) + rcu_read_unlock(); + + if (disable_preempt) + preempt_enable(); + + if (disable_softirq) + local_bh_enable(); + + if (test_disable_irq) + local_irq_enable(); + + if (lock_mmap_sem && master) { + if (lock_read) + up_read(&main_task->mm->mmap_sem); + else + up_write(&main_task->mm->mmap_sem); + if (verbose) + pr_notice("unlock mmap_sem pid=%d\n", main_task->pid); + } + + if (lock_rwsem_ptr && master) { + if (lock_read) + up_read((struct rw_semaphore *)lock_rwsem_ptr); + else + up_write((struct rw_semaphore *)lock_rwsem_ptr); + if (verbose) + pr_notice("unlock rw_semaphore %ps\n", + (void *)lock_rwsem_ptr); + } + + if (lock_mutex_ptr && master) { + mutex_unlock((struct mutex *)lock_mutex_ptr); + if (verbose) + pr_notice("unlock mutex %ps\n", + (void *)lock_mutex_ptr); + } +} + +static void test_alloc_pages(struct list_head *pages) +{ + struct page *page; + unsigned int i; + + for (i = 0; i < alloc_pages_nr; i++) { + page = alloc_pages(alloc_pages_gfp, alloc_pages_order); + if (!page) { + atomic_inc(&alloc_pages_failed); + break; + } + list_add(&page->lru, pages); + } +} + +static void test_free_pages(struct list_head *pages) +{ + struct page *page, *next; + + list_for_each_entry_safe(page, next, pages, lru) + __free_pages(page, alloc_pages_order); + INIT_LIST_HEAD(pages); +} + +static void test_wait(unsigned int secs, unsigned int nsecs) +{ + if (wait_state == TASK_RUNNING) { + if (secs) + mdelay(secs * MSEC_PER_SEC); + if (nsecs) + ndelay(nsecs); + return; + } + + __set_current_state(wait_state); + if (use_hrtimer) { + ktime_t time; + + time = ns_to_ktime((u64)secs * NSEC_PER_SEC + nsecs); + schedule_hrtimeout(&time, HRTIMER_MODE_REL); + } else { + schedule_timeout(secs * HZ + nsecs_to_jiffies(nsecs)); + } +} + +static void test_lockup(bool master) +{ + u64 lockup_start = local_clock(); + unsigned int iter = 0; + LIST_HEAD(pages); + + pr_notice("Start on CPU%d\n", raw_smp_processor_id()); + + test_lock(master, true); + + test_alloc_pages(&pages); + + while (iter++ < iterations && !signal_pending(main_task)) { + + if (iowait) + current->in_iowait = 1; + + test_wait(time_secs, time_nsecs); + + if (iowait) + current->in_iowait = 0; + + if (reallocate_pages) + test_free_pages(&pages); + + if (reacquire_locks) + test_unlock(master, false); + + if (touch_softlockup) + touch_softlockup_watchdog(); + + if (touch_hardlockup) + touch_nmi_watchdog(); + + if (call_cond_resched) + cond_resched(); + + test_wait(cooldown_secs, cooldown_nsecs); + + if (reacquire_locks) + test_lock(master, false); + + if (reallocate_pages) + test_alloc_pages(&pages); + } + + pr_notice("Finish on CPU%d in %lld ns\n", raw_smp_processor_id(), + local_clock() - lockup_start); + + test_free_pages(&pages); + + test_unlock(master, true); +} + +DEFINE_PER_CPU(struct work_struct, test_works); + +static void test_work_fn(struct work_struct *work) +{ + test_lockup(!lock_single || + work == per_cpu_ptr(&test_works, master_cpu)); +} + +static bool test_kernel_ptr(unsigned long addr, int size) +{ + void *ptr = (void *)addr; + char buf; + + if (!addr) + return false; + + /* should be at least readable kernel address */ + if (access_ok(ptr, 1) || + access_ok(ptr + size - 1, 1) || + probe_kernel_address(ptr, buf) || + probe_kernel_address(ptr + size - 1, buf)) { + pr_err("invalid kernel ptr: %#lx\n", addr); + return true; + } + + return false; +} + +static bool __maybe_unused test_magic(unsigned long addr, int offset, + unsigned int expected) +{ + void *ptr = (void *)addr + offset; + unsigned int magic = 0; + + if (!addr) + return false; + + if (probe_kernel_address(ptr, magic) || magic != expected) { + pr_err("invalid magic at %#lx + %#x = %#x, expected %#x\n", + addr, offset, magic, expected); + return true; + } + + return false; +} + +static int __init test_lockup_init(void) +{ + u64 test_start = local_clock(); + + main_task = current; + + switch (state[0]) { + case 'S': + wait_state = TASK_INTERRUPTIBLE; + break; + case 'D': + wait_state = TASK_UNINTERRUPTIBLE; + break; + case 'K': + wait_state = TASK_KILLABLE; + break; + case 'R': + wait_state = TASK_RUNNING; + break; + default: + pr_err("unknown state=%s\n", state); + return -EINVAL; + } + + if (alloc_pages_atomic) + alloc_pages_gfp = GFP_ATOMIC; + + if (test_kernel_ptr(lock_spinlock_ptr, sizeof(spinlock_t)) || + test_kernel_ptr(lock_rwlock_ptr, sizeof(rwlock_t)) || + test_kernel_ptr(lock_mutex_ptr, sizeof(struct mutex)) || + test_kernel_ptr(lock_rwsem_ptr, sizeof(struct rw_semaphore))) + return -EINVAL; + +#ifdef CONFIG_DEBUG_SPINLOCK + if (test_magic(lock_spinlock_ptr, + offsetof(spinlock_t, rlock.magic), + SPINLOCK_MAGIC) || + test_magic(lock_rwlock_ptr, + offsetof(rwlock_t, magic), + RWLOCK_MAGIC) || + test_magic(lock_mutex_ptr, + offsetof(struct mutex, wait_lock.rlock.magic), + SPINLOCK_MAGIC) || + test_magic(lock_rwsem_ptr, + offsetof(struct rw_semaphore, wait_lock.magic), + SPINLOCK_MAGIC)) + return -EINVAL; +#endif + + if ((wait_state != TASK_RUNNING || + (call_cond_resched && !reacquire_locks) || + (alloc_pages_nr && gfpflags_allow_blocking(alloc_pages_gfp))) && + (test_disable_irq || disable_softirq || disable_preempt || + lock_rcu || lock_spinlock_ptr || lock_rwlock_ptr)) { + pr_err("refuse to sleep in atomic context\n"); + return -EINVAL; + } + + if (lock_mmap_sem && !main_task->mm) { + pr_err("no mm to lock mmap_sem\n"); + return -EINVAL; + } + + pr_notice("START pid=%d time=%u +%u ns cooldown=%u +%u ns iteraions=%u state=%s %s%s%s%s%s%s%s%s%s%s%s\n", + main_task->pid, time_secs, time_nsecs, + cooldown_secs, cooldown_nsecs, iterations, state, + all_cpus ? "all_cpus " : "", + iowait ? "iowait " : "", + test_disable_irq ? "disable_irq " : "", + disable_softirq ? "disable_softirq " : "", + disable_preempt ? "disable_preempt " : "", + lock_rcu ? "lock_rcu " : "", + lock_read ? "lock_read " : "", + touch_softlockup ? "touch_softlockup " : "", + touch_hardlockup ? "touch_hardlockup " : "", + call_cond_resched ? "call_cond_resched " : "", + reacquire_locks ? "reacquire_locks " : ""); + + if (alloc_pages_nr) + pr_notice("ALLOCATE PAGES nr=%u order=%u gfp=%pGg %s\n", + alloc_pages_nr, alloc_pages_order, &alloc_pages_gfp, + reallocate_pages ? "reallocate_pages " : ""); + + if (all_cpus) { + unsigned int cpu; + + cpus_read_lock(); + + preempt_disable(); + master_cpu = smp_processor_id(); + for_each_online_cpu(cpu) { + INIT_WORK(per_cpu_ptr(&test_works, cpu), test_work_fn); + queue_work_on(cpu, system_highpri_wq, + per_cpu_ptr(&test_works, cpu)); + } + preempt_enable(); + + for_each_online_cpu(cpu) + flush_work(per_cpu_ptr(&test_works, cpu)); + + cpus_read_unlock(); + } else { + test_lockup(true); + } + + if (measure_lock_wait) + pr_notice("Maximum lock wait: %lld ns\n", + atomic64_read(&max_lock_wait)); + + if (alloc_pages_nr) + pr_notice("Page allocation failed %u times\n", + atomic_read(&alloc_pages_failed)); + + pr_notice("FINISH in %llu ns\n", local_clock() - test_start); + + if (signal_pending(main_task)) + return -EINTR; + + return -EAGAIN; +} +module_init(test_lockup_init); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Konstantin Khlebnikov "); +MODULE_DESCRIPTION("Test module to generate lockups"); -- cgit v1.2.3 From 9cf016e6b49b53d6c15d4137c034178148149ef4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 6 Apr 2020 20:10:12 -0700 Subject: lib: test_stackinit.c: XFAIL switch variable init tests The tests for initializing a variable defined between a switch statement's test and its first "case" statement are currently not initialized in Clang[1] nor the proposed auto-initialization feature in GCC. We should retain the test (so that we can evaluate compiler fixes), but mark it as an "expected fail". The rest of the kernel source will be adjusted to avoid this corner case. Also disable -Wswitch-unreachable for the test so that the intentionally broken code won't trigger warnings for GCC (nor future Clang) when initialization happens this unhandled place. [1] https://bugs.llvm.org/show_bug.cgi?id=44916 Suggested-by: Alexander Potapenko Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Cc: Jann Horn Cc: Ard Biesheuvel Link: http://lkml.kernel.org/r/202002191358.2897A07C6@keescook Signed-off-by: Linus Torvalds --- lib/Makefile | 1 + lib/test_stackinit.c | 28 ++++++++++++++++++---------- 2 files changed, 19 insertions(+), 10 deletions(-) (limited to 'lib/Makefile') diff --git a/lib/Makefile b/lib/Makefile index 0fd125c4ad07..93d05ff4f501 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -87,6 +87,7 @@ obj-$(CONFIG_TEST_KMOD) += test_kmod.o obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o +CFLAGS_test_stackinit.o += $(call cc-disable-warning, switch-unreachable) obj-$(CONFIG_TEST_STACKINIT) += test_stackinit.o obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o diff --git a/lib/test_stackinit.c b/lib/test_stackinit.c index 2d7d257a430e..f93b1e145ada 100644 --- a/lib/test_stackinit.c +++ b/lib/test_stackinit.c @@ -92,8 +92,9 @@ static bool range_contains(char *haystack_start, size_t haystack_size, * @var_type: type to be tested for zeroing initialization * @which: is this a SCALAR, STRING, or STRUCT type? * @init_level: what kind of initialization is performed + * @xfail: is this test expected to fail? */ -#define DEFINE_TEST_DRIVER(name, var_type, which) \ +#define DEFINE_TEST_DRIVER(name, var_type, which, xfail) \ /* Returns 0 on success, 1 on failure. */ \ static noinline __init int test_ ## name (void) \ { \ @@ -139,13 +140,14 @@ static noinline __init int test_ ## name (void) \ for (sum = 0, i = 0; i < target_size; i++) \ sum += (check_buf[i] == 0xFF); \ \ - if (sum == 0) \ + if (sum == 0) { \ pr_info(#name " ok\n"); \ - else \ - pr_warn(#name " FAIL (uninit bytes: %d)\n", \ - sum); \ - \ - return (sum != 0); \ + return 0; \ + } else { \ + pr_warn(#name " %sFAIL (uninit bytes: %d)\n", \ + (xfail) ? "X" : "", sum); \ + return (xfail) ? 0 : 1; \ + } \ } #define DEFINE_TEST(name, var_type, which, init_level) \ /* no-op to force compiler into ignoring "uninitialized" vars */\ @@ -189,7 +191,7 @@ static noinline __init int leaf_ ## name(unsigned long sp, \ \ return (int)buf[0] | (int)buf[sizeof(buf) - 1]; \ } \ -DEFINE_TEST_DRIVER(name, var_type, which) +DEFINE_TEST_DRIVER(name, var_type, which, 0) /* Structure with no padding. */ struct test_packed { @@ -326,8 +328,14 @@ static noinline __init int leaf_switch_2_none(unsigned long sp, bool fill, return __leaf_switch_none(2, fill); } -DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR); -DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR); +/* + * These are expected to fail for most configurations because neither + * GCC nor Clang have a way to perform initialization of variables in + * non-code areas (i.e. in a switch statement before the first "case"). + * https://bugs.llvm.org/show_bug.cgi?id=44916 + */ +DEFINE_TEST_DRIVER(switch_1_none, uint64_t, SCALAR, 1); +DEFINE_TEST_DRIVER(switch_2_none, uint64_t, SCALAR, 1); static int __init test_stackinit_init(void) { -- cgit v1.2.3 From 7b65942fb2f0ac939be9c659bb889e78b399f84e Mon Sep 17 00:00:00 2001 From: Alexander Potapenko Date: Mon, 6 Apr 2020 20:10:19 -0700 Subject: lib/stackdepot.c: build with -fno-builtin Clang may replace stackdepot_memcmp() with a call to instrumented bcmp(), which is exactly what we wanted to avoid creating stackdepot_memcmp(). Building the file with -fno-builtin prevents such optimizations. This patch has been previously mailed as part of KMSAN RFC patch series. Signed-off-by: Alexander Potapenko Signed-off-by: Andrew Morton Cc: Vegard Nossum Cc: Dmitry Vyukov Cc: Marco Elver Cc: Andrey Konovalov Cc: Sergey Senozhatsky Cc: Arnd Bergmann Cc: Andrey Ryabinin Link: http://lkml.kernel.org/r/20200220141916.55455-2-glider@google.com Signed-off-by: Linus Torvalds --- lib/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/Makefile') diff --git a/lib/Makefile b/lib/Makefile index 93d05ff4f501..3fc06399295d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -223,6 +223,10 @@ obj-$(CONFIG_MEMREGION) += memregion.o obj-$(CONFIG_STMP_DEVICE) += stmp_device.o obj-$(CONFIG_IRQ_POLL) += irq_poll.o +# stackdepot.c should not be instrumented or call instrumented functions. +# Prevent the compiler from calling builtins like memcmp() or bcmp() from this +# file. +CFLAGS_stackdepot.o += -fno-builtin obj-$(CONFIG_STACKDEPOT) += stackdepot.o KASAN_SANITIZE_stackdepot.o := n KCOV_INSTRUMENT_stackdepot.o := n -- cgit v1.2.3 From 0887a7ebc97770c7870abf3075a2e8cd502a7f52 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 6 Apr 2020 20:12:27 -0700 Subject: ubsan: add trap instrumentation option Patch series "ubsan: Split out bounds checker", v5. This splits out the bounds checker so it can be individually used. This is enabled in Android and hopefully for syzbot. Includes LKDTM tests for behavioral corner-cases (beyond just the bounds checker), and adjusts ubsan and kasan slightly for correct panic handling. This patch (of 6): The Undefined Behavior Sanitizer can operate in two modes: warning reporting mode via lib/ubsan.c handler calls, or trap mode, which uses __builtin_trap() as the handler. Using lib/ubsan.c means the kernel image is about 5% larger (due to all the debugging text and reporting structures to capture details about the warning conditions). Using the trap mode, the image size changes are much smaller, though at the loss of the "warning only" mode. In order to give greater flexibility to system builders that want minimal changes to image size and are prepared to deal with kernel code being aborted and potentially destabilizing the system, this introduces CONFIG_UBSAN_TRAP. The resulting image sizes comparison: text data bss dec hex filename 19533663 6183037 18554956 44271656 2a38828 vmlinux.stock 19991849 7618513 18874448 46484810 2c54d4a vmlinux.ubsan 19712181 6284181 18366540 44362902 2a4ec96 vmlinux.ubsan-trap CONFIG_UBSAN=y: image +4.8% (text +2.3%, data +18.9%) CONFIG_UBSAN_TRAP=y: image +0.2% (text +0.9%, data +1.6%) Additionally adjusts the CONFIG_UBSAN Kconfig help for clarity and removes the mention of non-existing boot param "ubsan_handle". Suggested-by: Elena Petrova Signed-off-by: Kees Cook Signed-off-by: Andrew Morton Acked-by: Dmitry Vyukov Cc: Andrey Ryabinin Cc: Andrey Konovalov Cc: Alexander Potapenko Cc: Dan Carpenter Cc: "Gustavo A. R. Silva" Cc: Arnd Bergmann Cc: Ard Biesheuvel Link: http://lkml.kernel.org/r/20200227193516.32566-2-keescook@chromium.org Signed-off-by: Linus Torvalds --- lib/Kconfig.ubsan | 22 ++++++++++++++++++---- lib/Makefile | 2 ++ scripts/Makefile.ubsan | 9 +++++++-- 3 files changed, 27 insertions(+), 6 deletions(-) (limited to 'lib/Makefile') diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 0e04fcb3ab3d..9deb655838b0 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -5,11 +5,25 @@ config ARCH_HAS_UBSAN_SANITIZE_ALL config UBSAN bool "Undefined behaviour sanity checker" help - This option enables undefined behaviour sanity checker + This option enables the Undefined Behaviour sanity checker. Compile-time instrumentation is used to detect various undefined - behaviours in runtime. Various types of checks may be enabled - via boot parameter ubsan_handle - (see: Documentation/dev-tools/ubsan.rst). + behaviours at runtime. For more details, see: + Documentation/dev-tools/ubsan.rst + +config UBSAN_TRAP + bool "On Sanitizer warnings, abort the running kernel code" + depends on UBSAN + depends on $(cc-option, -fsanitize-undefined-trap-on-error) + help + Building kernels with Sanitizer features enabled tends to grow + the kernel size by around 5%, due to adding all the debugging + text on failure paths. To avoid this, Sanitizer instrumentation + can just issue a trap. This reduces the kernel size overhead but + turns all warnings (including potentially harmless conditions) + into full exceptions that abort the running kernel code + (regardless of context, locks held, etc), which may destabilize + the system. For some system builders this is an acceptable + trade-off. config UBSAN_SANITIZE_ALL bool "Enable instrumentation for the entire kernel" diff --git a/lib/Makefile b/lib/Makefile index 3fc06399295d..685aee60de1d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -286,7 +286,9 @@ quiet_cmd_build_OID_registry = GEN $@ clean-files += oid_registry_data.c obj-$(CONFIG_UCS2_STRING) += ucs2_string.o +ifneq ($(CONFIG_UBSAN_TRAP),y) obj-$(CONFIG_UBSAN) += ubsan.o +endif UBSAN_SANITIZE_ubsan.o := n KASAN_SANITIZE_ubsan.o := n diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan index 019771b845c5..668a91510bfe 100644 --- a/scripts/Makefile.ubsan +++ b/scripts/Makefile.ubsan @@ -1,5 +1,10 @@ # SPDX-License-Identifier: GPL-2.0 ifdef CONFIG_UBSAN + +ifdef CONFIG_UBSAN_ALIGNMENT + CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment) +endif + CFLAGS_UBSAN += $(call cc-option, -fsanitize=shift) CFLAGS_UBSAN += $(call cc-option, -fsanitize=integer-divide-by-zero) CFLAGS_UBSAN += $(call cc-option, -fsanitize=unreachable) @@ -9,8 +14,8 @@ ifdef CONFIG_UBSAN CFLAGS_UBSAN += $(call cc-option, -fsanitize=bool) CFLAGS_UBSAN += $(call cc-option, -fsanitize=enum) -ifdef CONFIG_UBSAN_ALIGNMENT - CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment) +ifdef CONFIG_UBSAN_TRAP + CFLAGS_UBSAN += $(call cc-option, -fsanitize-undefined-trap-on-error) endif # -fsanitize=* options makes GCC less smart than usual and -- cgit v1.2.3