From ce041c43f22298485122bab15c14d062383fbc67 Mon Sep 17 00:00:00 2001 From: Thierry Escande Date: Wed, 13 Jun 2018 19:18:40 +0200 Subject: lib/test_printf.c: accept "ptrval" as valid result for plain 'p' tests If the test_printf module is loaded before the crng is initialized, the plain 'p' tests will fail because the printed address will not be hashed and the buffer will contain "(____ptrval____)" or "(ptrval)" instead (64-bit vs 32-bit). Since we cannot wait for the crng to be initialized for an undefined time, both plain 'p' tests now accept the strings "(____ptrval____)" or "(ptrval)" as a valid result and print a warning message. Link: http://lkml.kernel.org/r/20180613171840.29827-1-thierry.escande@linaro.org Fixes: ad67b74d2469d9b82 ("printk: hash addresses printed with %p") To: Andrew Morton To: David Miller Cc: Rasmus Villemoes Cc: "Tobin C . Harding" Cc: linux-kernel@vger.kernel.org Signed-off-by: Thierry Escande Reviewed-by: Andy Shevchenko Signed-off-by: Petr Mladek --- lib/test_printf.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/test_printf.c b/lib/test_printf.c index cea592f402ed..53527ea822b5 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -206,6 +206,7 @@ test_string(void) #define PTR_WIDTH 16 #define PTR ((void *)0xffff0123456789abUL) #define PTR_STR "ffff0123456789ab" +#define PTR_VAL_NO_CRNG "(____ptrval____)" #define ZEROS "00000000" /* hex 32 zero bits */ static int __init @@ -216,7 +217,16 @@ plain_format(void) nchars = snprintf(buf, PLAIN_BUF_SIZE, "%p", PTR); - if (nchars != PTR_WIDTH || strncmp(buf, ZEROS, strlen(ZEROS)) != 0) + if (nchars != PTR_WIDTH) + return -1; + + if (strncmp(buf, PTR_VAL_NO_CRNG, PTR_WIDTH) == 0) { + pr_warn("crng possibly not yet initialized. plain 'p' buffer contains \"%s\"", + PTR_VAL_NO_CRNG); + return 0; + } + + if (strncmp(buf, ZEROS, strlen(ZEROS)) != 0) return -1; return 0; @@ -227,6 +237,7 @@ plain_format(void) #define PTR_WIDTH 8 #define PTR ((void *)0x456789ab) #define PTR_STR "456789ab" +#define PTR_VAL_NO_CRNG "(ptrval)" static int __init plain_format(void) @@ -245,7 +256,16 @@ plain_hash(void) nchars = snprintf(buf, PLAIN_BUF_SIZE, "%p", PTR); - if (nchars != PTR_WIDTH || strncmp(buf, PTR_STR, PTR_WIDTH) == 0) + if (nchars != PTR_WIDTH) + return -1; + + if (strncmp(buf, PTR_VAL_NO_CRNG, PTR_WIDTH) == 0) { + pr_warn("crng possibly not yet initialized. plain 'p' buffer contains \"%s\"", + PTR_VAL_NO_CRNG); + return 0; + } + + if (strncmp(buf, PTR_STR, PTR_WIDTH) == 0) return -1; return 0; -- cgit v1.2.3 From 693ba15c9202fe0283404abe4066e1b986e284eb Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 12 Jun 2018 12:05:45 -0700 Subject: scsi: Remove percpu_ida With its one user gone, remove the library code. Signed-off-by: Matthew Wilcox Reviewed-by: Jens Axboe Signed-off-by: Martin K. Petersen --- include/linux/percpu_ida.h | 83 ---------- lib/Makefile | 2 +- lib/percpu_ida.c | 370 --------------------------------------------- 3 files changed, 1 insertion(+), 454 deletions(-) delete mode 100644 include/linux/percpu_ida.h delete mode 100644 lib/percpu_ida.c (limited to 'lib') diff --git a/include/linux/percpu_ida.h b/include/linux/percpu_ida.h deleted file mode 100644 index 07d78e4653bc..000000000000 --- a/include/linux/percpu_ida.h +++ /dev/null @@ -1,83 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef __PERCPU_IDA_H__ -#define __PERCPU_IDA_H__ - -#include -#include -#include -#include -#include -#include -#include - -struct percpu_ida_cpu; - -struct percpu_ida { - /* - * number of tags available to be allocated, as passed to - * percpu_ida_init() - */ - unsigned nr_tags; - unsigned percpu_max_size; - unsigned percpu_batch_size; - - struct percpu_ida_cpu __percpu *tag_cpu; - - /* - * Bitmap of cpus that (may) have tags on their percpu freelists: - * steal_tags() uses this to decide when to steal tags, and which cpus - * to try stealing from. - * - * It's ok for a freelist to be empty when its bit is set - steal_tags() - * will just keep looking - but the bitmap _must_ be set whenever a - * percpu freelist does have tags. - */ - cpumask_t cpus_have_tags; - - struct { - spinlock_t lock; - /* - * When we go to steal tags from another cpu (see steal_tags()), - * we want to pick a cpu at random. Cycling through them every - * time we steal is a bit easier and more or less equivalent: - */ - unsigned cpu_last_stolen; - - /* For sleeping on allocation failure */ - wait_queue_head_t wait; - - /* - * Global freelist - it's a stack where nr_free points to the - * top - */ - unsigned nr_free; - unsigned *freelist; - } ____cacheline_aligned_in_smp; -}; - -/* - * Number of tags we move between the percpu freelist and the global freelist at - * a time - */ -#define IDA_DEFAULT_PCPU_BATCH_MOVE 32U -/* Max size of percpu freelist, */ -#define IDA_DEFAULT_PCPU_SIZE ((IDA_DEFAULT_PCPU_BATCH_MOVE * 3) / 2) - -int percpu_ida_alloc(struct percpu_ida *pool, int state); -void percpu_ida_free(struct percpu_ida *pool, unsigned tag); - -void percpu_ida_destroy(struct percpu_ida *pool); -int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags, - unsigned long max_size, unsigned long batch_size); -static inline int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) -{ - return __percpu_ida_init(pool, nr_tags, IDA_DEFAULT_PCPU_SIZE, - IDA_DEFAULT_PCPU_BATCH_MOVE); -} - -typedef int (*percpu_ida_cb)(unsigned, void *); -int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, - void *data); - -unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu); -#endif /* __PERCPU_IDA_H__ */ diff --git a/lib/Makefile b/lib/Makefile index 956b320292fe..055420101965 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -40,7 +40,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \ + percpu-refcount.o rhashtable.o reciprocal_div.o \ once.o refcount.o usercopy.o errseq.o bucket_locks.o obj-$(CONFIG_STRING_SELFTEST) += test_string.o obj-y += string_helpers.o diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c deleted file mode 100644 index 9bbd9c5d375a..000000000000 --- a/lib/percpu_ida.c +++ /dev/null @@ -1,370 +0,0 @@ -/* - * Percpu IDA library - * - * Copyright (C) 2013 Datera, Inc. Kent Overstreet - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct percpu_ida_cpu { - /* - * Even though this is percpu, we need a lock for tag stealing by remote - * CPUs: - */ - spinlock_t lock; - - /* nr_free/freelist form a stack of free IDs */ - unsigned nr_free; - unsigned freelist[]; -}; - -static inline void move_tags(unsigned *dst, unsigned *dst_nr, - unsigned *src, unsigned *src_nr, - unsigned nr) -{ - *src_nr -= nr; - memcpy(dst + *dst_nr, src + *src_nr, sizeof(unsigned) * nr); - *dst_nr += nr; -} - -/* - * Try to steal tags from a remote cpu's percpu freelist. - * - * We first check how many percpu freelists have tags - * - * Then we iterate through the cpus until we find some tags - we don't attempt - * to find the "best" cpu to steal from, to keep cacheline bouncing to a - * minimum. - */ -static inline void steal_tags(struct percpu_ida *pool, - struct percpu_ida_cpu *tags) -{ - unsigned cpus_have_tags, cpu = pool->cpu_last_stolen; - struct percpu_ida_cpu *remote; - - for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); - cpus_have_tags; cpus_have_tags--) { - cpu = cpumask_next(cpu, &pool->cpus_have_tags); - - if (cpu >= nr_cpu_ids) { - cpu = cpumask_first(&pool->cpus_have_tags); - if (cpu >= nr_cpu_ids) - BUG(); - } - - pool->cpu_last_stolen = cpu; - remote = per_cpu_ptr(pool->tag_cpu, cpu); - - cpumask_clear_cpu(cpu, &pool->cpus_have_tags); - - if (remote == tags) - continue; - - spin_lock(&remote->lock); - - if (remote->nr_free) { - memcpy(tags->freelist, - remote->freelist, - sizeof(unsigned) * remote->nr_free); - - tags->nr_free = remote->nr_free; - remote->nr_free = 0; - } - - spin_unlock(&remote->lock); - - if (tags->nr_free) - break; - } -} - -/* - * Pop up to IDA_PCPU_BATCH_MOVE IDs off the global freelist, and push them onto - * our percpu freelist: - */ -static inline void alloc_global_tags(struct percpu_ida *pool, - struct percpu_ida_cpu *tags) -{ - move_tags(tags->freelist, &tags->nr_free, - pool->freelist, &pool->nr_free, - min(pool->nr_free, pool->percpu_batch_size)); -} - -/** - * percpu_ida_alloc - allocate a tag - * @pool: pool to allocate from - * @state: task state for prepare_to_wait - * - * Returns a tag - an integer in the range [0..nr_tags) (passed to - * tag_pool_init()), or otherwise -ENOSPC on allocation failure. - * - * Safe to be called from interrupt context (assuming it isn't passed - * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course). - * - * @gfp indicates whether or not to wait until a free id is available (it's not - * used for internal memory allocations); thus if passed __GFP_RECLAIM we may sleep - * however long it takes until another thread frees an id (same semantics as a - * mempool). - * - * Will not fail if passed TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE. - */ -int percpu_ida_alloc(struct percpu_ida *pool, int state) -{ - DEFINE_WAIT(wait); - struct percpu_ida_cpu *tags; - unsigned long flags; - int tag = -ENOSPC; - - tags = raw_cpu_ptr(pool->tag_cpu); - spin_lock_irqsave(&tags->lock, flags); - - /* Fastpath */ - if (likely(tags->nr_free >= 0)) { - tag = tags->freelist[--tags->nr_free]; - spin_unlock_irqrestore(&tags->lock, flags); - return tag; - } - spin_unlock_irqrestore(&tags->lock, flags); - - while (1) { - spin_lock_irqsave(&pool->lock, flags); - tags = this_cpu_ptr(pool->tag_cpu); - - /* - * prepare_to_wait() must come before steal_tags(), in case - * percpu_ida_free() on another cpu flips a bit in - * cpus_have_tags - * - * global lock held and irqs disabled, don't need percpu lock - */ - if (state != TASK_RUNNING) - prepare_to_wait(&pool->wait, &wait, state); - - if (!tags->nr_free) - alloc_global_tags(pool, tags); - if (!tags->nr_free) - steal_tags(pool, tags); - - if (tags->nr_free) { - tag = tags->freelist[--tags->nr_free]; - if (tags->nr_free) - cpumask_set_cpu(smp_processor_id(), - &pool->cpus_have_tags); - } - - spin_unlock_irqrestore(&pool->lock, flags); - - if (tag >= 0 || state == TASK_RUNNING) - break; - - if (signal_pending_state(state, current)) { - tag = -ERESTARTSYS; - break; - } - - schedule(); - } - if (state != TASK_RUNNING) - finish_wait(&pool->wait, &wait); - - return tag; -} -EXPORT_SYMBOL_GPL(percpu_ida_alloc); - -/** - * percpu_ida_free - free a tag - * @pool: pool @tag was allocated from - * @tag: a tag previously allocated with percpu_ida_alloc() - * - * Safe to be called from interrupt context. - */ -void percpu_ida_free(struct percpu_ida *pool, unsigned tag) -{ - struct percpu_ida_cpu *tags; - unsigned long flags; - unsigned nr_free; - - BUG_ON(tag >= pool->nr_tags); - - tags = raw_cpu_ptr(pool->tag_cpu); - - spin_lock_irqsave(&tags->lock, flags); - tags->freelist[tags->nr_free++] = tag; - - nr_free = tags->nr_free; - - if (nr_free == 1) { - cpumask_set_cpu(smp_processor_id(), - &pool->cpus_have_tags); - wake_up(&pool->wait); - } - spin_unlock_irqrestore(&tags->lock, flags); - - if (nr_free == pool->percpu_max_size) { - spin_lock_irqsave(&pool->lock, flags); - spin_lock(&tags->lock); - - if (tags->nr_free == pool->percpu_max_size) { - move_tags(pool->freelist, &pool->nr_free, - tags->freelist, &tags->nr_free, - pool->percpu_batch_size); - - wake_up(&pool->wait); - } - spin_unlock(&tags->lock); - spin_unlock_irqrestore(&pool->lock, flags); - } -} -EXPORT_SYMBOL_GPL(percpu_ida_free); - -/** - * percpu_ida_destroy - release a tag pool's resources - * @pool: pool to free - * - * Frees the resources allocated by percpu_ida_init(). - */ -void percpu_ida_destroy(struct percpu_ida *pool) -{ - free_percpu(pool->tag_cpu); - free_pages((unsigned long) pool->freelist, - get_order(pool->nr_tags * sizeof(unsigned))); -} -EXPORT_SYMBOL_GPL(percpu_ida_destroy); - -/** - * percpu_ida_init - initialize a percpu tag pool - * @pool: pool to initialize - * @nr_tags: number of tags that will be available for allocation - * - * Initializes @pool so that it can be used to allocate tags - integers in the - * range [0, nr_tags). Typically, they'll be used by driver code to refer to a - * preallocated array of tag structures. - * - * Allocation is percpu, but sharding is limited by nr_tags - for best - * performance, the workload should not span more cpus than nr_tags / 128. - */ -int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags, - unsigned long max_size, unsigned long batch_size) -{ - unsigned i, cpu, order; - - memset(pool, 0, sizeof(*pool)); - - init_waitqueue_head(&pool->wait); - spin_lock_init(&pool->lock); - pool->nr_tags = nr_tags; - pool->percpu_max_size = max_size; - pool->percpu_batch_size = batch_size; - - /* Guard against overflow */ - if (nr_tags > (unsigned) INT_MAX + 1) { - pr_err("percpu_ida_init(): nr_tags too large\n"); - return -EINVAL; - } - - order = get_order(nr_tags * sizeof(unsigned)); - pool->freelist = (void *) __get_free_pages(GFP_KERNEL, order); - if (!pool->freelist) - return -ENOMEM; - - for (i = 0; i < nr_tags; i++) - pool->freelist[i] = i; - - pool->nr_free = nr_tags; - - pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + - pool->percpu_max_size * sizeof(unsigned), - sizeof(unsigned)); - if (!pool->tag_cpu) - goto err; - - for_each_possible_cpu(cpu) - spin_lock_init(&per_cpu_ptr(pool->tag_cpu, cpu)->lock); - - return 0; -err: - percpu_ida_destroy(pool); - return -ENOMEM; -} -EXPORT_SYMBOL_GPL(__percpu_ida_init); - -/** - * percpu_ida_for_each_free - iterate free ids of a pool - * @pool: pool to iterate - * @fn: interate callback function - * @data: parameter for @fn - * - * Note, this doesn't guarantee to iterate all free ids restrictly. Some free - * ids might be missed, some might be iterated duplicated, and some might - * be iterated and not free soon. - */ -int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, - void *data) -{ - unsigned long flags; - struct percpu_ida_cpu *remote; - unsigned cpu, i, err = 0; - - for_each_possible_cpu(cpu) { - remote = per_cpu_ptr(pool->tag_cpu, cpu); - spin_lock_irqsave(&remote->lock, flags); - for (i = 0; i < remote->nr_free; i++) { - err = fn(remote->freelist[i], data); - if (err) - break; - } - spin_unlock_irqrestore(&remote->lock, flags); - if (err) - goto out; - } - - spin_lock_irqsave(&pool->lock, flags); - for (i = 0; i < pool->nr_free; i++) { - err = fn(pool->freelist[i], data); - if (err) - break; - } - spin_unlock_irqrestore(&pool->lock, flags); -out: - return err; -} -EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); - -/** - * percpu_ida_free_tags - return free tags number of a specific cpu or global pool - * @pool: pool related - * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids - * - * Note: this just returns a snapshot of free tags number. - */ -unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu) -{ - struct percpu_ida_cpu *remote; - if (cpu == nr_cpu_ids) - return pool->nr_free; - remote = per_cpu_ptr(pool->tag_cpu, cpu); - return remote->nr_free; -} -EXPORT_SYMBOL_GPL(percpu_ida_free_tags); -- cgit v1.2.3 From 5a6cf77f5e35e7af35d36a1e7dc21a42f6412e4f Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 20 Jun 2018 01:05:07 +0900 Subject: kprobes: Remove jprobe API implementation Remove functionally empty jprobe API implementations and test cases. Signed-off-by: Masami Hiramatsu Acked-by: Thomas Gleixner Cc: Ananth N Mavinakayanahalli Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: linux-arch@vger.kernel.org Link: https://lore.kernel.org/lkml/152942430705.15209.2307050500995264322.stgit@devbox Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 3 -- kernel/kprobes.c | 78 +--------------------------------------- kernel/test_kprobes.c | 94 ------------------------------------------------- lib/Kconfig.debug | 2 +- 4 files changed, 2 insertions(+), 175 deletions(-) (limited to 'lib') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 9440a2fc8893..b520baa65682 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -389,9 +389,6 @@ int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); void unregister_kprobes(struct kprobe **kps, int num); -int setjmp_pre_handler(struct kprobe *, struct pt_regs *); -int longjmp_break_handler(struct kprobe *, struct pt_regs *); -void jprobe_return(void); unsigned long arch_deref_entry_point(void *); int register_kretprobe(struct kretprobe *rp); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index ea619021d901..69de130595f7 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -1272,7 +1272,7 @@ NOKPROBE_SYMBOL(cleanup_rp_inst); /* * Add the new probe to ap->list. Fail if this is the -* second jprobe at the address - two jprobes can't coexist +* second break_handler at the address */ static int add_new_kprobe(struct kprobe *ap, struct kprobe *p) { @@ -1812,77 +1812,6 @@ unsigned long __weak arch_deref_entry_point(void *entry) return (unsigned long)entry; } -#if 0 -int register_jprobes(struct jprobe **jps, int num) -{ - int ret = 0, i; - - if (num <= 0) - return -EINVAL; - - for (i = 0; i < num; i++) { - ret = register_jprobe(jps[i]); - - if (ret < 0) { - if (i > 0) - unregister_jprobes(jps, i); - break; - } - } - - return ret; -} -EXPORT_SYMBOL_GPL(register_jprobes); - -int register_jprobe(struct jprobe *jp) -{ - unsigned long addr, offset; - struct kprobe *kp = &jp->kp; - - /* - * Verify probepoint as well as the jprobe handler are - * valid function entry points. - */ - addr = arch_deref_entry_point(jp->entry); - - if (kallsyms_lookup_size_offset(addr, NULL, &offset) && offset == 0 && - kprobe_on_func_entry(kp->addr, kp->symbol_name, kp->offset)) { - kp->pre_handler = setjmp_pre_handler; - kp->break_handler = longjmp_break_handler; - return register_kprobe(kp); - } - - return -EINVAL; -} -EXPORT_SYMBOL_GPL(register_jprobe); - -void unregister_jprobe(struct jprobe *jp) -{ - unregister_jprobes(&jp, 1); -} -EXPORT_SYMBOL_GPL(unregister_jprobe); - -void unregister_jprobes(struct jprobe **jps, int num) -{ - int i; - - if (num <= 0) - return; - mutex_lock(&kprobe_mutex); - for (i = 0; i < num; i++) - if (__unregister_kprobe_top(&jps[i]->kp) < 0) - jps[i]->kp.addr = NULL; - mutex_unlock(&kprobe_mutex); - - synchronize_sched(); - for (i = 0; i < num; i++) { - if (jps[i]->kp.addr) - __unregister_kprobe_bottom(&jps[i]->kp); - } -} -EXPORT_SYMBOL_GPL(unregister_jprobes); -#endif - #ifdef CONFIG_KRETPROBES /* * This kprobe pre_handler is registered with every kretprobe. When probe @@ -2329,8 +2258,6 @@ static void report_probe(struct seq_file *pi, struct kprobe *p, if (p->pre_handler == pre_handler_kretprobe) kprobe_type = "r"; - else if (p->pre_handler == setjmp_pre_handler) - kprobe_type = "j"; else kprobe_type = "k"; @@ -2637,6 +2564,3 @@ late_initcall(debugfs_kprobe_init); #endif /* CONFIG_DEBUG_FS */ module_init(init_kprobes); - -/* defined in arch/.../kernel/kprobes.c */ -EXPORT_SYMBOL_GPL(jprobe_return); diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c index dd53e354f630..7bca480151b0 100644 --- a/kernel/test_kprobes.c +++ b/kernel/test_kprobes.c @@ -162,90 +162,6 @@ static int test_kprobes(void) } -#if 0 -static u32 jph_val; - -static u32 j_kprobe_target(u32 value) -{ - if (preemptible()) { - handler_errors++; - pr_err("jprobe-handler is preemptible\n"); - } - if (value != rand1) { - handler_errors++; - pr_err("incorrect value in jprobe handler\n"); - } - - jph_val = rand1; - jprobe_return(); - return 0; -} - -static struct jprobe jp = { - .entry = j_kprobe_target, - .kp.symbol_name = "kprobe_target" -}; - -static int test_jprobe(void) -{ - int ret; - - ret = register_jprobe(&jp); - if (ret < 0) { - pr_err("register_jprobe returned %d\n", ret); - return ret; - } - - ret = target(rand1); - unregister_jprobe(&jp); - if (jph_val == 0) { - pr_err("jprobe handler not called\n"); - handler_errors++; - } - - return 0; -} - -static struct jprobe jp2 = { - .entry = j_kprobe_target, - .kp.symbol_name = "kprobe_target2" -}; - -static int test_jprobes(void) -{ - int ret; - struct jprobe *jps[2] = {&jp, &jp2}; - - /* addr and flags should be cleard for reusing kprobe. */ - jp.kp.addr = NULL; - jp.kp.flags = 0; - ret = register_jprobes(jps, 2); - if (ret < 0) { - pr_err("register_jprobes returned %d\n", ret); - return ret; - } - - jph_val = 0; - ret = target(rand1); - if (jph_val == 0) { - pr_err("jprobe handler not called\n"); - handler_errors++; - } - - jph_val = 0; - ret = target2(rand1); - if (jph_val == 0) { - pr_err("jprobe handler2 not called\n"); - handler_errors++; - } - unregister_jprobes(jps, 2); - - return 0; -} -#else -#define test_jprobe() (0) -#define test_jprobes() (0) -#endif #ifdef CONFIG_KRETPROBES static u32 krph_val; @@ -383,16 +299,6 @@ int init_test_probes(void) if (ret < 0) errors++; - num_tests++; - ret = test_jprobe(); - if (ret < 0) - errors++; - - num_tests++; - ret = test_jprobes(); - if (ret < 0) - errors++; - #ifdef CONFIG_KRETPROBES num_tests++; ret = test_kretprobe(); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8838d1158d19..0b066b3c9284 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1718,7 +1718,7 @@ config KPROBES_SANITY_TEST default n help This option provides for testing basic kprobes functionality on - boot. A sample kprobe, jprobe and kretprobe are inserted and + boot. Samples of kprobe and kretprobe are inserted and verified for functionality. Say N if you are unsure. -- cgit v1.2.3 From ade5ef9280c33993099199c51e2e27c2c4013afd Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:07 +0100 Subject: atomics: Make conditional ops return 'bool' Some of the atomics return a status value, which is a boolean value describing whether the operation was performed. To make it clear that this is a boolean value, let's update the common fallbacks to return bool, fixing up the return values and comments likewise. At the same time, let's simplify the description of the operations in their respective comments. The instrumented atomics and generic atomic64 implementation are updated accordingly. Note that atomic64_dec_if_positive() doesn't follow the usual test op pattern, and returns the would-be decremented value. This is not changed. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Boqun Feng Cc: Linus Torvalds Cc: Michael Ellerman Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-5-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic-instrumented.h | 2 +- include/asm-generic/atomic64.h | 3 ++- include/linux/atomic.h | 24 +++++++++++++----------- lib/atomic64.c | 6 +++--- 4 files changed, 19 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/include/asm-generic/atomic-instrumented.h b/include/asm-generic/atomic-instrumented.h index b8b14cc2df6c..497faa4a05e3 100644 --- a/include/asm-generic/atomic-instrumented.h +++ b/include/asm-generic/atomic-instrumented.h @@ -205,7 +205,7 @@ static __always_inline s64 atomic64_dec_return(atomic64_t *v) return arch_atomic64_dec_return(v); } -static __always_inline s64 atomic64_inc_not_zero(atomic64_t *v) +static __always_inline bool atomic64_inc_not_zero(atomic64_t *v) { kasan_check_write(v, sizeof(*v)); return arch_atomic64_inc_not_zero(v); diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index 8d28eb010d0d..a951a721e1bb 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -11,6 +11,7 @@ */ #ifndef _ASM_GENERIC_ATOMIC64_H #define _ASM_GENERIC_ATOMIC64_H +#include typedef struct { long long counter; @@ -52,7 +53,7 @@ ATOMIC64_OPS(xor) extern long long atomic64_dec_if_positive(atomic64_t *v); extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n); extern long long atomic64_xchg(atomic64_t *v, long long new); -extern int atomic64_add_unless(atomic64_t *v, long long a, long long u); +extern bool atomic64_add_unless(atomic64_t *v, long long a, long long u); #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) #define atomic64_inc(v) atomic64_add(1LL, (v)) diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 5c5620ae5a35..307a7f6d619a 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -2,6 +2,8 @@ /* Atomic operations usable in machine independent code */ #ifndef _LINUX_ATOMIC_H #define _LINUX_ATOMIC_H +#include + #include #include @@ -525,10 +527,10 @@ * @a: the amount to add to v... * @u: ...unless v is equal to u. * - * Atomically adds @a to @v, so long as @v was not already @u. - * Returns non-zero if @v was not @u, and zero otherwise. + * Atomically adds @a to @v, if @v was not already @u. + * Returns true if the addition was done. */ -static inline int atomic_add_unless(atomic_t *v, int a, int u) +static inline bool atomic_add_unless(atomic_t *v, int a, int u) { return atomic_fetch_add_unless(v, a, u) != u; } @@ -537,8 +539,8 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u) * atomic_inc_not_zero - increment unless the number is zero * @v: pointer of type atomic_t * - * Atomically increments @v by 1, so long as @v is non-zero. - * Returns non-zero if @v was non-zero, and zero otherwise. + * Atomically increments @v by 1, if @v is non-zero. + * Returns true if the increment was done. */ #ifndef atomic_inc_not_zero #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) @@ -572,28 +574,28 @@ static inline int atomic_fetch_andnot_release(int i, atomic_t *v) #endif #ifndef atomic_inc_unless_negative -static inline int atomic_inc_unless_negative(atomic_t *p) +static inline bool atomic_inc_unless_negative(atomic_t *p) { int v, v1; for (v = 0; v >= 0; v = v1) { v1 = atomic_cmpxchg(p, v, v + 1); if (likely(v1 == v)) - return 1; + return true; } - return 0; + return false; } #endif #ifndef atomic_dec_unless_positive -static inline int atomic_dec_unless_positive(atomic_t *p) +static inline bool atomic_dec_unless_positive(atomic_t *p) { int v, v1; for (v = 0; v <= 0; v = v1) { v1 = atomic_cmpxchg(p, v, v - 1); if (likely(v1 == v)) - return 1; + return true; } - return 0; + return false; } #endif diff --git a/lib/atomic64.c b/lib/atomic64.c index 53c2d5edc826..4230f4b8906c 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -178,16 +178,16 @@ long long atomic64_xchg(atomic64_t *v, long long new) } EXPORT_SYMBOL(atomic64_xchg); -int atomic64_add_unless(atomic64_t *v, long long a, long long u) +bool atomic64_add_unless(atomic64_t *v, long long a, long long u) { unsigned long flags; raw_spinlock_t *lock = lock_addr(v); - int ret = 0; + bool ret = false; raw_spin_lock_irqsave(lock, flags); if (v->counter != u) { v->counter += a; - ret = 1; + ret = true; } raw_spin_unlock_irqrestore(lock, flags); return ret; -- cgit v1.2.3 From 00b808ab79ead372daf1a0682d1ef271599c0b55 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 21 Jun 2018 13:13:11 +0100 Subject: atomics/generic: Define atomic64_fetch_add_unless() As a step towards unifying the atomic/atomic64/atomic_long APIs, this patch converts the generic implementation of atomic64_add_unless() into a generic implementation of atomic64_fetch_add_unless(). A wrapper in will build atomic_add_unless() atop of this, provided it is given a preprocessor definition. No functional change is intended as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Acked-by: Peter Zijlstra (Intel) Cc: Arnd Bergmann Cc: Boqun Feng Cc: Linus Torvalds Cc: Thomas Gleixner Link: https://lore.kernel.org/lkml/20180621121321.4761-9-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/asm-generic/atomic64.h | 3 ++- lib/atomic64.c | 14 +++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/include/asm-generic/atomic64.h b/include/asm-generic/atomic64.h index 5105275ac825..49460107b29a 100644 --- a/include/asm-generic/atomic64.h +++ b/include/asm-generic/atomic64.h @@ -53,7 +53,8 @@ ATOMIC64_OPS(xor) extern long long atomic64_dec_if_positive(atomic64_t *v); extern long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n); extern long long atomic64_xchg(atomic64_t *v, long long new); -extern bool atomic64_add_unless(atomic64_t *v, long long a, long long u); +extern long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u); +#define atomic64_fetch_add_unless atomic64_fetch_add_unless #define atomic64_add_negative(a, v) (atomic64_add_return((a), (v)) < 0) #define atomic64_inc(v) atomic64_add(1LL, (v)) diff --git a/lib/atomic64.c b/lib/atomic64.c index 4230f4b8906c..1d91e31eceec 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -178,18 +178,18 @@ long long atomic64_xchg(atomic64_t *v, long long new) } EXPORT_SYMBOL(atomic64_xchg); -bool atomic64_add_unless(atomic64_t *v, long long a, long long u) +long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u) { unsigned long flags; raw_spinlock_t *lock = lock_addr(v); - bool ret = false; + long long val; raw_spin_lock_irqsave(lock, flags); - if (v->counter != u) { + val = v->counter; + if (val != u) v->counter += a; - ret = true; - } raw_spin_unlock_irqrestore(lock, flags); - return ret; + + return val; } -EXPORT_SYMBOL(atomic64_add_unless); +EXPORT_SYMBOL(atomic64_fetch_add_unless); -- cgit v1.2.3 From 75a040ff14d9a99fc041f5e1d8f09541cab13ba4 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sun, 1 Apr 2018 01:00:36 +0300 Subject: locking/refcounts: Include fewer headers in Debloat 's dependencies: - is not needed, but is. - is not needed, only a forward declaration of "struct mutex". - is not needed, is enough. Signed-off-by: Alexey Dobriyan Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Will Deacon Link: https://lkml.kernel.org/lkml/20180331220036.GA7676@avx2 Signed-off-by: Ingo Molnar --- arch/x86/include/asm/refcount.h | 1 + include/linux/refcount.h | 7 ++++--- lib/refcount.c | 2 ++ 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/arch/x86/include/asm/refcount.h b/arch/x86/include/asm/refcount.h index 4cf11d88d3b3..19b90521954c 100644 --- a/arch/x86/include/asm/refcount.h +++ b/arch/x86/include/asm/refcount.h @@ -5,6 +5,7 @@ * PaX/grsecurity. */ #include +#include /* * This is the first portion of the refcount error handling, which lives in diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 4193c41e383a..c36addd27dd5 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -3,9 +3,10 @@ #define _LINUX_REFCOUNT_H #include -#include -#include -#include +#include +#include + +struct mutex; /** * struct refcount_t - variant of atomic_t specialized for reference counts diff --git a/lib/refcount.c b/lib/refcount.c index 0eb48353abe3..4bd842f20749 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -35,7 +35,9 @@ * */ +#include #include +#include #include #ifdef CONFIG_REFCOUNT_FULL -- cgit v1.2.3 From 02361bc7788852f33dd0a05235a52b9ccf097916 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 31 May 2018 11:45:25 -0700 Subject: lib/bch: Remove VLA usage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In the quest to remove all stack VLA usage from the kernel[1], this allocates a fixed size stack array to cover the range needed for bch. This was done instead of a preallocation on the SLAB due to performance reasons, shown by Ivan Djelic: little-endian, type sizes: int=4 long=8 longlong=8 cpu: Intel(R) Core(TM) i5 CPU         650  @ 3.20GHz calibration: iter=4.9143µs niter=2034 nsamples=200 m=13 t=4   Buffer allocation |  Encoding throughput (Mbit/s) ---------------------------------------------------  on-stack, VLA      |   3988  on-stack, fixed    |   4494  kmalloc            |   1967 So this change actually improves performance too, it seems. The resulting stack allocation can get rather large; without CONFIG_BCH_CONST_PARAMS, it will allocate 4096 bytes, which trips the stack size checking: lib/bch.c: In function ‘encode_bch’: lib/bch.c:261:1: warning: the frame size of 4432 bytes is larger than 2048 bytes [-Wframe-larger-than=] Even the default case for "allmodconfig" (with CONFIG_BCH_CONST_M=14 and CONFIG_BCH_CONST_T=4) would have started throwing a warning: lib/bch.c: In function ‘encode_bch’: lib/bch.c:261:1: warning: the frame size of 2288 bytes is larger than 2048 bytes [-Wframe-larger-than=] But this is how large it's always been; it was just hidden from the checker because it was a VLA. So the Makefile has been adjusted to silence this warning for anything smaller than 4500 bytes, which should provide room for normal cases, but still low enough to catch any future pathological situations. [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com Signed-off-by: Kees Cook Reviewed-by: Ivan Djelic Tested-by: Ivan Djelic Acked-by: Boris Brezillon Signed-off-by: Boris Brezillon --- lib/Makefile | 1 + lib/bch.c | 23 +++++++++++++++-------- 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index 956b320292fe..e1ee1046abf0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -123,6 +123,7 @@ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o +CFLAGS_bch.o := $(call cc-option,-Wframe-larger-than=4500) obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ obj-$(CONFIG_LZ4_COMPRESS) += lz4/ diff --git a/lib/bch.c b/lib/bch.c index bc89dfe4d1b3..7b0f2006698b 100644 --- a/lib/bch.c +++ b/lib/bch.c @@ -78,15 +78,22 @@ #define GF_M(_p) (CONFIG_BCH_CONST_M) #define GF_T(_p) (CONFIG_BCH_CONST_T) #define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1) +#define BCH_MAX_M (CONFIG_BCH_CONST_M) #else #define GF_M(_p) ((_p)->m) #define GF_T(_p) ((_p)->t) #define GF_N(_p) ((_p)->n) +#define BCH_MAX_M 15 #endif +#define BCH_MAX_T (((1 << BCH_MAX_M) - 1) / BCH_MAX_M) + #define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32) #define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8) +#define BCH_ECC_MAX_WORDS DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32) +#define BCH_ECC_MAX_BYTES DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 8) + #ifndef dbg #define dbg(_fmt, args...) do {} while (0) #endif @@ -187,7 +194,8 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, const unsigned int l = BCH_ECC_WORDS(bch)-1; unsigned int i, mlen; unsigned long m; - uint32_t w, r[l+1]; + uint32_t w, r[BCH_ECC_MAX_WORDS]; + const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r); const uint32_t * const tab0 = bch->mod8_tab; const uint32_t * const tab1 = tab0 + 256*(l+1); const uint32_t * const tab2 = tab1 + 256*(l+1); @@ -198,7 +206,7 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, /* load ecc parity bytes into internal 32-bit buffer */ load_ecc8(bch, bch->ecc_buf, ecc); } else { - memset(bch->ecc_buf, 0, sizeof(r)); + memset(bch->ecc_buf, 0, r_bytes); } /* process first unaligned data bytes */ @@ -215,7 +223,7 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, mlen = len/4; data += 4*mlen; len -= 4*mlen; - memcpy(r, bch->ecc_buf, sizeof(r)); + memcpy(r, bch->ecc_buf, r_bytes); /* * split each 32-bit word into 4 polynomials of weight 8 as follows: @@ -241,7 +249,7 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, r[l] = p0[l]^p1[l]^p2[l]^p3[l]; } - memcpy(bch->ecc_buf, r, sizeof(r)); + memcpy(bch->ecc_buf, r, r_bytes); /* process last unaligned bytes */ if (len) @@ -434,7 +442,7 @@ static int solve_linear_system(struct bch_control *bch, unsigned int *rows, { const int m = GF_M(bch); unsigned int tmp, mask; - int rem, c, r, p, k, param[m]; + int rem, c, r, p, k, param[BCH_MAX_M]; k = 0; mask = 1 << m; @@ -1114,7 +1122,7 @@ static int build_deg2_base(struct bch_control *bch) { const int m = GF_M(bch); int i, j, r; - unsigned int sum, x, y, remaining, ak = 0, xi[m]; + unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M]; /* find k s.t. Tr(a^k) = 1 and 0 <= k < m */ for (i = 0; i < m; i++) { @@ -1254,7 +1262,6 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly) struct bch_control *bch = NULL; const int min_m = 5; - const int max_m = 15; /* default primitive polynomials */ static const unsigned int prim_poly_tab[] = { @@ -1270,7 +1277,7 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly) goto fail; } #endif - if ((m < min_m) || (m > max_m)) + if ((m < min_m) || (m > BCH_MAX_M)) /* * values of m greater than 15 are not currently supported; * supporting m > 15 would require changing table base type -- cgit v1.2.3 From cbab901296232b1247b46e6e127103d2f738d783 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: silence RCU warning in rhashtable_test. print_ht in rhashtable_test calls rht_dereference() with neither RCU protection or the mutex. This triggers an RCU warning. So take the mutex to silence the warning. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/test_rhashtable.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index fb6968109113..6ca59ffcacbe 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -501,6 +501,8 @@ static unsigned int __init print_ht(struct rhltable *rhlt) unsigned int i, cnt = 0; ht = &rhlt->ht; + /* Take the mutex to avoid RCU warning */ + mutex_lock(&ht->mutex); tbl = rht_dereference(ht->tbl, ht); for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; @@ -534,6 +536,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt) } } printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff); + mutex_unlock(&ht->mutex); return cnt; } -- cgit v1.2.3 From 0eb71a9da5796851fa87ddc1a534066c0fe54055 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: split rhashtable.h Due to the use of rhashtables in net namespaces, rhashtable.h is included in lots of the kernel, so a small changes can required a large recompilation. This makes development painful. This patch splits out rhashtable-types.h which just includes the major type declarations, and does not include (non-trivial) inline code. rhashtable.h is no longer included by anything in the include/ directory. Common include files only include rhashtable-types.h so a large recompilation is only triggered when that changes. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- MAINTAINERS | 2 + drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + include/linux/ipc.h | 2 +- include/linux/ipc_namespace.h | 2 +- include/linux/mroute_base.h | 2 +- include/linux/rhashtable-types.h | 139 +++++++++++++++++++++++++++++ include/linux/rhashtable.h | 127 +------------------------- include/net/inet_frag.h | 2 +- include/net/netfilter/nf_flow_table.h | 2 +- include/net/sctp/structs.h | 2 +- include/net/seg6.h | 2 +- include/net/seg6_hmac.h | 2 +- ipc/msg.c | 1 + ipc/sem.c | 1 + ipc/shm.c | 1 + ipc/util.c | 1 + lib/rhashtable.c | 1 + net/ipv4/inet_fragment.c | 1 + net/ipv4/ipmr.c | 1 + net/ipv4/ipmr_base.c | 1 + net/ipv6/ip6mr.c | 1 + net/ipv6/seg6.c | 1 + net/ipv6/seg6_hmac.c | 1 + net/netfilter/nf_tables_api.c | 1 + net/sctp/input.c | 1 + net/sctp/socket.c | 1 + 26 files changed, 166 insertions(+), 133 deletions(-) create mode 100644 include/linux/rhashtable-types.h (limited to 'lib') diff --git a/MAINTAINERS b/MAINTAINERS index edf3cf5ea691..99e5cef8172e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -12162,7 +12162,9 @@ M: Herbert Xu L: netdev@vger.kernel.org S: Maintained F: lib/rhashtable.c +F: lib/test_rhashtable.c F: include/linux/rhashtable.h +F: include/linux/rhashtable-types.h RICOH R5C592 MEMORYSTICK DRIVER M: Maxim Levitsky diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index 0dbe2d9e22d6..1adb968b8354 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 6cc2df7f7ac9..e1c9eea6015b 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -4,7 +4,7 @@ #include #include -#include +#include #include #include diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index b5630c8eb2f3..6cea726612b7 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -9,7 +9,7 @@ #include #include #include -#include +#include struct user_namespace; diff --git a/include/linux/mroute_base.h b/include/linux/mroute_base.h index d633f737b3c6..fd436cdd4725 100644 --- a/include/linux/mroute_base.h +++ b/include/linux/mroute_base.h @@ -2,7 +2,7 @@ #define __LINUX_MROUTE_BASE_H #include -#include +#include #include #include #include diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h new file mode 100644 index 000000000000..9740063ff13b --- /dev/null +++ b/include/linux/rhashtable-types.h @@ -0,0 +1,139 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Resizable, Scalable, Concurrent Hash Table + * + * Simple structures that might be needed in include + * files. + */ + +#ifndef _LINUX_RHASHTABLE_TYPES_H +#define _LINUX_RHASHTABLE_TYPES_H + +#include +#include +#include +#include + +struct rhash_head { + struct rhash_head __rcu *next; +}; + +struct rhlist_head { + struct rhash_head rhead; + struct rhlist_head __rcu *next; +}; + +struct bucket_table; + +/** + * struct rhashtable_compare_arg - Key for the function rhashtable_compare + * @ht: Hash table + * @key: Key to compare against + */ +struct rhashtable_compare_arg { + struct rhashtable *ht; + const void *key; +}; + +typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); +typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); +typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, + const void *obj); + +/** + * struct rhashtable_params - Hash table construction parameters + * @nelem_hint: Hint on number of elements, should be 75% of desired size + * @key_len: Length of key + * @key_offset: Offset of key in struct to be hashed + * @head_offset: Offset of rhash_head in struct to be hashed + * @max_size: Maximum size while expanding + * @min_size: Minimum size while shrinking + * @locks_mul: Number of bucket locks to allocate per cpu (default: 32) + * @automatic_shrinking: Enable automatic shrinking of tables + * @nulls_base: Base value to generate nulls marker + * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) + * @obj_hashfn: Function to hash object + * @obj_cmpfn: Function to compare key with object + */ +struct rhashtable_params { + u16 nelem_hint; + u16 key_len; + u16 key_offset; + u16 head_offset; + unsigned int max_size; + u16 min_size; + bool automatic_shrinking; + u8 locks_mul; + u32 nulls_base; + rht_hashfn_t hashfn; + rht_obj_hashfn_t obj_hashfn; + rht_obj_cmpfn_t obj_cmpfn; +}; + +/** + * struct rhashtable - Hash table handle + * @tbl: Bucket table + * @key_len: Key length for hashfn + * @max_elems: Maximum number of elements in table + * @p: Configuration parameters + * @rhlist: True if this is an rhltable + * @run_work: Deferred worker to expand/shrink asynchronously + * @mutex: Mutex to protect current/future table swapping + * @lock: Spin lock to protect walker list + * @nelems: Number of elements in table + */ +struct rhashtable { + struct bucket_table __rcu *tbl; + unsigned int key_len; + unsigned int max_elems; + struct rhashtable_params p; + bool rhlist; + struct work_struct run_work; + struct mutex mutex; + spinlock_t lock; + atomic_t nelems; +}; + +/** + * struct rhltable - Hash table with duplicate objects in a list + * @ht: Underlying rhtable + */ +struct rhltable { + struct rhashtable ht; +}; + +/** + * struct rhashtable_walker - Hash table walker + * @list: List entry on list of walkers + * @tbl: The table that we were walking over + */ +struct rhashtable_walker { + struct list_head list; + struct bucket_table *tbl; +}; + +/** + * struct rhashtable_iter - Hash table iterator + * @ht: Table to iterate through + * @p: Current pointer + * @list: Current hash list pointer + * @walker: Associated rhashtable walker + * @slot: Current slot + * @skip: Number of entries to skip in slot + */ +struct rhashtable_iter { + struct rhashtable *ht; + struct rhash_head *p; + struct rhlist_head *list; + struct rhashtable_walker walker; + unsigned int slot; + unsigned int skip; + bool end_of_table; +}; + +int rhashtable_init(struct rhashtable *ht, + const struct rhashtable_params *params); +int rhltable_init(struct rhltable *hlt, + const struct rhashtable_params *params); + +#endif /* _LINUX_RHASHTABLE_TYPES_H */ diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 4e1f535c2034..48754ab07cdf 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Resizable, Scalable, Concurrent Hash Table * @@ -17,16 +18,14 @@ #ifndef _LINUX_RHASHTABLE_H #define _LINUX_RHASHTABLE_H -#include -#include #include #include #include #include #include -#include #include +#include /* * The end of the chain is marked with a special nulls marks which has * the following format: @@ -64,15 +63,6 @@ */ #define RHT_ELASTICITY 16u -struct rhash_head { - struct rhash_head __rcu *next; -}; - -struct rhlist_head { - struct rhash_head rhead; - struct rhlist_head __rcu *next; -}; - /** * struct bucket_table - Table of hash buckets * @size: Number of hash buckets @@ -102,114 +92,6 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; -/** - * struct rhashtable_compare_arg - Key for the function rhashtable_compare - * @ht: Hash table - * @key: Key to compare against - */ -struct rhashtable_compare_arg { - struct rhashtable *ht; - const void *key; -}; - -typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); -typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); -typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, - const void *obj); - -struct rhashtable; - -/** - * struct rhashtable_params - Hash table construction parameters - * @nelem_hint: Hint on number of elements, should be 75% of desired size - * @key_len: Length of key - * @key_offset: Offset of key in struct to be hashed - * @head_offset: Offset of rhash_head in struct to be hashed - * @max_size: Maximum size while expanding - * @min_size: Minimum size while shrinking - * @locks_mul: Number of bucket locks to allocate per cpu (default: 32) - * @automatic_shrinking: Enable automatic shrinking of tables - * @nulls_base: Base value to generate nulls marker - * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) - * @obj_hashfn: Function to hash object - * @obj_cmpfn: Function to compare key with object - */ -struct rhashtable_params { - u16 nelem_hint; - u16 key_len; - u16 key_offset; - u16 head_offset; - unsigned int max_size; - u16 min_size; - bool automatic_shrinking; - u8 locks_mul; - u32 nulls_base; - rht_hashfn_t hashfn; - rht_obj_hashfn_t obj_hashfn; - rht_obj_cmpfn_t obj_cmpfn; -}; - -/** - * struct rhashtable - Hash table handle - * @tbl: Bucket table - * @key_len: Key length for hashfn - * @max_elems: Maximum number of elements in table - * @p: Configuration parameters - * @rhlist: True if this is an rhltable - * @run_work: Deferred worker to expand/shrink asynchronously - * @mutex: Mutex to protect current/future table swapping - * @lock: Spin lock to protect walker list - * @nelems: Number of elements in table - */ -struct rhashtable { - struct bucket_table __rcu *tbl; - unsigned int key_len; - unsigned int max_elems; - struct rhashtable_params p; - bool rhlist; - struct work_struct run_work; - struct mutex mutex; - spinlock_t lock; - atomic_t nelems; -}; - -/** - * struct rhltable - Hash table with duplicate objects in a list - * @ht: Underlying rhtable - */ -struct rhltable { - struct rhashtable ht; -}; - -/** - * struct rhashtable_walker - Hash table walker - * @list: List entry on list of walkers - * @tbl: The table that we were walking over - */ -struct rhashtable_walker { - struct list_head list; - struct bucket_table *tbl; -}; - -/** - * struct rhashtable_iter - Hash table iterator - * @ht: Table to iterate through - * @p: Current pointer - * @list: Current hash list pointer - * @walker: Associated rhashtable walker - * @slot: Current slot - * @skip: Number of entries to skip in slot - */ -struct rhashtable_iter { - struct rhashtable *ht; - struct rhash_head *p; - struct rhlist_head *list; - struct rhashtable_walker walker; - unsigned int slot; - unsigned int skip; - bool end_of_table; -}; - static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) { return NULLS_MARKER(ht->p.nulls_base + hash); @@ -376,11 +258,6 @@ static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, } #endif /* CONFIG_PROVE_LOCKING */ -int rhashtable_init(struct rhashtable *ht, - const struct rhashtable_params *params); -int rhltable_init(struct rhltable *hlt, - const struct rhashtable_params *params); - void *rhashtable_insert_slow(struct rhashtable *ht, const void *key, struct rhash_head *obj); diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index ed07e3786d98..f4272a29dc44 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -2,7 +2,7 @@ #ifndef __NET_FRAG_H__ #define __NET_FRAG_H__ -#include +#include struct netns_frags { /* sysctls */ diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index ba9fa4592f2b..0e355f4a3d76 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index dbe1b911a24d..e0f962d27386 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -48,7 +48,7 @@ #define __sctp_structs_h__ #include -#include +#include #include /* linux/in.h needs this!! */ #include /* We get struct sockaddr_in. */ #include /* We get struct in6_addr */ diff --git a/include/net/seg6.h b/include/net/seg6.h index e029e301faa5..2567941a2f32 100644 --- a/include/net/seg6.h +++ b/include/net/seg6.h @@ -18,7 +18,7 @@ #include #include #include -#include +#include static inline void update_csum_diff4(struct sk_buff *skb, __be32 from, __be32 to) diff --git a/include/net/seg6_hmac.h b/include/net/seg6_hmac.h index 69c3a106056b..7fda469e2758 100644 --- a/include/net/seg6_hmac.h +++ b/include/net/seg6_hmac.h @@ -22,7 +22,7 @@ #include #include #include -#include +#include #define SEG6_HMAC_MAX_DIGESTSIZE 160 #define SEG6_HMAC_RING_SIZE 256 diff --git a/ipc/msg.c b/ipc/msg.c index 3b6545302598..203281198079 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include diff --git a/ipc/sem.c b/ipc/sem.c index 5af1943ad782..29c0347ef11d 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -86,6 +86,7 @@ #include #include #include +#include #include #include "util.h" diff --git a/ipc/shm.c b/ipc/shm.c index 051a3e1fb8df..d4daf78df6da 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -43,6 +43,7 @@ #include #include #include +#include #include diff --git a/ipc/util.c b/ipc/util.c index 4e81182fa0ac..fdffff41f65b 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -63,6 +63,7 @@ #include #include #include +#include #include diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 9427b5766134..c9fafea7dc6e 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -28,6 +28,7 @@ #include #include #include +#include #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index c9e35b81d093..316518f87294 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9f79b9803a16..82f914122f1b 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c index cafb0506c8c9..1ad9aa62a97b 100644 --- a/net/ipv4/ipmr_base.c +++ b/net/ipv4/ipmr_base.c @@ -2,6 +2,7 @@ * Common logic shared by IPv4 [ipmr] and IPv6 [ip6mr] implementation */ +#include #include /* Sets everything common except 'dev', since that is done under locking */ diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 0d0f0053bb11..d0b7e0249c13 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 0fdf2a55e746..8d0ba757a46c 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index 33fb35cbfac1..b1791129a875 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 896d4a36081d..3f211e1025c1 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sctp/input.c b/net/sctp/input.c index ba8a6e6c36fa..9bbc5f92c941 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -56,6 +56,7 @@ #include #include #include +#include /* Forward declarations for internal helpers. */ static int sctp_rcv_ootb(struct sk_buff *); diff --git a/net/sctp/socket.c b/net/sctp/socket.c index d20f7addee19..0e91e83eea5a 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -66,6 +66,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 9f9a707738aa7a8b9f78a641b83927ada256a626 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: remove nulls_base and related code. This "feature" is unused, undocumented, and untested and so doesn't really belong. A patch is under development to properly implement support for detecting when a search gets diverted down a different chain, which the common purpose of nulls markers. This patch actually fixes a bug too. The table resizing allows a table to grow to 2^31 buckets, but the hash is truncated to 27 bits - any growth beyond 2^27 is wasteful an ineffective. This patch results in NULLS_MARKER(0) being used for all chains, and leaves the use of rht_is_a_null() to test for it. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- include/linux/rhashtable-types.h | 2 -- include/linux/rhashtable.h | 33 +++------------------------------ lib/rhashtable.c | 8 -------- lib/test_rhashtable.c | 5 +---- net/core/xdp.c | 4 ++-- 5 files changed, 6 insertions(+), 46 deletions(-) (limited to 'lib') diff --git a/include/linux/rhashtable-types.h b/include/linux/rhashtable-types.h index 9740063ff13b..763d613ce2c2 100644 --- a/include/linux/rhashtable-types.h +++ b/include/linux/rhashtable-types.h @@ -50,7 +50,6 @@ typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, * @min_size: Minimum size while shrinking * @locks_mul: Number of bucket locks to allocate per cpu (default: 32) * @automatic_shrinking: Enable automatic shrinking of tables - * @nulls_base: Base value to generate nulls marker * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object * @obj_cmpfn: Function to compare key with object @@ -64,7 +63,6 @@ struct rhashtable_params { u16 min_size; bool automatic_shrinking; u8 locks_mul; - u32 nulls_base; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; rht_obj_cmpfn_t obj_cmpfn; diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 48754ab07cdf..d9f719af7936 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -28,25 +28,8 @@ #include /* * The end of the chain is marked with a special nulls marks which has - * the following format: - * - * +-------+-----------------------------------------------------+-+ - * | Base | Hash |1| - * +-------+-----------------------------------------------------+-+ - * - * Base (4 bits) : Reserved to distinguish between multiple tables. - * Specified via &struct rhashtable_params.nulls_base. - * Hash (27 bits): Full hash (unmasked) of first element added to bucket - * 1 (1 bit) : Nulls marker (always set) - * - * The remaining bits of the next pointer remain unused for now. + * the least significant bit set. */ -#define RHT_BASE_BITS 4 -#define RHT_HASH_BITS 27 -#define RHT_BASE_SHIFT RHT_HASH_BITS - -/* Base bits plus 1 bit for nulls marker */ -#define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) /* Maximum chain length before rehash * @@ -92,24 +75,14 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; -static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) -{ - return NULLS_MARKER(ht->p.nulls_base + hash); -} - #define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \ - ((ptr) = (typeof(ptr)) rht_marker(ht, hash)) + ((ptr) = (typeof(ptr)) NULLS_MARKER(0)) static inline bool rht_is_a_nulls(const struct rhash_head *ptr) { return ((unsigned long) ptr & 1); } -static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr) -{ - return ((unsigned long) ptr) >> 1; -} - static inline void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) { @@ -119,7 +92,7 @@ static inline void *rht_obj(const struct rhashtable *ht, static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, unsigned int hash) { - return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1); + return hash & (tbl->size - 1); } static inline unsigned int rht_key_get_hash(struct rhashtable *ht, diff --git a/lib/rhashtable.c b/lib/rhashtable.c index c9fafea7dc6e..688693c919be 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -995,7 +995,6 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) * .key_offset = offsetof(struct test_obj, key), * .key_len = sizeof(int), * .hashfn = jhash, - * .nulls_base = (1U << RHT_BASE_SHIFT), * }; * * Configuration Example 2: Variable length keys @@ -1029,9 +1028,6 @@ int rhashtable_init(struct rhashtable *ht, (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; - if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) - return -EINVAL; - memset(ht, 0, sizeof(*ht)); mutex_init(&ht->mutex); spin_lock_init(&ht->lock); @@ -1096,10 +1092,6 @@ int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params) { int err; - /* No rhlist NULLs marking for now. */ - if (params->nulls_base) - return -EINVAL; - err = rhashtable_init(&hlt->ht, params); hlt->ht.rhlist = true; return err; diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 6ca59ffcacbe..82ac39ce5310 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -83,7 +83,7 @@ static u32 my_hashfn(const void *data, u32 len, u32 seed) { const struct test_obj_rhl *obj = data; - return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE; + return (obj->value.id % 10); } static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) @@ -99,7 +99,6 @@ static struct rhashtable_params test_rht_params = { .key_offset = offsetof(struct test_obj, value), .key_len = sizeof(struct test_obj_val), .hashfn = jhash, - .nulls_base = (3U << RHT_BASE_SHIFT), }; static struct rhashtable_params test_rht_params_dup = { @@ -296,8 +295,6 @@ static int __init test_rhltable(unsigned int entries) if (!obj_in_table) goto out_free; - /* nulls_base not supported in rhlist interface */ - test_rht_params.nulls_base = 0; err = rhltable_init(&rhlt, &test_rht_params); if (WARN_ON(err)) goto out_free; diff --git a/net/core/xdp.c b/net/core/xdp.c index 9d1f22072d5d..31c58719b5a9 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -45,8 +45,8 @@ static u32 xdp_mem_id_hashfn(const void *data, u32 len, u32 seed) BUILD_BUG_ON(FIELD_SIZEOF(struct xdp_mem_allocator, mem.id) != sizeof(u32)); - /* Use cyclic increasing ID as direct hash key, see rht_bucket_index */ - return key << RHT_HASH_RESERVED_SPACE; + /* Use cyclic increasing ID as direct hash key */ + return key; } static int xdp_mem_id_cmp(struct rhashtable_compare_arg *arg, -- cgit v1.2.3 From 9b4f64a227b6f462482a8cc68c7134dc6e26f1c1 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: simplify INIT_RHT_NULLS_HEAD() The 'ht' and 'hash' arguments to INIT_RHT_NULLS_HEAD() are no longer used - so drop them. This allows us to also remove the nhash argument from nested_table_alloc(). Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- lib/rhashtable.c | 15 ++++++--------- 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'lib') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index d9f719af7936..3f3a182bd0b4 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -75,7 +75,7 @@ struct bucket_table { struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; }; -#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \ +#define INIT_RHT_NULLS_HEAD(ptr) \ ((ptr) = (typeof(ptr)) NULLS_MARKER(0)) static inline bool rht_is_a_nulls(const struct rhash_head *ptr) diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 688693c919be..a81cd27d518c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -116,8 +116,7 @@ static void bucket_table_free_rcu(struct rcu_head *head) static union nested_table *nested_table_alloc(struct rhashtable *ht, union nested_table __rcu **prev, - unsigned int shifted, - unsigned int nhash) + unsigned int shifted) { union nested_table *ntbl; int i; @@ -130,8 +129,7 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht, if (ntbl && shifted) { for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++) - INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht, - (i << shifted) | nhash); + INIT_RHT_NULLS_HEAD(ntbl[i].bucket); } rcu_assign_pointer(*prev, ntbl); @@ -157,7 +155,7 @@ static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, return NULL; if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets, - 0, 0)) { + 0)) { kfree(tbl); return NULL; } @@ -207,7 +205,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, tbl->hash_rnd = get_random_u32(); for (i = 0; i < nbuckets; i++) - INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); + INIT_RHT_NULLS_HEAD(tbl->buckets[i]); return tbl; } @@ -1217,7 +1215,7 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, nhash = index; shifted = tbl->nest; ntbl = nested_table_alloc(ht, &ntbl[index].table, - size <= (1 << shift) ? shifted : 0, nhash); + size <= (1 << shift) ? shifted : 0); while (ntbl && size > (1 << shift)) { index = hash & ((1 << shift) - 1); @@ -1226,8 +1224,7 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, nhash |= index << shifted; shifted += shift; ntbl = nested_table_alloc(ht, &ntbl[index].table, - size <= (1 << shift) ? shifted : 0, - nhash); + size <= (1 << shift) ? shifted : 0); } if (!ntbl) -- cgit v1.2.3 From 5af68ef7333c8606bfe6e400cb962081518c3acb Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: simplify nested_table_alloc() and rht_bucket_nested_insert() Now that we don't use the hash value or shift in nested_table_alloc() there is room for simplification. We only need to pass a "is this a leaf" flag to nested_table_alloc(), and don't need to track as much information in rht_bucket_nested_insert(). Note there is another minor cleanup in nested_table_alloc() here. The number of elements in a page of "union nested_tables" is most naturally PAGE_SIZE / sizeof(ntbl[0]) The previous code had PAGE_SIZE / sizeof(ntbl[0].bucket) which happens to be the correct value only because the bucket uses all the space in the union. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index a81cd27d518c..2aa41c15df17 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -116,7 +116,7 @@ static void bucket_table_free_rcu(struct rcu_head *head) static union nested_table *nested_table_alloc(struct rhashtable *ht, union nested_table __rcu **prev, - unsigned int shifted) + bool leaf) { union nested_table *ntbl; int i; @@ -127,8 +127,8 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht, ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC); - if (ntbl && shifted) { - for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++) + if (ntbl && leaf) { + for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++) INIT_RHT_NULLS_HEAD(ntbl[i].bucket); } @@ -155,7 +155,7 @@ static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, return NULL; if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets, - 0)) { + false)) { kfree(tbl); return NULL; } @@ -1207,24 +1207,18 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, unsigned int index = hash & ((1 << tbl->nest) - 1); unsigned int size = tbl->size >> tbl->nest; union nested_table *ntbl; - unsigned int shifted; - unsigned int nhash; ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); hash >>= tbl->nest; - nhash = index; - shifted = tbl->nest; ntbl = nested_table_alloc(ht, &ntbl[index].table, - size <= (1 << shift) ? shifted : 0); + size <= (1 << shift)); while (ntbl && size > (1 << shift)) { index = hash & ((1 << shift) - 1); size >>= shift; hash >>= shift; - nhash |= index << shifted; - shifted += shift; ntbl = nested_table_alloc(ht, &ntbl[index].table, - size <= (1 << shift) ? shifted : 0); + size <= (1 << shift)); } if (!ntbl) -- cgit v1.2.3 From 0ad66449aa3cbaedbdeaf55bffce74084bb7e9f9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: use cmpxchg() to protect ->future_tbl. Rather than borrowing one of the bucket locks to protect ->future_tbl updates, use cmpxchg(). This gives more freedom to change how bucket locking is implemented. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- lib/rhashtable.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 2aa41c15df17..52ec83212856 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -297,21 +297,14 @@ static int rhashtable_rehash_attach(struct rhashtable *ht, struct bucket_table *old_tbl, struct bucket_table *new_tbl) { - /* Protect future_tbl using the first bucket lock. */ - spin_lock_bh(old_tbl->locks); - - /* Did somebody beat us to it? */ - if (rcu_access_pointer(old_tbl->future_tbl)) { - spin_unlock_bh(old_tbl->locks); - return -EEXIST; - } - /* Make insertions go into the new, empty table right away. Deletions * and lookups will be attempted in both tables until we synchronize. + * As cmpxchg() provides strong barriers, we do not need + * rcu_assign_pointer(). */ - rcu_assign_pointer(old_tbl->future_tbl, new_tbl); - spin_unlock_bh(old_tbl->locks); + if (cmpxchg(&old_tbl->future_tbl, NULL, new_tbl) != NULL) + return -EEXIST; return 0; } -- cgit v1.2.3 From c0690016a73fe6bd456887bbbe6e10c7f0096554 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 18 Jun 2018 12:52:50 +1000 Subject: rhashtable: clean up dereference of ->future_tbl. Using rht_dereference_bucket() to dereference ->future_tbl looks like a type error, and could be confusing. Using rht_dereference_rcu() to test a pointer for NULL adds an unnecessary barrier - rcu_access_pointer() is preferred for NULL tests when no lock is held. This uses 3 different ways to access ->future_tbl. - if we know the mutex is held, use rht_dereference() - if we don't hold the mutex, and are only testing for NULL, use rcu_access_pointer() - otherwise (using RCU protection for true dereference), use rht_dereference_rcu(). Note that this includes a simplification of the call to rhashtable_last_table() - we don't do an extra dereference before the call any more. Acked-by: Herbert Xu Signed-off-by: NeilBrown Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 2 +- lib/rhashtable.c | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 3f3a182bd0b4..eb7111039247 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -595,7 +595,7 @@ static inline void *__rhashtable_insert_fast( lock = rht_bucket_lock(tbl, hash); spin_lock_bh(lock); - if (unlikely(rht_dereference_bucket(tbl->future_tbl, tbl, hash))) { + if (unlikely(rcu_access_pointer(tbl->future_tbl))) { slow_path: spin_unlock_bh(lock); rcu_read_unlock(); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 52ec83212856..0e04947b7e0c 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -226,8 +226,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - struct bucket_table *new_tbl = rhashtable_last_table(ht, - rht_dereference_rcu(old_tbl->future_tbl, ht)); + struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl); struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash); int err = -EAGAIN; struct rhash_head *head, *next, *entry; @@ -467,7 +466,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht, fail: /* Do not fail the insert if someone else did a rehash. */ - if (likely(rcu_dereference_raw(tbl->future_tbl))) + if (likely(rcu_access_pointer(tbl->future_tbl))) return 0; /* Schedule async rehash to retry allocation in process context. */ @@ -540,7 +539,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) return ERR_CAST(data); - new_tbl = rcu_dereference(tbl->future_tbl); + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (new_tbl) return new_tbl; @@ -599,7 +598,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, break; spin_unlock_bh(lock); - tbl = rcu_dereference(tbl->future_tbl); + tbl = rht_dereference_rcu(tbl->future_tbl, ht); } data = rhashtable_lookup_one(ht, tbl, hash, key, obj); -- cgit v1.2.3 From 22eceb8bf3e8f1f9b2f566062d06b25807725d7f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 19 Jun 2018 13:57:26 +0200 Subject: printk: Make CONSOLE_LOGLEVEL_QUIET configurable The goal of passing the "quiet" option to the kernel is for the kernel to be quiet unless something really is wrong. Sofar passing quiet has been (mostly) equivalent to passing loglevel=4 on the kernel commandline. Which means to show any messages with a level of KERN_ERR or higher severity on the console. In practice this often does not result in a quiet boot though, since there are many false-positive or otherwise harmless error messages printed, defeating the purpose of the quiet option. Esp. the ACPICA code is really bad wrt this, but there are plenty of others too. This commit makes CONSOLE_LOGLEVEL_QUIET configurable. This for example will allow distros which want quiet to really mean quiet to set CONSOLE_LOGLEVEL_QUIET so that only messages with a higher severity then KERN_ERR (CRIT, ALERT, EMERG) get printed, avoiding an endless game of whack-a-mole silencing harmless error messages. Link: http://lkml.kernel.org/r/20180619115726.3098-1-hdegoede@redhat.com To: Petr Mladek To: Sergey Senozhatsky Cc: Hans de Goede Cc: Steven Rostedt Cc: linux-kernel@vger.kernel.org Signed-off-by: Hans de Goede Acked-by: Steven Rostedt (VMware) Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- include/linux/printk.h | 6 +++--- lib/Kconfig.debug | 11 +++++++++++ 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/include/linux/printk.h b/include/linux/printk.h index 6d7e800affd8..18602bb3eca8 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -50,15 +50,15 @@ static inline const char *printk_skip_headers(const char *buffer) /* We show everything that is MORE important than this.. */ #define CONSOLE_LOGLEVEL_SILENT 0 /* Mum's the word */ #define CONSOLE_LOGLEVEL_MIN 1 /* Minimum loglevel we let people use */ -#define CONSOLE_LOGLEVEL_QUIET 4 /* Shhh ..., when booted with "quiet" */ #define CONSOLE_LOGLEVEL_DEBUG 10 /* issue debug messages */ #define CONSOLE_LOGLEVEL_MOTORMOUTH 15 /* You can't shut this one up */ /* - * Default used to be hard-coded at 7, we're now allowing it to be set from - * kernel config. + * Default used to be hard-coded at 7, quiet used to be hardcoded at 4, + * we're now allowing both to be set from kernel config. */ #define CONSOLE_LOGLEVEL_DEFAULT CONFIG_CONSOLE_LOGLEVEL_DEFAULT +#define CONSOLE_LOGLEVEL_QUIET CONFIG_CONSOLE_LOGLEVEL_QUIET extern int console_printk[]; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 76555479ae36..a7ef03009e9e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -30,6 +30,17 @@ config CONSOLE_LOGLEVEL_DEFAULT usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT option. +config CONSOLE_LOGLEVEL_QUIET + int "quiet console loglevel (1-15)" + range 1 15 + default "4" + help + loglevel to use when "quiet" is passed on the kernel commandline. + + When "quiet" is passed on the kernel commandline this loglevel + will be used as the loglevel. IOW passing "quiet" will be the + equivalent of passing "loglevel=" + config MESSAGE_LOGLEVEL_DEFAULT int "Default message log level (1-7)" range 1 7 -- cgit v1.2.3 From 0e2dc70e3d0d503b0cc9c5f74db3eb6db52c9e22 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 20 Jun 2018 08:58:30 +0200 Subject: bitfield: add tests Add tests for the bitfield helpers. The constant ones will all be folded to nothing by the compiler (if everything is correct in the header file), and the variable ones do some tests against open-coding the necessary shifts. A few test cases that should fail/warn compilation are provided under ifdef. Suggested-by: Andy Shevchenko Reviewed-by: Andy Shevchenko Signed-off-by: Johannes Berg Signed-off-by: Kalle Valo --- lib/Kconfig.debug | 7 +++ lib/Makefile | 1 + lib/test_bitfield.c | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+) create mode 100644 lib/test_bitfield.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8838d1158d19..d3d82eccdfa5 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1802,6 +1802,13 @@ config TEST_BITMAP If unsure, say N. +config TEST_BITFIELD + tristate "Test bitfield functions at runtime" + help + Enable this option to test the bitfield functions at boot. + + If unsure, say N. + config TEST_UUID tristate "Test functions located in the uuid module at runtime" diff --git a/lib/Makefile b/lib/Makefile index 956b320292fe..701717a23d32 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -68,6 +68,7 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o +obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o diff --git a/lib/test_bitfield.c b/lib/test_bitfield.c new file mode 100644 index 000000000000..5b8f4108662d --- /dev/null +++ b/lib/test_bitfield.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Test cases for bitfield helpers. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +#define CHECK_ENC_GET_U(tp, v, field, res) do { \ + { \ + u##tp _res; \ + \ + _res = u##tp##_encode_bits(v, field); \ + if (_res != res) { \ + pr_warn("u" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != " #res "\n",\ + (u64)_res); \ + return -EINVAL; \ + } \ + if (u##tp##_get_bits(_res, field) != v) \ + return -EINVAL; \ + } \ + } while (0) + +#define CHECK_ENC_GET_LE(tp, v, field, res) do { \ + { \ + __le##tp _res; \ + \ + _res = le##tp##_encode_bits(v, field); \ + if (_res != cpu_to_le##tp(res)) { \ + pr_warn("le" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\ + (u64)le##tp##_to_cpu(_res), \ + (u64)(res)); \ + return -EINVAL; \ + } \ + if (le##tp##_get_bits(_res, field) != v) \ + return -EINVAL; \ + } \ + } while (0) + +#define CHECK_ENC_GET_BE(tp, v, field, res) do { \ + { \ + __be##tp _res; \ + \ + _res = be##tp##_encode_bits(v, field); \ + if (_res != cpu_to_be##tp(res)) { \ + pr_warn("be" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\ + (u64)be##tp##_to_cpu(_res), \ + (u64)(res)); \ + return -EINVAL; \ + } \ + if (be##tp##_get_bits(_res, field) != v) \ + return -EINVAL; \ + } \ + } while (0) + +#define CHECK_ENC_GET(tp, v, field, res) do { \ + CHECK_ENC_GET_U(tp, v, field, res); \ + CHECK_ENC_GET_LE(tp, v, field, res); \ + CHECK_ENC_GET_BE(tp, v, field, res); \ + } while (0) + +static int test_constants(void) +{ + /* + * NOTE + * This whole function compiles (or at least should, if everything + * is going according to plan) to nothing after optimisation. + */ + + CHECK_ENC_GET(16, 1, 0x000f, 0x0001); + CHECK_ENC_GET(16, 3, 0x00f0, 0x0030); + CHECK_ENC_GET(16, 5, 0x0f00, 0x0500); + CHECK_ENC_GET(16, 7, 0xf000, 0x7000); + CHECK_ENC_GET(16, 14, 0x000f, 0x000e); + CHECK_ENC_GET(16, 15, 0x00f0, 0x00f0); + + CHECK_ENC_GET_U(8, 1, 0x0f, 0x01); + CHECK_ENC_GET_U(8, 3, 0xf0, 0x30); + CHECK_ENC_GET_U(8, 14, 0x0f, 0x0e); + CHECK_ENC_GET_U(8, 15, 0xf0, 0xf0); + + CHECK_ENC_GET(32, 1, 0x00000f00, 0x00000100); + CHECK_ENC_GET(32, 3, 0x0000f000, 0x00003000); + CHECK_ENC_GET(32, 5, 0x000f0000, 0x00050000); + CHECK_ENC_GET(32, 7, 0x00f00000, 0x00700000); + CHECK_ENC_GET(32, 14, 0x0f000000, 0x0e000000); + CHECK_ENC_GET(32, 15, 0xf0000000, 0xf0000000); + + CHECK_ENC_GET(64, 1, 0x00000f0000000000ull, 0x0000010000000000ull); + CHECK_ENC_GET(64, 3, 0x0000f00000000000ull, 0x0000300000000000ull); + CHECK_ENC_GET(64, 5, 0x000f000000000000ull, 0x0005000000000000ull); + CHECK_ENC_GET(64, 7, 0x00f0000000000000ull, 0x0070000000000000ull); + CHECK_ENC_GET(64, 14, 0x0f00000000000000ull, 0x0e00000000000000ull); + CHECK_ENC_GET(64, 15, 0xf000000000000000ull, 0xf000000000000000ull); + + return 0; +} + +#define CHECK(tp, mask) do { \ + u64 v; \ + \ + for (v = 0; v < 1 << hweight32(mask); v++) \ + if (tp##_encode_bits(v, mask) != v << __ffs64(mask)) \ + return -EINVAL; \ + } while (0) + +static int test_variables(void) +{ + CHECK(u8, 0x0f); + CHECK(u8, 0xf0); + CHECK(u8, 0x38); + + CHECK(u16, 0x0038); + CHECK(u16, 0x0380); + CHECK(u16, 0x3800); + CHECK(u16, 0x8000); + + CHECK(u32, 0x80000000); + CHECK(u32, 0x7f000000); + CHECK(u32, 0x07e00000); + CHECK(u32, 0x00018000); + + CHECK(u64, 0x8000000000000000ull); + CHECK(u64, 0x7f00000000000000ull); + CHECK(u64, 0x0001800000000000ull); + CHECK(u64, 0x0000000080000000ull); + CHECK(u64, 0x000000007f000000ull); + CHECK(u64, 0x0000000018000000ull); + CHECK(u64, 0x0000001f8000000ull); + + return 0; +} + +static int __init test_bitfields(void) +{ + int ret = test_constants(); + + if (ret) { + pr_warn("constant tests failed!\n"); + return ret; + } + + ret = test_variables(); + if (ret) { + pr_warn("variable tests failed!\n"); + return ret; + } + +#ifdef TEST_BITFIELD_COMPILE + /* these should fail compilation */ + CHECK_ENC_GET(16, 16, 0x0f00, 0x1000); + u32_encode_bits(7, 0x06000000); + + /* this should at least give a warning */ + u16_encode_bits(0, 0x60000); +#endif + + pr_info("tests passed\n"); + + return 0; +} +module_init(test_bitfields) + +MODULE_AUTHOR("Johannes Berg "); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 7861552cedd81a164c0d5d1c89fe2cb45a3ed41b Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 26 Jun 2018 12:39:18 -0700 Subject: netlink: Return extack message if attribute validation fails Have one extack message for parsing and validating. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- lib/nlattr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/nlattr.c b/lib/nlattr.c index dfa55c873c13..e335bcafa9e4 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -253,8 +253,8 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, if (policy) { err = validate_nla(nla, maxtype, policy); if (err < 0) { - if (extack) - extack->bad_attr = nla; + NL_SET_ERR_MSG_ATTR(extack, nla, + "Attribute failed policy validation"); goto errout; } } -- cgit v1.2.3 From 624fa7790f80575a4ec28fbdb2034097dc18d051 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 22 Jun 2018 14:54:49 -0700 Subject: scsi: klist: Make it safe to use klists in atomic context In the scsi_transport_srp implementation it cannot be avoided to iterate over a klist from atomic context when using the legacy block layer instead of blk-mq. Hence this patch that makes it safe to use klists in atomic context. This patch avoids that lockdep reports the following: WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(&(&k->k_lock)->rlock); local_irq_disable(); lock(&(&q->__queue_lock)->rlock); lock(&(&k->k_lock)->rlock); lock(&(&q->__queue_lock)->rlock); stack backtrace: Workqueue: kblockd blk_timeout_work Call Trace: dump_stack+0xa4/0xf5 check_usage+0x6e6/0x700 __lock_acquire+0x185d/0x1b50 lock_acquire+0xd2/0x260 _raw_spin_lock+0x32/0x50 klist_next+0x47/0x190 device_for_each_child+0x8e/0x100 srp_timed_out+0xaf/0x1d0 [scsi_transport_srp] scsi_times_out+0xd4/0x410 [scsi_mod] blk_rq_timed_out+0x36/0x70 blk_timeout_work+0x1b5/0x220 process_one_work+0x4fe/0xad0 worker_thread+0x63/0x5a0 kthread+0x1c1/0x1e0 ret_from_fork+0x24/0x30 See also commit c9ddf73476ff ("scsi: scsi_transport_srp: Fix shost to rport translation"). Signed-off-by: Bart Van Assche Cc: Martin K. Petersen Cc: James Bottomley Acked-by: Greg Kroah-Hartman Signed-off-by: Martin K. Petersen --- lib/klist.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/klist.c b/lib/klist.c index 0507fa5d84c5..f6b547812fe3 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -336,8 +336,9 @@ struct klist_node *klist_prev(struct klist_iter *i) void (*put)(struct klist_node *) = i->i_klist->put; struct klist_node *last = i->i_cur; struct klist_node *prev; + unsigned long flags; - spin_lock(&i->i_klist->k_lock); + spin_lock_irqsave(&i->i_klist->k_lock, flags); if (last) { prev = to_klist_node(last->n_node.prev); @@ -356,7 +357,7 @@ struct klist_node *klist_prev(struct klist_iter *i) prev = to_klist_node(prev->n_node.prev); } - spin_unlock(&i->i_klist->k_lock); + spin_unlock_irqrestore(&i->i_klist->k_lock, flags); if (put && last) put(last); @@ -377,8 +378,9 @@ struct klist_node *klist_next(struct klist_iter *i) void (*put)(struct klist_node *) = i->i_klist->put; struct klist_node *last = i->i_cur; struct klist_node *next; + unsigned long flags; - spin_lock(&i->i_klist->k_lock); + spin_lock_irqsave(&i->i_klist->k_lock, flags); if (last) { next = to_klist_node(last->n_node.next); @@ -397,7 +399,7 @@ struct klist_node *klist_next(struct klist_iter *i) next = to_klist_node(next->n_node.next); } - spin_unlock(&i->i_klist->k_lock); + spin_unlock_irqrestore(&i->i_klist->k_lock, flags); if (put && last) put(last); -- cgit v1.2.3 From 08295b3b5beec9aac0f7a9db86f0fc3792039da3 Mon Sep 17 00:00:00 2001 From: Thomas Hellstrom Date: Fri, 15 Jun 2018 10:17:38 +0200 Subject: locking: Implement an algorithm choice for Wound-Wait mutexes The current Wound-Wait mutex algorithm is actually not Wound-Wait but Wait-Die. Implement also Wound-Wait as a per-ww-class choice. Wound-Wait is, contrary to Wait-Die a preemptive algorithm and is known to generate fewer backoffs. Testing reveals that this is true if the number of simultaneous contending transactions is small. As the number of simultaneous contending threads increases, Wait-Wound becomes inferior to Wait-Die in terms of elapsed time. Possibly due to the larger number of held locks of sleeping transactions. Update documentation and callers. Timings using git://people.freedesktop.org/~thomash/ww_mutex_test tag patch-18-06-15 Each thread runs 100000 batches of lock / unlock 800 ww mutexes randomly chosen out of 100000. Four core Intel x86_64: Algorithm #threads Rollbacks time Wound-Wait 4 ~100 ~17s. Wait-Die 4 ~150000 ~19s. Wound-Wait 16 ~360000 ~109s. Wait-Die 16 ~450000 ~82s. Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Gustavo Padovan Cc: Maarten Lankhorst Cc: Sean Paul Cc: David Airlie Cc: Davidlohr Bueso Cc: "Paul E. McKenney" Cc: Josh Triplett Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Greg Kroah-Hartman Cc: linux-doc@vger.kernel.org Cc: linux-media@vger.kernel.org Cc: linaro-mm-sig@lists.linaro.org Co-authored-by: Peter Zijlstra Signed-off-by: Thomas Hellstrom Acked-by: Peter Zijlstra (Intel) Acked-by: Ingo Molnar --- Documentation/locking/ww-mutex-design.txt | 57 +++++++++-- drivers/dma-buf/reservation.c | 2 +- drivers/gpu/drm/drm_modeset_lock.c | 2 +- include/linux/ww_mutex.h | 17 ++- kernel/locking/locktorture.c | 2 +- kernel/locking/mutex.c | 165 +++++++++++++++++++++++++++--- kernel/locking/test-ww_mutex.c | 2 +- lib/locking-selftest.c | 2 +- 8 files changed, 213 insertions(+), 36 deletions(-) (limited to 'lib') diff --git a/Documentation/locking/ww-mutex-design.txt b/Documentation/locking/ww-mutex-design.txt index 2fd7f2a2af21..f0ed7c30e695 100644 --- a/Documentation/locking/ww-mutex-design.txt +++ b/Documentation/locking/ww-mutex-design.txt @@ -1,4 +1,4 @@ -Wait/Wound Deadlock-Proof Mutex Design +Wound/Wait Deadlock-Proof Mutex Design ====================================== Please read mutex-design.txt first, as it applies to wait/wound mutexes too. @@ -32,10 +32,26 @@ the oldest task) wins, and the one with the higher reservation id (i.e. the younger task) unlocks all of the buffers that it has already locked, and then tries again. -In the RDBMS literature this deadlock handling approach is called wait/die: -The older tasks waits until it can acquire the contended lock. The younger tasks -needs to back off and drop all the locks it is currently holding, i.e. the -younger task dies. +In the RDBMS literature, a reservation ticket is associated with a transaction. +and the deadlock handling approach is called Wait-Die. The name is based on +the actions of a locking thread when it encounters an already locked mutex. +If the transaction holding the lock is younger, the locking transaction waits. +If the transaction holding the lock is older, the locking transaction backs off +and dies. Hence Wait-Die. +There is also another algorithm called Wound-Wait: +If the transaction holding the lock is younger, the locking transaction +wounds the transaction holding the lock, requesting it to die. +If the transaction holding the lock is older, it waits for the other +transaction. Hence Wound-Wait. +The two algorithms are both fair in that a transaction will eventually succeed. +However, the Wound-Wait algorithm is typically stated to generate fewer backoffs +compared to Wait-Die, but is, on the other hand, associated with more work than +Wait-Die when recovering from a backoff. Wound-Wait is also a preemptive +algorithm in that transactions are wounded by other transactions, and that +requires a reliable way to pick up up the wounded condition and preempt the +running transaction. Note that this is not the same as process preemption. A +Wound-Wait transaction is considered preempted when it dies (returning +-EDEADLK) following a wound. Concepts -------- @@ -47,10 +63,12 @@ Acquire context: To ensure eventual forward progress it is important the a task trying to acquire locks doesn't grab a new reservation id, but keeps the one it acquired when starting the lock acquisition. This ticket is stored in the acquire context. Furthermore the acquire context keeps track of debugging state -to catch w/w mutex interface abuse. +to catch w/w mutex interface abuse. An acquire context is representing a +transaction. W/w class: In contrast to normal mutexes the lock class needs to be explicit for -w/w mutexes, since it is required to initialize the acquire context. +w/w mutexes, since it is required to initialize the acquire context. The lock +class also specifies what algorithm to use, Wound-Wait or Wait-Die. Furthermore there are three different class of w/w lock acquire functions: @@ -90,6 +108,12 @@ provided. Usage ----- +The algorithm (Wait-Die vs Wound-Wait) is chosen by using either +DEFINE_WW_CLASS() (Wound-Wait) or DEFINE_WD_CLASS() (Wait-Die) +As a rough rule of thumb, use Wound-Wait iff you +expect the number of simultaneous competing transactions to be typically small, +and you want to reduce the number of rollbacks. + Three different ways to acquire locks within the same w/w class. Common definitions for methods #1 and #2: @@ -312,12 +336,23 @@ Design: We maintain the following invariants for the wait list: (1) Waiters with an acquire context are sorted by stamp order; waiters without an acquire context are interspersed in FIFO order. - (2) Among waiters with contexts, only the first one can have other locks - acquired already (ctx->acquired > 0). Note that this waiter may come - after other waiters without contexts in the list. + (2) For Wait-Die, among waiters with contexts, only the first one can have + other locks acquired already (ctx->acquired > 0). Note that this waiter + may come after other waiters without contexts in the list. + + The Wound-Wait preemption is implemented with a lazy-preemption scheme: + The wounded status of the transaction is checked only when there is + contention for a new lock and hence a true chance of deadlock. In that + situation, if the transaction is wounded, it backs off, clears the + wounded status and retries. A great benefit of implementing preemption in + this way is that the wounded transaction can identify a contending lock to + wait for before restarting the transaction. Just blindly restarting the + transaction would likely make the transaction end up in a situation where + it would have to back off again. In general, not much contention is expected. The locks are typically used to - serialize access to resources for devices. + serialize access to resources for devices, and optimization focus should + therefore be directed towards the uncontended cases. Lockdep: Special care has been taken to warn for as many cases of api abuse diff --git a/drivers/dma-buf/reservation.c b/drivers/dma-buf/reservation.c index 314eb1071cce..20bf90f4ee63 100644 --- a/drivers/dma-buf/reservation.c +++ b/drivers/dma-buf/reservation.c @@ -46,7 +46,7 @@ * write-side updates. */ -DEFINE_WW_CLASS(reservation_ww_class); +DEFINE_WD_CLASS(reservation_ww_class); EXPORT_SYMBOL(reservation_ww_class); struct lock_class_key reservation_seqcount_class; diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c index 8a5100685875..638be2eb67b4 100644 --- a/drivers/gpu/drm/drm_modeset_lock.c +++ b/drivers/gpu/drm/drm_modeset_lock.c @@ -70,7 +70,7 @@ * lists and lookup data structures. */ -static DEFINE_WW_CLASS(crtc_ww_class); +static DEFINE_WD_CLASS(crtc_ww_class); /** * drm_modeset_lock_all - take all modeset locks diff --git a/include/linux/ww_mutex.h b/include/linux/ww_mutex.h index f82fce2229c8..3af7c0e03be5 100644 --- a/include/linux/ww_mutex.h +++ b/include/linux/ww_mutex.h @@ -8,6 +8,8 @@ * * Wait/Die implementation: * Copyright (C) 2013 Canonical Ltd. + * Choice of algorithm: + * Copyright (C) 2018 WMWare Inc. * * This file contains the main data structure and API definitions. */ @@ -23,12 +25,15 @@ struct ww_class { struct lock_class_key mutex_key; const char *acquire_name; const char *mutex_name; + unsigned int is_wait_die; }; struct ww_acquire_ctx { struct task_struct *task; unsigned long stamp; unsigned int acquired; + unsigned short wounded; + unsigned short is_wait_die; #ifdef CONFIG_DEBUG_MUTEXES unsigned int done_acquire; struct ww_class *ww_class; @@ -58,17 +63,21 @@ struct ww_mutex { # define __WW_CLASS_MUTEX_INITIALIZER(lockname, class) #endif -#define __WW_CLASS_INITIALIZER(ww_class) \ +#define __WW_CLASS_INITIALIZER(ww_class, _is_wait_die) \ { .stamp = ATOMIC_LONG_INIT(0) \ , .acquire_name = #ww_class "_acquire" \ - , .mutex_name = #ww_class "_mutex" } + , .mutex_name = #ww_class "_mutex" \ + , .is_wait_die = _is_wait_die } #define __WW_MUTEX_INITIALIZER(lockname, class) \ { .base = __MUTEX_INITIALIZER(lockname.base) \ __WW_CLASS_MUTEX_INITIALIZER(lockname, class) } +#define DEFINE_WD_CLASS(classname) \ + struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 1) + #define DEFINE_WW_CLASS(classname) \ - struct ww_class classname = __WW_CLASS_INITIALIZER(classname) + struct ww_class classname = __WW_CLASS_INITIALIZER(classname, 0) #define DEFINE_WW_MUTEX(mutexname, ww_class) \ struct ww_mutex mutexname = __WW_MUTEX_INITIALIZER(mutexname, ww_class) @@ -123,6 +132,8 @@ static inline void ww_acquire_init(struct ww_acquire_ctx *ctx, ctx->task = current; ctx->stamp = atomic_long_inc_return_relaxed(&ww_class->stamp); ctx->acquired = 0; + ctx->wounded = false; + ctx->is_wait_die = ww_class->is_wait_die; #ifdef CONFIG_DEBUG_MUTEXES ctx->ww_class = ww_class; ctx->done_acquire = 0; diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 8402b3349dca..c28224347d69 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -365,7 +365,7 @@ static struct lock_torture_ops mutex_lock_ops = { }; #include -static DEFINE_WW_CLASS(torture_ww_class); +static DEFINE_WD_CLASS(torture_ww_class); static DEFINE_WW_MUTEX(torture_ww_mutex_0, &torture_ww_class); static DEFINE_WW_MUTEX(torture_ww_mutex_1, &torture_ww_class); static DEFINE_WW_MUTEX(torture_ww_mutex_2, &torture_ww_class); diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index cfe48419b7d0..1a81a1257b3f 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -173,6 +173,21 @@ static inline bool __mutex_waiter_is_first(struct mutex *lock, struct mutex_wait return list_first_entry(&lock->wait_list, struct mutex_waiter, list) == waiter; } +/* + * Add @waiter to a given location in the lock wait_list and set the + * FLAG_WAITERS flag if it's the first waiter. + */ +static void __sched +__mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, + struct list_head *list) +{ + debug_mutex_add_waiter(lock, waiter, current); + + list_add_tail(&waiter->list, list); + if (__mutex_waiter_is_first(lock, waiter)) + __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); +} + /* * Give up ownership to a specific task, when @task = NULL, this is equivalent * to a regular unlock. Sets PICKUP on a handoff, clears HANDOF, preserves @@ -249,6 +264,11 @@ EXPORT_SYMBOL(mutex_lock); * The newer transactions are killed when: * It (the new transaction) makes a request for a lock being held * by an older transaction. + * + * Wound-Wait: + * The newer transactions are wounded when: + * An older transaction makes a request for a lock being held by + * the newer transaction. */ /* @@ -320,6 +340,9 @@ static bool __sched __ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter, struct ww_acquire_ctx *ww_ctx) { + if (!ww_ctx->is_wait_die) + return false; + if (waiter->ww_ctx->acquired > 0 && __ww_ctx_stamp_after(waiter->ww_ctx, ww_ctx)) { debug_mutex_wake_waiter(lock, waiter); @@ -329,13 +352,65 @@ __ww_mutex_die(struct mutex *lock, struct mutex_waiter *waiter, return true; } +/* + * Wound-Wait; wound a younger @hold_ctx if it holds the lock. + * + * Wound the lock holder if there are waiters with older transactions than + * the lock holders. Even if multiple waiters may wound the lock holder, + * it's sufficient that only one does. + */ +static bool __ww_mutex_wound(struct mutex *lock, + struct ww_acquire_ctx *ww_ctx, + struct ww_acquire_ctx *hold_ctx) +{ + struct task_struct *owner = __mutex_owner(lock); + + lockdep_assert_held(&lock->wait_lock); + + /* + * Possible through __ww_mutex_add_waiter() when we race with + * ww_mutex_set_context_fastpath(). In that case we'll get here again + * through __ww_mutex_check_waiters(). + */ + if (!hold_ctx) + return false; + + /* + * Can have !owner because of __mutex_unlock_slowpath(), but if owner, + * it cannot go away because we'll have FLAG_WAITERS set and hold + * wait_lock. + */ + if (!owner) + return false; + + if (ww_ctx->acquired > 0 && __ww_ctx_stamp_after(hold_ctx, ww_ctx)) { + hold_ctx->wounded = 1; + + /* + * wake_up_process() paired with set_current_state() + * inserts sufficient barriers to make sure @owner either sees + * it's wounded in __ww_mutex_lock_check_stamp() or has a + * wakeup pending to re-read the wounded state. + */ + if (owner != current) + wake_up_process(owner); + + return true; + } + + return false; +} + /* * We just acquired @lock under @ww_ctx, if there are later contexts waiting - * behind us on the wait-list, check if they need to die. + * behind us on the wait-list, check if they need to die, or wound us. * * See __ww_mutex_add_waiter() for the list-order construction; basically the * list is ordered by stamp, smallest (oldest) first. * + * This relies on never mixing wait-die/wound-wait on the same wait-list; + * which is currently ensured by that being a ww_class property. + * * The current task must not be on the wait list. */ static void __sched @@ -349,7 +424,8 @@ __ww_mutex_check_waiters(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) if (!cur->ww_ctx) continue; - if (__ww_mutex_die(lock, cur, ww_ctx)) + if (__ww_mutex_die(lock, cur, ww_ctx) || + __ww_mutex_wound(lock, cur->ww_ctx, ww_ctx)) break; } } @@ -370,17 +446,23 @@ ww_mutex_set_context_fastpath(struct ww_mutex *lock, struct ww_acquire_ctx *ctx) * and keep spinning, or it will acquire wait_lock, add itself * to waiter list and sleep. */ - smp_mb(); /* ^^^ */ + smp_mb(); /* See comments above and below. */ /* - * Check if lock is contended, if not there is nobody to wake up + * [W] ww->ctx = ctx [W] MUTEX_FLAG_WAITERS + * MB MB + * [R] MUTEX_FLAG_WAITERS [R] ww->ctx + * + * The memory barrier above pairs with the memory barrier in + * __ww_mutex_add_waiter() and makes sure we either observe ww->ctx + * and/or !empty list. */ if (likely(!(atomic_long_read(&lock->base.owner) & MUTEX_FLAG_WAITERS))) return; /* * Uh oh, we raced in fastpath, check if any of the waiters need to - * die. + * die or wound us. */ spin_lock(&lock->base.wait_lock); __ww_mutex_check_waiters(&lock->base, ctx); @@ -682,7 +764,9 @@ __ww_mutex_kill(struct mutex *lock, struct ww_acquire_ctx *ww_ctx) /* - * Check whether we need to kill the transaction for the current lock acquire. + * Check the wound condition for the current lock acquire. + * + * Wound-Wait: If we're wounded, kill ourself. * * Wait-Die: If we're trying to acquire a lock already held by an older * context, kill ourselves. @@ -701,6 +785,13 @@ __ww_mutex_check_kill(struct mutex *lock, struct mutex_waiter *waiter, if (ctx->acquired == 0) return 0; + if (!ctx->is_wait_die) { + if (ctx->wounded) + return __ww_mutex_kill(lock, ctx); + + return 0; + } + if (hold_ctx && __ww_ctx_stamp_after(ctx, hold_ctx)) return __ww_mutex_kill(lock, ctx); @@ -727,7 +818,8 @@ __ww_mutex_check_kill(struct mutex *lock, struct mutex_waiter *waiter, * Waiters without context are interspersed in FIFO order. * * Furthermore, for Wait-Die kill ourself immediately when possible (there are - * older contexts already waiting) to avoid unnecessary waiting. + * older contexts already waiting) to avoid unnecessary waiting and for + * Wound-Wait ensure we wound the owning context when it is younger. */ static inline int __sched __ww_mutex_add_waiter(struct mutex_waiter *waiter, @@ -736,16 +828,21 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter, { struct mutex_waiter *cur; struct list_head *pos; + bool is_wait_die; if (!ww_ctx) { - list_add_tail(&waiter->list, &lock->wait_list); + __mutex_add_waiter(lock, waiter, &lock->wait_list); return 0; } + is_wait_die = ww_ctx->is_wait_die; + /* * Add the waiter before the first waiter with a higher stamp. * Waiters without a context are skipped to avoid starving - * them. Wait-Die waiters may die here. + * them. Wait-Die waiters may die here. Wound-Wait waiters + * never die here, but they are sorted in stamp order and + * may wound the lock holder. */ pos = &lock->wait_list; list_for_each_entry_reverse(cur, &lock->wait_list, list) { @@ -758,10 +855,12 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter, * is no point in queueing behind it, as we'd have to * die the moment it would acquire the lock. */ - int ret = __ww_mutex_kill(lock, ww_ctx); + if (is_wait_die) { + int ret = __ww_mutex_kill(lock, ww_ctx); - if (ret) - return ret; + if (ret) + return ret; + } break; } @@ -772,7 +871,23 @@ __ww_mutex_add_waiter(struct mutex_waiter *waiter, __ww_mutex_die(lock, cur, ww_ctx); } - list_add_tail(&waiter->list, pos); + __mutex_add_waiter(lock, waiter, pos); + + /* + * Wound-Wait: if we're blocking on a mutex owned by a younger context, + * wound that such that we might proceed. + */ + if (!is_wait_die) { + struct ww_mutex *ww = container_of(lock, struct ww_mutex, base); + + /* + * See ww_mutex_set_context_fastpath(). Orders setting + * MUTEX_FLAG_WAITERS vs the ww->ctx load, + * such that either we or the fastpath will wound @ww->ctx. + */ + smp_mb(); + __ww_mutex_wound(lock, ww_ctx, ww->ctx); + } return 0; } @@ -796,6 +911,14 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, if (use_ww_ctx && ww_ctx) { if (unlikely(ww_ctx == READ_ONCE(ww->ctx))) return -EALREADY; + + /* + * Reset the wounded flag after a kill. No other process can + * race and wound us here since they can't have a valid owner + * pointer if we don't have any locks held. + */ + if (ww_ctx->acquired == 0) + ww_ctx->wounded = 0; } preempt_disable(); @@ -829,7 +952,8 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, if (!use_ww_ctx) { /* add waiting tasks to the end of the waitqueue (FIFO): */ - list_add_tail(&waiter.list, &lock->wait_list); + __mutex_add_waiter(lock, &waiter, &lock->wait_list); + #ifdef CONFIG_DEBUG_MUTEXES waiter.ww_ctx = MUTEX_POISON_WW_CTX; @@ -848,9 +972,6 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, waiter.task = current; - if (__mutex_waiter_is_first(lock, &waiter)) - __mutex_set_flag(lock, MUTEX_FLAG_WAITERS); - set_current_state(state); for (;;) { /* @@ -907,6 +1028,16 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, acquired: __set_current_state(TASK_RUNNING); + if (use_ww_ctx && ww_ctx) { + /* + * Wound-Wait; we stole the lock (!first_waiter), check the + * waiters as anyone might want to wound us. + */ + if (!ww_ctx->is_wait_die && + !__mutex_waiter_is_first(lock, &waiter)) + __ww_mutex_check_waiters(lock, ww_ctx); + } + mutex_remove_waiter(lock, &waiter, current); if (likely(list_empty(&lock->wait_list))) __mutex_clear_flag(lock, MUTEX_FLAGS); diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 0e4cd64ad2c0..5b915b370d5a 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -26,7 +26,7 @@ #include #include -static DEFINE_WW_CLASS(ww_class); +static DEFINE_WD_CLASS(ww_class); struct workqueue_struct *wq; struct test_mutex { diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index b5c1293ce147..1e1bbf171eca 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -29,7 +29,7 @@ */ static unsigned int debug_locks_verbose; -static DEFINE_WW_CLASS(ww_lockdep); +static DEFINE_WD_CLASS(ww_lockdep); static int __init setup_debug_locks_verbose(char *str) { -- cgit v1.2.3 From ad5728ce21398926a1215ebb38570113fa8edd99 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 3 Jul 2018 12:52:21 -0700 Subject: RAID/s390: Remove VLA usage In the quest to remove all stack VLA usage from the kernel[1], this moves the "$#" replacement from being an argument to being inside the function, which avoids generating VLAs. [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com Signed-off-by: Kees Cook Signed-off-by: Martin Schwidefsky --- lib/raid6/s390vx.uc | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index 140fa8bb5c23..914ebe98fc21 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -55,22 +55,24 @@ static inline void XOR(int x, int y, int z) asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); } -static inline void LOAD_DATA(int x, int n, u8 *ptr) +static inline void LOAD_DATA(int x, u8 *ptr) { - typedef struct { u8 _[16*n]; } addrtype; + typedef struct { u8 _[16 * $#]; } addrtype; register addrtype *__ptr asm("1") = (addrtype *) ptr; asm volatile ("VLM %2,%3,0,%r1" - : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1)); + : : "m" (*__ptr), "a" (__ptr), "i" (x), + "i" (x + $# - 1)); } -static inline void STORE_DATA(int x, int n, u8 *ptr) +static inline void STORE_DATA(int x, u8 *ptr) { - typedef struct { u8 _[16*n]; } addrtype; + typedef struct { u8 _[16 * $#]; } addrtype; register addrtype *__ptr asm("1") = (addrtype *) ptr; asm volatile ("VSTM %2,%3,0,1" - : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1)); + : "=m" (*__ptr) : "a" (__ptr), "i" (x), + "i" (x + $# - 1)); } static inline void COPY_VEC(int x, int y) @@ -93,19 +95,19 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) q = dptr[z0 + 2]; /* RS syndrome */ for (d = 0; d < bytes; d += $#*NSIZE) { - LOAD_DATA(0,$#,&dptr[z0][d]); + LOAD_DATA(0,&dptr[z0][d]); COPY_VEC(8+$$,0+$$); for (z = z0 - 1; z >= 0; z--) { MASK(16+$$,8+$$); AND(16+$$,16+$$,25); SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); - LOAD_DATA(16,$#,&dptr[z][d]); + LOAD_DATA(16,&dptr[z][d]); XOR(0+$$,0+$$,16+$$); XOR(8+$$,8+$$,16+$$); } - STORE_DATA(0,$#,&p[d]); - STORE_DATA(8,$#,&q[d]); + STORE_DATA(0,&p[d]); + STORE_DATA(8,&q[d]); } kernel_fpu_end(&vxstate, KERNEL_VXR); } @@ -127,14 +129,14 @@ static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, for (d = 0; d < bytes; d += $#*NSIZE) { /* P/Q data pages */ - LOAD_DATA(0,$#,&dptr[z0][d]); + LOAD_DATA(0,&dptr[z0][d]); COPY_VEC(8+$$,0+$$); for (z = z0 - 1; z >= start; z--) { MASK(16+$$,8+$$); AND(16+$$,16+$$,25); SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); - LOAD_DATA(16,$#,&dptr[z][d]); + LOAD_DATA(16,&dptr[z][d]); XOR(0+$$,0+$$,16+$$); XOR(8+$$,8+$$,16+$$); } @@ -145,12 +147,12 @@ static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); } - LOAD_DATA(16,$#,&p[d]); + LOAD_DATA(16,&p[d]); XOR(16+$$,16+$$,0+$$); - STORE_DATA(16,$#,&p[d]); - LOAD_DATA(16,$#,&q[d]); + STORE_DATA(16,&p[d]); + LOAD_DATA(16,&q[d]); XOR(16+$$,16+$$,8+$$); - STORE_DATA(16,$#,&q[d]); + STORE_DATA(16,&q[d]); } kernel_fpu_end(&vxstate, KERNEL_VXR); } -- cgit v1.2.3 From 785a19f9d1dd8a4ab2d0633be4656653bd3de1fc Mon Sep 17 00:00:00 2001 From: Chintan Pandya Date: Wed, 27 Jun 2018 08:13:47 -0600 Subject: ioremap: Update pgtable free interfaces with addr The following kernel panic was observed on ARM64 platform due to a stale TLB entry. 1. ioremap with 4K size, a valid pte page table is set. 2. iounmap it, its pte entry is set to 0. 3. ioremap the same address with 2M size, update its pmd entry with a new value. 4. CPU may hit an exception because the old pmd entry is still in TLB, which leads to a kernel panic. Commit b6bdb7517c3d ("mm/vmalloc: add interfaces to free unmapped page table") has addressed this panic by falling to pte mappings in the above case on ARM64. To support pmd mappings in all cases, TLB purge needs to be performed in this case on ARM64. Add a new arg, 'addr', to pud_free_pmd_page() and pmd_free_pte_page() so that TLB purge can be added later in seprate patches. [toshi.kani@hpe.com: merge changes, rewrite patch description] Fixes: 28ee90fe6048 ("x86/mm: implement free pmd/pte page interfaces") Signed-off-by: Chintan Pandya Signed-off-by: Toshi Kani Signed-off-by: Thomas Gleixner Cc: mhocko@suse.com Cc: akpm@linux-foundation.org Cc: hpa@zytor.com Cc: linux-mm@kvack.org Cc: linux-arm-kernel@lists.infradead.org Cc: Will Deacon Cc: Joerg Roedel Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: Michal Hocko Cc: "H. Peter Anvin" Cc: Link: https://lkml.kernel.org/r/20180627141348.21777-3-toshi.kani@hpe.com --- arch/arm64/mm/mmu.c | 4 ++-- arch/x86/mm/pgtable.c | 12 +++++++----- include/asm-generic/pgtable.h | 8 ++++---- lib/ioremap.c | 4 ++-- 4 files changed, 15 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 493ff75670ff..8ae5d7ae4af3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -977,12 +977,12 @@ int pmd_clear_huge(pmd_t *pmdp) return 1; } -int pud_free_pmd_page(pud_t *pud) +int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return pud_none(*pud); } -int pmd_free_pte_page(pmd_t *pmd) +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return pmd_none(*pmd); } diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 1aeb7a5dbce5..fbd14e506758 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -723,11 +723,12 @@ int pmd_clear_huge(pmd_t *pmd) /** * pud_free_pmd_page - Clear pud entry and free pmd page. * @pud: Pointer to a PUD. + * @addr: Virtual address associated with pud. * * Context: The pud range has been unmaped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. */ -int pud_free_pmd_page(pud_t *pud) +int pud_free_pmd_page(pud_t *pud, unsigned long addr) { pmd_t *pmd; int i; @@ -738,7 +739,7 @@ int pud_free_pmd_page(pud_t *pud) pmd = (pmd_t *)pud_page_vaddr(*pud); for (i = 0; i < PTRS_PER_PMD; i++) - if (!pmd_free_pte_page(&pmd[i])) + if (!pmd_free_pte_page(&pmd[i], addr + (i * PMD_SIZE))) return 0; pud_clear(pud); @@ -750,11 +751,12 @@ int pud_free_pmd_page(pud_t *pud) /** * pmd_free_pte_page - Clear pmd entry and free pte page. * @pmd: Pointer to a PMD. + * @addr: Virtual address associated with pmd. * * Context: The pmd range has been unmaped and TLB purged. * Return: 1 if clearing the entry succeeded. 0 otherwise. */ -int pmd_free_pte_page(pmd_t *pmd) +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { pte_t *pte; @@ -770,7 +772,7 @@ int pmd_free_pte_page(pmd_t *pmd) #else /* !CONFIG_X86_64 */ -int pud_free_pmd_page(pud_t *pud) +int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return pud_none(*pud); } @@ -779,7 +781,7 @@ int pud_free_pmd_page(pud_t *pud) * Disable free page handling on x86-PAE. This assures that ioremap() * does not update sync'd pmd entries. See vmalloc_sync_one(). */ -int pmd_free_pte_page(pmd_t *pmd) +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return pmd_none(*pmd); } diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index f59639afaa39..b081794ba135 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -1019,8 +1019,8 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot); int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot); int pud_clear_huge(pud_t *pud); int pmd_clear_huge(pmd_t *pmd); -int pud_free_pmd_page(pud_t *pud); -int pmd_free_pte_page(pmd_t *pmd); +int pud_free_pmd_page(pud_t *pud, unsigned long addr); +int pmd_free_pte_page(pmd_t *pmd, unsigned long addr); #else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ static inline int p4d_set_huge(p4d_t *p4d, phys_addr_t addr, pgprot_t prot) { @@ -1046,11 +1046,11 @@ static inline int pmd_clear_huge(pmd_t *pmd) { return 0; } -static inline int pud_free_pmd_page(pud_t *pud) +static inline int pud_free_pmd_page(pud_t *pud, unsigned long addr) { return 0; } -static inline int pmd_free_pte_page(pmd_t *pmd) +static inline int pmd_free_pte_page(pmd_t *pmd, unsigned long addr) { return 0; } diff --git a/lib/ioremap.c b/lib/ioremap.c index 54e5bbaa3200..517f5853ffed 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -92,7 +92,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, if (ioremap_pmd_enabled() && ((next - addr) == PMD_SIZE) && IS_ALIGNED(phys_addr + addr, PMD_SIZE) && - pmd_free_pte_page(pmd)) { + pmd_free_pte_page(pmd, addr)) { if (pmd_set_huge(pmd, phys_addr + addr, prot)) continue; } @@ -119,7 +119,7 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, if (ioremap_pud_enabled() && ((next - addr) == PUD_SIZE) && IS_ALIGNED(phys_addr + addr, PUD_SIZE) && - pud_free_pmd_page(pud)) { + pud_free_pmd_page(pud, addr)) { if (pud_set_huge(pud, phys_addr + addr, prot)) continue; } -- cgit v1.2.3 From 06ae48269d1e0324d806fca30fe77112f4a4a14a Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Fri, 6 Jul 2018 15:13:18 -0700 Subject: lib: reciprocal_div: implement the improved algorithm on the paper mentioned The new added "reciprocal_value_adv" implements the advanced version of the algorithm described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose ceil(log2(d)) result will be 32 which then requires u128 divide on host. The exception case could be easily handled before calling "reciprocal_value_adv". The advanced version requires more complex calculation to get the reciprocal multiplier and other control variables, but then could reduce the required emulation operations. It makes no sense to use this advanced version for host divide emulation, those extra complexities for calculating multiplier etc could completely waive our saving on emulation operations. However, it makes sense to use it for JIT divide code generation (for example eBPF JIT backends) for which we are willing to trade performance of JITed code with that of host. As shown by the following pseudo code, the required emulation operations could go down from 6 (the basic version) to 3 or 4. To use the result of "reciprocal_value_adv", suppose we want to calculate n/d, the C-style pseudo code will be the following, it could be easily changed to real code generation for other JIT targets. struct reciprocal_value_adv rvalue; u8 pre_shift, exp; // handle exception case. if (d >= (1U << 31)) { result = n >= d; return; } rvalue = reciprocal_value_adv(d, 32) exp = rvalue.exp; if (rvalue.is_wide_m && !(d & 1)) { // floor(log2(d & (2^32 -d))) pre_shift = fls(d & -d) - 1; rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); } else { pre_shift = 0; } // code generation starts. if (imm == 1U << exp) { result = n >> exp; } else if (rvalue.is_wide_m) { // pre_shift must be zero when reached here. t = (n * rvalue.m) >> 32; result = n - t; result >>= 1; result += t; result >>= rvalue.sh - 1; } else { if (pre_shift) result = n >> pre_shift; result = ((u64)result * rvalue.m) >> 32; result >>= rvalue.sh; } Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/reciprocal_div.h | 68 ++++++++++++++++++++++++++++++++++++++++++ lib/reciprocal_div.c | 41 +++++++++++++++++++++++++ 2 files changed, 109 insertions(+) (limited to 'lib') diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h index e031e9f2f9d8..585ce89c0f33 100644 --- a/include/linux/reciprocal_div.h +++ b/include/linux/reciprocal_div.h @@ -25,6 +25,9 @@ struct reciprocal_value { u8 sh1, sh2; }; +/* "reciprocal_value" and "reciprocal_divide" together implement the basic + * version of the algorithm described in Figure 4.1 of the paper. + */ struct reciprocal_value reciprocal_value(u32 d); static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) @@ -33,4 +36,69 @@ static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) return (t + ((a - t) >> R.sh1)) >> R.sh2; } +struct reciprocal_value_adv { + u32 m; + u8 sh, exp; + bool is_wide_m; +}; + +/* "reciprocal_value_adv" implements the advanced version of the algorithm + * described in Figure 4.2 of the paper except when "divisor > (1U << 31)" whose + * ceil(log2(d)) result will be 32 which then requires u128 divide on host. The + * exception case could be easily handled before calling "reciprocal_value_adv". + * + * The advanced version requires more complex calculation to get the reciprocal + * multiplier and other control variables, but then could reduce the required + * emulation operations. + * + * It makes no sense to use this advanced version for host divide emulation, + * those extra complexities for calculating multiplier etc could completely + * waive our saving on emulation operations. + * + * However, it makes sense to use it for JIT divide code generation for which + * we are willing to trade performance of JITed code with that of host. As shown + * by the following pseudo code, the required emulation operations could go down + * from 6 (the basic version) to 3 or 4. + * + * To use the result of "reciprocal_value_adv", suppose we want to calculate + * n/d, the pseudo C code will be: + * + * struct reciprocal_value_adv rvalue; + * u8 pre_shift, exp; + * + * // handle exception case. + * if (d >= (1U << 31)) { + * result = n >= d; + * return; + * } + * + * rvalue = reciprocal_value_adv(d, 32) + * exp = rvalue.exp; + * if (rvalue.is_wide_m && !(d & 1)) { + * // floor(log2(d & (2^32 -d))) + * pre_shift = fls(d & -d) - 1; + * rvalue = reciprocal_value_adv(d >> pre_shift, 32 - pre_shift); + * } else { + * pre_shift = 0; + * } + * + * // code generation starts. + * if (imm == 1U << exp) { + * result = n >> exp; + * } else if (rvalue.is_wide_m) { + * // pre_shift must be zero when reached here. + * t = (n * rvalue.m) >> 32; + * result = n - t; + * result >>= 1; + * result += t; + * result >>= rvalue.sh - 1; + * } else { + * if (pre_shift) + * result = n >> pre_shift; + * result = ((u64)result * rvalue.m) >> 32; + * result >>= rvalue.sh; + * } + */ +struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec); + #endif /* _LINUX_RECIPROCAL_DIV_H */ diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c index fcb4ce682c6f..bf043258fa00 100644 --- a/lib/reciprocal_div.c +++ b/lib/reciprocal_div.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -26,3 +27,43 @@ struct reciprocal_value reciprocal_value(u32 d) return R; } EXPORT_SYMBOL(reciprocal_value); + +struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec) +{ + struct reciprocal_value_adv R; + u32 l, post_shift; + u64 mhigh, mlow; + + /* ceil(log2(d)) */ + l = fls(d - 1); + /* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to + * be handled before calling "reciprocal_value_adv", please see the + * comment at include/linux/reciprocal_div.h. + */ + WARN(l == 32, + "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor", + d, __func__); + post_shift = l; + mlow = 1ULL << (32 + l); + do_div(mlow, d); + mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec)); + do_div(mhigh, d); + + for (; post_shift > 0; post_shift--) { + u64 lo = mlow >> 1, hi = mhigh >> 1; + + if (lo >= hi) + break; + + mlow = lo; + mhigh = hi; + } + + R.m = (u32)mhigh; + R.sh = post_shift; + R.exp = l; + R.is_wide_m = mhigh > U32_MAX; + + return R; +} +EXPORT_SYMBOL(reciprocal_value_adv); -- cgit v1.2.3 From 03fc7f9c99c1e7ae2925d459e8487f1a6f199f79 Mon Sep 17 00:00:00 2001 From: Petr Mladek Date: Wed, 27 Jun 2018 16:20:28 +0200 Subject: printk/nmi: Prevent deadlock when accessing the main log buffer in NMI The commit 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI when logbuf_lock is available") brought back the possible deadlocks in printk() and NMI. The check of logbuf_lock is done only in printk_nmi_enter() to prevent mixed output. But another CPU might take the lock later, enter NMI, and: + Both NMIs might be serialized by yet another lock, for example, the one in nmi_cpu_backtrace(). + The other CPU might get stopped in NMI, see smp_send_stop() in panic(). The only safe solution is to use trylock when storing the message into the main log-buffer. It might cause reordering when some lines go to the main lock buffer directly and others are delayed via the per-CPU buffer. It means that it is not useful in general. This patch replaces the problematic NMI deferred context with NMI direct context. It can be used to mark a code that might produce many messages in NMI and the risk of losing them is more critical than problems with eventual reordering. The context is then used when dumping trace buffers on oops. It was the primary motivation for the original fix. Also the reordering is even smaller issue there because some traces have their own time stamps. Finally, nmi_cpu_backtrace() need not longer be serialized because it will always us the per-CPU buffers again. Fixes: 719f6a7040f1bdaf96 ("printk: Use the main logbuf in NMI when logbuf_lock is available") Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20180627142028.11259-1-pmladek@suse.com To: Steven Rostedt Cc: Peter Zijlstra Cc: Tetsuo Handa Cc: Sergey Senozhatsky Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org Acked-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- include/linux/printk.h | 4 ++++ kernel/printk/internal.h | 9 ++++++- kernel/printk/printk_safe.c | 58 +++++++++++++++++++++++++++++---------------- kernel/trace/trace.c | 4 +++- lib/nmi_backtrace.c | 3 --- 5 files changed, 52 insertions(+), 26 deletions(-) (limited to 'lib') diff --git a/include/linux/printk.h b/include/linux/printk.h index 6d7e800affd8..3ede9f46a494 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -148,9 +148,13 @@ void early_printk(const char *s, ...) { } #ifdef CONFIG_PRINTK_NMI extern void printk_nmi_enter(void); extern void printk_nmi_exit(void); +extern void printk_nmi_direct_enter(void); +extern void printk_nmi_direct_exit(void); #else static inline void printk_nmi_enter(void) { } static inline void printk_nmi_exit(void) { } +static inline void printk_nmi_direct_enter(void) { } +static inline void printk_nmi_direct_exit(void) { } #endif /* PRINTK_NMI */ #ifdef CONFIG_PRINTK diff --git a/kernel/printk/internal.h b/kernel/printk/internal.h index 2a7d04049af4..0f1898820cba 100644 --- a/kernel/printk/internal.h +++ b/kernel/printk/internal.h @@ -19,11 +19,16 @@ #ifdef CONFIG_PRINTK #define PRINTK_SAFE_CONTEXT_MASK 0x3fffffff -#define PRINTK_NMI_DEFERRED_CONTEXT_MASK 0x40000000 +#define PRINTK_NMI_DIRECT_CONTEXT_MASK 0x40000000 #define PRINTK_NMI_CONTEXT_MASK 0x80000000 extern raw_spinlock_t logbuf_lock; +__printf(5, 0) +int vprintk_store(int facility, int level, + const char *dict, size_t dictlen, + const char *fmt, va_list args); + __printf(1, 0) int vprintk_default(const char *fmt, va_list args); __printf(1, 0) int vprintk_deferred(const char *fmt, va_list args); __printf(1, 0) int vprintk_func(const char *fmt, va_list args); @@ -54,6 +59,8 @@ void __printk_safe_exit(void); local_irq_enable(); \ } while (0) +void defer_console_output(void); + #else __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { return 0; } diff --git a/kernel/printk/printk_safe.c b/kernel/printk/printk_safe.c index d7d091309054..a0a74c533e4b 100644 --- a/kernel/printk/printk_safe.c +++ b/kernel/printk/printk_safe.c @@ -308,24 +308,33 @@ static __printf(1, 0) int vprintk_nmi(const char *fmt, va_list args) void printk_nmi_enter(void) { - /* - * The size of the extra per-CPU buffer is limited. Use it only when - * the main one is locked. If this CPU is not in the safe context, - * the lock must be taken on another CPU and we could wait for it. - */ - if ((this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) && - raw_spin_is_locked(&logbuf_lock)) { - this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); - } else { - this_cpu_or(printk_context, PRINTK_NMI_DEFERRED_CONTEXT_MASK); - } + this_cpu_or(printk_context, PRINTK_NMI_CONTEXT_MASK); } void printk_nmi_exit(void) { - this_cpu_and(printk_context, - ~(PRINTK_NMI_CONTEXT_MASK | - PRINTK_NMI_DEFERRED_CONTEXT_MASK)); + this_cpu_and(printk_context, ~PRINTK_NMI_CONTEXT_MASK); +} + +/* + * Marks a code that might produce many messages in NMI context + * and the risk of losing them is more critical than eventual + * reordering. + * + * It has effect only when called in NMI context. Then printk() + * will try to store the messages into the main logbuf directly + * and use the per-CPU buffers only as a fallback when the lock + * is not available. + */ +void printk_nmi_direct_enter(void) +{ + if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) + this_cpu_or(printk_context, PRINTK_NMI_DIRECT_CONTEXT_MASK); +} + +void printk_nmi_direct_exit(void) +{ + this_cpu_and(printk_context, ~PRINTK_NMI_DIRECT_CONTEXT_MASK); } #else @@ -363,6 +372,20 @@ void __printk_safe_exit(void) __printf(1, 0) int vprintk_func(const char *fmt, va_list args) { + /* + * Try to use the main logbuf even in NMI. But avoid calling console + * drivers that might have their own locks. + */ + if ((this_cpu_read(printk_context) & PRINTK_NMI_DIRECT_CONTEXT_MASK) && + raw_spin_trylock(&logbuf_lock)) { + int len; + + len = vprintk_store(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args); + raw_spin_unlock(&logbuf_lock); + defer_console_output(); + return len; + } + /* Use extra buffer in NMI when logbuf_lock is taken or in safe mode. */ if (this_cpu_read(printk_context) & PRINTK_NMI_CONTEXT_MASK) return vprintk_nmi(fmt, args); @@ -371,13 +394,6 @@ __printf(1, 0) int vprintk_func(const char *fmt, va_list args) if (this_cpu_read(printk_context) & PRINTK_SAFE_CONTEXT_MASK) return vprintk_safe(fmt, args); - /* - * Use the main logbuf when logbuf_lock is available in NMI. - * But avoid calling console drivers that might have their own locks. - */ - if (this_cpu_read(printk_context) & PRINTK_NMI_DEFERRED_CONTEXT_MASK) - return vprintk_deferred(fmt, args); - /* No obstacles. */ return vprintk_default(fmt, args); } diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index bcd93031d042..f106ad12f72f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -8265,6 +8265,7 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) tracing_off(); local_irq_save(flags); + printk_nmi_direct_enter(); /* Simulate the iterator */ trace_init_global_iter(&iter); @@ -8344,7 +8345,8 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) for_each_tracing_cpu(cpu) { atomic_dec(&per_cpu_ptr(iter.trace_buffer->data, cpu)->disabled); } - atomic_dec(&dump_running); + atomic_dec(&dump_running); + printk_nmi_direct_exit(); local_irq_restore(flags); } EXPORT_SYMBOL_GPL(ftrace_dump); diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 61a6b5aab07e..15ca78e1c7d4 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -87,11 +87,9 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, bool nmi_cpu_backtrace(struct pt_regs *regs) { - static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; int cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - arch_spin_lock(&lock); if (regs && cpu_in_idle(instruction_pointer(regs))) { pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", cpu, (void *)instruction_pointer(regs)); @@ -102,7 +100,6 @@ bool nmi_cpu_backtrace(struct pt_regs *regs) else dump_stack(); } - arch_spin_unlock(&lock); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); return true; } -- cgit v1.2.3 From 8dd99871f7acb776521b35a6107d3dd22f1522b8 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Tue, 3 Jul 2018 12:43:54 -0700 Subject: reed_solomon: Fix kernel-doc The current doc build warns: ./lib/reed_solomon/reed_solomon.c:287: WARNING: Unknown target name: "gfp". This is because it misinterprets the "GFP_" that is part of the description. Change the description to avoid the problem. Signed-off-by: Matthew Wilcox Acked-by: Kees Cook Signed-off-by: Jonathan Corbet --- lib/reed_solomon/reed_solomon.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c index d8bb1a1eba72..e5fdc8b9e856 100644 --- a/lib/reed_solomon/reed_solomon.c +++ b/lib/reed_solomon/reed_solomon.c @@ -283,7 +283,7 @@ out: * in index form * @prim: primitive element to generate polynomial roots * @nroots: RS code generator polynomial degree (number of roots) - * @gfp: GFP_ flags for allocations + * @gfp: Memory allocation flags. */ struct rs_control *init_rs_gfp(int symsize, int gfpoly, int fcr, int prim, int nroots, gfp_t gfp) -- cgit v1.2.3 From afed7bcf9487bb28e2e2b016a195085c07416c0b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 11 Jul 2018 10:36:07 +0100 Subject: locking/refcount: Always allow checked forms In many cases, it would be useful to be able to use the full sanity-checked refcount helpers regardless of CONFIG_REFCOUNT_FULL, as this would help to avoid duplicate warnings where callers try to sanity-check refcount manipulation. This patch refactors things such that the full refcount helpers were always built, as refcount_${op}_checked(), such that they can be used regardless of CONFIG_REFCOUNT_FULL. This will allow code which *always* wants a checked refcount to opt-in, avoiding the need to duplicate the logic for warnings. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Reviewed-by: David Sterba Acked-by: Kees Cook Acked-by: Will Deacon Cc: Boqun Feng Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180711093607.1644-1-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 27 +++++++++++++++++------- lib/refcount.c | 53 +++++++++++++++++++++++------------------------- 2 files changed, 45 insertions(+), 35 deletions(-) (limited to 'lib') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index c36addd27dd5..53c5eca24d83 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -43,17 +43,30 @@ static inline unsigned int refcount_read(const refcount_t *r) return atomic_read(&r->refs); } +extern __must_check bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r); +extern void refcount_add_checked(unsigned int i, refcount_t *r); + +extern __must_check bool refcount_inc_not_zero_checked(refcount_t *r); +extern void refcount_inc_checked(refcount_t *r); + +extern __must_check bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r); + +extern __must_check bool refcount_dec_and_test_checked(refcount_t *r); +extern void refcount_dec_checked(refcount_t *r); + #ifdef CONFIG_REFCOUNT_FULL -extern __must_check bool refcount_add_not_zero(unsigned int i, refcount_t *r); -extern void refcount_add(unsigned int i, refcount_t *r); -extern __must_check bool refcount_inc_not_zero(refcount_t *r); -extern void refcount_inc(refcount_t *r); +#define refcount_add_not_zero refcount_add_not_zero_checked +#define refcount_add refcount_add_checked + +#define refcount_inc_not_zero refcount_inc_not_zero_checked +#define refcount_inc refcount_inc_checked + +#define refcount_sub_and_test refcount_sub_and_test_checked -extern __must_check bool refcount_sub_and_test(unsigned int i, refcount_t *r); +#define refcount_dec_and_test refcount_dec_and_test_checked +#define refcount_dec refcount_dec_checked -extern __must_check bool refcount_dec_and_test(refcount_t *r); -extern void refcount_dec(refcount_t *r); #else # ifdef CONFIG_ARCH_HAS_REFCOUNT # include diff --git a/lib/refcount.c b/lib/refcount.c index 4bd842f20749..5c4aaefc0682 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -40,10 +40,8 @@ #include #include -#ifdef CONFIG_REFCOUNT_FULL - /** - * refcount_add_not_zero - add a value to a refcount unless it is 0 + * refcount_add_not_zero_checked - add a value to a refcount unless it is 0 * @i: the value to add to the refcount * @r: the refcount * @@ -60,7 +58,7 @@ * * Return: false if the passed refcount is 0, true otherwise */ -bool refcount_add_not_zero(unsigned int i, refcount_t *r) +bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); @@ -81,10 +79,10 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r) return true; } -EXPORT_SYMBOL(refcount_add_not_zero); +EXPORT_SYMBOL(refcount_add_not_zero_checked); /** - * refcount_add - add a value to a refcount + * refcount_add_checked - add a value to a refcount * @i: the value to add to the refcount * @r: the refcount * @@ -99,14 +97,14 @@ EXPORT_SYMBOL(refcount_add_not_zero); * cases, refcount_inc(), or one of its variants, should instead be used to * increment a reference count. */ -void refcount_add(unsigned int i, refcount_t *r) +void refcount_add_checked(unsigned int i, refcount_t *r) { - WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); + WARN_ONCE(!refcount_add_not_zero_checked(i, r), "refcount_t: addition on 0; use-after-free.\n"); } -EXPORT_SYMBOL(refcount_add); +EXPORT_SYMBOL(refcount_add_checked); /** - * refcount_inc_not_zero - increment a refcount unless it is 0 + * refcount_inc_not_zero_checked - increment a refcount unless it is 0 * @r: the refcount to increment * * Similar to atomic_inc_not_zero(), but will saturate at UINT_MAX and WARN. @@ -117,7 +115,7 @@ EXPORT_SYMBOL(refcount_add); * * Return: true if the increment was successful, false otherwise */ -bool refcount_inc_not_zero(refcount_t *r) +bool refcount_inc_not_zero_checked(refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); @@ -136,10 +134,10 @@ bool refcount_inc_not_zero(refcount_t *r) return true; } -EXPORT_SYMBOL(refcount_inc_not_zero); +EXPORT_SYMBOL(refcount_inc_not_zero_checked); /** - * refcount_inc - increment a refcount + * refcount_inc_checked - increment a refcount * @r: the refcount to increment * * Similar to atomic_inc(), but will saturate at UINT_MAX and WARN. @@ -150,14 +148,14 @@ EXPORT_SYMBOL(refcount_inc_not_zero); * Will WARN if the refcount is 0, as this represents a possible use-after-free * condition. */ -void refcount_inc(refcount_t *r) +void refcount_inc_checked(refcount_t *r) { - WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); + WARN_ONCE(!refcount_inc_not_zero_checked(r), "refcount_t: increment on 0; use-after-free.\n"); } -EXPORT_SYMBOL(refcount_inc); +EXPORT_SYMBOL(refcount_inc_checked); /** - * refcount_sub_and_test - subtract from a refcount and test if it is 0 + * refcount_sub_and_test_checked - subtract from a refcount and test if it is 0 * @i: amount to subtract from the refcount * @r: the refcount * @@ -176,7 +174,7 @@ EXPORT_SYMBOL(refcount_inc); * * Return: true if the resulting refcount is 0, false otherwise */ -bool refcount_sub_and_test(unsigned int i, refcount_t *r) +bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); @@ -194,10 +192,10 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r) return !new; } -EXPORT_SYMBOL(refcount_sub_and_test); +EXPORT_SYMBOL(refcount_sub_and_test_checked); /** - * refcount_dec_and_test - decrement a refcount and test if it is 0 + * refcount_dec_and_test_checked - decrement a refcount and test if it is 0 * @r: the refcount * * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to @@ -209,14 +207,14 @@ EXPORT_SYMBOL(refcount_sub_and_test); * * Return: true if the resulting refcount is 0, false otherwise */ -bool refcount_dec_and_test(refcount_t *r) +bool refcount_dec_and_test_checked(refcount_t *r) { - return refcount_sub_and_test(1, r); + return refcount_sub_and_test_checked(1, r); } -EXPORT_SYMBOL(refcount_dec_and_test); +EXPORT_SYMBOL(refcount_dec_and_test_checked); /** - * refcount_dec - decrement a refcount + * refcount_dec_checked - decrement a refcount * @r: the refcount * * Similar to atomic_dec(), it will WARN on underflow and fail to decrement @@ -225,12 +223,11 @@ EXPORT_SYMBOL(refcount_dec_and_test); * Provides release memory ordering, such that prior loads and stores are done * before. */ -void refcount_dec(refcount_t *r) +void refcount_dec_checked(refcount_t *r) { - WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); + WARN_ONCE(refcount_dec_and_test_checked(r), "refcount_t: decrement hit 0; leaking memory.\n"); } -EXPORT_SYMBOL(refcount_dec); -#endif /* CONFIG_REFCOUNT_FULL */ +EXPORT_SYMBOL(refcount_dec_checked); /** * refcount_dec_if_one - decrement a refcount if it is 1 -- cgit v1.2.3 From 1c4facb846c7f863bc65483394e80acdbacf671b Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 22 Jun 2018 09:15:33 +1000 Subject: vsprintf: Use hw RNG for ptr_key Currently we must wait for enough entropy to become available before hashed pointers can be printed. We can remove this wait by using the hw RNG if available. Use hw RNG to get keying material. Reviewed-by: Steven Rostedt (VMware) Suggested-by: Kees Cook Signed-off-by: Tobin C. Harding Signed-off-by: Theodore Ts'o --- lib/vsprintf.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index a48aaa79d352..6c1fb395bddf 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1675,8 +1675,16 @@ static struct random_ready_callback random_ready = { static int __init initialize_ptr_random(void) { - int ret = add_random_ready_callback(&random_ready); + int key_size = sizeof(ptr_key); + int ret; + + /* Use hw RNG if available. */ + if (get_random_bytes_arch(&ptr_key, key_size) == key_size) { + static_branch_disable(¬_filled_random_ptr_key); + return 0; + } + ret = add_random_ready_callback(&random_ready); if (!ret) { return 0; } else if (ret == -EALREADY) { -- cgit v1.2.3 From 3672476edaa0660eb833f54fa9edeb505417b75c Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Fri, 22 Jun 2018 09:15:34 +1000 Subject: vsprintf: Add command line option debug_boot_weak_hash Currently printing [hashed] pointers requires enough entropy to be available. Early in the boot sequence this may not be the case resulting in a dummy string '(____ptrval____)' being printed. This makes debugging the early boot sequence difficult. We can relax the requirement to use cryptographically secure hashing during debugging. This enables debugging while keeping development/production kernel behaviour the same. If new command line option debug_boot_weak_hash is enabled use cryptographically insecure hashing and hash pointer value immediately. Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Tobin C. Harding Signed-off-by: Theodore Ts'o --- Documentation/admin-guide/kernel-parameters.txt | 8 ++++++++ lib/vsprintf.c | 17 +++++++++++++++++ 2 files changed, 25 insertions(+) (limited to 'lib') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index efc7aa7a0670..0c8f7889efa1 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -748,6 +748,14 @@ debug [KNL] Enable kernel debugging (events log level). + debug_boot_weak_hash + [KNL] Enable printing [hashed] pointers early in the + boot sequence. If enabled, we use a weak hash instead + of siphash to hash pointers. Use this option if you are + seeing instances of '(___ptrval___)') and need to see a + value (hashed pointer) instead. Cryptographically + insecure, please do not use on production kernels. + debug_locks_verbose= [KNL] verbose self-tests Format=<0|1> diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 6c1fb395bddf..1ee2829f3b54 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1651,6 +1651,17 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, return widen_string(buf, buf - buf_start, end, spec); } +/* Make pointers available for printing early in the boot sequence. */ +static int debug_boot_weak_hash __ro_after_init; + +static int __init debug_boot_weak_hash_enable(char *str) +{ + debug_boot_weak_hash = 1; + pr_info("debug_boot_weak_hash enabled\n"); + return 0; +} +early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable); + static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key); static siphash_key_t ptr_key __read_mostly; @@ -1703,6 +1714,12 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec) const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)"; unsigned long hashval; + /* When debugging early boot use non-cryptographically secure hash. */ + if (unlikely(debug_boot_weak_hash)) { + hashval = hash_long((unsigned long)ptr, 32); + return pointer_string(buf, end, (const void *)hashval, spec); + } + if (static_branch_unlikely(¬_filled_random_ptr_key)) { spec.field_width = 2 * sizeof(ptr); /* string length must be less than default_width */ -- cgit v1.2.3 From 5f81880d5204ee2388fd9a75bb850ccd526885b7 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 20 Jul 2018 21:56:48 +0000 Subject: sysfs, kobject: allow creating kobject belonging to arbitrary users Normally kobjects and their sysfs representation belong to global root, however it is not necessarily the case for objects in separate namespaces. For example, objects in separate network namespace logically belong to the container's root and not global root. This change lays groundwork for allowing network namespace objects ownership to be transferred to container's root user by defining get_ownership() callback in ktype structure and using it in sysfs code to retrieve desired uid/gid when creating sysfs objects for given kobject. Co-Developed-by: Tyler Hicks Signed-off-by: Dmitry Torokhov Signed-off-by: Tyler Hicks Signed-off-by: David S. Miller --- fs/sysfs/dir.c | 7 +++++-- fs/sysfs/file.c | 32 ++++++++++++++++++++------------ fs/sysfs/group.c | 23 +++++++++++++++++------ fs/sysfs/sysfs.h | 5 ++--- include/linux/kobject.h | 4 ++++ lib/kobject.c | 19 +++++++++++++++++++ 6 files changed, 67 insertions(+), 23 deletions(-) (limited to 'lib') diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index e39b884f0867..feeae8081c22 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -40,6 +40,8 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name) int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) { struct kernfs_node *parent, *kn; + kuid_t uid; + kgid_t gid; BUG_ON(!kobj); @@ -51,9 +53,10 @@ int sysfs_create_dir_ns(struct kobject *kobj, const void *ns) if (!parent) return -ENOENT; + kobject_get_ownership(kobj, &uid, &gid); + kn = kernfs_create_dir_ns(parent, kobject_name(kobj), - S_IRWXU | S_IRUGO | S_IXUGO, - GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + S_IRWXU | S_IRUGO | S_IXUGO, uid, gid, kobj, ns); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c index 513fa691ecbd..fa46216523cf 100644 --- a/fs/sysfs/file.c +++ b/fs/sysfs/file.c @@ -245,7 +245,7 @@ static const struct kernfs_ops sysfs_bin_kfops_mmap = { int sysfs_add_file_mode_ns(struct kernfs_node *parent, const struct attribute *attr, bool is_bin, - umode_t mode, const void *ns) + umode_t mode, kuid_t uid, kgid_t gid, const void *ns) { struct lock_class_key *key = NULL; const struct kernfs_ops *ops; @@ -302,8 +302,8 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, if (!attr->ignore_lockdep) key = attr->key ?: (struct lock_class_key *)&attr->skey; #endif - kn = __kernfs_create_file(parent, attr->name, - mode & 0777, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, + + kn = __kernfs_create_file(parent, attr->name, mode & 0777, uid, gid, size, ops, (void *)attr, ns, key); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) @@ -313,12 +313,6 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent, return 0; } -int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr, - bool is_bin) -{ - return sysfs_add_file_mode_ns(parent, attr, is_bin, attr->mode, NULL); -} - /** * sysfs_create_file_ns - create an attribute file for an object with custom ns * @kobj: object we're creating for @@ -328,9 +322,14 @@ int sysfs_add_file(struct kernfs_node *parent, const struct attribute *attr, int sysfs_create_file_ns(struct kobject *kobj, const struct attribute *attr, const void *ns) { + kuid_t uid; + kgid_t gid; + BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, ns); + kobject_get_ownership(kobj, &uid, &gid); + return sysfs_add_file_mode_ns(kobj->sd, attr, false, attr->mode, + uid, gid, ns); } EXPORT_SYMBOL_GPL(sysfs_create_file_ns); @@ -359,6 +358,8 @@ int sysfs_add_file_to_group(struct kobject *kobj, const struct attribute *attr, const char *group) { struct kernfs_node *parent; + kuid_t uid; + kgid_t gid; int error; if (group) { @@ -371,7 +372,9 @@ int sysfs_add_file_to_group(struct kobject *kobj, if (!parent) return -ENOENT; - error = sysfs_add_file(parent, attr, false); + kobject_get_ownership(kobj, &uid, &gid); + error = sysfs_add_file_mode_ns(kobj->sd, attr, false, + attr->mode, uid, gid, NULL); kernfs_put(parent); return error; @@ -487,9 +490,14 @@ EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group); int sysfs_create_bin_file(struct kobject *kobj, const struct bin_attribute *attr) { + kuid_t uid; + kgid_t gid; + BUG_ON(!kobj || !kobj->sd || !attr); - return sysfs_add_file(kobj->sd, &attr->attr, true); + kobject_get_ownership(kobj, &uid, &gid); + return sysfs_add_file_mode_ns(kobj->sd, &attr->attr, true, + attr->attr.mode, uid, gid, NULL); } EXPORT_SYMBOL_GPL(sysfs_create_bin_file); diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index 4802ec0e1e3a..c7a716c4acc9 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -31,6 +31,7 @@ static void remove_files(struct kernfs_node *parent, } static int create_files(struct kernfs_node *parent, struct kobject *kobj, + kuid_t uid, kgid_t gid, const struct attribute_group *grp, int update) { struct attribute *const *attr; @@ -60,7 +61,7 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, mode &= SYSFS_PREALLOC | 0664; error = sysfs_add_file_mode_ns(parent, *attr, false, - mode, NULL); + mode, uid, gid, NULL); if (unlikely(error)) break; } @@ -90,7 +91,8 @@ static int create_files(struct kernfs_node *parent, struct kobject *kobj, mode &= SYSFS_PREALLOC | 0664; error = sysfs_add_file_mode_ns(parent, &(*bin_attr)->attr, true, - mode, NULL); + mode, + uid, gid, NULL); if (error) break; } @@ -106,6 +108,8 @@ static int internal_create_group(struct kobject *kobj, int update, const struct attribute_group *grp) { struct kernfs_node *kn; + kuid_t uid; + kgid_t gid; int error; BUG_ON(!kobj || (!update && !kobj->sd)); @@ -118,9 +122,11 @@ static int internal_create_group(struct kobject *kobj, int update, kobj->name, grp->name ?: ""); return -EINVAL; } + kobject_get_ownership(kobj, &uid, &gid); if (grp->name) { - kn = kernfs_create_dir(kobj->sd, grp->name, - S_IRWXU | S_IRUGO | S_IXUGO, kobj); + kn = kernfs_create_dir_ns(kobj->sd, grp->name, + S_IRWXU | S_IRUGO | S_IXUGO, + uid, gid, kobj, NULL); if (IS_ERR(kn)) { if (PTR_ERR(kn) == -EEXIST) sysfs_warn_dup(kobj->sd, grp->name); @@ -129,7 +135,7 @@ static int internal_create_group(struct kobject *kobj, int update, } else kn = kobj->sd; kernfs_get(kn); - error = create_files(kn, kobj, grp, update); + error = create_files(kn, kobj, uid, gid, grp, update); if (error) { if (grp->name) kernfs_remove(kn); @@ -281,6 +287,8 @@ int sysfs_merge_group(struct kobject *kobj, const struct attribute_group *grp) { struct kernfs_node *parent; + kuid_t uid; + kgid_t gid; int error = 0; struct attribute *const *attr; int i; @@ -289,8 +297,11 @@ int sysfs_merge_group(struct kobject *kobj, if (!parent) return -ENOENT; + kobject_get_ownership(kobj, &uid, &gid); + for ((i = 0, attr = grp->attrs); *attr && !error; (++i, ++attr)) - error = sysfs_add_file(parent, *attr, false); + error = sysfs_add_file_mode_ns(parent, *attr, false, + (*attr)->mode, uid, gid, NULL); if (error) { while (--i >= 0) kernfs_remove_by_name(parent, (*--attr)->name); diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h index d098e015fcc9..0050cc0c0236 100644 --- a/fs/sysfs/sysfs.h +++ b/fs/sysfs/sysfs.h @@ -27,11 +27,10 @@ void sysfs_warn_dup(struct kernfs_node *parent, const char *name); /* * file.c */ -int sysfs_add_file(struct kernfs_node *parent, - const struct attribute *attr, bool is_bin); int sysfs_add_file_mode_ns(struct kernfs_node *parent, const struct attribute *attr, bool is_bin, - umode_t amode, const void *ns); + umode_t amode, kuid_t uid, kgid_t gid, + const void *ns); /* * symlink.c diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 7f6f93c3df9c..b49ff230beba 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -26,6 +26,7 @@ #include #include #include +#include #define UEVENT_HELPER_PATH_LEN 256 #define UEVENT_NUM_ENVP 32 /* number of env pointers */ @@ -114,6 +115,8 @@ extern struct kobject * __must_check kobject_get_unless_zero( extern void kobject_put(struct kobject *kobj); extern const void *kobject_namespace(struct kobject *kobj); +extern void kobject_get_ownership(struct kobject *kobj, + kuid_t *uid, kgid_t *gid); extern char *kobject_get_path(struct kobject *kobj, gfp_t flag); struct kobj_type { @@ -122,6 +125,7 @@ struct kobj_type { struct attribute **default_attrs; const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); const void *(*namespace)(struct kobject *kobj); + void (*get_ownership)(struct kobject *kobj, kuid_t *uid, kgid_t *gid); }; struct kobj_uevent_env { diff --git a/lib/kobject.c b/lib/kobject.c index 18989b5b3b56..f2dc1f756007 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -35,6 +35,25 @@ const void *kobject_namespace(struct kobject *kobj) return kobj->ktype->namespace(kobj); } +/** + * kobject_get_ownership - get sysfs ownership data for @kobj + * @kobj: kobject in question + * @uid: kernel user ID for sysfs objects + * @gid: kernel group ID for sysfs objects + * + * Returns initial uid/gid pair that should be used when creating sysfs + * representation of given kobject. Normally used to adjust ownership of + * objects in a container. + */ +void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +{ + *uid = GLOBAL_ROOT_UID; + *gid = GLOBAL_ROOT_GID; + + if (kobj->ktype->get_ownership) + kobj->ktype->get_ownership(kobj, uid, gid); +} + /* * populate_dir - populate directory with attributes. * @kobj: object we're working on. -- cgit v1.2.3 From d028b6f703209dbe96201b2714ff46625877128e Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Fri, 20 Jul 2018 21:56:49 +0000 Subject: kobject: kset_create_and_add() - fetch ownership info from parent This change implements get_ownership() for ksets created with kset_create_and_add() call by fetching ownership data from parent kobject. This is done mostly for benefit of "queues" attribute of net devices so that corresponding directory belongs to container's root instead of global root for network devices in a container. Signed-off-by: Dmitry Torokhov Reviewed-by: Tyler Hicks Signed-off-by: David S. Miller --- lib/kobject.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/kobject.c b/lib/kobject.c index f2dc1f756007..389829d3a1d1 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -887,9 +887,16 @@ static void kset_release(struct kobject *kobj) kfree(kset); } +void kset_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +{ + if (kobj->parent) + kobject_get_ownership(kobj->parent, uid, gid); +} + static struct kobj_type kset_ktype = { .sysfs_ops = &kobj_sysfs_ops, - .release = kset_release, + .release = kset_release, + .get_ownership = kset_get_ownership, }; /** -- cgit v1.2.3 From 1fb2e3f276ddafee81073d884f599cd2574c31e2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 17 Jul 2018 18:05:36 +0200 Subject: lib/crc: Move polynomial definition to separate header Allow other drivers and parts of kernel to use the same define for CRC32 polynomial, instead of duplicating it in many places. This code does not bring any functional changes, except moving existing code. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- include/linux/crc32poly.h | 20 ++++++++++++++++++++ lib/crc32.c | 1 + lib/crc32defs.h | 14 -------------- lib/gen_crc32table.c | 1 + 4 files changed, 22 insertions(+), 14 deletions(-) create mode 100644 include/linux/crc32poly.h (limited to 'lib') diff --git a/include/linux/crc32poly.h b/include/linux/crc32poly.h new file mode 100644 index 000000000000..7ad5aa92d3c7 --- /dev/null +++ b/include/linux/crc32poly.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_CRC32_POLY_H +#define _LINUX_CRC32_POLY_H + +/* + * There are multiple 16-bit CRC polynomials in common use, but this is + * *the* standard CRC-32 polynomial, first popularized by Ethernet. + * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 + */ +#define CRCPOLY_LE 0xedb88320 +#define CRCPOLY_BE 0x04c11db7 + +/* + * This is the CRC32c polynomial, as outlined by Castagnoli. + * x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+x^11+x^10+x^9+ + * x^8+x^6+x^0 + */ +#define CRC32C_POLY_LE 0x82F63B78 + +#endif /* _LINUX_CRC32_POLY_H */ diff --git a/lib/crc32.c b/lib/crc32.c index 2ef20fe84b69..341c54cb4edf 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -27,6 +27,7 @@ /* see: Documentation/crc32.txt for a description of algorithms */ #include +#include #include #include #include diff --git a/lib/crc32defs.h b/lib/crc32defs.h index cb275a28a750..0c8fb5923e7e 100644 --- a/lib/crc32defs.h +++ b/lib/crc32defs.h @@ -1,18 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * There are multiple 16-bit CRC polynomials in common use, but this is - * *the* standard CRC-32 polynomial, first popularized by Ethernet. - * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 - */ -#define CRCPOLY_LE 0xedb88320 -#define CRCPOLY_BE 0x04c11db7 - -/* - * This is the CRC32c polynomial, as outlined by Castagnoli. - * x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+x^11+x^10+x^9+ - * x^8+x^6+x^0 - */ -#define CRC32C_POLY_LE 0x82F63B78 /* Try to choose an implementation variant via Kconfig */ #ifdef CONFIG_CRC32_SLICEBY8 diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index 8f26660ea10a..34c3bc826f45 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include "../include/linux/crc32poly.h" #include "../include/generated/autoconf.h" #include "crc32defs.h" #include -- cgit v1.2.3 From e37f2f93afe594682702439ca34eb8130598cdf2 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 17 Jul 2018 18:05:37 +0200 Subject: lib/crc: Use consistent naming for CRC-32 polynomials Header was defining CRCPOLY_LE/BE and CRC32C_POLY_LE but in fact all of them are CRC-32 polynomials so use consistent naming. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- include/linux/crc32poly.h | 4 ++-- lib/crc32.c | 10 +++++----- lib/gen_crc32table.c | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/include/linux/crc32poly.h b/include/linux/crc32poly.h index 7ad5aa92d3c7..62c4b7790a28 100644 --- a/include/linux/crc32poly.h +++ b/include/linux/crc32poly.h @@ -7,8 +7,8 @@ * *the* standard CRC-32 polynomial, first popularized by Ethernet. * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 */ -#define CRCPOLY_LE 0xedb88320 -#define CRCPOLY_BE 0x04c11db7 +#define CRC32_POLY_LE 0xedb88320 +#define CRC32_POLY_BE 0x04c11db7 /* * This is the CRC32c polynomial, as outlined by Castagnoli. diff --git a/lib/crc32.c b/lib/crc32.c index 341c54cb4edf..a6c9afafc8c8 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -185,7 +185,7 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, #if CRC_LE_BITS == 1 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) { - return crc32_le_generic(crc, p, len, NULL, CRCPOLY_LE); + return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE); } u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) { @@ -195,7 +195,7 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, - (const u32 (*)[256])crc32table_le, CRCPOLY_LE); + (const u32 (*)[256])crc32table_le, CRC32_POLY_LE); } u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) { @@ -269,7 +269,7 @@ static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len, u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len) { - return crc32_generic_shift(crc, len, CRCPOLY_LE); + return crc32_generic_shift(crc, len, CRC32_POLY_LE); } u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len) @@ -331,13 +331,13 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p, #if CRC_LE_BITS == 1 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) { - return crc32_be_generic(crc, p, len, NULL, CRCPOLY_BE); + return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE); } #else u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) { return crc32_be_generic(crc, p, len, - (const u32 (*)[256])crc32table_be, CRCPOLY_BE); + (const u32 (*)[256])crc32table_be, CRC32_POLY_BE); } #endif EXPORT_SYMBOL(crc32_be); diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index 34c3bc826f45..f755b997b967 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c @@ -58,7 +58,7 @@ static void crc32init_le_generic(const uint32_t polynomial, static void crc32init_le(void) { - crc32init_le_generic(CRCPOLY_LE, crc32table_le); + crc32init_le_generic(CRC32_POLY_LE, crc32table_le); } static void crc32cinit_le(void) @@ -77,7 +77,7 @@ static void crc32init_be(void) crc32table_be[0][0] = 0; for (i = 1; i < BE_TABLE_SIZE; i <<= 1) { - crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0); + crc = (crc << 1) ^ ((crc & 0x80000000) ? CRC32_POLY_BE : 0); for (j = 0; j < i; j++) crc32table_be[0][i + j] = crc ^ crc32table_be[0][j]; } -- cgit v1.2.3 From faa16bc404d72a5afb857c924c83a5f691f83386 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 17 Jul 2018 18:05:41 +0200 Subject: lib: Use existing define with polynomial Do not define again the polynomial but use header with existing define. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- lib/decompress_bunzip2.c | 3 ++- lib/xz/xz_crc32.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 0234361b24b8..7c4932eed748 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -51,6 +51,7 @@ #endif /* STATIC */ #include +#include #ifndef INT_MAX #define INT_MAX 0x7fffffff @@ -654,7 +655,7 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len, for (i = 0; i < 256; i++) { c = i << 24; for (j = 8; j; j--) - c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1); + c = c&0x80000000 ? (c << 1)^(CRC32_POLY_BE) : (c << 1); bd->crc32Table[i] = c; } diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c index 34532d14fd4c..25a5d87e2e4c 100644 --- a/lib/xz/xz_crc32.c +++ b/lib/xz/xz_crc32.c @@ -15,6 +15,7 @@ * but they are bigger and use more memory for the lookup table. */ +#include #include "xz_private.h" /* @@ -29,7 +30,7 @@ STATIC_RW_DATA uint32_t xz_crc32_table[256]; XZ_EXTERN void xz_crc32_init(void) { - const uint32_t poly = 0xEDB88320; + const uint32_t poly = CRC32_POLY_LE; uint32_t i; uint32_t j; -- cgit v1.2.3 From fc91a3c4c27acdca0bc13af6fbb68c35cfd519f2 Mon Sep 17 00:00:00 2001 From: "Joel Fernandes (Google)" Date: Mon, 23 Jul 2018 14:25:31 -0700 Subject: debugobjects: Make stack check warning more informative While debugging an issue debugobject tracking warned about an annotation issue of an object on stack. It turned out that the issue was due to the object in concern being on a different stack which was due to another issue. Thomas suggested to print the pointers and the location of the stack for the currently running task. This helped to figure out that the object was on the wrong stack. As this is general useful information for debugging similar issues, make the error message more informative by printing the pointers. [ tglx: Massaged changelog ] Signed-off-by: Joel Fernandes (Google) Signed-off-by: Thomas Gleixner Acked-by: Waiman Long Acked-by: Yang Shi Cc: kernel-team@android.com Cc: Arnd Bergmann Cc: astrachan@google.com Link: https://lkml.kernel.org/r/20180723212531.202328-1-joel@joelfernandes.org --- lib/debugobjects.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 994be4805cec..24c1df0d7466 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -360,9 +360,12 @@ static void debug_object_is_on_stack(void *addr, int onstack) limit++; if (is_on_stack) - pr_warn("object is on stack, but not annotated\n"); + pr_warn("object %p is on stack %p, but NOT annotated.\n", addr, + task_stack_page(current)); else - pr_warn("object is not on stack, but annotated\n"); + pr_warn("object %p is NOT on stack %p, but annotated.\n", addr, + task_stack_page(current)); + WARN_ON(1); } -- cgit v1.2.3 From 06ec64b84c357693e9a5540de8eedfc775dbae12 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 31 Jul 2018 13:39:31 +0200 Subject: Kconfig: consolidate the "Kernel hacking" menu Move the source of lib/Kconfig.debug and arch/$(ARCH)/Kconfig.debug to the top-level Kconfig. For two architectures that means moving their arch-specific symbols in that menu into a new arch Kconfig.debug file, and for a few more creating a dummy file so that we can include it unconditionally. Also move the actual 'Kernel hacking' menu to lib/Kconfig.debug, where it belongs. Signed-off-by: Christoph Hellwig Signed-off-by: Masahiro Yamada --- Kconfig | 2 ++ arch/alpha/Kconfig | 2 -- arch/alpha/Kconfig.debug | 5 ----- arch/arc/Kconfig | 1 - arch/arc/Kconfig.debug | 5 ----- arch/arm/Kconfig | 2 -- arch/arm/Kconfig.debug | 5 ----- arch/arm64/Kconfig | 2 -- arch/arm64/Kconfig.debug | 5 ----- arch/c6x/Kconfig | 15 --------------- arch/c6x/Kconfig.debug | 10 ++++++++++ arch/h8300/Kconfig | 6 ------ arch/h8300/Kconfig.debug | 1 + arch/hexagon/Kconfig | 4 ---- arch/hexagon/Kconfig.debug | 1 + arch/ia64/Kconfig | 2 -- arch/ia64/Kconfig.debug | 5 ----- arch/m68k/Kconfig | 2 -- arch/m68k/Kconfig.debug | 5 ----- arch/microblaze/Kconfig | 2 -- arch/microblaze/Kconfig.debug | 6 ------ arch/mips/Kconfig | 2 -- arch/mips/Kconfig.debug | 5 ----- arch/nds32/Kconfig | 4 ---- arch/nds32/Kconfig.debug | 1 + arch/nios2/Kconfig | 2 -- arch/nios2/Kconfig.debug | 5 ----- arch/openrisc/Kconfig | 6 ------ arch/openrisc/Kconfig.debug | 1 + arch/parisc/Kconfig | 2 -- arch/parisc/Kconfig.debug | 5 ----- arch/powerpc/Kconfig | 2 -- arch/powerpc/Kconfig.debug | 5 ----- arch/riscv/Kconfig | 45 ------------------------------------------- arch/riscv/Kconfig.debug | 37 +++++++++++++++++++++++++++++++++++ arch/s390/Kconfig | 2 -- arch/s390/Kconfig.debug | 5 ----- arch/sh/Kconfig | 2 -- arch/sh/Kconfig.debug | 5 ----- arch/sparc/Kconfig | 2 -- arch/sparc/Kconfig.debug | 5 ----- arch/um/Kconfig | 2 -- arch/um/Kconfig.debug | 5 ----- arch/unicore32/Kconfig | 2 -- arch/unicore32/Kconfig.debug | 5 ----- arch/x86/Kconfig | 2 -- arch/x86/Kconfig.debug | 5 ----- arch/xtensa/Kconfig | 2 -- arch/xtensa/Kconfig.debug | 5 ----- lib/Kconfig.debug | 6 ++++++ 50 files changed, 59 insertions(+), 206 deletions(-) create mode 100644 arch/c6x/Kconfig.debug create mode 100644 arch/h8300/Kconfig.debug create mode 100644 arch/hexagon/Kconfig.debug create mode 100644 arch/nds32/Kconfig.debug create mode 100644 arch/openrisc/Kconfig.debug create mode 100644 arch/riscv/Kconfig.debug (limited to 'lib') diff --git a/Kconfig b/Kconfig index 1a5c0a5d4ea1..a5997d6c2029 100644 --- a/Kconfig +++ b/Kconfig @@ -30,3 +30,5 @@ source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" + +source "lib/Kconfig.debug" diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 50676152babd..e4334f017f8e 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -710,8 +710,6 @@ config SRM_ENV endmenu -source "arch/alpha/Kconfig.debug" - # DUMMY_CONSOLE may be defined in drivers/video/console/Kconfig # but we also need it if VGA_HOSE is set config DUMMY_CONSOLE diff --git a/arch/alpha/Kconfig.debug b/arch/alpha/Kconfig.debug index 5e93dffb818a..b88c7b641d72 100644 --- a/arch/alpha/Kconfig.debug +++ b/arch/alpha/Kconfig.debug @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" - -source "lib/Kconfig.debug" config EARLY_PRINTK bool @@ -39,5 +36,3 @@ config MATHEMU This option is required for IEEE compliant floating point arithmetic on the Alpha. The only time you would ever not say Y is to say M in order to debug the code. Say Y unless you know what you are doing. - -endmenu diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index d4a28c45c406..5aab069eba17 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -574,5 +574,4 @@ source "drivers/pci/Kconfig" endmenu -source "arch/arc/Kconfig.debug" source "kernel/power/Kconfig" diff --git a/arch/arc/Kconfig.debug b/arch/arc/Kconfig.debug index 03da1a6b3072..45add86decd5 100644 --- a/arch/arc/Kconfig.debug +++ b/arch/arc/Kconfig.debug @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" - -source "lib/Kconfig.debug" config 16KSTACKS bool "Use 16Kb for kernel stacks instead of 8Kb" @@ -11,5 +8,3 @@ config 16KSTACKS This increases the resident kernel footprint and will cause less threads to run on the system and also increase the pressure on the VM subsystem for higher order allocations. - -endmenu diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 46ee6669204b..9aaa23c25374 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -2191,8 +2191,6 @@ endmenu source "drivers/firmware/Kconfig" -source "arch/arm/Kconfig.debug" - if CRYPTO source "arch/arm/crypto/Kconfig" endif diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index 693f84392f1b..b48dc083d1b1 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" - -source "lib/Kconfig.debug" config ARM_PTDUMP_CORE def_bool n @@ -1863,5 +1860,3 @@ config PID_IN_CONTEXTIDR are planning to use hardware trace tools with this kernel. source "drivers/hwtracing/coresight/Kconfig" - -endmenu diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index fabd797c8645..07d457ec417a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1321,8 +1321,6 @@ source "drivers/acpi/Kconfig" source "arch/arm64/kvm/Kconfig" -source "arch/arm64/Kconfig.debug" - if CRYPTO source "arch/arm64/crypto/Kconfig" endif diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index cc6bd559af85..69c9170bdd24 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -1,6 +1,3 @@ -menu "Kernel hacking" - -source "lib/Kconfig.debug" config ARM64_PTDUMP_CORE def_bool n @@ -97,5 +94,3 @@ config ARM64_RELOC_TEST tristate "Relocation testing module" source "drivers/hwtracing/coresight/Kconfig" - -endmenu diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index 441bbe089b96..904b3375331e 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -115,18 +115,3 @@ source "kernel/Kconfig.preempt" source "kernel/Kconfig.hz" endmenu - -menu "Kernel hacking" - -source "lib/Kconfig.debug" - -config ACCESS_CHECK - bool "Check the user pointer address" - default y - help - Usually the pointer transfer from user space is checked to see if its - address is in the kernel space. - - Say N here to disable that check to improve the performance. - -endmenu diff --git a/arch/c6x/Kconfig.debug b/arch/c6x/Kconfig.debug new file mode 100644 index 000000000000..c299e0d8eca3 --- /dev/null +++ b/arch/c6x/Kconfig.debug @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0 + +config ACCESS_CHECK + bool "Check the user pointer address" + default y + help + Usually the pointer transfer from user space is checked to see if its + address is in the kernel space. + + Say N here to disable that check to improve the performance. diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index e23cd887f8f9..c16e7cf732f7 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -55,9 +55,3 @@ menu "Kernel Features" source "kernel/Kconfig.preempt" endmenu - -menu "Kernel hacking" - -source "lib/Kconfig.debug" - -endmenu diff --git a/arch/h8300/Kconfig.debug b/arch/h8300/Kconfig.debug new file mode 100644 index 000000000000..22a162cd99e8 --- /dev/null +++ b/arch/h8300/Kconfig.debug @@ -0,0 +1 @@ +# dummy file, do not delete diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index d2851f46a64b..fcdb6d9fcecc 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -149,7 +149,3 @@ endchoice source "kernel/Kconfig.hz" endmenu - -menu "Kernel hacking" -source "lib/Kconfig.debug" -endmenu diff --git a/arch/hexagon/Kconfig.debug b/arch/hexagon/Kconfig.debug new file mode 100644 index 000000000000..22a162cd99e8 --- /dev/null +++ b/arch/hexagon/Kconfig.debug @@ -0,0 +1 @@ +# dummy file, do not delete diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 6015d66fa521..34a8d24cffea 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -576,5 +576,3 @@ config MSPEC If you have an ia64 and you want to enable memory special operations support (formerly known as fetchop), say Y here, otherwise say N. - -source "arch/ia64/Kconfig.debug" diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug index 677c409425df..1371efc9b005 100644 --- a/arch/ia64/Kconfig.debug +++ b/arch/ia64/Kconfig.debug @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" - -source "lib/Kconfig.debug" choice prompt "Physical memory granularity" @@ -56,5 +53,3 @@ config IA64_DEBUG_IRQ Selecting this option turns on bug checking for the IA-64 irq_save and restore instructions. It's useful for tracking down spinlock problems, but slow! If you're unsure, select N. - -endmenu diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index f76fb7bb371f..cf41ad45513f 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -147,5 +147,3 @@ endmenu endif source "arch/m68k/Kconfig.devices" - -source "arch/m68k/Kconfig.debug" diff --git a/arch/m68k/Kconfig.debug b/arch/m68k/Kconfig.debug index 04690b179a6e..f43643111eaf 100644 --- a/arch/m68k/Kconfig.debug +++ b/arch/m68k/Kconfig.debug @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" - -source "lib/Kconfig.debug" config BOOTPARAM bool 'Compiled-in Kernel Boot Parameter' @@ -51,5 +48,3 @@ config BDM_DISABLE Disable the ColdFire CPU's BDM signals. endif - -endmenu diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index fc31e047a43a..f4492e9478fe 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -285,5 +285,3 @@ config PCI_XILINX source "drivers/pci/Kconfig" endmenu - -source "arch/microblaze/Kconfig.debug" diff --git a/arch/microblaze/Kconfig.debug b/arch/microblaze/Kconfig.debug index 93a737c8d1a6..dc2e3c45e8a2 100644 --- a/arch/microblaze/Kconfig.debug +++ b/arch/microblaze/Kconfig.debug @@ -1,11 +1,5 @@ # For a description of the syntax of this configuration file, # see Documentation/kbuild/kconfig-language.txt. -menu "Kernel hacking" - config TRACE_IRQFLAGS_SUPPORT def_bool y - -source "lib/Kconfig.debug" - -endmenu diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index f586a3259f7f..ab02824c3976 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -3195,6 +3195,4 @@ endmenu source "drivers/firmware/Kconfig" -source "arch/mips/Kconfig.debug" - source "arch/mips/kvm/Kconfig" diff --git a/arch/mips/Kconfig.debug b/arch/mips/Kconfig.debug index 0749c3724543..0c86b2a2adfc 100644 --- a/arch/mips/Kconfig.debug +++ b/arch/mips/Kconfig.debug @@ -1,12 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" config TRACE_IRQFLAGS_SUPPORT bool default y -source "lib/Kconfig.debug" - config EARLY_PRINTK bool "Early printk" if EXPERT depends on SYS_HAS_EARLY_PRINTK @@ -155,5 +152,3 @@ config MIPS_CPS_NS16550_SHIFT adjacent ns16550 registers in the system. endif # MIPS_CPS_NS16550_BOOL - -endmenu diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index a9c6d25a14f0..c03d0e5a591b 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -91,7 +91,3 @@ menu "Kernel Features" source "kernel/Kconfig.preempt" source "kernel/Kconfig.hz" endmenu - -menu "Kernel hacking" -source "lib/Kconfig.debug" -endmenu diff --git a/arch/nds32/Kconfig.debug b/arch/nds32/Kconfig.debug new file mode 100644 index 000000000000..22a162cd99e8 --- /dev/null +++ b/arch/nds32/Kconfig.debug @@ -0,0 +1 @@ +# dummy file, do not delete diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index 2d813dfdb8cb..22d19febcc92 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -187,5 +187,3 @@ config NIOS2_IO_REGION_BASE default "0xe0000000" endmenu - -source "arch/nios2/Kconfig.debug" diff --git a/arch/nios2/Kconfig.debug b/arch/nios2/Kconfig.debug index edfeef049a51..7a49f0d28d14 100644 --- a/arch/nios2/Kconfig.debug +++ b/arch/nios2/Kconfig.debug @@ -1,11 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" config TRACE_IRQFLAGS_SUPPORT def_bool y -source "lib/Kconfig.debug" - config DEBUG_STACK_USAGE bool "Enable stack utilization instrumentation" depends on DEBUG_KERNEL @@ -24,5 +21,3 @@ config EARLY_PRINTK This is useful for kernel debugging when your machine crashes very early before the console code is initialized. You should normally say N here, unless you want to debug such a crash. - -endmenu diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index eb9f503c8972..c52cecd94d62 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -201,9 +201,3 @@ config OPENRISC_ESR_EXCEPTION_BUG_CHECK endmenu endmenu - -menu "Kernel hacking" - -source "lib/Kconfig.debug" - -endmenu diff --git a/arch/openrisc/Kconfig.debug b/arch/openrisc/Kconfig.debug new file mode 100644 index 000000000000..22a162cd99e8 --- /dev/null +++ b/arch/openrisc/Kconfig.debug @@ -0,0 +1 @@ +# dummy file, do not delete diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 64b832a19bdf..d1dd56ea297b 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -349,8 +349,6 @@ endmenu source "drivers/parisc/Kconfig" -source "arch/parisc/Kconfig.debug" - config SECCOMP def_bool y prompt "Enable seccomp to safely compute untrusted bytecode" diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug index fb3507f9b14a..1478ded0e247 100644 --- a/arch/parisc/Kconfig.debug +++ b/arch/parisc/Kconfig.debug @@ -1,9 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" - -source "lib/Kconfig.debug" config TRACE_IRQFLAGS_SUPPORT def_bool y - -endmenu diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 131132f161e5..c55e61302d57 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -1194,8 +1194,6 @@ endif config ARCH_RANDOM def_bool n -source "arch/powerpc/Kconfig.debug" - config PPC_LIB_RHEAP bool diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index c45424c64e19..fd63cd914a74 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" - -source "lib/Kconfig.debug" config PPC_DISABLE_WERROR bool "Don't build arch/powerpc code with -Werror" @@ -379,5 +376,3 @@ config PPC_FAST_ENDIAN_SWITCH depends on DEBUG_KERNEL && PPC_BOOK3S_64 help If you're unsure what this is, say N. - -endmenu diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 082486cddf31..73c0e16793fa 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -245,48 +245,3 @@ menu "Power management options" source kernel/power/Kconfig endmenu - -menu "Kernel hacking" - -config CMDLINE_BOOL - bool "Built-in kernel command line" - help - For most platforms, it is firmware or second stage bootloader - that by default specifies the kernel command line options. - However, it might be necessary or advantageous to either override - the default kernel command line or add a few extra options to it. - For such cases, this option allows hardcoding command line options - directly into the kernel. - - For that, choose 'Y' here and fill in the extra boot parameters - in CONFIG_CMDLINE. - - The built-in options will be concatenated to the default command - line if CMDLINE_FORCE is set to 'N'. Otherwise, the default - command line will be ignored and replaced by the built-in string. - -config CMDLINE - string "Built-in kernel command string" - depends on CMDLINE_BOOL - default "" - help - Supply command-line options at build time by entering them here. - -config CMDLINE_FORCE - bool "Built-in command line overrides bootloader arguments" - depends on CMDLINE_BOOL - help - Set this option to 'Y' to have the kernel ignore the bootloader - or firmware command line. Instead, the built-in command line - will be used exclusively. - - If you don't know what to do here, say N. - -config EARLY_PRINTK - def_bool y - -source "lib/Kconfig.debug" - -config CMDLINE_BOOL - bool -endmenu diff --git a/arch/riscv/Kconfig.debug b/arch/riscv/Kconfig.debug new file mode 100644 index 000000000000..3224ff6ecf6e --- /dev/null +++ b/arch/riscv/Kconfig.debug @@ -0,0 +1,37 @@ + +config CMDLINE_BOOL + bool "Built-in kernel command line" + help + For most platforms, it is firmware or second stage bootloader + that by default specifies the kernel command line options. + However, it might be necessary or advantageous to either override + the default kernel command line or add a few extra options to it. + For such cases, this option allows hardcoding command line options + directly into the kernel. + + For that, choose 'Y' here and fill in the extra boot parameters + in CONFIG_CMDLINE. + + The built-in options will be concatenated to the default command + line if CMDLINE_FORCE is set to 'N'. Otherwise, the default + command line will be ignored and replaced by the built-in string. + +config CMDLINE + string "Built-in kernel command string" + depends on CMDLINE_BOOL + default "" + help + Supply command-line options at build time by entering them here. + +config CMDLINE_FORCE + bool "Built-in command line overrides bootloader arguments" + depends on CMDLINE_BOOL + help + Set this option to 'Y' to have the kernel ignore the bootloader + or firmware command line. Instead, the built-in command line + will be used exclusively. + + If you don't know what to do here, say N. + +config EARLY_PRINTK + def_bool y diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index a94667712337..ac8925766d0a 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -828,8 +828,6 @@ config HAVE_PNETID tristate default (SMC || CCWGROUP) -source "arch/s390/Kconfig.debug" - menu "Virtualization" config PFAULT diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 2cfdfbf8d320..190527560b2c 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -1,11 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" config TRACE_IRQFLAGS_SUPPORT def_bool y -source "lib/Kconfig.debug" - config S390_PTDUMP bool "Export kernel pagetable layout to userspace via debugfs" depends on DEBUG_KERNEL @@ -20,5 +17,3 @@ config S390_PTDUMP config EARLY_PRINTK def_bool y - -endmenu diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index f291d2568cd4..c8460330eff7 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -885,5 +885,3 @@ source "kernel/power/Kconfig" source "drivers/cpuidle/Kconfig" endmenu - -source "arch/sh/Kconfig.debug" diff --git a/arch/sh/Kconfig.debug b/arch/sh/Kconfig.debug index d0767672640d..010b6c33bbba 100644 --- a/arch/sh/Kconfig.debug +++ b/arch/sh/Kconfig.debug @@ -1,11 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" config TRACE_IRQFLAGS_SUPPORT def_bool y -source "lib/Kconfig.debug" - config SH_STANDARD_BIOS bool "Use LinuxSH standard BIOS" depends on SUPERH32 @@ -88,5 +85,3 @@ config MCOUNT def_bool y depends on SUPERH32 depends on STACK_DEBUG || FUNCTION_TRACER - -endmenu diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 8a1e44d2e112..622695e8fa44 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -565,5 +565,3 @@ config SYSVIPC_COMPAT default y source "drivers/sbus/char/Kconfig" - -source "arch/sparc/Kconfig.debug" diff --git a/arch/sparc/Kconfig.debug b/arch/sparc/Kconfig.debug index 4aef29a11925..50a918d496c8 100644 --- a/arch/sparc/Kconfig.debug +++ b/arch/sparc/Kconfig.debug @@ -1,12 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" config TRACE_IRQFLAGS_SUPPORT bool default y -source "lib/Kconfig.debug" - config DEBUG_DCFLUSH bool "D-cache flush debugging" depends on SPARC64 && DEBUG_KERNEL @@ -21,5 +18,3 @@ config FRAME_POINTER bool depends on MCOUNT default y - -endmenu diff --git a/arch/um/Kconfig b/arch/um/Kconfig index a5cc2b19a960..3022d1bf9bf9 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -187,5 +187,3 @@ config SECCOMP endmenu source "arch/um/drivers/Kconfig" - -source "arch/um/Kconfig.debug" diff --git a/arch/um/Kconfig.debug b/arch/um/Kconfig.debug index 967d3109689f..2014597605ea 100644 --- a/arch/um/Kconfig.debug +++ b/arch/um/Kconfig.debug @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" - -source "lib/Kconfig.debug" config GPROF bool "Enable gprof support" @@ -37,5 +34,3 @@ config EARLY_PRINTK This is useful for kernel debugging when your machine crashes very early before the console code is initialized. - -endmenu diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 7fa7e61eb19f..6bfaa4a910e1 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -222,5 +222,3 @@ endmenu endif endif - -source "arch/unicore32/Kconfig.debug" diff --git a/arch/unicore32/Kconfig.debug b/arch/unicore32/Kconfig.debug index de8dae3abc0a..ca0ff97657ef 100644 --- a/arch/unicore32/Kconfig.debug +++ b/arch/unicore32/Kconfig.debug @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" - -source "lib/Kconfig.debug" config EARLY_PRINTK def_bool DEBUG_OCD @@ -30,5 +27,3 @@ config DEBUG_OCD help Say Y here if you want the debug print routines to direct their output to the UniCore On-Chip-Debugger channel using CP #1. - -endmenu diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 6faefd1c9f13..41d28b430fef 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2931,6 +2931,4 @@ config HAVE_GENERIC_GUP source "drivers/firmware/Kconfig" -source "arch/x86/Kconfig.debug" - source "arch/x86/kvm/Kconfig" diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index c6dd1d980081..7d68f0c7cfb1 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -1,11 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" config TRACE_IRQFLAGS_SUPPORT def_bool y -source "lib/Kconfig.debug" - config EARLY_PRINTK_USB bool @@ -410,5 +407,3 @@ endchoice config FRAME_POINTER depends on !UNWINDER_ORC && !UNWINDER_GUESS bool - -endmenu diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index d8bac8be3a33..aae0e1800be7 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -567,5 +567,3 @@ menu "Power management options" source "kernel/power/Kconfig" endmenu - -source "arch/xtensa/Kconfig.debug" diff --git a/arch/xtensa/Kconfig.debug b/arch/xtensa/Kconfig.debug index f64c14adadb3..39de98e20018 100644 --- a/arch/xtensa/Kconfig.debug +++ b/arch/xtensa/Kconfig.debug @@ -1,7 +1,4 @@ # SPDX-License-Identifier: GPL-2.0 -menu "Kernel hacking" - -source "lib/Kconfig.debug" config DEBUG_TLB_SANITY bool "Debug TLB sanity" @@ -34,5 +31,3 @@ config S32C1I_SELFTEST It is easy to make wrong hardware configuration, this test should catch it early. Say 'N' on stable hardware. - -endmenu diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8838d1158d19..26d3ff7e3cf4 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1,3 +1,5 @@ +menu "Kernel hacking" + menu "printk and dmesg options" config PRINTK_TIME @@ -2034,3 +2036,7 @@ config IO_STRICT_DEVMEM if the driver using a given range cannot be disabled. If in doubt, say Y. + +source "arch/$(SRCARCH)/Kconfig.debug" + +endmenu # Kernel hacking -- cgit v1.2.3 From 87a4c375995ed8eaa721b08825cf73d0b02b3145 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 31 Jul 2018 13:39:32 +0200 Subject: kconfig: include kernel/Kconfig.preempt from init/Kconfig Almost all architectures include it. Add a ARCH_NO_PREEMPT symbol to disable preempt support for alpha, hexagon, non-coldfire m68k and user mode Linux. Signed-off-by: Christoph Hellwig Signed-off-by: Masahiro Yamada --- arch/Kconfig | 3 +++ arch/alpha/Kconfig | 1 + arch/arc/Kconfig | 2 -- arch/arm/Kconfig | 2 -- arch/arm64/Kconfig | 1 - arch/c6x/Kconfig | 2 -- arch/h8300/Kconfig | 6 ------ arch/hexagon/Kconfig | 1 + arch/ia64/Kconfig | 2 -- arch/m68k/Kconfig | 5 +---- arch/microblaze/Kconfig | 2 -- arch/mips/Kconfig | 2 -- arch/nds32/Kconfig | 1 - arch/nios2/Kconfig | 2 -- arch/openrisc/Kconfig | 1 - arch/parisc/Kconfig | 1 - arch/powerpc/Kconfig | 1 - arch/riscv/Kconfig | 2 -- arch/s390/Kconfig | 2 -- arch/sh/Kconfig | 2 -- arch/sparc/Kconfig | 2 -- arch/um/Kconfig | 1 + arch/unicore32/Kconfig | 2 -- arch/x86/Kconfig | 2 -- arch/xtensa/Kconfig | 2 -- init/Kconfig | 1 + kernel/Kconfig.preempt | 2 ++ lib/Kconfig.debug | 1 + 28 files changed, 11 insertions(+), 43 deletions(-) (limited to 'lib') diff --git a/arch/Kconfig b/arch/Kconfig index 09a561a70168..cddd0faae4cb 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -881,6 +881,9 @@ config COMPAT_32BIT_TIME config ARCH_NO_COHERENT_DMA_MMAP bool +config ARCH_NO_PREEMPT + bool + config CPU_NO_EFFICIENT_FFS def_bool n diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index e4334f017f8e..5b4f88363453 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -4,6 +4,7 @@ config ALPHA default y select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_NO_PREEMPT select ARCH_USE_CMPXCHG_LOCKREF select HAVE_AOUT select HAVE_IDE diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 5aab069eba17..639ab1bed835 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -545,8 +545,6 @@ config ARC_BUILTIN_DTB_NAME Set the name of the DTB to embed in the vmlinux binary Leaving it blank selects the minimal "skeleton" dtb -source "kernel/Kconfig.preempt" - endmenu # "ARC Architecture Configuration" config FORCE_MAX_ZONEORDER diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9aaa23c25374..1b1c21519039 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1481,8 +1481,6 @@ config ARCH_NR_GPIO If unsure, leave the default value. -source kernel/Kconfig.preempt - config HZ_FIXED int default 200 if ARCH_EBSA110 diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 07d457ec417a..daf59d363dd3 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -752,7 +752,6 @@ config HOLES_IN_ZONE def_bool y depends on NUMA -source kernel/Kconfig.preempt source kernel/Kconfig.hz config ARCH_SUPPORTS_DEBUG_PAGEALLOC diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index 904b3375331e..a641b0bf1611 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -110,8 +110,6 @@ config KERNEL_RAM_BASE_ADDRESS default 0xe0000000 if SOC_TMS320C6472 default 0x80000000 -source "kernel/Kconfig.preempt" - source "kernel/Kconfig.hz" endmenu diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index c16e7cf732f7..5e89d40be8cd 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig @@ -49,9 +49,3 @@ config NR_CPUS default 1 source "arch/h8300/Kconfig.cpu" - -menu "Kernel Features" - -source "kernel/Kconfig.preempt" - -endmenu diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index fcdb6d9fcecc..89a4b22f34d9 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -4,6 +4,7 @@ comment "Linux Kernel Configuration for Hexagon" config HEXAGON def_bool y + select ARCH_NO_PREEMPT select HAVE_OPROFILE # Other pending projects/to-do items. # select HAVE_REGS_AND_STACK_ACCESS_API diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 34a8d24cffea..86bd377bc7c0 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -364,8 +364,6 @@ config FORCE_CPEI_RETARGET This option it useful to enable this feature on older BIOS's as well. You can also enable this by using boot command line option force_cpei=1. -source "kernel/Kconfig.preempt" - config ARCH_SELECT_MEMORY_MODEL def_bool y diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index cf41ad45513f..3e47f8df6504 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -4,6 +4,7 @@ config M68K default y select ARCH_MIGHT_HAVE_PC_PARPORT if ISA select ARCH_NO_COHERENT_DMA_MMAP if !MMU + select ARCH_NO_PREEMPT if !COLDFIRE select HAVE_IDE select HAVE_AOUT if MMU select HAVE_DEBUG_BUGVERBOSE @@ -129,10 +130,6 @@ endmenu menu "Kernel Features" -if COLDFIRE -source "kernel/Kconfig.preempt" -endif - endmenu if !MMU diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index f4492e9478fe..6163a39ddeb6 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -87,8 +87,6 @@ source "arch/microblaze/Kconfig.platform" menu "Processor type and features" -source "kernel/Kconfig.preempt" - source "kernel/Kconfig.hz" config MMU diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index ab02824c3976..06a633eb9777 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -2818,8 +2818,6 @@ config HZ config SCHED_HRTICK def_bool HIGH_RES_TIMERS -source "kernel/Kconfig.preempt" - config KEXEC bool "Kexec system call" select KEXEC_CORE diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index c03d0e5a591b..541f16adfb06 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -88,6 +88,5 @@ config NDS32_BUILTIN_DTB endmenu menu "Kernel Features" -source "kernel/Kconfig.preempt" source "kernel/Kconfig.hz" endmenu diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index 22d19febcc92..cbe1844b0657 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig @@ -47,8 +47,6 @@ config TRACE_IRQFLAGS_SUPPORT menu "Kernel features" -source "kernel/Kconfig.preempt" - source "kernel/Kconfig.hz" config FORCE_MAX_ZONEORDER diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index c52cecd94d62..42e3a0f2afab 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -143,7 +143,6 @@ config SMP If you don't know what to do here, say N. source kernel/Kconfig.hz -source kernel/Kconfig.preempt config OPENRISC_NO_SPR_SR_DSX bool "use SPR_SR_DSX software emulation" if OR1K_1200 diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index d1dd56ea297b..89496aa318da 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -323,7 +323,6 @@ config NODES_SHIFT default "3" depends on NEED_MULTIPLE_NODES -source "kernel/Kconfig.preempt" source "kernel/Kconfig.hz" config COMPAT diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c55e61302d57..1c10ff0406f2 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -393,7 +393,6 @@ config HIGHMEM depends on PPC32 source kernel/Kconfig.hz -source kernel/Kconfig.preempt config HUGETLB_PAGE_SIZE_VARIABLE bool diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 73c0e16793fa..a344980287a5 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -212,8 +212,6 @@ endmenu menu "Kernel type" -source "kernel/Kconfig.preempt" - source "kernel/Kconfig.hz" endmenu diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index ac8925766d0a..8441bd8f7d70 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -511,8 +511,6 @@ config SCHED_TOPOLOGY making when dealing with machines that have multi-threading, multiple cores or multiple books. -source kernel/Kconfig.preempt - source kernel/Kconfig.hz config KEXEC diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index c8460330eff7..993f61a5961d 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -709,8 +709,6 @@ config HOTPLUG_CPU Say Y here to experiment with turning CPUs off and on. CPUs can be controlled through /sys/devices/system/cpu. -source "kernel/Kconfig.preempt" - config GUSA def_bool y depends on !SMP && SUPERH32 diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 622695e8fa44..2d58c26bff9a 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -349,8 +349,6 @@ config SCHED_MC making when dealing with multi-core CPU chips at a cost of slightly increased overhead in some places. If unsure say N here. -source "kernel/Kconfig.preempt" - config CMDLINE_BOOL bool "Default bootloader kernel arguments" depends on SPARC64 diff --git a/arch/um/Kconfig b/arch/um/Kconfig index 3022d1bf9bf9..6b9938919f0b 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -6,6 +6,7 @@ config UML bool default y select ARCH_HAS_KCOV + select ARCH_NO_PREEMPT select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_SECCOMP_FILTER select HAVE_UID16 diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 6bfaa4a910e1..60eae744d8fd 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -135,8 +135,6 @@ endmenu menu "Kernel Features" -source "kernel/Kconfig.preempt" - source "kernel/Kconfig.hz" config LEDS diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 41d28b430fef..98fd04cfa995 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1039,8 +1039,6 @@ config SCHED_MC_PRIO If unsure say Y here. -source "kernel/Kconfig.preempt" - config UP_LATE_INIT def_bool y depends on !SMP && X86_LOCAL_APIC diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index aae0e1800be7..801491e98890 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -173,8 +173,6 @@ config XTENSA_UNALIGNED_USER Say Y here to enable unaligned memory access in user space. -source "kernel/Kconfig.preempt" - config HAVE_SMP bool "System Supports SMP (MX)" depends on XTENSA_VARIANT_CUSTOM diff --git a/init/Kconfig b/init/Kconfig index 283f6bc796b1..29bbad9338d6 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -326,6 +326,7 @@ config AUDIT_TREE source "kernel/irq/Kconfig" source "kernel/time/Kconfig" +source "kernel/Kconfig.preempt" menu "CPU/Task time and stats accounting" diff --git a/kernel/Kconfig.preempt b/kernel/Kconfig.preempt index 3f9c97419f02..cd1655122ec0 100644 --- a/kernel/Kconfig.preempt +++ b/kernel/Kconfig.preempt @@ -18,6 +18,7 @@ config PREEMPT_NONE config PREEMPT_VOLUNTARY bool "Voluntary Kernel Preemption (Desktop)" + depends on !ARCH_NO_PREEMPT help This option reduces the latency of the kernel by adding more "explicit preemption points" to the kernel code. These new @@ -35,6 +36,7 @@ config PREEMPT_VOLUNTARY config PREEMPT bool "Preemptible Kernel (Low-Latency Desktop)" + depends on !ARCH_NO_PREEMPT select PREEMPT_COUNT select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK help diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 26d3ff7e3cf4..373ce9fecd7e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1195,6 +1195,7 @@ config DEBUG_ATOMIC_SLEEP bool "Sleep inside atomic section checking" select PREEMPT_COUNT depends on DEBUG_KERNEL + depends on !ARCH_NO_PREEMPT help If you say Y here, various routines which may sleep will become very noisy if they are called inside atomic sections: when a spinlock is -- cgit v1.2.3 From 3ff4f80a74fd38398ae1bd8a458ba9c51aa0dd44 Mon Sep 17 00:00:00 2001 From: Zhong Jiang Date: Wed, 1 Aug 2018 00:24:58 +0800 Subject: debugobjects: Remove redundant NULL pointer check kmem_cache_destroy() has a built in NULL pointer check, so the one at the call can be removed. Signed-off-by: Zhong Jiang Signed-off-by: Thomas Gleixner Cc: Cc: Cc: Link: https://lkml.kernel.org/r/1533054298-35824-1-git-send-email-zhongjiang@huawei.com --- lib/debugobjects.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 24c1df0d7466..70935ed91125 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -1188,8 +1188,7 @@ void __init debug_objects_mem_init(void) if (!obj_cache || debug_objects_replace_static_objects()) { debug_objects_enabled = 0; - if (obj_cache) - kmem_cache_destroy(obj_cache); + kmem_cache_destroy(obj_cache); pr_warn("out of memory.\n"); } else debug_objects_selftest(); -- cgit v1.2.3 From 554ec508653688c21d9b8024af73a1ffaa0164b9 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 6 Aug 2018 15:34:21 -0700 Subject: lib/vsprintf: Do not handle %pO[^F] as %px This patch avoids that gcc reports the following when building with W=1: lib/vsprintf.c:1941:3: warning: this statement may fall through [-Wimplicit-fallthrough=] switch (fmt[1]) { ^~~~~~ Fixes: 7b1924a1d930eb2 ("vsprintf: add printk specifier %px") Link: http://lkml.kernel.org/r/20180806223421.11995-1-bart.vanassche@wdc.com Cc: linux-kernel@vger.kernel.org Cc: Bart Van Assche Cc: Pantelis Antoniou Cc: Joe Perches Cc: Rob Herring Cc: v4.15+ Signed-off-by: Bart Van Assche Signed-off-by: Petr Mladek --- lib/vsprintf.c | 1 + 1 file changed, 1 insertion(+) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index a48aaa79d352..cda186230287 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1942,6 +1942,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'F': return device_node_string(buf, end, ptr, spec, fmt + 1); } + break; case 'x': return pointer_string(buf, end, ptr, spec); } -- cgit v1.2.3 From 6122bbbdc65630d202e5494d5b9678676f2883f8 Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 30 Jul 2018 09:59:36 +0100 Subject: lib/mpi: remove redundant variable esign Variable esign is being assigned but is never used hence it is redundant and can be removed. Cleans up clang warning: warning: variable 'esign' set but not used [-Wunused-but-set-variable] Signed-off-by: Colin Ian King Signed-off-by: Herbert Xu --- lib/mpi/mpi-pow.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c index 468fb7cd1221..a5c921e6d667 100644 --- a/lib/mpi/mpi-pow.c +++ b/lib/mpi/mpi-pow.c @@ -41,7 +41,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) mpi_ptr_t tspace = NULL; mpi_ptr_t rp, ep, mp, bp; mpi_size_t esize, msize, bsize, rsize; - int esign, msign, bsign, rsign; + int msign, bsign, rsign; mpi_size_t size; int mod_shift_cnt; int negative_result; @@ -53,7 +53,6 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) esize = exp->nlimbs; msize = mod->nlimbs; size = 2 * msize; - esign = exp->sign; msign = mod->sign; rp = res->d; -- cgit v1.2.3