From ef12496022d5917cfe0b04cf8fd685fc6bc08400 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 17 Dec 2012 15:59:58 -0800 Subject: lib/vsprintf.c: fix handling of %zd when using ssize_t Documentation/printk-formats.txt says to use %zd for a ssize_t argument and some drivers do. Unfortunately this prints a positive number for negative values eg: tpm_tis 70030000.tpm_tis: tpm_transmit: tpm_send: error 4294967234 Add a case to va_args a ssize_t type if the interpretation should be signed. Tested on PPC32. Signed-off-by: Jason Gunthorpe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 39c99fea7c03..41da0741a663 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1485,7 +1485,10 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) num = va_arg(args, long); break; case FORMAT_TYPE_SIZE_T: - num = va_arg(args, size_t); + if (spec.flags & SIGN) + num = va_arg(args, ssize_t); + else + num = va_arg(args, size_t); break; case FORMAT_TYPE_PTRDIFF: num = va_arg(args, ptrdiff_t); -- cgit v1.2.3 From 35367ab28d024ef026dbd797b4076c8f008ec08c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Dec 2012 16:01:21 -0800 Subject: lib: dynamic_debug: use kbasename() Remove the custom implementation of the functionality similar to kbasename(). Signed-off-by: Andy Shevchenko Cc: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dynamic_debug.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index e7f7d993357a..1db1fc660538 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -62,13 +62,6 @@ static LIST_HEAD(ddebug_tables); static int verbose = 0; module_param(verbose, int, 0644); -/* Return the last part of a pathname */ -static inline const char *basename(const char *path) -{ - const char *tail = strrchr(path, '/'); - return tail ? tail+1 : path; -} - /* Return the path relative to source root */ static inline const char *trim_prefix(const char *path) { @@ -154,7 +147,7 @@ static int ddebug_change(const struct ddebug_query *query, /* match against the source filename */ if (query->filename && strcmp(query->filename, dp->filename) && - strcmp(query->filename, basename(dp->filename)) && + strcmp(query->filename, kbasename(dp->filename)) && strcmp(query->filename, trim_prefix(dp->filename))) continue; -- cgit v1.2.3 From 53809751ac230a3611b5cdd375f3389f3207d471 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 17 Dec 2012 16:01:31 -0800 Subject: sscanf: don't ignore field widths for numeric conversions This is another step towards better standard conformance. Rather than adding a local buffer to store the specified portion of the string (with the need to enforce an arbitrary maximum supported width to limit the buffer size), do a maximum width conversion and then drop as much of it as is necessary to meet the caller's request. Also fail on negative field widths. Uses the deprecated simple_strto*() functions because kstrtoXX() fail on non-zero terminated strings. Signed-off-by: Jan Beulich Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 96 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 43 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 41da0741a663..292fcb174a32 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -23,12 +23,12 @@ #include #include #include +#include #include #include #include #include /* for PAGE_SIZE */ -#include #include /* for dereference_function_descriptor() */ #include "kstrtox.h" @@ -2016,7 +2016,11 @@ int vsscanf(const char *buf, const char *fmt, va_list args) char digit; int num = 0; u8 qualifier; - u8 base; + unsigned int base; + union { + long long s; + unsigned long long u; + } val; s16 field_width; bool is_sign; @@ -2056,8 +2060,11 @@ int vsscanf(const char *buf, const char *fmt, va_list args) /* get field width */ field_width = -1; - if (isdigit(*fmt)) + if (isdigit(*fmt)) { field_width = skip_atoi(&fmt); + if (field_width <= 0) + break; + } /* get conversion qualifier */ qualifier = -1; @@ -2157,58 +2164,61 @@ int vsscanf(const char *buf, const char *fmt, va_list args) || (base == 0 && !isdigit(digit))) break; + if (is_sign) + val.s = qualifier != 'L' ? + simple_strtol(str, &next, base) : + simple_strtoll(str, &next, base); + else + val.u = qualifier != 'L' ? + simple_strtoul(str, &next, base) : + simple_strtoull(str, &next, base); + + if (field_width > 0 && next - str > field_width) { + if (base == 0) + _parse_integer_fixup_radix(str, &base); + while (next - str > field_width) { + if (is_sign) + val.s = div_s64(val.s, base); + else + val.u = div_u64(val.u, base); + --next; + } + } + switch (qualifier) { case 'H': /* that's 'hh' in format */ - if (is_sign) { - signed char *s = (signed char *)va_arg(args, signed char *); - *s = (signed char)simple_strtol(str, &next, base); - } else { - unsigned char *s = (unsigned char *)va_arg(args, unsigned char *); - *s = (unsigned char)simple_strtoul(str, &next, base); - } + if (is_sign) + *va_arg(args, signed char *) = val.s; + else + *va_arg(args, unsigned char *) = val.u; break; case 'h': - if (is_sign) { - short *s = (short *)va_arg(args, short *); - *s = (short)simple_strtol(str, &next, base); - } else { - unsigned short *s = (unsigned short *)va_arg(args, unsigned short *); - *s = (unsigned short)simple_strtoul(str, &next, base); - } + if (is_sign) + *va_arg(args, short *) = val.s; + else + *va_arg(args, unsigned short *) = val.u; break; case 'l': - if (is_sign) { - long *l = (long *)va_arg(args, long *); - *l = simple_strtol(str, &next, base); - } else { - unsigned long *l = (unsigned long *)va_arg(args, unsigned long *); - *l = simple_strtoul(str, &next, base); - } + if (is_sign) + *va_arg(args, long *) = val.s; + else + *va_arg(args, unsigned long *) = val.u; break; case 'L': - if (is_sign) { - long long *l = (long long *)va_arg(args, long long *); - *l = simple_strtoll(str, &next, base); - } else { - unsigned long long *l = (unsigned long long *)va_arg(args, unsigned long long *); - *l = simple_strtoull(str, &next, base); - } + if (is_sign) + *va_arg(args, long long *) = val.s; + else + *va_arg(args, unsigned long long *) = val.u; break; case 'Z': case 'z': - { - size_t *s = (size_t *)va_arg(args, size_t *); - *s = (size_t)simple_strtoul(str, &next, base); - } - break; + *va_arg(args, size_t *) = val.u; + break; default: - if (is_sign) { - int *i = (int *)va_arg(args, int *); - *i = (int)simple_strtol(str, &next, base); - } else { - unsigned int *i = (unsigned int *)va_arg(args, unsigned int*); - *i = (unsigned int)simple_strtoul(str, &next, base); - } + if (is_sign) + *va_arg(args, int *) = val.s; + else + *va_arg(args, unsigned int *) = val.u; break; } num++; -- cgit v1.2.3 From a1fd3e24d8a484b3265a6d485202afe093c058f3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Dec 2012 16:01:32 -0800 Subject: percpu_rw_semaphore: reimplement to not block the readers unnecessarily Currently the writer does msleep() plus synchronize_sched() 3 times to acquire/release the semaphore, and during this time the readers are blocked completely. Even if the "write" section was not actually started or if it was already finished. With this patch down_write/up_write does synchronize_sched() twice and down_read/up_read are still possible during this time, just they use the slow path. percpu_down_write() first forces the readers to use rw_semaphore and increment the "slow" counter to take the lock for reading, then it takes that rw_semaphore for writing and blocks the readers. Also. With this patch the code relies on the documented behaviour of synchronize_sched(), it doesn't try to pair synchronize_sched() with barrier. Signed-off-by: Oleg Nesterov Reviewed-by: Paul E. McKenney Cc: Linus Torvalds Cc: Mikulas Patocka Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu-rwsem.h | 83 ++++------------------- lib/Makefile | 2 +- lib/percpu-rwsem.c | 154 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 71 deletions(-) create mode 100644 lib/percpu-rwsem.c (limited to 'lib') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index bd1e86071e57..592f0d610d8e 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -2,82 +2,25 @@ #define _LINUX_PERCPU_RWSEM_H #include +#include #include -#include -#include +#include struct percpu_rw_semaphore { - unsigned __percpu *counters; - bool locked; - struct mutex mtx; + unsigned int __percpu *fast_read_ctr; + struct mutex writer_mutex; + struct rw_semaphore rw_sem; + atomic_t slow_read_ctr; + wait_queue_head_t write_waitq; }; -#define light_mb() barrier() -#define heavy_mb() synchronize_sched_expedited() +extern void percpu_down_read(struct percpu_rw_semaphore *); +extern void percpu_up_read(struct percpu_rw_semaphore *); -static inline void percpu_down_read(struct percpu_rw_semaphore *p) -{ - rcu_read_lock_sched(); - if (unlikely(p->locked)) { - rcu_read_unlock_sched(); - mutex_lock(&p->mtx); - this_cpu_inc(*p->counters); - mutex_unlock(&p->mtx); - return; - } - this_cpu_inc(*p->counters); - rcu_read_unlock_sched(); - light_mb(); /* A, between read of p->locked and read of data, paired with D */ -} +extern void percpu_down_write(struct percpu_rw_semaphore *); +extern void percpu_up_write(struct percpu_rw_semaphore *); -static inline void percpu_up_read(struct percpu_rw_semaphore *p) -{ - light_mb(); /* B, between read of the data and write to p->counter, paired with C */ - this_cpu_dec(*p->counters); -} - -static inline unsigned __percpu_count(unsigned __percpu *counters) -{ - unsigned total = 0; - int cpu; - - for_each_possible_cpu(cpu) - total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu)); - - return total; -} - -static inline void percpu_down_write(struct percpu_rw_semaphore *p) -{ - mutex_lock(&p->mtx); - p->locked = true; - synchronize_sched_expedited(); /* make sure that all readers exit the rcu_read_lock_sched region */ - while (__percpu_count(p->counters)) - msleep(1); - heavy_mb(); /* C, between read of p->counter and write to data, paired with B */ -} - -static inline void percpu_up_write(struct percpu_rw_semaphore *p) -{ - heavy_mb(); /* D, between write to data and write to p->locked, paired with A */ - p->locked = false; - mutex_unlock(&p->mtx); -} - -static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p) -{ - p->counters = alloc_percpu(unsigned); - if (unlikely(!p->counters)) - return -ENOMEM; - p->locked = false; - mutex_init(&p->mtx); - return 0; -} - -static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p) -{ - free_percpu(p->counters); - p->counters = NULL; /* catch use after free bugs */ -} +extern int percpu_init_rwsem(struct percpu_rw_semaphore *); +extern void percpu_free_rwsem(struct percpu_rw_semaphore *); #endif diff --git a/lib/Makefile b/lib/Makefile index e2152fa7ff4d..e959c20efb24 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -9,7 +9,7 @@ endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ - idr.o int_sqrt.o extable.o \ + idr.o int_sqrt.o extable.o percpu-rwsem.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c new file mode 100644 index 000000000000..2e03bcfe48f9 --- /dev/null +++ b/lib/percpu-rwsem.c @@ -0,0 +1,154 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +int percpu_init_rwsem(struct percpu_rw_semaphore *brw) +{ + brw->fast_read_ctr = alloc_percpu(int); + if (unlikely(!brw->fast_read_ctr)) + return -ENOMEM; + + mutex_init(&brw->writer_mutex); + init_rwsem(&brw->rw_sem); + atomic_set(&brw->slow_read_ctr, 0); + init_waitqueue_head(&brw->write_waitq); + return 0; +} + +void percpu_free_rwsem(struct percpu_rw_semaphore *brw) +{ + free_percpu(brw->fast_read_ctr); + brw->fast_read_ctr = NULL; /* catch use after free bugs */ +} + +/* + * This is the fast-path for down_read/up_read, it only needs to ensure + * there is no pending writer (!mutex_is_locked() check) and inc/dec the + * fast per-cpu counter. The writer uses synchronize_sched_expedited() to + * serialize with the preempt-disabled section below. + * + * The nontrivial part is that we should guarantee acquire/release semantics + * in case when + * + * R_W: down_write() comes after up_read(), the writer should see all + * changes done by the reader + * or + * W_R: down_read() comes after up_write(), the reader should see all + * changes done by the writer + * + * If this helper fails the callers rely on the normal rw_semaphore and + * atomic_dec_and_test(), so in this case we have the necessary barriers. + * + * But if it succeeds we do not have any barriers, mutex_is_locked() or + * __this_cpu_add() below can be reordered with any LOAD/STORE done by the + * reader inside the critical section. See the comments in down_write and + * up_write below. + */ +static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) +{ + bool success = false; + + preempt_disable(); + if (likely(!mutex_is_locked(&brw->writer_mutex))) { + __this_cpu_add(*brw->fast_read_ctr, val); + success = true; + } + preempt_enable(); + + return success; +} + +/* + * Like the normal down_read() this is not recursive, the writer can + * come after the first percpu_down_read() and create the deadlock. + */ +void percpu_down_read(struct percpu_rw_semaphore *brw) +{ + if (likely(update_fast_ctr(brw, +1))) + return; + + down_read(&brw->rw_sem); + atomic_inc(&brw->slow_read_ctr); + up_read(&brw->rw_sem); +} + +void percpu_up_read(struct percpu_rw_semaphore *brw) +{ + if (likely(update_fast_ctr(brw, -1))) + return; + + /* false-positive is possible but harmless */ + if (atomic_dec_and_test(&brw->slow_read_ctr)) + wake_up_all(&brw->write_waitq); +} + +static int clear_fast_ctr(struct percpu_rw_semaphore *brw) +{ + unsigned int sum = 0; + int cpu; + + for_each_possible_cpu(cpu) { + sum += per_cpu(*brw->fast_read_ctr, cpu); + per_cpu(*brw->fast_read_ctr, cpu) = 0; + } + + return sum; +} + +/* + * A writer takes ->writer_mutex to exclude other writers and to force the + * readers to switch to the slow mode, note the mutex_is_locked() check in + * update_fast_ctr(). + * + * After that the readers can only inc/dec the slow ->slow_read_ctr counter, + * ->fast_read_ctr is stable. Once the writer moves its sum into the slow + * counter it represents the number of active readers. + * + * Finally the writer takes ->rw_sem for writing and blocks the new readers, + * then waits until the slow counter becomes zero. + */ +void percpu_down_write(struct percpu_rw_semaphore *brw) +{ + /* also blocks update_fast_ctr() which checks mutex_is_locked() */ + mutex_lock(&brw->writer_mutex); + + /* + * 1. Ensures mutex_is_locked() is visible to any down_read/up_read + * so that update_fast_ctr() can't succeed. + * + * 2. Ensures we see the result of every previous this_cpu_add() in + * update_fast_ctr(). + * + * 3. Ensures that if any reader has exited its critical section via + * fast-path, it executes a full memory barrier before we return. + * See R_W case in the comment above update_fast_ctr(). + */ + synchronize_sched_expedited(); + + /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ + atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); + + /* block the new readers completely */ + down_write(&brw->rw_sem); + + /* wait for all readers to complete their percpu_up_read() */ + wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); +} + +void percpu_up_write(struct percpu_rw_semaphore *brw) +{ + /* allow the new readers, but only the slow-path */ + up_write(&brw->rw_sem); + + /* + * Insert the barrier before the next fast-path in down_read, + * see W_R case in the comment above update_fast_ctr(). + */ + synchronize_sched_expedited(); + mutex_unlock(&brw->writer_mutex); +} -- cgit v1.2.3 From 9390ef0c85fd065f01045fef708b046c98cda04c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Dec 2012 16:01:36 -0800 Subject: percpu_rw_semaphore: kill ->writer_mutex, add ->write_ctr percpu_rw_semaphore->writer_mutex was only added to simplify the initial rewrite, the only thing it protects is clear_fast_ctr() which otherwise could be called by multiple writers. ->rw_sem is enough to serialize the writers. Kill this mutex and add "atomic_t write_ctr" instead. The writers increment/decrement this counter, the readers check it is zero instead of mutex_is_locked(). Move atomic_add(clear_fast_ctr(), slow_read_ctr) under down_write() to avoid the race with other writers. This is a bit sub-optimal, only the first writer needs this and we do not need to exclude the readers at this stage. But this is simple, we do not want another internal lock until we add more features. And this speeds up the write-contended case. Before this patch the racing writers sleep in synchronize_sched_expedited() sequentially, with this patch multiple synchronize_sched_expedited's can "overlap" with each other. Note: we can do more optimizations, this is only the first step. Signed-off-by: Oleg Nesterov Cc: Anton Arapov Cc: Ingo Molnar Cc: Linus Torvalds Cc: Michal Marek Cc: Mikulas Patocka Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Srikar Dronamraju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu-rwsem.h | 4 ++-- lib/percpu-rwsem.c | 34 ++++++++++++++++------------------ 2 files changed, 18 insertions(+), 20 deletions(-) (limited to 'lib') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 592f0d610d8e..d2146a4f833e 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -1,14 +1,14 @@ #ifndef _LINUX_PERCPU_RWSEM_H #define _LINUX_PERCPU_RWSEM_H -#include +#include #include #include #include struct percpu_rw_semaphore { unsigned int __percpu *fast_read_ctr; - struct mutex writer_mutex; + atomic_t write_ctr; struct rw_semaphore rw_sem; atomic_t slow_read_ctr; wait_queue_head_t write_waitq; diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c index 2e03bcfe48f9..ce92ab563a08 100644 --- a/lib/percpu-rwsem.c +++ b/lib/percpu-rwsem.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -13,8 +13,8 @@ int percpu_init_rwsem(struct percpu_rw_semaphore *brw) if (unlikely(!brw->fast_read_ctr)) return -ENOMEM; - mutex_init(&brw->writer_mutex); init_rwsem(&brw->rw_sem); + atomic_set(&brw->write_ctr, 0); atomic_set(&brw->slow_read_ctr, 0); init_waitqueue_head(&brw->write_waitq); return 0; @@ -28,7 +28,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) /* * This is the fast-path for down_read/up_read, it only needs to ensure - * there is no pending writer (!mutex_is_locked() check) and inc/dec the + * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the * fast per-cpu counter. The writer uses synchronize_sched_expedited() to * serialize with the preempt-disabled section below. * @@ -44,7 +44,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) * If this helper fails the callers rely on the normal rw_semaphore and * atomic_dec_and_test(), so in this case we have the necessary barriers. * - * But if it succeeds we do not have any barriers, mutex_is_locked() or + * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or * __this_cpu_add() below can be reordered with any LOAD/STORE done by the * reader inside the critical section. See the comments in down_write and * up_write below. @@ -54,7 +54,7 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) bool success = false; preempt_disable(); - if (likely(!mutex_is_locked(&brw->writer_mutex))) { + if (likely(!atomic_read(&brw->write_ctr))) { __this_cpu_add(*brw->fast_read_ctr, val); success = true; } @@ -101,9 +101,8 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) } /* - * A writer takes ->writer_mutex to exclude other writers and to force the - * readers to switch to the slow mode, note the mutex_is_locked() check in - * update_fast_ctr(). + * A writer increments ->write_ctr to force the readers to switch to the + * slow mode, note the atomic_read() check in update_fast_ctr(). * * After that the readers can only inc/dec the slow ->slow_read_ctr counter, * ->fast_read_ctr is stable. Once the writer moves its sum into the slow @@ -114,11 +113,10 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) */ void percpu_down_write(struct percpu_rw_semaphore *brw) { - /* also blocks update_fast_ctr() which checks mutex_is_locked() */ - mutex_lock(&brw->writer_mutex); - + /* tell update_fast_ctr() there is a pending writer */ + atomic_inc(&brw->write_ctr); /* - * 1. Ensures mutex_is_locked() is visible to any down_read/up_read + * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read * so that update_fast_ctr() can't succeed. * * 2. Ensures we see the result of every previous this_cpu_add() in @@ -130,25 +128,25 @@ void percpu_down_write(struct percpu_rw_semaphore *brw) */ synchronize_sched_expedited(); + /* exclude other writers, and block the new readers completely */ + down_write(&brw->rw_sem); + /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); - /* block the new readers completely */ - down_write(&brw->rw_sem); - /* wait for all readers to complete their percpu_up_read() */ wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); } void percpu_up_write(struct percpu_rw_semaphore *brw) { - /* allow the new readers, but only the slow-path */ + /* release the lock, but the readers can't use the fast-path */ up_write(&brw->rw_sem); - /* * Insert the barrier before the next fast-path in down_read, * see W_R case in the comment above update_fast_ctr(). */ synchronize_sched_expedited(); - mutex_unlock(&brw->writer_mutex); + /* the last writer unblocks update_fast_ctr() */ + atomic_dec(&brw->write_ctr); } -- cgit v1.2.3 From 8ebe34731a0d1a9d89b536430afd98d0fadec99b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Dec 2012 16:01:38 -0800 Subject: percpu_rw_semaphore: add lockdep annotations Add lockdep annotations. Not only this can help to find the potential problems, we do not want the false warnings if, say, the task takes two different percpu_rw_semaphore's for reading. IOW, at least ->rw_sem should not use a single class. This patch exposes this internal lock to lockdep so that it represents the whole percpu_rw_semaphore. This way we do not need to add another "fake" ->lockdep_map and lock_class_key. More importantly, this also makes the output from lockdep much more understandable if it finds the problem. In short, with this patch from lockdep pov percpu_down_read() and percpu_up_read() acquire/release ->rw_sem for reading, this matches the actual semantics. This abuses __up_read() but I hope this is fine and in fact I'd like to have down_read_no_lockdep() as well, percpu_down_read_recursive_readers() will need it. Signed-off-by: Oleg Nesterov Cc: Anton Arapov Cc: Ingo Molnar Cc: Linus Torvalds Cc: Michal Marek Cc: Mikulas Patocka Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Srikar Dronamraju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu-rwsem.h | 10 +++++++++- lib/percpu-rwsem.c | 21 +++++++++++++++++---- 2 files changed, 26 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index d2146a4f833e..3e88c9a7d57f 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -5,6 +5,7 @@ #include #include #include +#include struct percpu_rw_semaphore { unsigned int __percpu *fast_read_ctr; @@ -20,7 +21,14 @@ extern void percpu_up_read(struct percpu_rw_semaphore *); extern void percpu_down_write(struct percpu_rw_semaphore *); extern void percpu_up_write(struct percpu_rw_semaphore *); -extern int percpu_init_rwsem(struct percpu_rw_semaphore *); +extern int __percpu_init_rwsem(struct percpu_rw_semaphore *, + const char *, struct lock_class_key *); extern void percpu_free_rwsem(struct percpu_rw_semaphore *); +#define percpu_init_rwsem(brw) \ +({ \ + static struct lock_class_key rwsem_key; \ + __percpu_init_rwsem(brw, #brw, &rwsem_key); \ +}) + #endif diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c index ce92ab563a08..652a8ee8efe9 100644 --- a/lib/percpu-rwsem.c +++ b/lib/percpu-rwsem.c @@ -2,18 +2,21 @@ #include #include #include +#include #include #include #include #include -int percpu_init_rwsem(struct percpu_rw_semaphore *brw) +int __percpu_init_rwsem(struct percpu_rw_semaphore *brw, + const char *name, struct lock_class_key *rwsem_key) { brw->fast_read_ctr = alloc_percpu(int); if (unlikely(!brw->fast_read_ctr)) return -ENOMEM; - init_rwsem(&brw->rw_sem); + /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ + __init_rwsem(&brw->rw_sem, name, rwsem_key); atomic_set(&brw->write_ctr, 0); atomic_set(&brw->slow_read_ctr, 0); init_waitqueue_head(&brw->write_waitq); @@ -66,19 +69,29 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) /* * Like the normal down_read() this is not recursive, the writer can * come after the first percpu_down_read() and create the deadlock. + * + * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep, + * percpu_up_read() does rwsem_release(). This pairs with the usage + * of ->rw_sem in percpu_down/up_write(). */ void percpu_down_read(struct percpu_rw_semaphore *brw) { - if (likely(update_fast_ctr(brw, +1))) + might_sleep(); + if (likely(update_fast_ctr(brw, +1))) { + rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_); return; + } down_read(&brw->rw_sem); atomic_inc(&brw->slow_read_ctr); - up_read(&brw->rw_sem); + /* avoid up_read()->rwsem_release() */ + __up_read(&brw->rw_sem); } void percpu_up_read(struct percpu_rw_semaphore *brw) { + rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_); + if (likely(update_fast_ctr(brw, -1))) return; -- cgit v1.2.3 From 22b361d1df7bc25b558e52f22e779286a3d323e4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Dec 2012 16:01:39 -0800 Subject: percpu_rw_semaphore: introduce CONFIG_PERCPU_RWSEM Currently only block_dev and uprobes use percpu_rw_semaphore, add the config option selected by BLOCK || UPROBES. Signed-off-by: Oleg Nesterov Cc: Anton Arapov Cc: Ingo Molnar Cc: Linus Torvalds Cc: Michal Marek Cc: Mikulas Patocka Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Srikar Dronamraju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 1 + block/Kconfig | 1 + lib/Kconfig | 3 +++ lib/Makefile | 3 ++- 4 files changed, 7 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/arch/Kconfig b/arch/Kconfig index 34884faf98cd..54ffd0f9df21 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -80,6 +80,7 @@ config UPROBES bool "Transparent user-space probes (EXPERIMENTAL)" depends on UPROBE_EVENT && PERF_EVENTS default n + select PERCPU_RWSEM help Uprobes is the user-space counterpart to kprobes: they enable instrumentation applications (such as 'perf probe') diff --git a/block/Kconfig b/block/Kconfig index a7e40a7c8214..4a85ccf8d4cf 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -4,6 +4,7 @@ menuconfig BLOCK bool "Enable the block layer" if EXPERT default y + select PERCPU_RWSEM help Provide block layer support for the kernel. diff --git a/lib/Kconfig b/lib/Kconfig index 4b31a46fb307..75cdb77fa49d 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -42,6 +42,9 @@ config GENERIC_IO config STMP_DEVICE bool +config PERCPU_RWSEM + boolean + config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/Makefile b/lib/Makefile index e959c20efb24..5558e35170cd 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -9,7 +9,7 @@ endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ - idr.o int_sqrt.o extable.o percpu-rwsem.o \ + idr.o int_sqrt.o extable.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ @@ -40,6 +40,7 @@ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o +lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o -- cgit v1.2.3 From 918854a65e856574523d94763ef2a2b48ad55a25 Mon Sep 17 00:00:00 2001 From: Cong Ding Date: Mon, 17 Dec 2012 16:01:43 -0800 Subject: lib/rbtree_test.c: fix uninitialized variable warning Fix this warning: lib/rbtree_test.c: In function `check': lib/rbtree_test.c:121: warning: `blacks' may be used uninitialized in this function Signed-off-by: Cong Ding Cc: Michel Lespinasse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/rbtree_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 268b23951fec..d7f491a54579 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -118,7 +118,7 @@ static void check(int nr_nodes) { struct rb_node *rb; int count = 0; - int blacks; + int blacks = 0; u32 prev_key = 0; for (rb = rb_first(&root); rb; rb = rb_next(rb)) { -- cgit v1.2.3 From 4c925d6031f719fad6ea8b1c94a636f4c0fea39b Mon Sep 17 00:00:00 2001 From: Eldad Zack Date: Mon, 17 Dec 2012 16:03:04 -0800 Subject: kstrto*: add documentation As Bruce Fields pointed out, kstrto* is currently lacking kerneldoc comments. This patch adds kerneldoc comments to common variants of kstrto*: kstrto(u)l, kstrto(u)ll and kstrto(u)int. Signed-off-by: Eldad Zack Cc: J. Bruce Fields Cc: Joe Perches Cc: Randy Dunlap Cc: Alexey Dobriyan Cc: Rob Landley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/DocBook/kernel-api.tmpl | 3 ++ include/linux/kernel.h | 33 ++++++++++++++++++ lib/kstrtox.c | 64 +++++++++++++++++++++++++++++++++++ 3 files changed, 100 insertions(+) (limited to 'lib') diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl index 00687ee9d363..f75ab4c1b281 100644 --- a/Documentation/DocBook/kernel-api.tmpl +++ b/Documentation/DocBook/kernel-api.tmpl @@ -58,6 +58,9 @@ String Conversions !Elib/vsprintf.c +!Finclude/linux/kernel.h kstrtol +!Finclude/linux/kernel.h kstrtoul +!Elib/kstrtox.c String Manipulation