From 3577af70a2ce4853d58e57d832e687d739281479 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Tue, 2 Sep 2014 15:27:20 -0700 Subject: perf: Fix a race condition in perf_remove_from_context() We saw a kernel soft lockup in perf_remove_from_context(), it looks like the `perf` process, when exiting, could not go out of the retry loop. Meanwhile, the target process was forking a child. So either the target process should execute the smp function call to deactive the event (if it was running) or it should do a context switch which deactives the event. It seems we optimize out a context switch in perf_event_context_sched_out(), and what's more important, we still test an obsolete task pointer when retrying, so no one actually would deactive that event in this situation. Fix it directly by reloading the task pointer in perf_remove_from_context(). This should cure the above soft lockup. Signed-off-by: Cong Wang Signed-off-by: Cong Wang Signed-off-by: Peter Zijlstra Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Linus Torvalds Cc: Link: http://lkml.kernel.org/r/1409696840-843-1-git-send-email-xiyou.wangcong@gmail.com Signed-off-by: Ingo Molnar --- kernel/events/core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index f9c1ed002dbc..d640a8b4dcbc 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1524,6 +1524,11 @@ retry: */ if (ctx->is_active) { raw_spin_unlock_irq(&ctx->lock); + /* + * Reload the task pointer, it might have been changed by + * a concurrent perf_event_context_sched_out(). + */ + task = ctx->task; goto retry; } @@ -1967,6 +1972,11 @@ retry: */ if (ctx->is_active) { raw_spin_unlock_irq(&ctx->lock); + /* + * Reload the task pointer, it might have been changed by + * a concurrent perf_event_context_sched_out(). + */ + task = ctx->task; goto retry; } -- cgit v1.2.3 From eb4aec84d6bdf98d00cedb41c18000f7a31e648a Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Thu, 18 Sep 2014 17:28:46 +0800 Subject: cgroup: fix unbalanced locking cgroup_pidlist_start() holds cgrp->pidlist_mutex and then calls pidlist_array_load(), and cgroup_pidlist_stop() releases the mutex. It is wrong that we release the mutex in the failure path in pidlist_array_load(), because cgroup_pidlist_stop() will be called no matter if cgroup_pidlist_start() returns errno or not. Fixes: 4bac00d16a8760eae7205e41d2c246477d42a210 Cc: # 3.14+ Signed-off-by: Zefan Li Signed-off-by: Tejun Heo Acked-by: Cong Wang --- kernel/cgroup.c | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel') diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 940aced4ed00..3a73f995a81e 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -3985,7 +3985,6 @@ static int pidlist_array_load(struct cgroup *cgrp, enum cgroup_filetype type, l = cgroup_pidlist_find_create(cgrp, type); if (!l) { - mutex_unlock(&cgrp->pidlist_mutex); pidlist_free(array); return -ENOMEM; } -- cgit v1.2.3 From 5c4dd348af35a6f6db97b4f2401f74c71f7f3c7d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 25 Sep 2014 00:53:44 +0200 Subject: Revert "PM / Hibernate: Iterate over set bits instead of PFNs in swsusp_free()" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Revert commit 6efde38f0769 (PM / Hibernate: Iterate over set bits instead of PFNs in swsusp_free()) that introduced a NULL pointer dereference during system resume from hibernation: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] swsusp_free+0x21/0x190 PGD b39c2067 PUD b39c1067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: CPU: 1 PID: 4898 Comm: s2disk Tainted: G C 3.17-rc5-amd64 #1 Debian 3.17~rc5-1~exp1 Hardware name: LENOVO 2776LEG/2776LEG, BIOS 6EET55WW (3.15 ) 12/19/2011 task: ffff88023155ea40 ti: ffff8800b3b14000 task.ti: ffff8800b3b14000 RIP: 0010:[] [] swsusp_free+0x21/0x190 RSP: 0018:ffff8800b3b17ea8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff8800b39bab00 RCX: 0000000000000001 RDX: ffff8800b39bab10 RSI: ffff8800b39bab00 RDI: 0000000000000000 RBP: 0000000000000010 R08: 0000000000000000 R09: 0000000000000000 R10: ffff8800b39bab10 R11: 0000000000000246 R12: ffffea0000000000 R13: ffff880232f485a0 R14: ffff88023ac27cd8 R15: ffff880232927590 FS: 00007f406d83b700(0000) GS:ffff88023bc80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000000 CR3: 00000000b3a62000 CR4: 00000000000007e0 Stack: ffff8800b39bab00 0000000000000010 ffff880232927590 ffffffff810acb4a ffff8800b39bab00 ffffffff811a955a ffff8800b39bab10 0000000000000000 ffff88023155f098 ffffffff81a6b8c0 ffff88023155ea40 0000000000000007 Call Trace: [] ? snapshot_release+0x2a/0xb0 [] ? __fput+0xca/0x1d0 [] ? task_work_run+0x97/0xd0 [] ? do_notify_resume+0x69/0xa0 [] ? int_signal+0x12/0x17 Code: 66 2e 0f 1f 84 00 00 00 00 00 66 66 66 66 90 41 54 48 8b 05 ba 62 9c 00 49 bc 00 00 00 00 00 ea ff ff 48 8b 3d a1 62 9c 00 55 53 <48> 8b 10 48 89 50 18 48 8b 52 20 48 c7 40 28 00 00 00 00 c7 40 RIP [] swsusp_free+0x21/0x190 RSP CR2: 0000000000000000 ---[ end trace f02be86a1ec0cccb ]--- due to forbidden_pages_map being NULL in swsusp_free(). Fixes: 6efde38f0769 "PM / Hibernate: Iterate over set bits instead of PFNs in swsusp_free()" Reported-by: Bjørn Mork Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 50 +++++++++++++++---------------------------------- 1 file changed, 15 insertions(+), 35 deletions(-) (limited to 'kernel') diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index c4b8093c80b3..f1604d8cf489 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -725,14 +725,6 @@ static void memory_bm_clear_bit(struct memory_bitmap *bm, unsigned long pfn) clear_bit(bit, addr); } -static void memory_bm_clear_current(struct memory_bitmap *bm) -{ - int bit; - - bit = max(bm->cur.node_bit - 1, 0); - clear_bit(bit, bm->cur.node->data); -} - static int memory_bm_test_bit(struct memory_bitmap *bm, unsigned long pfn) { void *addr; @@ -1341,35 +1333,23 @@ static struct memory_bitmap copy_bm; void swsusp_free(void) { - unsigned long fb_pfn, fr_pfn; - - memory_bm_position_reset(forbidden_pages_map); - memory_bm_position_reset(free_pages_map); - -loop: - fr_pfn = memory_bm_next_pfn(free_pages_map); - fb_pfn = memory_bm_next_pfn(forbidden_pages_map); - - /* - * Find the next bit set in both bitmaps. This is guaranteed to - * terminate when fb_pfn == fr_pfn == BM_END_OF_MAP. - */ - do { - if (fb_pfn < fr_pfn) - fb_pfn = memory_bm_next_pfn(forbidden_pages_map); - if (fr_pfn < fb_pfn) - fr_pfn = memory_bm_next_pfn(free_pages_map); - } while (fb_pfn != fr_pfn); - - if (fr_pfn != BM_END_OF_MAP && pfn_valid(fr_pfn)) { - struct page *page = pfn_to_page(fr_pfn); + struct zone *zone; + unsigned long pfn, max_zone_pfn; - memory_bm_clear_current(forbidden_pages_map); - memory_bm_clear_current(free_pages_map); - __free_page(page); - goto loop; + for_each_populated_zone(zone) { + max_zone_pfn = zone_end_pfn(zone); + for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) + if (pfn_valid(pfn)) { + struct page *page = pfn_to_page(pfn); + + if (swsusp_page_is_forbidden(page) && + swsusp_page_is_free(page)) { + swsusp_unset_page_forbidden(page); + swsusp_unset_page_free(page); + __free_page(page); + } + } } - nr_copy_pages = 0; nr_meta_pages = 0; restore_pblist = NULL; -- cgit v1.2.3 From 2ad654bc5e2b211e92f66da1d819e47d79a866f0 Mon Sep 17 00:00:00 2001 From: Zefan Li Date: Thu, 25 Sep 2014 09:41:02 +0800 Subject: cpuset: PF_SPREAD_PAGE and PF_SPREAD_SLAB should be atomic flags When we change cpuset.memory_spread_{page,slab}, cpuset will flip PF_SPREAD_{PAGE,SLAB} bit of tsk->flags for each task in that cpuset. This should be done using atomic bitops, but currently we don't, which is broken. Tetsuo reported a hard-to-reproduce kernel crash on RHEL6, which happened when one thread tried to clear PF_USED_MATH while at the same time another thread tried to flip PF_SPREAD_PAGE/PF_SPREAD_SLAB. They both operate on the same task. Here's the full report: https://lkml.org/lkml/2014/9/19/230 To fix this, we make PF_SPREAD_PAGE and PF_SPREAD_SLAB atomic flags. v4: - updated mm/slab.c. (Fengguang Wu) - updated Documentation. Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Miao Xie Cc: Kees Cook Fixes: 950592f7b991 ("cpusets: update tasks' page/slab spread flags in time") Cc: # 2.6.31+ Reported-by: Tetsuo Handa Signed-off-by: Zefan Li Signed-off-by: Tejun Heo --- Documentation/cgroups/cpusets.txt | 6 +++--- include/linux/cpuset.h | 4 ++-- include/linux/sched.h | 13 +++++++++++-- kernel/cpuset.c | 9 +++++---- mm/slab.c | 4 ++-- 5 files changed, 23 insertions(+), 13 deletions(-) (limited to 'kernel') diff --git a/Documentation/cgroups/cpusets.txt b/Documentation/cgroups/cpusets.txt index 7740038d82bc..3c94ff3f9693 100644 --- a/Documentation/cgroups/cpusets.txt +++ b/Documentation/cgroups/cpusets.txt @@ -345,14 +345,14 @@ the named feature on. The implementation is simple. Setting the flag 'cpuset.memory_spread_page' turns on a per-process flag -PF_SPREAD_PAGE for each task that is in that cpuset or subsequently +PFA_SPREAD_PAGE for each task that is in that cpuset or subsequently joins that cpuset. The page allocation calls for the page cache -is modified to perform an inline check for this PF_SPREAD_PAGE task +is modified to perform an inline check for this PFA_SPREAD_PAGE task flag, and if set, a call to a new routine cpuset_mem_spread_node() returns the node to prefer for the allocation. Similarly, setting 'cpuset.memory_spread_slab' turns on the flag -PF_SPREAD_SLAB, and appropriately marked slab caches will allocate +PFA_SPREAD_SLAB, and appropriately marked slab caches will allocate pages from the node returned by cpuset_mem_spread_node(). The cpuset_mem_spread_node() routine is also simple. It uses the diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index ade2390ffe92..6e39c9bb0dae 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -93,12 +93,12 @@ extern int cpuset_slab_spread_node(void); static inline int cpuset_do_page_mem_spread(void) { - return current->flags & PF_SPREAD_PAGE; + return task_spread_page(current); } static inline int cpuset_do_slab_mem_spread(void) { - return current->flags & PF_SPREAD_SLAB; + return task_spread_slab(current); } extern int current_cpuset_is_being_rebound(void); diff --git a/include/linux/sched.h b/include/linux/sched.h index 5630763956d9..7b1cafefb05e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1903,8 +1903,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ #define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ -#define PF_SPREAD_PAGE 0x01000000 /* Spread page cache over cpuset */ -#define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ @@ -1958,6 +1956,9 @@ static inline void memalloc_noio_restore(unsigned int flags) /* Per-process atomic flags. */ #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ +#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ +#define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ + #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ @@ -1972,6 +1973,14 @@ static inline void memalloc_noio_restore(unsigned int flags) TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs) TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs) +TASK_PFA_TEST(SPREAD_PAGE, spread_page) +TASK_PFA_SET(SPREAD_PAGE, spread_page) +TASK_PFA_CLEAR(SPREAD_PAGE, spread_page) + +TASK_PFA_TEST(SPREAD_SLAB, spread_slab) +TASK_PFA_SET(SPREAD_SLAB, spread_slab) +TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) + /* * task->jobctl flags */ diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 22874d7cf2c0..52cb04c993b7 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -365,13 +365,14 @@ static void cpuset_update_task_spread_flag(struct cpuset *cs, struct task_struct *tsk) { if (is_spread_page(cs)) - tsk->flags |= PF_SPREAD_PAGE; + task_set_spread_page(tsk); else - tsk->flags &= ~PF_SPREAD_PAGE; + task_clear_spread_page(tsk); + if (is_spread_slab(cs)) - tsk->flags |= PF_SPREAD_SLAB; + task_set_spread_slab(tsk); else - tsk->flags &= ~PF_SPREAD_SLAB; + task_clear_spread_slab(tsk); } /* diff --git a/mm/slab.c b/mm/slab.c index a467b308c682..881951e67f12 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -2994,7 +2994,7 @@ out: #ifdef CONFIG_NUMA /* - * Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set. + * Try allocating on another node if PFA_SPREAD_SLAB is a mempolicy is set. * * If we are in_interrupt, then process context, including cpusets and * mempolicy, may not apply and should not be used for allocation policy. @@ -3226,7 +3226,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags) { void *objp; - if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) { + if (current->mempolicy || cpuset_do_slab_mem_spread()) { objp = alternate_node_alloc(cache, flags); if (objp) goto out; -- cgit v1.2.3 From 75c349062a666deab57bdca8b5bd0779c9fb0d58 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Thu, 18 Sep 2014 20:39:15 +0100 Subject: ARM: 8153/1: Enable gcov support on the ARM architecture Enable gcov support for ARM based on original patches by David Singleton and George G. Davis Riku - updated to patch to current mainline kernel. The patch has been submitted in 2010, 2012 - for symmetry, now in 2014 too. https://lwn.net/Articles/390419/ http://marc.info/?l=linux-arm-kernel&m=133823081813044 v2: remove arch/arm/kernel from gcov disabled files Cc: Andrey Ryabinin Cc: Naresh Kamboju Acked-by: Arnd Bergmann Signed-off-by: Riku Voipio Signed-off-by: Vincent Sanders Signed-off-by: Russell King --- arch/arm/boot/bootp/Makefile | 2 ++ arch/arm/boot/compressed/Makefile | 2 ++ kernel/gcov/Kconfig | 2 +- 3 files changed, 5 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/arch/arm/boot/bootp/Makefile b/arch/arm/boot/bootp/Makefile index c394e305447c..5761f0039133 100644 --- a/arch/arm/boot/bootp/Makefile +++ b/arch/arm/boot/bootp/Makefile @@ -5,6 +5,8 @@ # architecture-specific flags and dependencies. # +GCOV_PROFILE := n + LDFLAGS_bootp :=-p --no-undefined -X \ --defsym initrd_phys=$(INITRD_PHYS) \ --defsym params_phys=$(PARAMS_PHYS) -T diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 76a50ecae1c3..3ea230aa94b7 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -37,6 +37,8 @@ ifeq ($(CONFIG_ARM_VIRT_EXT),y) OBJS += hyp-stub.o endif +GCOV_PROFILE := n + # # Architecture dependencies # diff --git a/kernel/gcov/Kconfig b/kernel/gcov/Kconfig index d04ce8ac4399..cf66c5c8458e 100644 --- a/kernel/gcov/Kconfig +++ b/kernel/gcov/Kconfig @@ -35,7 +35,7 @@ config GCOV_KERNEL config GCOV_PROFILE_ALL bool "Profile entire Kernel" depends on GCOV_KERNEL - depends on SUPERH || S390 || X86 || PPC || MICROBLAZE + depends on SUPERH || S390 || X86 || PPC || MICROBLAZE || ARM default n ---help--- This options activates profiling for the entire kernel. -- cgit v1.2.3