summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS3
-rw-r--r--arch/Kconfig4
-rw-r--r--fs/ocfs2/refcounttree.c14
-rw-r--r--fs/proc/kcore.c23
-rw-r--r--include/linux/oom.h2
-rw-r--r--include/linux/rbtree_augmented.h1
-rw-r--r--include/linux/rbtree_latch.h1
-rw-r--r--init/main.c7
-rw-r--r--kernel/module.c5
-rw-r--r--lib/find_bit_benchmark.c7
-rw-r--r--mm/migrate.c4
-rw-r--r--mm/mmap.c44
-rw-r--r--mm/oom_kill.c81
-rw-r--r--mm/sparse.c2
-rw-r--r--mm/vmstat.c6
-rw-r--r--mm/z3fold.c42
-rwxr-xr-xscripts/faddr2line5
17 files changed, 164 insertions, 87 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f913c80c8c38..58b9861ccf99 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3691,7 +3691,6 @@ F: drivers/cpufreq/arm_big_little_dt.c
CPU POWER MONITORING SUBSYSTEM
M: Thomas Renninger <trenn@suse.com>
-M: Shuah Khan <shuahkh@osg.samsung.com>
M: Shuah Khan <shuah@kernel.org>
L: linux-pm@vger.kernel.org
S: Maintained
@@ -7696,7 +7695,6 @@ F: include/linux/sunrpc/
F: include/uapi/linux/sunrpc/
KERNEL SELFTEST FRAMEWORK
-M: Shuah Khan <shuahkh@osg.samsung.com>
M: Shuah Khan <shuah@kernel.org>
L: linux-kselftest@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git
@@ -14650,7 +14648,6 @@ F: drivers/usb/common/usb-otg-fsm.c
USB OVER IP DRIVER
M: Valentina Manea <valentina.manea.m@gmail.com>
-M: Shuah Khan <shuahkh@osg.samsung.com>
M: Shuah Khan <shuah@kernel.org>
L: linux-usb@vger.kernel.org
S: Maintained
diff --git a/arch/Kconfig b/arch/Kconfig
index 8e0d665c8d53..75dd23acf133 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -464,6 +464,10 @@ config GCC_PLUGIN_LATENT_ENTROPY
config GCC_PLUGIN_STRUCTLEAK
bool "Force initialization of variables containing userspace addresses"
depends on GCC_PLUGINS
+ # Currently STRUCTLEAK inserts initialization out of live scope of
+ # variables from KASAN point of view. This leads to KASAN false
+ # positive reports. Prohibit this combination for now.
+ depends on !KASAN_EXTRA
help
This plugin zero-initializes any structures containing a
__user attribute. This can prevent some classes of information
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 01c6b3894406..7869622af22a 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -4250,10 +4250,11 @@ out:
static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
struct dentry *new_dentry, bool preserve)
{
- int error;
+ int error, had_lock;
struct inode *inode = d_inode(old_dentry);
struct buffer_head *old_bh = NULL;
struct inode *new_orphan_inode = NULL;
+ struct ocfs2_lock_holder oh;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
return -EOPNOTSUPP;
@@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
goto out;
}
+ had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1,
+ &oh);
+ if (had_lock < 0) {
+ error = had_lock;
+ mlog_errno(error);
+ goto out;
+ }
+
/* If the security isn't preserved, we need to re-initialize them. */
if (!preserve) {
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
@@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
if (error)
mlog_errno(error);
}
-out:
if (!error) {
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
new_dentry);
if (error)
mlog_errno(error);
}
+ ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock);
+out:
if (new_orphan_inode) {
/*
* We need to open_unlock the inode no matter whether we
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index d1e82761de81..e64ecb9f2720 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg)
{
struct list_head *head = (struct list_head *)arg;
struct kcore_list *ent;
+ struct page *p;
+
+ if (!pfn_valid(pfn))
+ return 1;
+
+ p = pfn_to_page(pfn);
+ if (!memmap_valid_within(pfn, p, page_zone(p)))
+ return 1;
ent = kmalloc(sizeof(*ent), GFP_KERNEL);
if (!ent)
return -ENOMEM;
- ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT));
+ ent->addr = (unsigned long)page_to_virt(p);
ent->size = nr_pages << PAGE_SHIFT;
- /* Sanity check: Can happen in 32bit arch...maybe */
- if (ent->addr < (unsigned long) __va(0))
+ if (!virt_addr_valid(ent->addr))
goto free_out;
/* cut not-mapped area. ....from ppc-32 code. */
if (ULONG_MAX - ent->addr < ent->size)
ent->size = ULONG_MAX - ent->addr;
- /* cut when vmalloc() area is higher than direct-map area */
- if (VMALLOC_START > (unsigned long)__va(0)) {
- if (ent->addr > VMALLOC_START)
- goto free_out;
+ /*
+ * We've already checked virt_addr_valid so we know this address
+ * is a valid pointer, therefore we can check against it to determine
+ * if we need to trim
+ */
+ if (VMALLOC_START > ent->addr) {
if (VMALLOC_START - ent->addr < ent->size)
ent->size = VMALLOC_START - ent->addr;
}
diff --git a/include/linux/oom.h b/include/linux/oom.h
index 5bad038ac012..6adac113e96d 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm)
return 0;
}
+void __oom_reap_task_mm(struct mm_struct *mm);
+
extern unsigned long oom_badness(struct task_struct *p,
struct mem_cgroup *memcg, const nodemask_t *nodemask,
unsigned long totalpages);
diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index 6bfd2b581f75..af8a61be2d8d 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -26,6 +26,7 @@
#include <linux/compiler.h>
#include <linux/rbtree.h>
+#include <linux/rcupdate.h>
/*
* Please note - only struct rb_augment_callbacks and the prototypes for
diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h
index ece43e882b56..7d012faa509a 100644
--- a/include/linux/rbtree_latch.h
+++ b/include/linux/rbtree_latch.h
@@ -35,6 +35,7 @@
#include <linux/rbtree.h>
#include <linux/seqlock.h>
+#include <linux/rcupdate.h>
struct latch_tree_node {
struct rb_node node[2];
diff --git a/init/main.c b/init/main.c
index a404936d85d8..fd37315835b4 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1034,6 +1034,13 @@ __setup("rodata=", set_debug_rodata);
static void mark_readonly(void)
{
if (rodata_enabled) {
+ /*
+ * load_module() results in W+X mappings, which are cleaned up
+ * with call_rcu_sched(). Let's make sure that queued work is
+ * flushed so that we don't hit false positives looking for
+ * insecure pages which are W+X.
+ */
+ rcu_barrier_sched();
mark_rodata_ro();
rodata_test();
} else
diff --git a/kernel/module.c b/kernel/module.c
index ce8066b88178..c9bea7f2b43e 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3517,6 +3517,11 @@ static noinline int do_init_module(struct module *mod)
* walking this with preempt disabled. In all the failure paths, we
* call synchronize_sched(), but we don't want to slow down the success
* path, so use actual RCU here.
+ * Note that module_alloc() on most architectures creates W+X page
+ * mappings which won't be cleaned up until do_free_init() runs. Any
+ * code such as mark_rodata_ro() which depends on those mappings to
+ * be cleaned up needs to sync with the queued work - ie
+ * rcu_barrier_sched()
*/
call_rcu_sched(&freeinit->rcu, do_free_init);
mutex_unlock(&module_mutex);
diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c
index 5985a25e6cbc..5367ffa5c18f 100644
--- a/lib/find_bit_benchmark.c
+++ b/lib/find_bit_benchmark.c
@@ -132,7 +132,12 @@ static int __init find_bit_test(void)
test_find_next_bit(bitmap, BITMAP_LEN);
test_find_next_zero_bit(bitmap, BITMAP_LEN);
test_find_last_bit(bitmap, BITMAP_LEN);
- test_find_first_bit(bitmap, BITMAP_LEN);
+
+ /*
+ * test_find_first_bit() may take some time, so
+ * traverse only part of bitmap to avoid soft lockup.
+ */
+ test_find_first_bit(bitmap, BITMAP_LEN / 10);
test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
pr_err("\nStart testing find_bit() with sparse bitmap\n");
diff --git a/mm/migrate.c b/mm/migrate.c
index 568433023831..8c0af0f7cab1 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -528,14 +528,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
int i;
int index = page_index(page);
- for (i = 0; i < HPAGE_PMD_NR; i++) {
+ for (i = 1; i < HPAGE_PMD_NR; i++) {
pslot = radix_tree_lookup_slot(&mapping->i_pages,
index + i);
radix_tree_replace_slot(&mapping->i_pages, pslot,
newpage + i);
}
- } else {
- radix_tree_replace_slot(&mapping->i_pages, pslot, newpage);
}
/*
diff --git a/mm/mmap.c b/mm/mmap.c
index 6fc435760086..78e14facdb6e 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3056,6 +3056,32 @@ void exit_mmap(struct mm_struct *mm)
/* mm's last user has gone, and its about to be pulled down */
mmu_notifier_release(mm);
+ if (unlikely(mm_is_oom_victim(mm))) {
+ /*
+ * Manually reap the mm to free as much memory as possible.
+ * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard
+ * this mm from further consideration. Taking mm->mmap_sem for
+ * write after setting MMF_OOM_SKIP will guarantee that the oom
+ * reaper will not run on this mm again after mmap_sem is
+ * dropped.
+ *
+ * Nothing can be holding mm->mmap_sem here and the above call
+ * to mmu_notifier_release(mm) ensures mmu notifier callbacks in
+ * __oom_reap_task_mm() will not block.
+ *
+ * This needs to be done before calling munlock_vma_pages_all(),
+ * which clears VM_LOCKED, otherwise the oom reaper cannot
+ * reliably test it.
+ */
+ mutex_lock(&oom_lock);
+ __oom_reap_task_mm(mm);
+ mutex_unlock(&oom_lock);
+
+ set_bit(MMF_OOM_SKIP, &mm->flags);
+ down_write(&mm->mmap_sem);
+ up_write(&mm->mmap_sem);
+ }
+
if (mm->locked_vm) {
vma = mm->mmap;
while (vma) {
@@ -3077,24 +3103,6 @@ void exit_mmap(struct mm_struct *mm)
/* update_hiwater_rss(mm) here? but nobody should be looking */
/* Use -1 here to ensure all VMAs in the mm are unmapped */
unmap_vmas(&tlb, vma, 0, -1);
-
- if (unlikely(mm_is_oom_victim(mm))) {
- /*
- * Wait for oom_reap_task() to stop working on this
- * mm. Because MMF_OOM_SKIP is already set before
- * calling down_read(), oom_reap_task() will not run
- * on this "mm" post up_write().
- *
- * mm_is_oom_victim() cannot be set from under us
- * either because victim->mm is already set to NULL
- * under task_lock before calling mmput and oom_mm is
- * set not NULL by the OOM killer only if victim->mm
- * is found not NULL while holding the task_lock.
- */
- set_bit(MMF_OOM_SKIP, &mm->flags);
- down_write(&mm->mmap_sem);
- up_write(&mm->mmap_sem);
- }
free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb, 0, -1);
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index ff992fa8760a..8ba6cb88cf58 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm)
return false;
}
-
#ifdef CONFIG_MMU
/*
* OOM Reaper kernel thread which tries to reap the memory used by the OOM
@@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
static struct task_struct *oom_reaper_list;
static DEFINE_SPINLOCK(oom_reaper_lock);
-static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
+void __oom_reap_task_mm(struct mm_struct *mm)
{
- struct mmu_gather tlb;
struct vm_area_struct *vma;
+
+ /*
+ * Tell all users of get_user/copy_from_user etc... that the content
+ * is no longer stable. No barriers really needed because unmapping
+ * should imply barriers already and the reader would hit a page fault
+ * if it stumbled over a reaped memory.
+ */
+ set_bit(MMF_UNSTABLE, &mm->flags);
+
+ for (vma = mm->mmap ; vma; vma = vma->vm_next) {
+ if (!can_madv_dontneed_vma(vma))
+ continue;
+
+ /*
+ * Only anonymous pages have a good chance to be dropped
+ * without additional steps which we cannot afford as we
+ * are OOM already.
+ *
+ * We do not even care about fs backed pages because all
+ * which are reclaimable have already been reclaimed and
+ * we do not want to block exit_mmap by keeping mm ref
+ * count elevated without a good reason.
+ */
+ if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
+ const unsigned long start = vma->vm_start;
+ const unsigned long end = vma->vm_end;
+ struct mmu_gather tlb;
+
+ tlb_gather_mmu(&tlb, mm, start, end);
+ mmu_notifier_invalidate_range_start(mm, start, end);
+ unmap_page_range(&tlb, vma, start, end, NULL);
+ mmu_notifier_invalidate_range_end(mm, start, end);
+ tlb_finish_mmu(&tlb, start, end);
+ }
+ }
+}
+
+static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
+{
bool ret = true;
/*
* We have to make sure to not race with the victim exit path
* and cause premature new oom victim selection:
- * __oom_reap_task_mm exit_mm
+ * oom_reap_task_mm exit_mm
* mmget_not_zero
* mmput
* atomic_dec_and_test
@@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
trace_start_task_reaping(tsk->pid);
- /*
- * Tell all users of get_user/copy_from_user etc... that the content
- * is no longer stable. No barriers really needed because unmapping
- * should imply barriers already and the reader would hit a page fault
- * if it stumbled over a reaped memory.
- */
- set_bit(MMF_UNSTABLE, &mm->flags);
-
- for (vma = mm->mmap ; vma; vma = vma->vm_next) {
- if (!can_madv_dontneed_vma(vma))
- continue;
+ __oom_reap_task_mm(mm);
- /*
- * Only anonymous pages have a good chance to be dropped
- * without additional steps which we cannot afford as we
- * are OOM already.
- *
- * We do not even care about fs backed pages because all
- * which are reclaimable have already been reclaimed and
- * we do not want to block exit_mmap by keeping mm ref
- * count elevated without a good reason.
- */
- if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) {
- const unsigned long start = vma->vm_start;
- const unsigned long end = vma->vm_end;
-
- tlb_gather_mmu(&tlb, mm, start, end);
- mmu_notifier_invalidate_range_start(mm, start, end);
- unmap_page_range(&tlb, vma, start, end, NULL);
- mmu_notifier_invalidate_range_end(mm, start, end);
- tlb_finish_mmu(&tlb, start, end);
- }
- }
pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n",
task_pid_nr(tsk), tsk->comm,
K(get_mm_counter(mm, MM_ANONPAGES)),
@@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk)
struct mm_struct *mm = tsk->signal->oom_mm;
/* Retry the down_read_trylock(mmap_sem) a few times */
- while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm))
+ while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm))
schedule_timeout_idle(HZ/10);
if (attempts <= MAX_OOM_REAP_RETRIES ||
test_bit(MMF_OOM_SKIP, &mm->flags))
goto done;
-
pr_info("oom_reaper: unable to reap pid:%d (%s)\n",
task_pid_nr(tsk), tsk->comm);
debug_show_all_locks();
diff --git a/mm/sparse.c b/mm/sparse.c
index 62eef264a7bd..73dc2fcc0eab 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -629,7 +629,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
unsigned long pfn;
for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
- unsigned long section_nr = pfn_to_section_nr(start_pfn);
+ unsigned long section_nr = pfn_to_section_nr(pfn);
struct mem_section *ms;
/*
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 536332e988b8..a2b9518980ce 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1161,7 +1161,7 @@ const char * const vmstat_text[] = {
"nr_vmscan_immediate_reclaim",
"nr_dirtied",
"nr_written",
- "nr_indirectly_reclaimable",
+ "", /* nr_indirectly_reclaimable */
/* enum writeback_stat_item counters */
"nr_dirty_threshold",
@@ -1740,6 +1740,10 @@ static int vmstat_show(struct seq_file *m, void *arg)
unsigned long *l = arg;
unsigned long off = l - (unsigned long *)m->private;
+ /* Skip hidden vmstat items. */
+ if (*vmstat_text[off] == '\0')
+ return 0;
+
seq_puts(m, vmstat_text[off]);
seq_put_decimal_ull(m, " ", *l);
seq_putc(m, '\n');
diff --git a/mm/z3fold.c b/mm/z3fold.c
index c0bca6153b95..4b366d181f35 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -144,7 +144,8 @@ enum z3fold_page_flags {
PAGE_HEADLESS = 0,
MIDDLE_CHUNK_MAPPED,
NEEDS_COMPACTING,
- PAGE_STALE
+ PAGE_STALE,
+ UNDER_RECLAIM
};
/*****************
@@ -173,6 +174,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page,
clear_bit(MIDDLE_CHUNK_MAPPED, &page->private);
clear_bit(NEEDS_COMPACTING, &page->private);
clear_bit(PAGE_STALE, &page->private);
+ clear_bit(UNDER_RECLAIM, &page->private);
spin_lock_init(&zhdr->page_lock);
kref_init(&zhdr->refcount);
@@ -756,6 +758,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle)
atomic64_dec(&pool->pages_nr);
return;
}
+ if (test_bit(UNDER_RECLAIM, &page->private)) {
+ z3fold_page_unlock(zhdr);
+ return;
+ }
if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) {
z3fold_page_unlock(zhdr);
return;
@@ -840,6 +846,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
kref_get(&zhdr->refcount);
list_del_init(&zhdr->buddy);
zhdr->cpu = -1;
+ set_bit(UNDER_RECLAIM, &page->private);
+ break;
}
list_del_init(&page->lru);
@@ -887,25 +895,35 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
goto next;
}
next:
- spin_lock(&pool->lock);
if (test_bit(PAGE_HEADLESS, &page->private)) {
if (ret == 0) {
- spin_unlock(&pool->lock);
free_z3fold_page(page);
return 0;
}
- } else if (kref_put(&zhdr->refcount, release_z3fold_page)) {
- atomic64_dec(&pool->pages_nr);
+ spin_lock(&pool->lock);
+ list_add(&page->lru, &pool->lru);
+ spin_unlock(&pool->lock);
+ } else {
+ z3fold_page_lock(zhdr);
+ clear_bit(UNDER_RECLAIM, &page->private);
+ if (kref_put(&zhdr->refcount,
+ release_z3fold_page_locked)) {
+ atomic64_dec(&pool->pages_nr);
+ return 0;
+ }
+ /*
+ * if we are here, the page is still not completely
+ * free. Take the global pool lock then to be able
+ * to add it back to the lru list
+ */
+ spin_lock(&pool->lock);
+ list_add(&page->lru, &pool->lru);
spin_unlock(&pool->lock);
- return 0;
+ z3fold_page_unlock(zhdr);
}
- /*
- * Add to the beginning of LRU.
- * Pool lock has to be kept here to ensure the page has
- * not already been released
- */
- list_add(&page->lru, &pool->lru);
+ /* We started off locked to we need to lock the pool back */
+ spin_lock(&pool->lock);
}
spin_unlock(&pool->lock);
return -EAGAIN;
diff --git a/scripts/faddr2line b/scripts/faddr2line
index 9e5735a4d3a5..1876a741087c 100755
--- a/scripts/faddr2line
+++ b/scripts/faddr2line
@@ -170,7 +170,10 @@ __faddr2line() {
echo "$file_lines" | while read -r line
do
echo $line
- eval $(echo $line | awk -F "[ :]" '{printf("n1=%d;n2=%d;f=%s",$NF-5, $NF+5, $(NF-1))}')
+ n=$(echo $line | sed 's/.*:\([0-9]\+\).*/\1/g')
+ n1=$[$n-5]
+ n2=$[$n+5]
+ f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g')
awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f
done