summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-kernel-mm-memory-tiers4
-rw-r--r--MAINTAINERS2
-rw-r--r--arch/x86/include/asm/string_64.h11
-rw-r--r--arch/x86/include/asm/uaccess.h13
-rw-r--r--arch/x86/purgatory/Makefile1
-rw-r--r--fs/exec.c2
-rw-r--r--fs/ext4/super.c4
-rw-r--r--fs/squashfs/file.c23
-rw-r--r--fs/squashfs/page_actor.c3
-rw-r--r--fs/squashfs/page_actor.h6
-rw-r--r--include/linux/fortify-string.h17
-rw-r--r--include/linux/kmsan_string.h21
-rw-r--r--include/linux/userfaultfd_k.h6
-rw-r--r--ipc/msg.c4
-rw-r--r--lib/Kconfig.debug3
-rw-r--r--lib/maple_tree.c4
-rw-r--r--mm/huge_memory.c2
-rw-r--r--mm/kmemleak.c61
-rw-r--r--mm/kmsan/instrumentation.c1
-rw-r--r--mm/kmsan/shadow.c1
-rw-r--r--mm/madvise.c12
-rw-r--r--mm/memory-tiers.c8
-rw-r--r--mm/migrate.c7
-rw-r--r--mm/mmap.c3
-rw-r--r--mm/page_alloc.c1
-rw-r--r--mm/page_isolation.c2
-rw-r--r--mm/shmem.c17
-rw-r--r--mm/userfaultfd.c25
28 files changed, 196 insertions, 68 deletions
diff --git a/Documentation/ABI/testing/sysfs-kernel-mm-memory-tiers b/Documentation/ABI/testing/sysfs-kernel-mm-memory-tiers
index 45985e411f13..721a05b90109 100644
--- a/Documentation/ABI/testing/sysfs-kernel-mm-memory-tiers
+++ b/Documentation/ABI/testing/sysfs-kernel-mm-memory-tiers
@@ -10,7 +10,7 @@ Description: A collection of all the memory tiers allocated.
What: /sys/devices/virtual/memory_tiering/memory_tierN/
- /sys/devices/virtual/memory_tiering/memory_tierN/nodes
+ /sys/devices/virtual/memory_tiering/memory_tierN/nodelist
Date: August 2022
Contact: Linux memory management mailing list <linux-mm@kvack.org>
Description: Directory with details of a specific memory tier
@@ -21,5 +21,5 @@ Description: Directory with details of a specific memory tier
A smaller value of N implies a higher (faster) memory tier in the
hierarchy.
- nodes: NUMA nodes that are part of this memory tier.
+ nodelist: NUMA nodes that are part of this memory tier.
diff --git a/MAINTAINERS b/MAINTAINERS
index 2034e7d26684..3d877de556ad 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14523,7 +14523,7 @@ L: linux-nilfs@vger.kernel.org
S: Supported
W: https://nilfs.sourceforge.io/
W: https://nilfs.osdn.jp/
-T: git git://github.com/konis/nilfs2.git
+T: git https://github.com/konis/nilfs2.git
F: Documentation/filesystems/nilfs2.rst
F: fs/nilfs2/
F: include/trace/events/nilfs2.h
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 3b87d889b6e1..888731ccf1f6 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -10,10 +10,13 @@
/* Even with __builtin_ the compiler may decide to use the out of line
function. */
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
+#include <linux/kmsan_string.h>
+#endif
+
#define __HAVE_ARCH_MEMCPY 1
-#if defined(__SANITIZE_MEMORY__)
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
#undef memcpy
-void *__msan_memcpy(void *dst, const void *src, size_t size);
#define memcpy __msan_memcpy
#else
extern void *memcpy(void *to, const void *from, size_t len);
@@ -21,7 +24,7 @@ extern void *memcpy(void *to, const void *from, size_t len);
extern void *__memcpy(void *to, const void *from, size_t len);
#define __HAVE_ARCH_MEMSET
-#if defined(__SANITIZE_MEMORY__)
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
extern void *__msan_memset(void *s, int c, size_t n);
#undef memset
#define memset __msan_memset
@@ -67,7 +70,7 @@ static inline void *memset64(uint64_t *s, uint64_t v, size_t n)
}
#define __HAVE_ARCH_MEMMOVE
-#if defined(__SANITIZE_MEMORY__)
+#if defined(__SANITIZE_MEMORY__) && defined(__NO_FORTIFY)
#undef memmove
void *__msan_memmove(void *dest, const void *src, size_t len);
#define memmove __msan_memmove
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index 8bc614cfe21b..1cc756eafa44 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -254,24 +254,25 @@ extern void __put_user_nocheck_8(void);
#define __put_user_size(x, ptr, size, label) \
do { \
__typeof__(*(ptr)) __x = (x); /* eval x once */ \
- __chk_user_ptr(ptr); \
+ __typeof__(ptr) __ptr = (ptr); /* eval ptr once */ \
+ __chk_user_ptr(__ptr); \
switch (size) { \
case 1: \
- __put_user_goto(__x, ptr, "b", "iq", label); \
+ __put_user_goto(__x, __ptr, "b", "iq", label); \
break; \
case 2: \
- __put_user_goto(__x, ptr, "w", "ir", label); \
+ __put_user_goto(__x, __ptr, "w", "ir", label); \
break; \
case 4: \
- __put_user_goto(__x, ptr, "l", "ir", label); \
+ __put_user_goto(__x, __ptr, "l", "ir", label); \
break; \
case 8: \
- __put_user_goto_u64(__x, ptr, label); \
+ __put_user_goto_u64(__x, __ptr, label); \
break; \
default: \
__put_user_bad(); \
} \
- instrument_put_user(__x, ptr, size); \
+ instrument_put_user(__x, __ptr, size); \
} while (0)
#ifdef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 58a200dc762d..17f09dc26381 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -26,6 +26,7 @@ GCOV_PROFILE := n
KASAN_SANITIZE := n
UBSAN_SANITIZE := n
KCSAN_SANITIZE := n
+KMSAN_SANITIZE := n
KCOV_INSTRUMENT := n
# These are adjustments to the compiler flags used for objects that
diff --git a/fs/exec.c b/fs/exec.c
index 32dc8cf5fceb..a0b1f0337a62 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1012,7 +1012,6 @@ static int exec_mmap(struct mm_struct *mm)
active_mm = tsk->active_mm;
tsk->active_mm = mm;
tsk->mm = mm;
- lru_gen_add_mm(mm);
/*
* This prevents preemption while active_mm is being loaded and
* it and mm are being updated, which could cause problems for
@@ -1025,6 +1024,7 @@ static int exec_mmap(struct mm_struct *mm)
activate_mm(active_mm, mm);
if (IS_ENABLED(CONFIG_ARCH_WANT_IRQS_OFF_ACTIVATE_MM))
local_irq_enable();
+ lru_gen_add_mm(mm);
task_unlock(tsk);
lru_gen_use_mm(mm);
if (old_mm) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 989365b878a6..7950904fbf04 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1741,10 +1741,6 @@ static const struct fs_parameter_spec ext4_param_specs[] = {
#define DEFAULT_JOURNAL_IOPRIO (IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, 3))
-static const char deprecated_msg[] =
- "Mount option \"%s\" will be removed by %s\n"
- "Contact linux-ext4@vger.kernel.org if you think we should keep it.\n";
-
#define MOPT_SET 0x0001
#define MOPT_CLEAR 0x0002
#define MOPT_NOSUPPORT 0x0004
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index e56510964b22..8ba8c4c50770 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -506,8 +506,9 @@ static int squashfs_readahead_fragment(struct page **page,
squashfs_i(inode)->fragment_size);
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
unsigned int n, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
+ int error = buffer->error;
- if (buffer->error)
+ if (error)
goto out;
expected += squashfs_i(inode)->fragment_offset;
@@ -529,7 +530,7 @@ static int squashfs_readahead_fragment(struct page **page,
out:
squashfs_cache_put(buffer);
- return buffer->error;
+ return error;
}
static void squashfs_readahead(struct readahead_control *ractl)
@@ -557,6 +558,13 @@ static void squashfs_readahead(struct readahead_control *ractl)
int res, bsize;
u64 block = 0;
unsigned int expected;
+ struct page *last_page;
+
+ expected = start >> msblk->block_log == file_end ?
+ (i_size_read(inode) & (msblk->block_size - 1)) :
+ msblk->block_size;
+
+ max_pages = (expected + PAGE_SIZE - 1) >> PAGE_SHIFT;
nr_pages = __readahead_batch(ractl, pages, max_pages);
if (!nr_pages)
@@ -566,13 +574,10 @@ static void squashfs_readahead(struct readahead_control *ractl)
goto skip_pages;
index = pages[0]->index >> shift;
+
if ((pages[nr_pages - 1]->index >> shift) != index)
goto skip_pages;
- expected = index == file_end ?
- (i_size_read(inode) & (msblk->block_size - 1)) :
- msblk->block_size;
-
if (index == file_end && squashfs_i(inode)->fragment_block !=
SQUASHFS_INVALID_BLK) {
res = squashfs_readahead_fragment(pages, nr_pages,
@@ -593,15 +598,15 @@ static void squashfs_readahead(struct readahead_control *ractl)
res = squashfs_read_data(inode->i_sb, block, bsize, NULL, actor);
- squashfs_page_actor_free(actor);
+ last_page = squashfs_page_actor_free(actor);
if (res == expected) {
int bytes;
/* Last page (if present) may have trailing bytes not filled */
bytes = res % PAGE_SIZE;
- if (pages[nr_pages - 1]->index == file_end && bytes)
- memzero_page(pages[nr_pages - 1], bytes,
+ if (index == file_end && bytes && last_page)
+ memzero_page(last_page, bytes,
PAGE_SIZE - bytes);
for (i = 0; i < nr_pages; i++) {
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index 54b93bf4a25c..81af6c4ca115 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c
@@ -71,11 +71,13 @@ static void *handle_next_page(struct squashfs_page_actor *actor)
(actor->next_index != actor->page[actor->next_page]->index)) {
actor->next_index++;
actor->returned_pages++;
+ actor->last_page = NULL;
return actor->alloc_buffer ? actor->tmp_buffer : ERR_PTR(-ENOMEM);
}
actor->next_index++;
actor->returned_pages++;
+ actor->last_page = actor->page[actor->next_page];
return actor->pageaddr = kmap_local_page(actor->page[actor->next_page++]);
}
@@ -125,6 +127,7 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct squashfs_sb_
actor->returned_pages = 0;
actor->next_index = page[0]->index & ~((1 << (msblk->block_log - PAGE_SHIFT)) - 1);
actor->pageaddr = NULL;
+ actor->last_page = NULL;
actor->alloc_buffer = msblk->decompressor->alloc_buffer;
actor->squashfs_first_page = direct_first_page;
actor->squashfs_next_page = direct_next_page;
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 95ffbb543d91..97d4983559b1 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -16,6 +16,7 @@ struct squashfs_page_actor {
void *(*squashfs_first_page)(struct squashfs_page_actor *);
void *(*squashfs_next_page)(struct squashfs_page_actor *);
void (*squashfs_finish_page)(struct squashfs_page_actor *);
+ struct page *last_page;
int pages;
int length;
int next_page;
@@ -29,10 +30,13 @@ extern struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
extern struct squashfs_page_actor *squashfs_page_actor_init_special(
struct squashfs_sb_info *msblk,
struct page **page, int pages, int length);
-static inline void squashfs_page_actor_free(struct squashfs_page_actor *actor)
+static inline struct page *squashfs_page_actor_free(struct squashfs_page_actor *actor)
{
+ struct page *last_page = actor->last_page;
+
kfree(actor->tmp_buffer);
kfree(actor);
+ return last_page;
}
static inline void *squashfs_first_page(struct squashfs_page_actor *actor)
{
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
index 4029fe368a4f..18a31b125f9d 100644
--- a/include/linux/fortify-string.h
+++ b/include/linux/fortify-string.h
@@ -43,11 +43,24 @@ extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen);
extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat);
extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy);
#else
-#define __underlying_memchr __builtin_memchr
-#define __underlying_memcmp __builtin_memcmp
+
+#if defined(__SANITIZE_MEMORY__)
+/*
+ * For KMSAN builds all memcpy/memset/memmove calls should be replaced by the
+ * corresponding __msan_XXX functions.
+ */
+#include <linux/kmsan_string.h>
+#define __underlying_memcpy __msan_memcpy
+#define __underlying_memmove __msan_memmove
+#define __underlying_memset __msan_memset
+#else
#define __underlying_memcpy __builtin_memcpy
#define __underlying_memmove __builtin_memmove
#define __underlying_memset __builtin_memset
+#endif
+
+#define __underlying_memchr __builtin_memchr
+#define __underlying_memcmp __builtin_memcmp
#define __underlying_strcat __builtin_strcat
#define __underlying_strcpy __builtin_strcpy
#define __underlying_strlen __builtin_strlen
diff --git a/include/linux/kmsan_string.h b/include/linux/kmsan_string.h
new file mode 100644
index 000000000000..7287da6f52ef
--- /dev/null
+++ b/include/linux/kmsan_string.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KMSAN string functions API used in other headers.
+ *
+ * Copyright (C) 2022 Google LLC
+ * Author: Alexander Potapenko <glider@google.com>
+ *
+ */
+#ifndef _LINUX_KMSAN_STRING_H
+#define _LINUX_KMSAN_STRING_H
+
+/*
+ * KMSAN overrides the default memcpy/memset/memmove implementations in the
+ * kernel, which requires having __msan_XXX function prototypes in several other
+ * headers. Keep them in one place instead of open-coding.
+ */
+void *__msan_memcpy(void *dst, const void *src, size_t size);
+void *__msan_memset(void *s, int c, size_t n);
+void *__msan_memmove(void *dest, const void *src, size_t len);
+
+#endif /* _LINUX_KMSAN_STRING_H */
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index f07e6998bb68..9df0b9a762cc 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -146,9 +146,9 @@ static inline bool userfaultfd_armed(struct vm_area_struct *vma)
static inline bool vma_can_userfault(struct vm_area_struct *vma,
unsigned long vm_flags)
{
- if (vm_flags & VM_UFFD_MINOR)
- return is_vm_hugetlb_page(vma) || vma_is_shmem(vma);
-
+ if ((vm_flags & VM_UFFD_MINOR) &&
+ (!is_vm_hugetlb_page(vma) && !vma_is_shmem(vma)))
+ return false;
#ifndef CONFIG_PTE_MARKER_UFFD_WP
/*
* If user requested uffd-wp but not enabled pte markers for
diff --git a/ipc/msg.c b/ipc/msg.c
index e4e0990e08f7..fd08b3cb36d7 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -1329,11 +1329,11 @@ fail_msg_bytes:
#ifdef CONFIG_IPC_NS
void msg_exit_ns(struct ipc_namespace *ns)
{
- percpu_counter_destroy(&ns->percpu_msg_bytes);
- percpu_counter_destroy(&ns->percpu_msg_hdrs);
free_ipcs(ns, &msg_ids(ns), freeque);
idr_destroy(&ns->ids[IPC_MSG_IDS].ipcs_idr);
rhashtable_destroy(&ns->ids[IPC_MSG_IDS].key_ht);
+ percpu_counter_destroy(&ns->percpu_msg_bytes);
+ percpu_counter_destroy(&ns->percpu_msg_hdrs);
}
#endif
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 3fc7abffc7aa..29280072dc0e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -400,8 +400,9 @@ config FRAME_WARN
default 1536 if (!64BIT && XTENSA)
default 1024 if !64BIT
default 2048 if 64BIT
+ default 0 if KMSAN
help
- Tell gcc to warn at build time for stack frames larger than this.
+ Tell the compiler to warn at build time for stack frames larger than this.
Setting this too low will cause a lot of warnings.
Setting it to 0 disables the warning.
diff --git a/lib/maple_tree.c b/lib/maple_tree.c
index e1743803c851..fbde494444b8 100644
--- a/lib/maple_tree.c
+++ b/lib/maple_tree.c
@@ -2903,8 +2903,8 @@ static inline void *mtree_range_walk(struct ma_state *mas)
unsigned long max, min;
unsigned long prev_max, prev_min;
- last = next = mas->node;
- prev_min = min = mas->min;
+ next = mas->node;
+ min = mas->min;
max = mas->max;
do {
offset = 0;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 03fc7e5edf07..561a42567477 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -2462,7 +2462,7 @@ static void __split_huge_page_tail(struct page *head, int tail,
* Fix up and warn once if private is unexpectedly set.
*/
if (!folio_test_swapcache(page_folio(head))) {
- VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, head);
+ VM_WARN_ON_ONCE_PAGE(page_tail->private != 0, page_tail);
page_tail->private = 0;
}
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 37af2dc8dac9..646e2979641f 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1461,6 +1461,27 @@ static void scan_gray_list(void)
}
/*
+ * Conditionally call resched() in a object iteration loop while making sure
+ * that the given object won't go away without RCU read lock by performing a
+ * get_object() if !pinned.
+ *
+ * Return: false if can't do a cond_resched() due to get_object() failure
+ * true otherwise
+ */
+static bool kmemleak_cond_resched(struct kmemleak_object *object, bool pinned)
+{
+ if (!pinned && !get_object(object))
+ return false;
+
+ rcu_read_unlock();
+ cond_resched();
+ rcu_read_lock();
+ if (!pinned)
+ put_object(object);
+ return true;
+}
+
+/*
* Scan data sections and all the referenced memory blocks allocated via the
* kernel's standard allocators. This function must be called with the
* scan_mutex held.
@@ -1471,7 +1492,7 @@ static void kmemleak_scan(void)
struct zone *zone;
int __maybe_unused i;
int new_leaks = 0;
- int loop1_cnt = 0;
+ int loop_cnt = 0;
jiffies_last_scan = jiffies;
@@ -1480,7 +1501,6 @@ static void kmemleak_scan(void)
list_for_each_entry_rcu(object, &object_list, object_list) {
bool obj_pinned = false;
- loop1_cnt++;
raw_spin_lock_irq(&object->lock);
#ifdef DEBUG
/*
@@ -1514,24 +1534,11 @@ static void kmemleak_scan(void)
raw_spin_unlock_irq(&object->lock);
/*
- * Do a cond_resched() to avoid soft lockup every 64k objects.
- * Make sure a reference has been taken so that the object
- * won't go away without RCU read lock.
+ * Do a cond_resched() every 64k objects to avoid soft lockup.
*/
- if (!(loop1_cnt & 0xffff)) {
- if (!obj_pinned && !get_object(object)) {
- /* Try the next object instead */
- loop1_cnt--;
- continue;
- }
-
- rcu_read_unlock();
- cond_resched();
- rcu_read_lock();
-
- if (!obj_pinned)
- put_object(object);
- }
+ if (!(++loop_cnt & 0xffff) &&
+ !kmemleak_cond_resched(object, obj_pinned))
+ loop_cnt--; /* Try again on next object */
}
rcu_read_unlock();
@@ -1598,8 +1605,16 @@ static void kmemleak_scan(void)
* scan and color them gray until the next scan.
*/
rcu_read_lock();
+ loop_cnt = 0;
list_for_each_entry_rcu(object, &object_list, object_list) {
/*
+ * Do a cond_resched() every 64k objects to avoid soft lockup.
+ */
+ if (!(++loop_cnt & 0xffff) &&
+ !kmemleak_cond_resched(object, false))
+ loop_cnt--; /* Try again on next object */
+
+ /*
* This is racy but we can save the overhead of lock/unlock
* calls. The missed objects, if any, should be caught in
* the next scan.
@@ -1632,8 +1647,16 @@ static void kmemleak_scan(void)
* Scanning result reporting.
*/
rcu_read_lock();
+ loop_cnt = 0;
list_for_each_entry_rcu(object, &object_list, object_list) {
/*
+ * Do a cond_resched() every 64k objects to avoid soft lockup.
+ */
+ if (!(++loop_cnt & 0xffff) &&
+ !kmemleak_cond_resched(object, false))
+ loop_cnt--; /* Try again on next object */
+
+ /*
* This is racy but we can save the overhead of lock/unlock
* calls. The missed objects, if any, should be caught in
* the next scan.
diff --git a/mm/kmsan/instrumentation.c b/mm/kmsan/instrumentation.c
index 280d15413268..271f135f97a1 100644
--- a/mm/kmsan/instrumentation.c
+++ b/mm/kmsan/instrumentation.c
@@ -14,6 +14,7 @@
#include "kmsan.h"
#include <linux/gfp.h>
+#include <linux/kmsan_string.h>
#include <linux/mm.h>
#include <linux/uaccess.h>
diff --git a/mm/kmsan/shadow.c b/mm/kmsan/shadow.c
index 21e3e196ec3c..a787c04e9583 100644
--- a/mm/kmsan/shadow.c
+++ b/mm/kmsan/shadow.c
@@ -167,6 +167,7 @@ void kmsan_copy_page_meta(struct page *dst, struct page *src)
__memcpy(origin_ptr_for(dst), origin_ptr_for(src), PAGE_SIZE);
kmsan_leave_runtime();
}
+EXPORT_SYMBOL(kmsan_copy_page_meta);
void kmsan_alloc_page(struct page *page, unsigned int order, gfp_t flags)
{
diff --git a/mm/madvise.c b/mm/madvise.c
index 2baa93ca2310..c7105ec6d08c 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -813,7 +813,14 @@ static bool madvise_dontneed_free_valid_vma(struct vm_area_struct *vma,
if (start & ~huge_page_mask(hstate_vma(vma)))
return false;
- *end = ALIGN(*end, huge_page_size(hstate_vma(vma)));
+ /*
+ * Madvise callers expect the length to be rounded up to PAGE_SIZE
+ * boundaries, and may be unaware that this VMA uses huge pages.
+ * Avoid unexpected data loss by rounding down the number of
+ * huge pages freed.
+ */
+ *end = ALIGN_DOWN(*end, huge_page_size(hstate_vma(vma)));
+
return true;
}
@@ -828,6 +835,9 @@ static long madvise_dontneed_free(struct vm_area_struct *vma,
if (!madvise_dontneed_free_valid_vma(vma, start, &end, behavior))
return -EINVAL;
+ if (start == end)
+ return 0;
+
if (!userfaultfd_remove(vma, start, end)) {
*prev = NULL; /* mmap_lock has been dropped, prev is stale */
diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c
index f116b7b6333e..fa8c9d07f9ce 100644
--- a/mm/memory-tiers.c
+++ b/mm/memory-tiers.c
@@ -131,8 +131,8 @@ static void memory_tier_device_release(struct device *dev)
kfree(tier);
}
-static ssize_t nodes_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t nodelist_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
int ret;
nodemask_t nmask;
@@ -143,10 +143,10 @@ static ssize_t nodes_show(struct device *dev,
mutex_unlock(&memory_tier_lock);
return ret;
}
-static DEVICE_ATTR_RO(nodes);
+static DEVICE_ATTR_RO(nodelist);
static struct attribute *memtier_dev_attrs[] = {
- &dev_attr_nodes.attr,
+ &dev_attr_nodelist.attr,
NULL
};
diff --git a/mm/migrate.c b/mm/migrate.c
index 1379e1912772..dff333593a8a 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1582,6 +1582,13 @@ out:
*/
list_splice(&ret_pages, from);
+ /*
+ * Return 0 in case all subpages of fail-to-migrate THPs are
+ * migrated successfully.
+ */
+ if (list_empty(from))
+ rc = 0;
+
count_vm_events(PGMIGRATE_SUCCESS, nr_succeeded);
count_vm_events(PGMIGRATE_FAIL, nr_failed_pages);
count_vm_events(THP_MIGRATION_SUCCESS, nr_thp_succeeded);
diff --git a/mm/mmap.c b/mm/mmap.c
index e270057ed04e..2def55555e05 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2852,6 +2852,9 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size,
if (next->vm_flags != vma->vm_flags)
goto out;
+ if (start + size <= next->vm_end)
+ break;
+
prev = next;
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b5a6c815ae28..218b28ee49ed 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -807,6 +807,7 @@ static void prep_compound_tail(struct page *head, int tail_idx)
p->mapping = TAIL_MAPPING;
set_compound_head(p, head);
+ set_page_private(p, 0);
}
void prep_compound_page(struct page *page, unsigned int order)
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 04141a9bea70..47fbc1696466 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -330,7 +330,7 @@ static int isolate_single_pageblock(unsigned long boundary_pfn, int flags,
zone->zone_start_pfn);
if (skip_isolation) {
- int mt = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
+ int mt __maybe_unused = get_pageblock_migratetype(pfn_to_page(isolate_pageblock));
VM_BUG_ON(!is_migrate_isolate(mt));
} else {
diff --git a/mm/shmem.c b/mm/shmem.c
index 8280a5cb48df..c1d8b8a1aa3b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2424,9 +2424,26 @@ int shmem_mfill_atomic_pte(struct mm_struct *dst_mm,
if (!zeropage) { /* COPY */
page_kaddr = kmap_local_folio(folio, 0);
+ /*
+ * The read mmap_lock is held here. Despite the
+ * mmap_lock being read recursive a deadlock is still
+ * possible if a writer has taken a lock. For example:
+ *
+ * process A thread 1 takes read lock on own mmap_lock
+ * process A thread 2 calls mmap, blocks taking write lock
+ * process B thread 1 takes page fault, read lock on own mmap lock
+ * process B thread 2 calls mmap, blocks taking write lock
+ * process A thread 1 blocks taking read lock on process B
+ * process B thread 1 blocks taking read lock on process A
+ *
+ * Disable page faults to prevent potential deadlock
+ * and retry the copy outside the mmap_lock.
+ */
+ pagefault_disable();
ret = copy_from_user(page_kaddr,
(const void __user *)src_addr,
PAGE_SIZE);
+ pagefault_enable();
kunmap_local(page_kaddr);
/* fallback to copy_from_user outside mmap_lock */
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index e24e8a47ce8a..3d0fef3980b3 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -157,11 +157,28 @@ static int mcopy_atomic_pte(struct mm_struct *dst_mm,
if (!page)
goto out;
- page_kaddr = kmap_atomic(page);
+ page_kaddr = kmap_local_page(page);
+ /*
+ * The read mmap_lock is held here. Despite the
+ * mmap_lock being read recursive a deadlock is still
+ * possible if a writer has taken a lock. For example:
+ *
+ * process A thread 1 takes read lock on own mmap_lock
+ * process A thread 2 calls mmap, blocks taking write lock
+ * process B thread 1 takes page fault, read lock on own mmap lock
+ * process B thread 2 calls mmap, blocks taking write lock
+ * process A thread 1 blocks taking read lock on process B
+ * process B thread 1 blocks taking read lock on process A
+ *
+ * Disable page faults to prevent potential deadlock
+ * and retry the copy outside the mmap_lock.
+ */
+ pagefault_disable();
ret = copy_from_user(page_kaddr,
(const void __user *) src_addr,
PAGE_SIZE);
- kunmap_atomic(page_kaddr);
+ pagefault_enable();
+ kunmap_local(page_kaddr);
/* fallback to copy_from_user outside mmap_lock */
if (unlikely(ret)) {
@@ -646,11 +663,11 @@ retry:
mmap_read_unlock(dst_mm);
BUG_ON(!page);
- page_kaddr = kmap(page);
+ page_kaddr = kmap_local_page(page);
err = copy_from_user(page_kaddr,
(const void __user *) src_addr,
PAGE_SIZE);
- kunmap(page);
+ kunmap_local(page_kaddr);
if (unlikely(err)) {
err = -EFAULT;
goto out;