summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.mailmap4
-rw-r--r--Documentation/vm/hwpoison.rst3
-rw-r--r--MAINTAINERS21
-rw-r--r--drivers/base/memory.c2
-rw-r--r--fs/hugetlbfs/inode.c68
-rw-r--r--include/linux/mm.h3
-rw-r--r--kernel/kthread.c14
-rw-r--r--mm/damon/reclaim.c8
-rw-r--r--mm/hwpoison-inject.c2
-rw-r--r--mm/kfence/core.c7
-rw-r--r--mm/madvise.c2
-rw-r--r--mm/memcontrol.c2
-rw-r--r--mm/memory-failure.c12
-rw-r--r--mm/page_isolation.c2
-rw-r--r--mm/swap.c2
15 files changed, 117 insertions, 35 deletions
diff --git a/.mailmap b/.mailmap
index 825fae8e6b7b..2ed1cf869175 100644
--- a/.mailmap
+++ b/.mailmap
@@ -10,6 +10,8 @@
# Please keep this list dictionary sorted.
#
Aaron Durbin <adurbin@google.com>
+Abel Vesa <abelvesa@kernel.org> <abel.vesa@nxp.com>
+Abel Vesa <abelvesa@kernel.org> <abelvesa@gmail.com>
Abhinav Kumar <quic_abhinavk@quicinc.com> <abhinavk@codeaurora.org>
Adam Oldham <oldhamca@gmail.com>
Adam Radford <aradford@gmail.com>
@@ -85,6 +87,7 @@ Christian Borntraeger <borntraeger@linux.ibm.com> <borntrae@de.ibm.com>
Christian Brauner <brauner@kernel.org> <christian@brauner.io>
Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com>
Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
+Christian Marangi <ansuelsmth@gmail.com>
Christophe Ricard <christophe.ricard@gmail.com>
Christoph Hellwig <hch@lst.de>
Colin Ian King <colin.king@intel.com> <colin.king@canonical.com>
@@ -165,6 +168,7 @@ Jan Glauber <jan.glauber@gmail.com> <jang@de.ibm.com>
Jan Glauber <jan.glauber@gmail.com> <jang@linux.vnet.ibm.com>
Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
+Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
diff --git a/Documentation/vm/hwpoison.rst b/Documentation/vm/hwpoison.rst
index c742de1769d1..b9d5253c1305 100644
--- a/Documentation/vm/hwpoison.rst
+++ b/Documentation/vm/hwpoison.rst
@@ -120,7 +120,8 @@ Testing
unpoison-pfn
Software-unpoison page at PFN echoed into this file. This way
a page can be reused again. This only works for Linux
- injected failures, not for real memory failures.
+ injected failures, not for real memory failures. Once any hardware
+ memory failure happens, this feature is disabled.
Note these injection interfaces are not stable and might change between
kernel versions
diff --git a/MAINTAINERS b/MAINTAINERS
index 063332472ca8..146308aeab71 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9152,6 +9152,7 @@ F: drivers/media/platform/st/sti/hva
HWPOISON MEMORY FAILURE HANDLING
M: Naoya Horiguchi <naoya.horiguchi@nec.com>
+R: Miaohe Lin <linmiaohe@huawei.com>
L: linux-mm@kvack.org
S: Maintained
F: mm/hwpoison-inject.c
@@ -11623,6 +11624,7 @@ F: drivers/gpu/drm/bridge/lontium-lt8912b.c
LOONGARCH
M: Huacai Chen <chenhuacai@kernel.org>
R: WANG Xuerui <kernel@xen0n.name>
+L: loongarch@lists.linux.dev
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git
F: arch/loongarch/
@@ -12879,9 +12881,8 @@ M: Andrew Morton <akpm@linux-foundation.org>
L: linux-mm@kvack.org
S: Maintained
W: http://www.linux-mm.org
-T: quilt https://ozlabs.org/~akpm/mmotm/
-T: quilt https://ozlabs.org/~akpm/mmots/
-T: git git://github.com/hnaz/linux-mm.git
+T: git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
+T: quilt git://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new
F: include/linux/gfp.h
F: include/linux/memory_hotplug.h
F: include/linux/mm.h
@@ -12891,6 +12892,18 @@ F: include/linux/vmalloc.h
F: mm/
F: tools/testing/selftests/vm/
+MEMORY HOT(UN)PLUG
+M: David Hildenbrand <david@redhat.com>
+M: Oscar Salvador <osalvador@suse.de>
+L: linux-mm@kvack.org
+S: Maintained
+F: Documentation/admin-guide/mm/memory-hotplug.rst
+F: Documentation/core-api/memory-hotplug.rst
+F: drivers/base/memory.c
+F: include/linux/memory_hotplug.h
+F: mm/memory_hotplug.c
+F: tools/testing/selftests/memory-hotplug/
+
MEMORY TECHNOLOGY DEVICES (MTD)
M: Miquel Raynal <miquel.raynal@bootlin.com>
M: Richard Weinberger <richard@nod.at>
@@ -14295,7 +14308,7 @@ F: drivers/iio/gyro/fxas21002c_i2c.c
F: drivers/iio/gyro/fxas21002c_spi.c
NXP i.MX CLOCK DRIVERS
-M: Abel Vesa <abel.vesa@nxp.com>
+M: Abel Vesa <abelvesa@kernel.org>
L: linux-clk@vger.kernel.org
L: linux-imx@nxp.com
S: Maintained
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index 084d67fd55cc..bc60c9cd3230 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -558,7 +558,7 @@ static ssize_t hard_offline_page_store(struct device *dev,
if (kstrtoull(buf, 0, &pfn) < 0)
return -EINVAL;
pfn >>= PAGE_SHIFT;
- ret = memory_failure(pfn, 0);
+ ret = memory_failure(pfn, MF_SW_SIMULATED);
if (ret == -EOPNOTSUPP)
ret = 0;
return ret ? ret : count;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 62408047e8d7..02eb72351b15 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -600,41 +600,79 @@ static void hugetlb_vmtruncate(struct inode *inode, loff_t offset)
remove_inode_hugepages(inode, offset, LLONG_MAX);
}
+static void hugetlbfs_zero_partial_page(struct hstate *h,
+ struct address_space *mapping,
+ loff_t start,
+ loff_t end)
+{
+ pgoff_t idx = start >> huge_page_shift(h);
+ struct folio *folio;
+
+ folio = filemap_lock_folio(mapping, idx);
+ if (!folio)
+ return;
+
+ start = start & ~huge_page_mask(h);
+ end = end & ~huge_page_mask(h);
+ if (!end)
+ end = huge_page_size(h);
+
+ folio_zero_segment(folio, (size_t)start, (size_t)end);
+
+ folio_unlock(folio);
+ folio_put(folio);
+}
+
static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
+ struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
+ struct address_space *mapping = inode->i_mapping;
struct hstate *h = hstate_inode(inode);
loff_t hpage_size = huge_page_size(h);
loff_t hole_start, hole_end;
/*
- * For hole punch round up the beginning offset of the hole and
- * round down the end.
+ * hole_start and hole_end indicate the full pages within the hole.
*/
hole_start = round_up(offset, hpage_size);
hole_end = round_down(offset + len, hpage_size);
- if (hole_end > hole_start) {
- struct address_space *mapping = inode->i_mapping;
- struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
+ inode_lock(inode);
- inode_lock(inode);
+ /* protected by i_rwsem */
+ if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
+ inode_unlock(inode);
+ return -EPERM;
+ }
- /* protected by i_rwsem */
- if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
- inode_unlock(inode);
- return -EPERM;
- }
+ i_mmap_lock_write(mapping);
+
+ /* If range starts before first full page, zero partial page. */
+ if (offset < hole_start)
+ hugetlbfs_zero_partial_page(h, mapping,
+ offset, min(offset + len, hole_start));
- i_mmap_lock_write(mapping);
+ /* Unmap users of full pages in the hole. */
+ if (hole_end > hole_start) {
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
hugetlb_vmdelete_list(&mapping->i_mmap,
hole_start >> PAGE_SHIFT,
hole_end >> PAGE_SHIFT, 0);
- i_mmap_unlock_write(mapping);
- remove_inode_hugepages(inode, hole_start, hole_end);
- inode_unlock(inode);
}
+ /* If range extends beyond last full page, zero partial page. */
+ if ((offset + len) > hole_end && (offset + len) > hole_start)
+ hugetlbfs_zero_partial_page(h, mapping,
+ hole_end, offset + len);
+
+ i_mmap_unlock_write(mapping);
+
+ /* Remove full pages from the file. */
+ if (hole_end > hole_start)
+ remove_inode_hugepages(inode, hole_start, hole_end);
+
+ inode_unlock(inode);
+
return 0;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bc8f326be0ce..cf3d0d673f6b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1600,7 +1600,7 @@ static inline bool is_pinnable_page(struct page *page)
if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE)
return false;
#endif
- return !(is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page)));
+ return !is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page));
}
#else
static inline bool is_pinnable_page(struct page *page)
@@ -3232,6 +3232,7 @@ enum mf_flags {
MF_MUST_KILL = 1 << 2,
MF_SOFT_OFFLINE = 1 << 3,
MF_UNPOISON = 1 << 4,
+ MF_SW_SIMULATED = 1 << 5,
};
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 544fd4097406..3c677918d8f2 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -340,7 +340,7 @@ static int kthread(void *_create)
self = to_kthread(current);
- /* If user was SIGKILLed, I release the structure. */
+ /* Release the structure when caller killed by a fatal signal. */
done = xchg(&create->done, NULL);
if (!done) {
kfree(create);
@@ -398,7 +398,7 @@ static void create_kthread(struct kthread_create_info *create)
/* We want our own signal handler (we take no signals by default). */
pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
if (pid < 0) {
- /* If user was SIGKILLed, I release the structure. */
+ /* Release the structure when caller killed by a fatal signal. */
struct completion *done = xchg(&create->done, NULL);
if (!done) {
@@ -440,9 +440,9 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
*/
if (unlikely(wait_for_completion_killable(&done))) {
/*
- * If I was SIGKILLed before kthreadd (or new kernel thread)
- * calls complete(), leave the cleanup of this structure to
- * that thread.
+ * If I was killed by a fatal signal before kthreadd (or new
+ * kernel thread) calls complete(), leave the cleanup of this
+ * structure to that thread.
*/
if (xchg(&create->done, NULL))
return ERR_PTR(-EINTR);
@@ -876,7 +876,7 @@ fail_task:
*
* Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
* when the needed structures could not get allocated, and ERR_PTR(-EINTR)
- * when the worker was SIGKILLed.
+ * when the caller was killed by a fatal signal.
*/
struct kthread_worker *
kthread_create_worker(unsigned int flags, const char namefmt[], ...)
@@ -925,7 +925,7 @@ EXPORT_SYMBOL(kthread_create_worker);
* Return:
* The pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
* when the needed structures could not get allocated, and ERR_PTR(-EINTR)
- * when the worker was SIGKILLed.
+ * when the caller was killed by a fatal signal.
*/
struct kthread_worker *
kthread_create_worker_on_cpu(int cpu, unsigned int flags,
diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
index 8efbfb24f3a1..4b07c29effe9 100644
--- a/mm/damon/reclaim.c
+++ b/mm/damon/reclaim.c
@@ -374,6 +374,8 @@ static void damon_reclaim_timer_fn(struct work_struct *work)
}
static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn);
+static bool damon_reclaim_initialized;
+
static int enabled_store(const char *val,
const struct kernel_param *kp)
{
@@ -382,6 +384,10 @@ static int enabled_store(const char *val,
if (rc < 0)
return rc;
+ /* system_wq might not initialized yet */
+ if (!damon_reclaim_initialized)
+ return rc;
+
if (enabled)
schedule_delayed_work(&damon_reclaim_timer, 0);
@@ -449,6 +455,8 @@ static int __init damon_reclaim_init(void)
damon_add_target(ctx, target);
schedule_delayed_work(&damon_reclaim_timer, 0);
+
+ damon_reclaim_initialized = true;
return 0;
}
diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c
index 5c0cddd81505..65e242b5a432 100644
--- a/mm/hwpoison-inject.c
+++ b/mm/hwpoison-inject.c
@@ -48,7 +48,7 @@ static int hwpoison_inject(void *data, u64 val)
inject:
pr_info("Injecting memory failure at pfn %#lx\n", pfn);
- err = memory_failure(pfn, 0);
+ err = memory_failure(pfn, MF_SW_SIMULATED);
return (err == -EOPNOTSUPP) ? 0 : err;
}
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
index 4e7cd4c8e687..4b5e5a3d3a63 100644
--- a/mm/kfence/core.c
+++ b/mm/kfence/core.c
@@ -360,6 +360,9 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
unsigned long flags;
struct slab *slab;
void *addr;
+ const bool random_right_allocate = prandom_u32_max(2);
+ const bool random_fault = CONFIG_KFENCE_STRESS_TEST_FAULTS &&
+ !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS);
/* Try to obtain a free object. */
raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
@@ -404,7 +407,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
* is that the out-of-bounds accesses detected are deterministic for
* such allocations.
*/
- if (prandom_u32_max(2)) {
+ if (random_right_allocate) {
/* Allocate on the "right" side, re-calculate address. */
meta->addr += PAGE_SIZE - size;
meta->addr = ALIGN_DOWN(meta->addr, cache->align);
@@ -444,7 +447,7 @@ static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t g
if (cache->ctor)
cache->ctor(addr);
- if (CONFIG_KFENCE_STRESS_TEST_FAULTS && !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS))
+ if (random_fault)
kfence_protect(meta->addr); /* Random "faults" by protecting the object. */
atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCATED]);
diff --git a/mm/madvise.c b/mm/madvise.c
index d7b4f2602949..0316bbc6441b 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1112,7 +1112,7 @@ static int madvise_inject_error(int behavior,
} else {
pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n",
pfn, start);
- ret = memory_failure(pfn, MF_COUNT_INCREASED);
+ ret = memory_failure(pfn, MF_COUNT_INCREASED | MF_SW_SIMULATED);
if (ret == -EOPNOTSUPP)
ret = 0;
}
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index abec50f31fe6..618c366a2f07 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -4859,7 +4859,7 @@ static int mem_cgroup_slab_show(struct seq_file *m, void *p)
{
/*
* Deprecated.
- * Please, take a look at tools/cgroup/slabinfo.py .
+ * Please, take a look at tools/cgroup/memcg_slabinfo.py .
*/
return 0;
}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index b85661cbdc4a..da39ec8afca8 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -69,6 +69,8 @@ int sysctl_memory_failure_recovery __read_mostly = 1;
atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
+static bool hw_memory_failure __read_mostly = false;
+
static bool __page_handle_poison(struct page *page)
{
int ret;
@@ -1768,6 +1770,9 @@ int memory_failure(unsigned long pfn, int flags)
mutex_lock(&mf_mutex);
+ if (!(flags & MF_SW_SIMULATED))
+ hw_memory_failure = true;
+
p = pfn_to_online_page(pfn);
if (!p) {
res = arch_memory_failure(pfn, flags);
@@ -2103,6 +2108,13 @@ int unpoison_memory(unsigned long pfn)
mutex_lock(&mf_mutex);
+ if (hw_memory_failure) {
+ unpoison_pr_info("Unpoison: Disabled after HW memory failure %#lx\n",
+ pfn, &unpoison_rs);
+ ret = -EOPNOTSUPP;
+ goto unlock_mutex;
+ }
+
if (!PageHWPoison(p)) {
unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
pfn, &unpoison_rs);
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index d200d41ad0d3..9d73dc38e3d7 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -286,6 +286,8 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
* @flags: isolation flags
* @gfp_flags: GFP flags used for migrating pages
* @isolate_before: isolate the pageblock before the boundary_pfn
+ * @skip_isolation: the flag to skip the pageblock isolation in second
+ * isolate_single_pageblock()
*
* Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
* pageblock. When not all pageblocks within a page are isolated at the same
diff --git a/mm/swap.c b/mm/swap.c
index f3922a96b2e9..034bb24879a3 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -881,7 +881,7 @@ void lru_cache_disable(void)
* lru_disable_count = 0 will have exited the critical
* section when synchronize_rcu() returns.
*/
- synchronize_rcu();
+ synchronize_rcu_expedited();
#ifdef CONFIG_SMP
__lru_add_drain_all(true);
#else