summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig.debug6
-rw-r--r--mm/backing-dev.c25
-rw-r--r--mm/compaction.c7
-rw-r--r--mm/filemap.c18
-rw-r--r--mm/internal.h4
-rw-r--r--mm/kasan/generic.c22
-rw-r--r--mm/memblock.c5
-rw-r--r--mm/mmap.c10
-rw-r--r--mm/page_alloc.c32
-rw-r--r--mm/shmem.c52
-rw-r--r--mm/slab.h11
-rw-r--r--mm/slab_common.c29
-rw-r--r--mm/slub.c116
-rw-r--r--mm/swapfile.c22
-rw-r--r--mm/userfaultfd.c6
-rw-r--r--mm/util.c17
-rw-r--r--mm/vmalloc.c83
-rw-r--r--mm/vmscan.c5
-rw-r--r--mm/workingset.c1
19 files changed, 284 insertions, 187 deletions
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 321ab379994f..afc72fde0f03 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -64,11 +64,11 @@ config SLUB_DEBUG_ON
help
Boot with debugging on by default. SLUB boots by default with
the runtime debug capabilities switched off. Enabling this is
- equivalent to specifying the "slub_debug" parameter on boot.
+ equivalent to specifying the "slab_debug" parameter on boot.
There is no support for more fine grained debug control like
- possible with slub_debug=xxx. SLUB debugging may be switched
+ possible with slab_debug=xxx. SLUB debugging may be switched
off in a kernel built with CONFIG_SLUB_DEBUG_ON by specifying
- "slub_debug=-".
+ "slab_debug=-".
config PAGE_OWNER
bool "Track page owner"
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index e039d05304dd..5f2be8c8df11 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -372,31 +372,6 @@ static int __init default_bdi_init(void)
}
subsys_initcall(default_bdi_init);
-/*
- * This function is used when the first inode for this wb is marked dirty. It
- * wakes-up the corresponding bdi thread which should then take care of the
- * periodic background write-out of dirty inodes. Since the write-out would
- * starts only 'dirty_writeback_interval' centisecs from now anyway, we just
- * set up a timer which wakes the bdi thread up later.
- *
- * Note, we wouldn't bother setting up the timer, but this function is on the
- * fast-path (used by '__mark_inode_dirty()'), so we save few context switches
- * by delaying the wake-up.
- *
- * We have to be careful not to postpone flush work if it is scheduled for
- * earlier. Thus we use queue_delayed_work().
- */
-void wb_wakeup_delayed(struct bdi_writeback *wb)
-{
- unsigned long timeout;
-
- timeout = msecs_to_jiffies(dirty_writeback_interval * 10);
- spin_lock_irq(&wb->work_lock);
- if (test_bit(WB_registered, &wb->state))
- queue_delayed_work(bdi_wq, &wb->dwork, timeout);
- spin_unlock_irq(&wb->work_lock);
-}
-
static void wb_update_bandwidth_workfn(struct work_struct *work)
{
struct bdi_writeback *wb = container_of(to_delayed_work(work),
diff --git a/mm/compaction.c b/mm/compaction.c
index 1faeffb28720..807b58e6eb68 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2827,16 +2827,11 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
unsigned int alloc_flags, const struct alloc_context *ac,
enum compact_priority prio, struct page **capture)
{
- int may_perform_io = (__force int)(gfp_mask & __GFP_IO);
struct zoneref *z;
struct zone *zone;
enum compact_result rc = COMPACT_SKIPPED;
- /*
- * Check if the GFP flags allow compaction - GFP_NOIO is really
- * tricky context because the migration might require IO
- */
- if (!may_perform_io)
+ if (!gfp_compaction_allowed(gfp_mask))
return COMPACT_SKIPPED;
trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio);
diff --git a/mm/filemap.c b/mm/filemap.c
index 31ab455c4537..7437b2bd75c1 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -124,6 +124,15 @@
* ->private_lock (zap_pte_range->block_dirty_folio)
*/
+static void mapping_set_update(struct xa_state *xas,
+ struct address_space *mapping)
+{
+ if (dax_mapping(mapping) || shmem_mapping(mapping))
+ return;
+ xas_set_update(xas, workingset_update_node);
+ xas_set_lru(xas, &shadow_nodes);
+}
+
static void page_cache_delete(struct address_space *mapping,
struct folio *folio, void *shadow)
{
@@ -2607,15 +2616,6 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
/*
- * Pairs with a barrier in
- * block_write_end()->mark_buffer_dirty() or other page
- * dirtying routines like iomap_write_end() to ensure
- * changes to page contents are visible before we see
- * increased inode size.
- */
- smp_rmb();
-
- /*
* Once we start copying data, we don't want to be touching any
* cachelines that might be contended:
*/
diff --git a/mm/internal.h b/mm/internal.h
index d1c69119b24f..7e486f2c502c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -1371,4 +1371,8 @@ static inline void shrinker_debugfs_remove(struct dentry *debugfs_entry,
}
#endif /* CONFIG_SHRINKER_DEBUG */
+/* Only track the nodes of mappings with shadow entries */
+void workingset_update_node(struct xa_node *node);
+extern struct list_lru shadow_nodes;
+
#endif /* __MM_INTERNAL_H */
diff --git a/mm/kasan/generic.c b/mm/kasan/generic.c
index 1900f8576034..6310a180278b 100644
--- a/mm/kasan/generic.c
+++ b/mm/kasan/generic.c
@@ -334,14 +334,6 @@ DEFINE_ASAN_SET_SHADOW(f3);
DEFINE_ASAN_SET_SHADOW(f5);
DEFINE_ASAN_SET_SHADOW(f8);
-/* Only allow cache merging when no per-object metadata is present. */
-slab_flags_t kasan_never_merge(void)
-{
- if (!kasan_requires_meta())
- return 0;
- return SLAB_KASAN;
-}
-
/*
* Adaptive redzone policy taken from the userspace AddressSanitizer runtime.
* For larger allocations larger redzones are used.
@@ -370,15 +362,13 @@ void kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
return;
/*
- * SLAB_KASAN is used to mark caches that are sanitized by KASAN
- * and that thus have per-object metadata.
- * Currently this flag is used in two places:
- * 1. In slab_ksize() to account for per-object metadata when
- * calculating the size of the accessible memory within the object.
- * 2. In slab_common.c via kasan_never_merge() to prevent merging of
- * caches with per-object metadata.
+ * SLAB_KASAN is used to mark caches that are sanitized by KASAN and
+ * that thus have per-object metadata. Currently, this flag is used in
+ * slab_ksize() to account for per-object metadata when calculating the
+ * size of the accessible memory within the object. Additionally, we use
+ * SLAB_NO_MERGE to prevent merging of caches with per-object metadata.
*/
- *flags |= SLAB_KASAN;
+ *flags |= SLAB_KASAN | SLAB_NO_MERGE;
ok_size = *size;
diff --git a/mm/memblock.c b/mm/memblock.c
index d9f4b82cbffe..d09136e040d3 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -180,8 +180,9 @@ static inline phys_addr_t memblock_cap_size(phys_addr_t base, phys_addr_t *size)
/*
* Address comparison utilities
*/
-static unsigned long __init_memblock memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1,
- phys_addr_t base2, phys_addr_t size2)
+unsigned long __init_memblock
+memblock_addrs_overlap(phys_addr_t base1, phys_addr_t size1, phys_addr_t base2,
+ phys_addr_t size2)
{
return ((base1 < (base2 + size2)) && (base2 < (base1 + size1)));
}
diff --git a/mm/mmap.c b/mm/mmap.c
index ccf377ee319f..04da02114c6f 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -955,13 +955,21 @@ static struct vm_area_struct
} else if (merge_prev) { /* case 2 */
if (curr) {
vma_start_write(curr);
- err = dup_anon_vma(prev, curr, &anon_dup);
if (end == curr->vm_end) { /* case 7 */
+ /*
+ * can_vma_merge_after() assumed we would not be
+ * removing prev vma, so it skipped the check
+ * for vm_ops->close, but we are removing curr
+ */
+ if (curr->vm_ops && curr->vm_ops->close)
+ err = -EINVAL;
remove = curr;
} else { /* case 5 */
adjust = curr;
adj_start = (end - curr->vm_start);
}
+ if (!err)
+ err = dup_anon_vma(prev, curr, &anon_dup);
}
} else { /* merge_next */
vma_start_write(next);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cc7f9b322193..14d39f34d336 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4047,6 +4047,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
struct alloc_context *ac)
{
bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM;
+ bool can_compact = gfp_compaction_allowed(gfp_mask);
const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER;
struct page *page = NULL;
unsigned int alloc_flags;
@@ -4117,7 +4118,7 @@ restart:
* Don't try this for allocations that are allowed to ignore
* watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen.
*/
- if (can_direct_reclaim &&
+ if (can_direct_reclaim && can_compact &&
(costly_order ||
(order > 0 && ac->migratetype != MIGRATE_MOVABLE))
&& !gfp_pfmemalloc_allowed(gfp_mask)) {
@@ -4215,9 +4216,10 @@ retry:
/*
* Do not retry costly high order allocations unless they are
- * __GFP_RETRY_MAYFAIL
+ * __GFP_RETRY_MAYFAIL and we can compact
*/
- if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL))
+ if (costly_order && (!can_compact ||
+ !(gfp_mask & __GFP_RETRY_MAYFAIL)))
goto nopage;
if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags,
@@ -4230,7 +4232,7 @@ retry:
* implementation of the compaction depends on the sufficient amount
* of free memory (see __compaction_suitable)
*/
- if (did_some_progress > 0 &&
+ if (did_some_progress > 0 && can_compact &&
should_compact_retry(ac, order, alloc_flags,
compact_result, &compact_priority,
&compaction_retries))
@@ -4691,8 +4693,8 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
gfp_t gfp = gfp_mask;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
- gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
- __GFP_NOMEMALLOC;
+ gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
+ __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
PAGE_FRAG_CACHE_MAX_ORDER);
nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
@@ -4705,6 +4707,16 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
return page;
}
+void page_frag_cache_drain(struct page_frag_cache *nc)
+{
+ if (!nc->va)
+ return;
+
+ __page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
+ nc->va = NULL;
+}
+EXPORT_SYMBOL(page_frag_cache_drain);
+
void __page_frag_cache_drain(struct page *page, unsigned int count)
{
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
@@ -4714,9 +4726,9 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)
}
EXPORT_SYMBOL(__page_frag_cache_drain);
-void *page_frag_alloc_align(struct page_frag_cache *nc,
- unsigned int fragsz, gfp_t gfp_mask,
- unsigned int align_mask)
+void *__page_frag_alloc_align(struct page_frag_cache *nc,
+ unsigned int fragsz, gfp_t gfp_mask,
+ unsigned int align_mask)
{
unsigned int size = PAGE_SIZE;
struct page *page;
@@ -4785,7 +4797,7 @@ refill:
return nc->va + offset;
}
-EXPORT_SYMBOL(page_frag_alloc_align);
+EXPORT_SYMBOL(__page_frag_alloc_align);
/*
* Frees a page fragment allocated out of either a compound or order 0 page.
diff --git a/mm/shmem.c b/mm/shmem.c
index 30c9dc862505..0aad0d9a621b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -254,7 +254,7 @@ static void shmem_inode_unacct_blocks(struct inode *inode, long pages)
}
static const struct super_operations shmem_ops;
-const struct address_space_operations shmem_aops;
+static const struct address_space_operations shmem_aops;
static const struct file_operations shmem_file_operations;
static const struct inode_operations shmem_inode_operations;
static const struct inode_operations shmem_dir_inode_operations;
@@ -263,6 +263,12 @@ static const struct vm_operations_struct shmem_vm_ops;
static const struct vm_operations_struct shmem_anon_vm_ops;
static struct file_system_type shmem_fs_type;
+bool shmem_mapping(struct address_space *mapping)
+{
+ return mapping->a_ops == &shmem_aops;
+}
+EXPORT_SYMBOL_GPL(shmem_mapping);
+
bool vma_is_anon_shmem(struct vm_area_struct *vma)
{
return vma->vm_ops == &shmem_anon_vm_ops;
@@ -311,7 +317,7 @@ static void shmem_disable_quotas(struct super_block *sb)
dquot_quota_off(sb, type);
}
-static struct dquot **shmem_get_dquots(struct inode *inode)
+static struct dquot __rcu **shmem_get_dquots(struct inode *inode)
{
return SHMEM_I(inode)->i_dquot;
}
@@ -1966,6 +1972,9 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index,
int error;
bool alloced;
+ if (WARN_ON_ONCE(!shmem_mapping(inode->i_mapping)))
+ return -EINVAL;
+
if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
return -EFBIG;
repeat:
@@ -2128,12 +2137,36 @@ unlock:
return error;
}
+/**
+ * shmem_get_folio - find, and lock a shmem folio.
+ * @inode: inode to search
+ * @index: the page index.
+ * @foliop: pointer to the folio if found
+ * @sgp: SGP_* flags to control behavior
+ *
+ * Looks up the page cache entry at @inode & @index. If a folio is
+ * present, it is returned locked with an increased refcount.
+ *
+ * If the caller modifies data in the folio, it must call folio_mark_dirty()
+ * before unlocking the folio to ensure that the folio is not reclaimed.
+ * There is no need to reserve space before calling folio_mark_dirty().
+ *
+ * When no folio is found, the behavior depends on @sgp:
+ * - for SGP_READ, *@foliop is %NULL and 0 is returned
+ * - for SGP_NOALLOC, *@foliop is %NULL and -ENOENT is returned
+ * - for all other flags a new folio is allocated, inserted into the
+ * page cache and returned locked in @foliop.
+ *
+ * Context: May sleep.
+ * Return: 0 if successful, else a negative error code.
+ */
int shmem_get_folio(struct inode *inode, pgoff_t index, struct folio **foliop,
enum sgp_type sgp)
{
return shmem_get_folio_gfp(inode, index, foliop, sgp,
mapping_gfp_mask(inode->i_mapping), NULL, NULL);
}
+EXPORT_SYMBOL_GPL(shmem_get_folio);
/*
* This is like autoremove_wake_function, but it removes the wait queue
@@ -3374,7 +3407,7 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry)
static int shmem_rmdir(struct inode *dir, struct dentry *dentry)
{
- if (!simple_empty(dentry))
+ if (!simple_offset_empty(dentry))
return -ENOTEMPTY;
drop_nlink(d_inode(dentry));
@@ -3431,7 +3464,7 @@ static int shmem_rename2(struct mnt_idmap *idmap,
return simple_offset_rename_exchange(old_dir, old_dentry,
new_dir, new_dentry);
- if (!simple_empty(new_dentry))
+ if (!simple_offset_empty(new_dentry))
return -ENOTEMPTY;
if (flags & RENAME_WHITEOUT) {
@@ -3500,10 +3533,10 @@ static int shmem_symlink(struct mnt_idmap *idmap, struct inode *dir,
inode->i_op = &shmem_short_symlink_operations;
} else {
inode_nohighmem(inode);
+ inode->i_mapping->a_ops = &shmem_aops;
error = shmem_get_folio(inode, 0, &folio, SGP_WRITE);
if (error)
goto out_remove_offset;
- inode->i_mapping->a_ops = &shmem_aops;
inode->i_op = &shmem_symlink_inode_operations;
memcpy(folio_address(folio), symname, len);
folio_mark_uptodate(folio);
@@ -4373,7 +4406,9 @@ static int shmem_fill_super(struct super_block *sb, struct fs_context *fc)
#ifdef CONFIG_TMPFS_POSIX_ACL
sb->s_flags |= SB_POSIXACL;
#endif
- uuid_gen(&sb->s_uuid);
+ uuid_t uuid;
+ uuid_gen(&uuid);
+ super_set_uuid(sb, uuid.b, sizeof(uuid));
#ifdef CONFIG_TMPFS_QUOTA
if (ctx->seen & SHMEM_SEEN_QUOTA) {
@@ -4484,7 +4519,7 @@ static int shmem_error_remove_folio(struct address_space *mapping,
return 0;
}
-const struct address_space_operations shmem_aops = {
+static const struct address_space_operations shmem_aops = {
.writepage = shmem_writepage,
.dirty_folio = noop_dirty_folio,
#ifdef CONFIG_TMPFS
@@ -4496,7 +4531,6 @@ const struct address_space_operations shmem_aops = {
#endif
.error_remove_folio = shmem_error_remove_folio,
};
-EXPORT_SYMBOL(shmem_aops);
static const struct file_operations shmem_file_operations = {
.mmap = shmem_mmap,
@@ -4851,6 +4885,7 @@ struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned lon
{
return __shmem_file_setup(shm_mnt, name, size, flags, S_PRIVATE);
}
+EXPORT_SYMBOL_GPL(shmem_kernel_file_setup);
/**
* shmem_file_setup - get an unlinked file living in tmpfs
@@ -4928,7 +4963,6 @@ struct folio *shmem_read_folio_gfp(struct address_space *mapping,
struct folio *folio;
int error;
- BUG_ON(!shmem_mapping(mapping));
error = shmem_get_folio_gfp(inode, index, &folio, SGP_CACHE,
gfp, NULL, NULL);
if (error)
diff --git a/mm/slab.h b/mm/slab.h
index 54deeb0428c6..d2bc9b191222 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -363,7 +363,6 @@ static inline int objs_per_slab(const struct kmem_cache *cache,
enum slab_state {
DOWN, /* No slab functionality yet */
PARTIAL, /* SLUB: kmem_cache_node available */
- PARTIAL_NODE, /* SLAB: kmalloc size for node struct available */
UP, /* Slab caches usable but not all extras yet */
FULL /* Everything is working */
};
@@ -387,7 +386,7 @@ extern const struct kmalloc_info_struct {
/* Kmalloc array related functions */
void setup_kmalloc_cache_index_table(void);
-void create_kmalloc_caches(slab_flags_t);
+void create_kmalloc_caches(void);
extern u8 kmalloc_size_index[24];
@@ -422,8 +421,6 @@ gfp_t kmalloc_fix_flags(gfp_t flags);
int __kmem_cache_create(struct kmem_cache *, slab_flags_t flags);
void __init kmem_cache_init(void);
-void __init new_kmalloc_cache(int idx, enum kmalloc_cache_type type,
- slab_flags_t flags);
extern void create_boot_cache(struct kmem_cache *, const char *name,
unsigned int size, slab_flags_t flags,
unsigned int useroffset, unsigned int usersize);
@@ -435,8 +432,7 @@ struct kmem_cache *
__kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
slab_flags_t flags, void (*ctor)(void *));
-slab_flags_t kmem_cache_flags(unsigned int object_size,
- slab_flags_t flags, const char *name);
+slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name);
static inline bool is_kmalloc_cache(struct kmem_cache *s)
{
@@ -469,7 +465,6 @@ static inline bool is_kmalloc_cache(struct kmem_cache *s)
SLAB_STORE_USER | \
SLAB_TRACE | \
SLAB_CONSISTENCY_CHECKS | \
- SLAB_MEM_SPREAD | \
SLAB_NOLEAKTRACE | \
SLAB_RECLAIM_ACCOUNT | \
SLAB_TEMPORARY | \
@@ -528,7 +523,7 @@ static inline bool __slub_debug_enabled(void)
#endif
/*
- * Returns true if any of the specified slub_debug flags is enabled for the
+ * Returns true if any of the specified slab_debug flags is enabled for the
* cache. Use only for flags parsed by setup_slub_debug() as it also enables
* the static key.
*/
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 238293b1dbe1..23af762148ca 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -50,7 +50,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
*/
#define SLAB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
SLAB_TRACE | SLAB_TYPESAFE_BY_RCU | SLAB_NOLEAKTRACE | \
- SLAB_FAILSLAB | SLAB_NO_MERGE | kasan_never_merge())
+ SLAB_FAILSLAB | SLAB_NO_MERGE)
#define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
@@ -172,7 +172,7 @@ struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
size = ALIGN(size, sizeof(void *));
align = calculate_alignment(flags, align, size);
size = ALIGN(size, align);
- flags = kmem_cache_flags(size, flags, name);
+ flags = kmem_cache_flags(flags, name);
if (flags & SLAB_NEVER_MERGE)
return NULL;
@@ -282,7 +282,7 @@ kmem_cache_create_usercopy(const char *name,
#ifdef CONFIG_SLUB_DEBUG
/*
- * If no slub_debug was enabled globally, the static key is not yet
+ * If no slab_debug was enabled globally, the static key is not yet
* enabled by setup_slub_debug(). Enable it if the cache is being
* created with any of the debugging flags passed explicitly.
* It's also possible that this is the first cache created with
@@ -404,8 +404,12 @@ EXPORT_SYMBOL(kmem_cache_create);
*/
static void kmem_cache_release(struct kmem_cache *s)
{
- sysfs_slab_unlink(s);
- sysfs_slab_release(s);
+ if (slab_state >= FULL) {
+ sysfs_slab_unlink(s);
+ sysfs_slab_release(s);
+ } else {
+ slab_kmem_cache_release(s);
+ }
}
#else
static void kmem_cache_release(struct kmem_cache *s)
@@ -766,7 +770,7 @@ EXPORT_SYMBOL(kmalloc_size_roundup);
}
/*
- * kmalloc_info[] is to make slub_debug=,kmalloc-xx option work at boot time.
+ * kmalloc_info[] is to make slab_debug=,kmalloc-xx option work at boot time.
* kmalloc_index() supports up to 2^21=2MB, so the final entry of the table is
* kmalloc-2M.
*/
@@ -853,9 +857,10 @@ static unsigned int __kmalloc_minalign(void)
return max(minalign, arch_slab_minalign());
}
-void __init
-new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
+static void __init
+new_kmalloc_cache(int idx, enum kmalloc_cache_type type)
{
+ slab_flags_t flags = 0;
unsigned int minalign = __kmalloc_minalign();
unsigned int aligned_size = kmalloc_info[idx].size;
int aligned_idx = idx;
@@ -902,7 +907,7 @@ new_kmalloc_cache(int idx, enum kmalloc_cache_type type, slab_flags_t flags)
* may already have been created because they were needed to
* enable allocations for slab creation.
*/
-void __init create_kmalloc_caches(slab_flags_t flags)
+void __init create_kmalloc_caches(void)
{
int i;
enum kmalloc_cache_type type;
@@ -913,7 +918,7 @@ void __init create_kmalloc_caches(slab_flags_t flags)
for (type = KMALLOC_NORMAL; type < NR_KMALLOC_TYPES; type++) {
for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
if (!kmalloc_caches[type][i])
- new_kmalloc_cache(i, type, flags);
+ new_kmalloc_cache(i, type);
/*
* Caches that are not of the two-to-the-power-of size.
@@ -922,10 +927,10 @@ void __init create_kmalloc_caches(slab_flags_t flags)
*/
if (KMALLOC_MIN_SIZE <= 32 && i == 6 &&
!kmalloc_caches[type][1])
- new_kmalloc_cache(1, type, flags);
+ new_kmalloc_cache(1, type);
if (KMALLOC_MIN_SIZE <= 64 && i == 7 &&
!kmalloc_caches[type][2])
- new_kmalloc_cache(2, type, flags);
+ new_kmalloc_cache(2, type);
}
}
#ifdef CONFIG_RANDOM_KMALLOC_CACHES
diff --git a/mm/slub.c b/mm/slub.c
index 2ef88bbf56a3..1bb2a93cf7b6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -295,7 +295,7 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
/*
* Debugging flags that require metadata to be stored in the slab. These get
- * disabled when slub_debug=O is used and a cache's min order increases with
+ * disabled when slab_debug=O is used and a cache's min order increases with
* metadata.
*/
#define DEBUG_METADATA_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
@@ -306,13 +306,13 @@ static inline bool kmem_cache_has_cpu_partial(struct kmem_cache *s)
/* Internal SLUB flags */
/* Poison object */
-#define __OBJECT_POISON ((slab_flags_t __force)0x80000000U)
+#define __OBJECT_POISON __SLAB_FLAG_BIT(_SLAB_OBJECT_POISON)
/* Use cmpxchg_double */
#ifdef system_has_freelist_aba
-#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0x40000000U)
+#define __CMPXCHG_DOUBLE __SLAB_FLAG_BIT(_SLAB_CMPXCHG_DOUBLE)
#else
-#define __CMPXCHG_DOUBLE ((slab_flags_t __force)0U)
+#define __CMPXCHG_DOUBLE __SLAB_FLAG_UNUSED
#endif
/*
@@ -391,7 +391,7 @@ struct kmem_cache_cpu {
};
struct slab *slab; /* The slab from which we are allocating */
#ifdef CONFIG_SLUB_CPU_PARTIAL
- struct slab *partial; /* Partially allocated frozen slabs */
+ struct slab *partial; /* Partially allocated slabs */
#endif
local_lock_t lock; /* Protects the fields above */
#ifdef CONFIG_SLUB_STATS
@@ -1498,16 +1498,8 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
{
struct kmem_cache_node *n = get_node(s, node);
- /*
- * May be called early in order to allocate a slab for the
- * kmem_cache_node structure. Solve the chicken-egg
- * dilemma by deferring the increment of the count during
- * bootstrap (see early_kmem_cache_node_alloc).
- */
- if (likely(n)) {
- atomic_long_inc(&n->nr_slabs);
- atomic_long_add(objects, &n->total_objects);
- }
+ atomic_long_inc(&n->nr_slabs);
+ atomic_long_add(objects, &n->total_objects);
}
static inline void dec_slabs_node(struct kmem_cache *s, int node, int objects)
{
@@ -1616,7 +1608,7 @@ static inline int free_consistency_checks(struct kmem_cache *s,
}
/*
- * Parse a block of slub_debug options. Blocks are delimited by ';'
+ * Parse a block of slab_debug options. Blocks are delimited by ';'
*
* @str: start of block
* @flags: returns parsed flags, or DEBUG_DEFAULT_FLAGS if none specified
@@ -1677,7 +1669,7 @@ parse_slub_debug_flags(char *str, slab_flags_t *flags, char **slabs, bool init)
break;
default:
if (init)
- pr_err("slub_debug option '%c' unknown. skipped\n", *str);
+ pr_err("slab_debug option '%c' unknown. skipped\n", *str);
}
}
check_slabs:
@@ -1736,7 +1728,7 @@ static int __init setup_slub_debug(char *str)
/*
* For backwards compatibility, a single list of flags with list of
* slabs means debugging is only changed for those slabs, so the global
- * slub_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending
+ * slab_debug should be unchanged (0 or DEBUG_DEFAULT_FLAGS, depending
* on CONFIG_SLUB_DEBUG_ON). We can extended that to multiple lists as
* long as there is no option specifying flags without a slab list.
*/
@@ -1760,21 +1752,20 @@ out:
return 1;
}
-__setup("slub_debug", setup_slub_debug);
+__setup("slab_debug", setup_slub_debug);
+__setup_param("slub_debug", slub_debug, setup_slub_debug, 0);
/*
* kmem_cache_flags - apply debugging options to the cache
- * @object_size: the size of an object without meta data
* @flags: flags to set
* @name: name of the cache
*
* Debug option(s) are applied to @flags. In addition to the debug
* option(s), if a slab name (or multiple) is specified i.e.
- * slub_debug=<Debug-Options>,<slab name1>,<slab name2> ...
+ * slab_debug=<Debug-Options>,<slab name1>,<slab name2> ...
* then only the select slabs will receive the debug option(s).
*/
-slab_flags_t kmem_cache_flags(unsigned int object_size,
- slab_flags_t flags, const char *name)
+slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name)
{
char *iter;
size_t len;
@@ -1850,8 +1841,7 @@ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct slab *slab) {}
static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
struct slab *slab) {}
-slab_flags_t kmem_cache_flags(unsigned int object_size,
- slab_flags_t flags, const char *name)
+slab_flags_t kmem_cache_flags(slab_flags_t flags, const char *name)
{
return flags;
}
@@ -2038,11 +2028,6 @@ void memcg_slab_alloc_error_hook(struct kmem_cache *s, int objects,
obj_cgroup_uncharge(objcg, objects * obj_full_size(s));
}
#else /* CONFIG_MEMCG_KMEM */
-static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
-{
- return NULL;
-}
-
static inline void memcg_free_slab_cgroups(struct slab *slab)
{
}
@@ -2243,7 +2228,7 @@ static void __init init_freelist_randomization(void)
}
/* Get the next entry on the pre-computed freelist randomized */
-static void *next_freelist_entry(struct kmem_cache *s, struct slab *slab,
+static void *next_freelist_entry(struct kmem_cache *s,
unsigned long *pos, void *start,
unsigned long page_limit,
unsigned long freelist_count)
@@ -2282,13 +2267,12 @@ static bool shuffle_freelist(struct kmem_cache *s, struct slab *slab)
start = fixup_red_left(s, slab_address(slab));
/* First entry is used as the base of the freelist */
- cur = next_freelist_entry(s, slab, &pos, start, page_limit,
- freelist_count);
+ cur = next_freelist_entry(s, &pos, start, page_limit, freelist_count);
cur = setup_object(s, cur);
slab->freelist = cur;
for (idx = 1; idx < slab->objects; idx++) {
- next = next_freelist_entry(s, slab, &pos, start, page_limit,
+ next = next_freelist_entry(s, &pos, start, page_limit,
freelist_count);
next = setup_object(s, next);
set_freepointer(s, cur, next);
@@ -3263,7 +3247,7 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
oo_order(s->min));
if (oo_order(s->min) > get_order(s->object_size))
- pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n",
+ pr_warn(" %s debugging increased min order, use slab_debug=O to disable.\n",
s->name);
for_each_kmem_cache_node(s, node, n) {
@@ -3326,7 +3310,6 @@ static inline void *get_freelist(struct kmem_cache *s, struct slab *slab)
counters = slab->counters;
new.counters = counters;
- VM_BUG_ON(!new.frozen);
new.inuse = slab->objects;
new.frozen = freelist != NULL;
@@ -3498,18 +3481,20 @@ new_slab:
slab = slub_percpu_partial(c);
slub_set_percpu_partial(c, slab);
- local_unlock_irqrestore(&s->cpu_slab->lock, flags);
- stat(s, CPU_PARTIAL_ALLOC);
- if (unlikely(!node_match(slab, node) ||
- !pfmemalloc_match(slab, gfpflags))) {
- slab->next = NULL;
- __put_partials(s, slab);
- continue;
+ if (likely(node_match(slab, node) &&
+ pfmemalloc_match(slab, gfpflags))) {
+ c->slab = slab;
+ freelist = get_freelist(s, slab);
+ VM_BUG_ON(!freelist);
+ stat(s, CPU_PARTIAL_ALLOC);
+ goto load_freelist;
}
- freelist = freeze_slab(s, slab);
- goto retry_load_slab;
+ local_unlock_irqrestore(&s->cpu_slab->lock, flags);
+
+ slab->next = NULL;
+ __put_partials(s, slab);
}
#endif
@@ -3792,11 +3777,11 @@ void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg,
zero_size = orig_size;
/*
- * When slub_debug is enabled, avoid memory initialization integrated
+ * When slab_debug is enabled, avoid memory initialization integrated
* into KASAN and instead zero out the memory via the memset below with
* the proper size. Otherwise, KASAN might overwrite SLUB redzones and
* cause false-positive reports. This does not lead to a performance
- * penalty on production builds, as slub_debug is not intended to be
+ * penalty on production builds, as slab_debug is not intended to be
* enabled there.
*/
if (__slub_debug_enabled())
@@ -4187,7 +4172,6 @@ static void __slab_free(struct kmem_cache *s, struct slab *slab,
* then add it.
*/
if (!kmem_cache_has_cpu_partial(s) && unlikely(!prior)) {
- remove_full(s, n, slab);
add_partial(n, slab, DEACTIVATE_TO_TAIL);
stat(s, FREE_ADD_PARTIAL);
}
@@ -4201,9 +4185,6 @@ slab_empty:
*/
remove_partial(n, slab);
stat(s, FREE_REMOVE_PARTIAL);
- } else {
- /* Slab must be on the full list */
- remove_full(s, n, slab);
}
spin_unlock_irqrestore(&n->list_lock, flags);
@@ -4702,8 +4683,8 @@ static unsigned int slub_min_objects;
* activity on the partial lists which requires taking the list_lock. This is
* less a concern for large slabs though which are rarely used.
*
- * slub_max_order specifies the order where we begin to stop considering the
- * number of objects in a slab as critical. If we reach slub_max_order then
+ * slab_max_order specifies the order where we begin to stop considering the
+ * number of objects in a slab as critical. If we reach slab_max_order then
* we try to keep the page order as low as possible. So we accept more waste
* of space in favor of a small page order.
*
@@ -4770,14 +4751,14 @@ static inline int calculate_order(unsigned int size)
* and backing off gradually.
*
* We start with accepting at most 1/16 waste and try to find the
- * smallest order from min_objects-derived/slub_min_order up to
- * slub_max_order that will satisfy the constraint. Note that increasing
+ * smallest order from min_objects-derived/slab_min_order up to
+ * slab_max_order that will satisfy the constraint. Note that increasing
* the order can only result in same or less fractional waste, not more.
*
* If that fails, we increase the acceptable fraction of waste and try
* again. The last iteration with fraction of 1/2 would effectively
* accept any waste and give us the order determined by min_objects, as
- * long as at least single object fits within slub_max_order.
+ * long as at least single object fits within slab_max_order.
*/
for (unsigned int fraction = 16; fraction > 1; fraction /= 2) {
order = calc_slab_order(size, min_order, slub_max_order,
@@ -4787,7 +4768,7 @@ static inline int calculate_order(unsigned int size)
}
/*
- * Doh this slab cannot be placed using slub_max_order.
+ * Doh this slab cannot be placed using slab_max_order.
*/
order = get_order(size);
if (order <= MAX_PAGE_ORDER)
@@ -4857,7 +4838,6 @@ static void early_kmem_cache_node_alloc(int node)
slab = new_slab(kmem_cache_node, GFP_NOWAIT, node);
BUG_ON(!slab);
- inc_slabs_node(kmem_cache_node, slab_nid(slab), slab->objects);
if (slab_nid(slab) != node) {
pr_err("SLUB: Unable to allocate memory from node %d\n", node);
pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n");
@@ -5104,7 +5084,7 @@ static int calculate_sizes(struct kmem_cache *s)
static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
{
- s->flags = kmem_cache_flags(s->size, flags, s->name);
+ s->flags = kmem_cache_flags(flags, s->name);
#ifdef CONFIG_SLAB_FREELIST_HARDENED
s->random = get_random_long();
#endif
@@ -5313,7 +5293,9 @@ static int __init setup_slub_min_order(char *str)
return 1;
}
-__setup("slub_min_order=", setup_slub_min_order);
+__setup("slab_min_order=", setup_slub_min_order);
+__setup_param("slub_min_order=", slub_min_order, setup_slub_min_order, 0);
+
static int __init setup_slub_max_order(char *str)
{
@@ -5326,7 +5308,8 @@ static int __init setup_slub_max_order(char *str)
return 1;
}
-__setup("slub_max_order=", setup_slub_max_order);
+__setup("slab_max_order=", setup_slub_max_order);
+__setup_param("slub_max_order=", slub_max_order, setup_slub_max_order, 0);
static int __init setup_slub_min_objects(char *str)
{
@@ -5335,7 +5318,8 @@ static int __init setup_slub_min_objects(char *str)
return 1;
}
-__setup("slub_min_objects=", setup_slub_min_objects);
+__setup("slab_min_objects=", setup_slub_min_objects);
+__setup_param("slub_min_objects=", slub_min_objects, setup_slub_min_objects, 0);
#ifdef CONFIG_HARDENED_USERCOPY
/*
@@ -5663,7 +5647,7 @@ void __init kmem_cache_init(void)
/* Now we can use the kmem_cache to allocate kmalloc slabs */
setup_kmalloc_cache_index_table();
- create_kmalloc_caches(0);
+ create_kmalloc_caches();
/* Setup random freelists for each cache */
init_freelist_randomization();
@@ -6792,14 +6776,12 @@ out_del_kobj:
void sysfs_slab_unlink(struct kmem_cache *s)
{
- if (slab_state >= FULL)
- kobject_del(&s->kobj);
+ kobject_del(&s->kobj);
}
void sysfs_slab_release(struct kmem_cache *s)
{
- if (slab_state >= FULL)
- kobject_put(&s->kobj);
+ kobject_put(&s->kobj);
}
/*
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 1155a6304119..4919423cce76 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2552,10 +2552,10 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
exit_swap_address_space(p->type);
inode = mapping->host;
- if (p->bdev_handle) {
+ if (p->bdev_file) {
set_blocksize(p->bdev, old_block_size);
- bdev_release(p->bdev_handle);
- p->bdev_handle = NULL;
+ fput(p->bdev_file);
+ p->bdev_file = NULL;
}
inode_lock(inode);
@@ -2785,14 +2785,14 @@ static int claim_swapfile(struct swap_info_struct *p, struct inode *inode)
int error;
if (S_ISBLK(inode->i_mode)) {
- p->bdev_handle = bdev_open_by_dev(inode->i_rdev,
+ p->bdev_file = bdev_file_open_by_dev(inode->i_rdev,
BLK_OPEN_READ | BLK_OPEN_WRITE, p, NULL);
- if (IS_ERR(p->bdev_handle)) {
- error = PTR_ERR(p->bdev_handle);
- p->bdev_handle = NULL;
+ if (IS_ERR(p->bdev_file)) {
+ error = PTR_ERR(p->bdev_file);
+ p->bdev_file = NULL;
return error;
}
- p->bdev = p->bdev_handle->bdev;
+ p->bdev = file_bdev(p->bdev_file);
p->old_block_size = block_size(p->bdev);
error = set_blocksize(p->bdev, PAGE_SIZE);
if (error < 0)
@@ -3234,10 +3234,10 @@ bad_swap:
p->percpu_cluster = NULL;
free_percpu(p->cluster_next_cpu);
p->cluster_next_cpu = NULL;
- if (p->bdev_handle) {
+ if (p->bdev_file) {
set_blocksize(p->bdev, p->old_block_size);
- bdev_release(p->bdev_handle);
- p->bdev_handle = NULL;
+ fput(p->bdev_file);
+ p->bdev_file = NULL;
}
inode = NULL;
destroy_swap_extents(p);
diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c
index a0331ba9ae2a..712160cd41ec 100644
--- a/mm/userfaultfd.c
+++ b/mm/userfaultfd.c
@@ -1017,9 +1017,6 @@ static int move_present_pte(struct mm_struct *mm,
goto out;
}
- folio_move_anon_rmap(src_folio, dst_vma);
- WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
-
orig_src_pte = ptep_clear_flush(src_vma, src_addr, src_pte);
/* Folio got pinned from under us. Put it back and fail the move. */
if (folio_maybe_dma_pinned(src_folio)) {
@@ -1028,6 +1025,9 @@ static int move_present_pte(struct mm_struct *mm,
goto out;
}
+ folio_move_anon_rmap(src_folio, dst_vma);
+ WRITE_ONCE(src_folio->index, linear_page_index(dst_vma, dst_addr));
+
orig_dst_pte = mk_pte(&src_folio->page, dst_vma->vm_page_prot);
/* Follow mremap() behavior and treat the entry dirty after the move */
orig_dst_pte = pte_mkwrite(pte_mkdirty(orig_dst_pte), dst_vma);
diff --git a/mm/util.c b/mm/util.c
index 6710efaf1802..669397235787 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -136,6 +136,23 @@ void *kmemdup(const void *src, size_t len, gfp_t gfp)
EXPORT_SYMBOL(kmemdup);
/**
+ * kmemdup_array - duplicate a given array.
+ *
+ * @src: array to duplicate.
+ * @element_size: size of each element of array.
+ * @count: number of elements to duplicate from array.
+ * @gfp: GFP mask to use.
+ *
+ * Return: duplicated array of @src or %NULL in case of error,
+ * result is physically contiguous. Use kfree() to free.
+ */
+void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp)
+{
+ return kmemdup(src, size_mul(element_size, count), gfp);
+}
+EXPORT_SYMBOL(kmemdup_array);
+
+/**
* kvmemdup - duplicate region of memory
*
* @src: memory region to duplicate
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 25a8df497255..22aa63f4ef63 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -304,8 +304,8 @@ static int vmap_range_noflush(unsigned long addr, unsigned long end,
return err;
}
-int ioremap_page_range(unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot)
+int vmap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
{
int err;
@@ -318,6 +318,26 @@ int ioremap_page_range(unsigned long addr, unsigned long end,
return err;
}
+int ioremap_page_range(unsigned long addr, unsigned long end,
+ phys_addr_t phys_addr, pgprot_t prot)
+{
+ struct vm_struct *area;
+
+ area = find_vm_area((void *)addr);
+ if (!area || !(area->flags & VM_IOREMAP)) {
+ WARN_ONCE(1, "vm_area at addr %lx is not marked as VM_IOREMAP\n", addr);
+ return -EINVAL;
+ }
+ if (addr != (unsigned long)area->addr ||
+ (void *)end != area->addr + get_vm_area_size(area)) {
+ WARN_ONCE(1, "ioremap request [%lx,%lx) doesn't match vm_area [%lx, %lx)\n",
+ addr, end, (long)area->addr,
+ (long)area->addr + get_vm_area_size(area));
+ return -ERANGE;
+ }
+ return vmap_page_range(addr, end, phys_addr, prot);
+}
+
static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pgtbl_mod_mask *mask)
{
@@ -635,6 +655,58 @@ static int vmap_pages_range(unsigned long addr, unsigned long end,
return err;
}
+static int check_sparse_vm_area(struct vm_struct *area, unsigned long start,
+ unsigned long end)
+{
+ might_sleep();
+ if (WARN_ON_ONCE(area->flags & VM_FLUSH_RESET_PERMS))
+ return -EINVAL;
+ if (WARN_ON_ONCE(area->flags & VM_NO_GUARD))
+ return -EINVAL;
+ if (WARN_ON_ONCE(!(area->flags & VM_SPARSE)))
+ return -EINVAL;
+ if ((end - start) >> PAGE_SHIFT > totalram_pages())
+ return -E2BIG;
+ if (start < (unsigned long)area->addr ||
+ (void *)end > area->addr + get_vm_area_size(area))
+ return -ERANGE;
+ return 0;
+}
+
+/**
+ * vm_area_map_pages - map pages inside given sparse vm_area
+ * @area: vm_area
+ * @start: start address inside vm_area
+ * @end: end address inside vm_area
+ * @pages: pages to map (always PAGE_SIZE pages)
+ */
+int vm_area_map_pages(struct vm_struct *area, unsigned long start,
+ unsigned long end, struct page **pages)
+{
+ int err;
+
+ err = check_sparse_vm_area(area, start, end);
+ if (err)
+ return err;
+
+ return vmap_pages_range(start, end, PAGE_KERNEL, pages, PAGE_SHIFT);
+}
+
+/**
+ * vm_area_unmap_pages - unmap pages inside given sparse vm_area
+ * @area: vm_area
+ * @start: start address inside vm_area
+ * @end: end address inside vm_area
+ */
+void vm_area_unmap_pages(struct vm_struct *area, unsigned long start,
+ unsigned long end)
+{
+ if (check_sparse_vm_area(area, start, end))
+ return;
+
+ vunmap_range(start, end);
+}
+
int is_vmalloc_or_module_addr(const void *x)
{
/*
@@ -4220,9 +4292,9 @@ long vread_iter(struct iov_iter *iter, const char *addr, size_t count)
if (flags & VMAP_RAM)
copied = vmap_ram_vread_iter(iter, addr, n, flags);
- else if (!(vm && (vm->flags & VM_IOREMAP)))
+ else if (!(vm && (vm->flags & (VM_IOREMAP | VM_SPARSE))))
copied = aligned_vread_iter(iter, addr, n);
- else /* IOREMAP area is treated as memory hole */
+ else /* IOREMAP | SPARSE area is treated as memory hole */
copied = zero_iter(iter, n);
addr += copied;
@@ -4810,6 +4882,9 @@ static int vmalloc_info_show(struct seq_file *m, void *p)
if (v->flags & VM_IOREMAP)
seq_puts(m, " ioremap");
+ if (v->flags & VM_SPARSE)
+ seq_puts(m, " sparse");
+
if (v->flags & VM_ALLOC)
seq_puts(m, " vmalloc");
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 741f16929e72..3ef654addd44 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -5759,7 +5759,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc)
/* Use reclaim/compaction for costly allocs or under memory pressure */
static bool in_reclaim_compaction(struct scan_control *sc)
{
- if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
+ if (gfp_compaction_allowed(sc->gfp_mask) && sc->order &&
(sc->order > PAGE_ALLOC_COSTLY_ORDER ||
sc->priority < DEF_PRIORITY - 2))
return true;
@@ -6006,6 +6006,9 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
{
unsigned long watermark;
+ if (!gfp_compaction_allowed(sc->gfp_mask))
+ return false;
+
/* Allocation can already succeed, nothing to do */
if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone),
sc->reclaim_idx, 0))
diff --git a/mm/workingset.c b/mm/workingset.c
index 226012974328..f2a0ecaf708d 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -16,6 +16,7 @@
#include <linux/dax.h>
#include <linux/fs.h>
#include <linux/mm.h>
+#include "internal.h"
/*
* Double CLOCK lists