summaryrefslogtreecommitdiff
path: root/include/linux/mm.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/mm.h')
-rw-r--r--include/linux/mm.h194
1 files changed, 111 insertions, 83 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5a323422d783..dc7b87310c10 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -15,6 +15,7 @@
#include <linux/atomic.h>
#include <linux/debug_locks.h>
#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
#include <linux/range.h>
#include <linux/pfn.h>
#include <linux/percpu-refcount.h>
@@ -28,6 +29,7 @@
#include <linux/overflow.h>
#include <linux/sizes.h>
#include <linux/sched.h>
+#include <linux/pgtable.h>
struct mempolicy;
struct anon_vma;
@@ -92,7 +94,6 @@ extern int mmap_rnd_compat_bits __read_mostly;
#endif
#include <asm/page.h>
-#include <asm/pgtable.h>
#include <asm/processor.h>
/*
@@ -201,10 +202,10 @@ extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
extern unsigned long sysctl_overcommit_kbytes;
-extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *,
- size_t *, loff_t *);
-extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
- size_t *, loff_t *);
+int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *,
+ loff_t *);
+int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
+ loff_t *);
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
@@ -325,17 +326,13 @@ extern unsigned int kobjsize(const void *objp);
#elif defined(CONFIG_SPARC64)
# define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */
# define VM_ARCH_CLEAR VM_SPARC_ADI
+#elif defined(CONFIG_ARM64)
+# define VM_ARM64_BTI VM_ARCH_1 /* BTI guarded page, a.k.a. GP bit */
+# define VM_ARCH_CLEAR VM_ARM64_BTI
#elif !defined(CONFIG_MMU)
# define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */
#endif
-#if defined(CONFIG_X86_INTEL_MPX)
-/* MPX specific bounds table or bounds directory */
-# define VM_MPX VM_HIGH_ARCH_4
-#else
-# define VM_MPX VM_NONE
-#endif
-
#ifndef VM_GROWSUP
# define VM_GROWSUP VM_NONE
#endif
@@ -405,7 +402,7 @@ extern pgprot_t protection_map[16];
* @FAULT_FLAG_WRITE: Fault was a write fault.
* @FAULT_FLAG_MKWRITE: Fault was mkwrite of existing PTE.
* @FAULT_FLAG_ALLOW_RETRY: Allow to retry the fault if blocked.
- * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_sem and wait when retrying.
+ * @FAULT_FLAG_RETRY_NOWAIT: Don't drop mmap_lock and wait when retrying.
* @FAULT_FLAG_KILLABLE: The fault task is in SIGKILL killable region.
* @FAULT_FLAG_TRIED: The fault has been tried once.
* @FAULT_FLAG_USER: The fault originated in userspace.
@@ -455,10 +452,10 @@ extern pgprot_t protection_map[16];
* fault_flag_allow_retry_first - check ALLOW_RETRY the first time
*
* This is mostly used for places where we want to try to avoid taking
- * the mmap_sem for too long a time when waiting for another condition
+ * the mmap_lock for too long a time when waiting for another condition
* to change, in which case we can try to be polite to release the
- * mmap_sem in the first round to avoid potential starvation of other
- * processes that would also want the mmap_sem.
+ * mmap_lock in the first round to avoid potential starvation of other
+ * processes that would also want the mmap_lock.
*
* Return: true if the page fault allows retry and this is the first
* attempt of the fault handling; false otherwise.
@@ -505,7 +502,6 @@ struct vm_fault {
pte_t orig_pte; /* Value of PTE at the time of fault */
struct page *cow_page; /* Page handler may use for COW fault */
- struct mem_cgroup *memcg; /* Cgroup cow_page belongs to */
struct page *page; /* ->fault handlers should return a
* page here, unless VM_FAULT_NOPAGE
* is set (which is also implied by
@@ -586,7 +582,7 @@ struct vm_operations_struct {
* (vma,addr) marked as MPOL_SHARED. The shared policy infrastructure
* in mm/mempolicy.c will do this automatically.
* get_policy() must NOT add a ref if the policy at (vma,addr) is not
- * marked as MPOL_SHARED. vma policies are protected by the mmap_sem.
+ * marked as MPOL_SHARED. vma policies are protected by the mmap_lock.
* If no [shared/vma] mempolicy exists at the addr, get_policy() op
* must return NULL--i.e., do not "fallback" to task or system default
* policy.
@@ -781,7 +777,13 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags)
}
extern void kvfree(const void *addr);
+extern void kvfree_sensitive(const void *addr, size_t len);
+/*
+ * Mapcount of compound page as a whole, does not include mapped sub-pages.
+ *
+ * Must be called only for compound pages or any their tail sub-pages.
+ */
static inline int compound_mapcount(struct page *page)
{
VM_BUG_ON_PAGE(!PageCompound(page), page);
@@ -801,10 +803,16 @@ static inline void page_mapcount_reset(struct page *page)
int __page_mapcount(struct page *page);
+/*
+ * Mapcount of 0-order page; when compound sub-page, includes
+ * compound_mapcount().
+ *
+ * Result is undefined for pages which cannot be mapped into userspace.
+ * For example SLAB or special types of pages. See function page_has_type().
+ * They use this place in struct page differently.
+ */
static inline int page_mapcount(struct page *page)
{
- VM_BUG_ON_PAGE(PageSlab(page), page);
-
if (unlikely(PageCompound(page)))
return __page_mapcount(page);
return atomic_read(&page->_mapcount) + 1;
@@ -860,7 +868,7 @@ enum compound_dtor_id {
#endif
NR_COMPOUND_DTORS,
};
-extern compound_page_dtor * const compound_page_dtors[];
+extern compound_page_dtor * const compound_page_dtors[NR_COMPOUND_DTORS];
static inline void set_compound_page_dtor(struct page *page,
enum compound_dtor_id compound_dtor)
@@ -869,10 +877,10 @@ static inline void set_compound_page_dtor(struct page *page,
page[1].compound_dtor = compound_dtor;
}
-static inline compound_page_dtor *get_compound_page_dtor(struct page *page)
+static inline void destroy_compound_page(struct page *page)
{
VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page);
- return compound_page_dtors[page[1].compound_dtor];
+ compound_page_dtors[page[1].compound_dtor](page);
}
static inline unsigned int compound_order(struct page *page)
@@ -939,8 +947,7 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
return pte;
}
-vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
- struct page *page);
+vm_fault_t alloc_set_pte(struct vm_fault *vmf, struct page *page);
vm_fault_t finish_fault(struct vm_fault *vmf);
vm_fault_t finish_mkwrite_fault(struct vm_fault *vmf);
#endif
@@ -1219,7 +1226,7 @@ void unpin_user_pages(struct page **pages, unsigned long npages);
* used to track the pincount (instead using of the GUP_PIN_COUNTING_BIAS
* scheme).
*
- * For more information, please see Documentation/vm/pin_user_pages.rst.
+ * For more information, please see Documentation/core-api/pin_user_pages.rst.
*
* @page: pointer to page to be queried.
* @Return: True, if it is likely that the page has been "dma-pinned".
@@ -1367,7 +1374,7 @@ static inline int cpu_pid_to_cpupid(int nid, int pid)
static inline bool cpupid_pid_unset(int cpupid)
{
- return 1;
+ return true;
}
static inline void page_cpupid_reset_last(struct page *page)
@@ -1700,8 +1707,12 @@ long pin_user_pages(unsigned long start, unsigned long nr_pages,
struct vm_area_struct **vmas);
long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages, int *locked);
+long pin_user_pages_locked(unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages, int *locked);
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
struct page **pages, unsigned int gup_flags);
+long pin_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
+ struct page **pages, unsigned int gup_flags);
int get_user_pages_fast(unsigned long start, int nr_pages,
unsigned int gup_flags, struct page **pages);
@@ -1718,7 +1729,7 @@ struct frame_vector {
unsigned int nr_frames; /* Number of frames stored in ptrs array */
bool got_ref; /* Did we pin pages by getting page ref? */
bool is_pfns; /* Does array contain pages or pfns? */
- void *ptrs[0]; /* Array of pinned pfns / pages. Use
+ void *ptrs[]; /* Array of pinned pfns / pages. Use
* pfns_vector_pages() or pfns_vector_pfns()
* for access */
};
@@ -1816,8 +1827,16 @@ extern int mprotect_fixup(struct vm_area_struct *vma,
/*
* doesn't attempt to fault and will return short.
*/
-int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
- struct page **pages);
+int get_user_pages_fast_only(unsigned long start, int nr_pages,
+ unsigned int gup_flags, struct page **pages);
+int pin_user_pages_fast_only(unsigned long start, int nr_pages,
+ unsigned int gup_flags, struct page **pages);
+
+static inline bool get_user_page_fast_only(unsigned long addr,
+ unsigned int gup_flags, struct page **pagep)
+{
+ return get_user_pages_fast_only(addr, 1, gup_flags, pagep) == 1;
+}
/*
* per-process(per-mm_struct) statistics.
*/
@@ -2060,11 +2079,6 @@ int __pte_alloc_kernel(pmd_t *pmd);
#if defined(CONFIG_MMU)
-/*
- * The following ifdef needed to get the 5level-fixup.h header to work.
- * Remove it when 5level-fixup.h has been removed.
- */
-#ifndef __ARCH_HAS_5LEVEL_HACK
static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd,
unsigned long address)
{
@@ -2078,13 +2092,52 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d,
return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ?
NULL : pud_offset(p4d, address);
}
-#endif /* !__ARCH_HAS_5LEVEL_HACK */
+
+static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
+ unsigned long address,
+ pgtbl_mod_mask *mod_mask)
+
+{
+ if (unlikely(pgd_none(*pgd))) {
+ if (__p4d_alloc(mm, pgd, address))
+ return NULL;
+ *mod_mask |= PGTBL_PGD_MODIFIED;
+ }
+
+ return p4d_offset(pgd, address);
+}
+
+static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
+ unsigned long address,
+ pgtbl_mod_mask *mod_mask)
+{
+ if (unlikely(p4d_none(*p4d))) {
+ if (__pud_alloc(mm, p4d, address))
+ return NULL;
+ *mod_mask |= PGTBL_P4D_MODIFIED;
+ }
+
+ return pud_offset(p4d, address);
+}
static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address)
{
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
NULL: pmd_offset(pud, address);
}
+
+static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
+ unsigned long address,
+ pgtbl_mod_mask *mod_mask)
+{
+ if (unlikely(pud_none(*pud))) {
+ if (__pmd_alloc(mm, pud, address))
+ return NULL;
+ *mod_mask |= PGTBL_PUD_MODIFIED;
+ }
+
+ return pmd_offset(pud, address);
+}
#endif /* CONFIG_MMU */
#if USE_SPLIT_PTE_PTLOCKS
@@ -2200,6 +2253,11 @@ static inline void pgtable_pte_page_dtor(struct page *page)
((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
NULL: pte_offset_kernel(pmd, address))
+#define pte_alloc_kernel_track(pmd, address, mask) \
+ ((unlikely(pmd_none(*(pmd))) && \
+ (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
+ NULL: pte_offset_kernel(pmd, address))
+
#if USE_SPLIT_PMD_PTLOCKS
static struct page *pmd_to_page(pmd_t *pmd)
@@ -2272,9 +2330,7 @@ static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud)
}
extern void __init pagecache_init(void);
-extern void free_area_init(unsigned long * zones_size);
-extern void __init free_area_init_node(int nid, unsigned long * zones_size,
- unsigned long zone_start_pfn, unsigned long *zholes_size);
+extern void __init free_area_init_memoryless_node(int nid);
extern void free_initmem(void);
/*
@@ -2344,34 +2400,26 @@ static inline unsigned long get_num_physpages(void)
return phys_pages;
}
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
/*
- * With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its
- * zones, allocate the backing mem_map and account for memory holes in a more
- * architecture independent manner. This is a substitute for creating the
- * zone_sizes[] and zholes_size[] arrays and passing them to
- * free_area_init_node()
+ * Using memblock node mappings, an architecture may initialise its
+ * zones, allocate the backing mem_map and account for memory holes in an
+ * architecture independent manner.
*
* An architecture is expected to register range of page frames backed by
* physical memory with memblock_add[_node]() before calling
- * free_area_init_nodes() passing in the PFN each zone ends at. At a basic
+ * free_area_init() passing in the PFN each zone ends at. At a basic
* usage, an architecture is expected to do something like
*
* unsigned long max_zone_pfns[MAX_NR_ZONES] = {max_dma, max_normal_pfn,
* max_highmem_pfn};
* for_each_valid_physical_page_range()
* memblock_add_node(base, size, nid)
- * free_area_init_nodes(max_zone_pfns);
+ * free_area_init(max_zone_pfns);
*
- * free_bootmem_with_active_regions() calls free_bootmem_node() for each
- * registered physical page range. Similarly
* sparse_memory_present_with_active_regions() calls memory_present() for
* each range when SPARSEMEM is enabled.
- *
- * See mm/page_alloc.c for more information on each function exposed by
- * CONFIG_HAVE_MEMBLOCK_NODE_MAP.
*/
-extern void free_area_init_nodes(unsigned long *max_zone_pfn);
+void free_area_init(unsigned long *max_zone_pfn);
unsigned long node_map_pfn_alignment(void);
unsigned long __absent_pages_in_range(int nid, unsigned long start_pfn,
unsigned long end_pfn);
@@ -2380,16 +2428,10 @@ extern unsigned long absent_pages_in_range(unsigned long start_pfn,
extern void get_pfn_range_for_nid(unsigned int nid,
unsigned long *start_pfn, unsigned long *end_pfn);
extern unsigned long find_min_pfn_with_active_regions(void);
-extern void free_bootmem_with_active_regions(int nid,
- unsigned long max_low_pfn);
extern void sparse_memory_present_with_active_regions(int nid);
-#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
-
-#if !defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP) && \
- !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID)
-static inline int __early_pfn_to_nid(unsigned long pfn,
- struct mminit_pfnnid_cache *state)
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+static inline int early_pfn_to_nid(unsigned long pfn)
{
return 0;
}
@@ -2425,6 +2467,7 @@ extern void setup_per_cpu_pageset(void);
extern int min_free_kbytes;
extern int watermark_boost_factor;
extern int watermark_scale_factor;
+extern bool arch_has_descending_max_zone_pfns(void);
/* nommu.c */
extern atomic_long_t mmap_pages_allocated;
@@ -2601,25 +2644,6 @@ extern vm_fault_t filemap_page_mkwrite(struct vm_fault *vmf);
int __must_check write_one_page(struct page *page);
void task_dirty_inc(struct task_struct *tsk);
-/* readahead.c */
-#define VM_READAHEAD_PAGES (SZ_128K / PAGE_SIZE)
-
-int force_page_cache_readahead(struct address_space *mapping, struct file *filp,
- pgoff_t offset, unsigned long nr_to_read);
-
-void page_cache_sync_readahead(struct address_space *mapping,
- struct file_ra_state *ra,
- struct file *filp,
- pgoff_t offset,
- unsigned long size);
-
-void page_cache_async_readahead(struct address_space *mapping,
- struct file_ra_state *ra,
- struct file *filp,
- struct page *pg,
- pgoff_t offset,
- unsigned long size);
-
extern unsigned long stack_guard_gap;
/* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */
extern int expand_stack(struct vm_area_struct *vma, unsigned long address);
@@ -2780,6 +2804,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
#define FOLL_LONGTERM 0x10000 /* mapping lifetime is indefinite: see below */
#define FOLL_SPLIT_PMD 0x20000 /* split huge pmd before returning */
#define FOLL_PIN 0x40000 /* pages must be released via unpin_user_page */
+#define FOLL_FAST_ONLY 0x80000 /* gup_fast: prevent fall-back to slow gup */
/*
* FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
@@ -2834,7 +2859,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
* releasing pages: get_user_pages*() pages must be released via put_page(),
* while pin_user_pages*() pages must be released via unpin_user_page().
*
- * Please see Documentation/vm/pin_user_pages.rst for more information.
+ * Please see Documentation/core-api/pin_user_pages.rst for more information.
*/
static inline int vm_fault_to_errno(vm_fault_t vm_fault, int foll_flags)
@@ -2957,8 +2982,8 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm);
#ifdef CONFIG_SYSCTL
extern int sysctl_drop_caches;
-int drop_caches_sysctl_handler(struct ctl_table *, int,
- void __user *, size_t *, loff_t *);
+int drop_caches_sysctl_handler(struct ctl_table *, int, void *, size_t *,
+ loff_t *);
#endif
void drop_slab(void);
@@ -3012,6 +3037,7 @@ enum mf_flags {
};
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
+extern void memory_failure_queue_kick(int cpu);
extern int unpoison_memory(unsigned long pfn);
extern int get_hwpoison_page(struct page *page);
#define put_hwpoison_page(page) put_page(page)
@@ -3140,5 +3166,7 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,
pgoff_t first_index, pgoff_t nr);
#endif
+extern int sysctl_nr_trim_pages;
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */