diff options
Diffstat (limited to 'include/linux/mm_types.h')
-rw-r--r-- | include/linux/mm_types.h | 229 |
1 files changed, 152 insertions, 77 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 9757067c3053..0722859c3647 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -140,30 +140,6 @@ struct page { }; struct { /* Tail pages of compound page */ unsigned long compound_head; /* Bit zero is set */ - - /* First tail page only */ - unsigned char compound_dtor; - unsigned char compound_order; - atomic_t compound_mapcount; - atomic_t subpages_mapcount; - atomic_t compound_pincount; -#ifdef CONFIG_64BIT - unsigned int compound_nr; /* 1 << compound_order */ -#endif - }; - struct { /* Second tail page of transparent huge page */ - unsigned long _compound_pad_1; /* compound_head */ - unsigned long _compound_pad_2; - /* For both global and memcg */ - struct list_head deferred_list; - }; - struct { /* Second tail page of hugetlb page */ - unsigned long _hugetlb_pad_1; /* compound_head */ - void *hugetlb_subpool; - void *hugetlb_cgroup; - void *hugetlb_cgroup_rsvd; - void *hugetlb_hwpoison; - /* No more space on 32-bit: use third tail if more */ }; struct { /* Page table pages */ unsigned long _pt_pad_1; /* compound_head */ @@ -302,20 +278,17 @@ static inline struct page *encoded_page_ptr(struct encoded_page *page) * @_refcount: Do not access this member directly. Use folio_ref_count() * to find how many references there are to this folio. * @memcg_data: Memory Control Group data. - * @_flags_1: For large folios, additional page flags. - * @_head_1: Points to the folio. Do not use. * @_folio_dtor: Which destructor to use for this folio. * @_folio_order: Do not use directly, call folio_order(). - * @_compound_mapcount: Do not use directly, call folio_entire_mapcount(). - * @_subpages_mapcount: Do not use directly, call folio_mapcount(). + * @_entire_mapcount: Do not use directly, call folio_entire_mapcount(). + * @_nr_pages_mapped: Do not use directly, call folio_mapcount(). * @_pincount: Do not use directly, call folio_maybe_dma_pinned(). * @_folio_nr_pages: Do not use directly, call folio_nr_pages(). - * @_flags_2: For alignment. Do not use. - * @_head_2: Points to the folio. Do not use. * @_hugetlb_subpool: Do not use directly, use accessor in hugetlb.h. * @_hugetlb_cgroup: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_cgroup_rsvd: Do not use directly, use accessor in hugetlb_cgroup.h. * @_hugetlb_hwpoison: Do not use directly, call raw_hwp_list_head(). + * @_deferred_list: Folios to be split under memory pressure. * * A folio is a physically, virtually and logically contiguous set * of bytes. It is a power-of-two in size, and it is aligned to that @@ -358,14 +331,16 @@ struct folio { struct { unsigned long _flags_1; unsigned long _head_1; + /* public: */ unsigned char _folio_dtor; unsigned char _folio_order; - atomic_t _compound_mapcount; - atomic_t _subpages_mapcount; + atomic_t _entire_mapcount; + atomic_t _nr_pages_mapped; atomic_t _pincount; #ifdef CONFIG_64BIT unsigned int _folio_nr_pages; #endif + /* private: the union with struct page is transitional */ }; struct page __page_1; }; @@ -373,10 +348,19 @@ struct folio { struct { unsigned long _flags_2; unsigned long _head_2; + /* public: */ void *_hugetlb_subpool; void *_hugetlb_cgroup; void *_hugetlb_cgroup_rsvd; void *_hugetlb_hwpoison; + /* private: the union with struct page is transitional */ + }; + struct { + unsigned long _flags_2a; + unsigned long _head_2a; + /* public: */ + struct list_head _deferred_list; + /* private: the union with struct page is transitional */ }; struct page __page_2; }; @@ -401,53 +385,14 @@ FOLIO_MATCH(memcg_data, memcg_data); offsetof(struct page, pg) + sizeof(struct page)) FOLIO_MATCH(flags, _flags_1); FOLIO_MATCH(compound_head, _head_1); -FOLIO_MATCH(compound_dtor, _folio_dtor); -FOLIO_MATCH(compound_order, _folio_order); -FOLIO_MATCH(compound_mapcount, _compound_mapcount); -FOLIO_MATCH(subpages_mapcount, _subpages_mapcount); -FOLIO_MATCH(compound_pincount, _pincount); -#ifdef CONFIG_64BIT -FOLIO_MATCH(compound_nr, _folio_nr_pages); -#endif #undef FOLIO_MATCH #define FOLIO_MATCH(pg, fl) \ static_assert(offsetof(struct folio, fl) == \ offsetof(struct page, pg) + 2 * sizeof(struct page)) FOLIO_MATCH(flags, _flags_2); FOLIO_MATCH(compound_head, _head_2); -FOLIO_MATCH(hugetlb_subpool, _hugetlb_subpool); -FOLIO_MATCH(hugetlb_cgroup, _hugetlb_cgroup); -FOLIO_MATCH(hugetlb_cgroup_rsvd, _hugetlb_cgroup_rsvd); -FOLIO_MATCH(hugetlb_hwpoison, _hugetlb_hwpoison); #undef FOLIO_MATCH -static inline atomic_t *folio_mapcount_ptr(struct folio *folio) -{ - struct page *tail = &folio->page + 1; - return &tail->compound_mapcount; -} - -static inline atomic_t *folio_subpages_mapcount_ptr(struct folio *folio) -{ - struct page *tail = &folio->page + 1; - return &tail->subpages_mapcount; -} - -static inline atomic_t *compound_mapcount_ptr(struct page *page) -{ - return &page[1].compound_mapcount; -} - -static inline atomic_t *subpages_mapcount_ptr(struct page *page) -{ - return &page[1].subpages_mapcount; -} - -static inline atomic_t *compound_pincount_ptr(struct page *page) -{ - return &page[1].compound_pincount; -} - /* * Used for sizing the vmemmap region on some architectures */ @@ -546,7 +491,15 @@ struct vm_area_struct { * See vmf_insert_mixed_prot() for discussion. */ pgprot_t vm_page_prot; - unsigned long vm_flags; /* Flags, see mm.h. */ + + /* + * Flags, see mm.h. + * To modify use vm_flags_{init|reset|set|clear|mod} functions. + */ + union { + const vm_flags_t vm_flags; + vm_flags_t __private __vm_flags; + }; /* * For areas with an address space and backing store, @@ -645,9 +598,20 @@ struct mm_struct { * &struct mm_struct is freed. */ atomic_t mm_count; - +#ifdef CONFIG_SCHED_MM_CID + /** + * @cid_lock: Protect cid bitmap updates vs lookups. + * + * Prevent situations where updates to the cid bitmap happen + * concurrently with lookups. Those can lead to situations + * where a lookup cannot find a free bit simply because it was + * unlucky enough to load, non-atomically, bitmap words as they + * were being concurrently updated by the updaters. + */ + raw_spinlock_t cid_lock; +#endif #ifdef CONFIG_MMU - atomic_long_t pgtables_bytes; /* PTE page table pages */ + atomic_long_t pgtables_bytes; /* size of all page tables */ #endif int map_count; /* number of VMAs */ @@ -904,11 +868,39 @@ struct vma_iterator { static inline void vma_iter_init(struct vma_iterator *vmi, struct mm_struct *mm, unsigned long addr) { - vmi->mas.tree = &mm->mm_mt; - vmi->mas.index = addr; - vmi->mas.node = MAS_START; + mas_init(&vmi->mas, &mm->mm_mt, addr); +} + +#ifdef CONFIG_SCHED_MM_CID +/* Accessor for struct mm_struct's cidmask. */ +static inline cpumask_t *mm_cidmask(struct mm_struct *mm) +{ + unsigned long cid_bitmap = (unsigned long)mm; + + cid_bitmap += offsetof(struct mm_struct, cpu_bitmap); + /* Skip cpu_bitmap */ + cid_bitmap += cpumask_size(); + return (struct cpumask *)cid_bitmap; +} + +static inline void mm_init_cid(struct mm_struct *mm) +{ + raw_spin_lock_init(&mm->cid_lock); + cpumask_clear(mm_cidmask(mm)); } +static inline unsigned int mm_cid_size(void) +{ + return cpumask_size(); +} +#else /* CONFIG_SCHED_MM_CID */ +static inline void mm_init_cid(struct mm_struct *mm) { } +static inline unsigned int mm_cid_size(void) +{ + return 0; +} +#endif /* CONFIG_SCHED_MM_CID */ + struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); @@ -1085,4 +1077,87 @@ enum fault_flag { typedef unsigned int __bitwise zap_flags_t; +/* + * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each + * other. Here is what they mean, and how to use them: + * + * + * FIXME: For pages which are part of a filesystem, mappings are subject to the + * lifetime enforced by the filesystem and we need guarantees that longterm + * users like RDMA and V4L2 only establish mappings which coordinate usage with + * the filesystem. Ideas for this coordination include revoking the longterm + * pin, delaying writeback, bounce buffer page writeback, etc. As FS DAX was + * added after the problem with filesystems was found FS DAX VMAs are + * specifically failed. Filesystem pages are still subject to bugs and use of + * FOLL_LONGTERM should be avoided on those pages. + * + * In the CMA case: long term pins in a CMA region would unnecessarily fragment + * that region. And so, CMA attempts to migrate the page before pinning, when + * FOLL_LONGTERM is specified. + * + * FOLL_PIN indicates that a special kind of tracking (not just page->_refcount, + * but an additional pin counting system) will be invoked. This is intended for + * anything that gets a page reference and then touches page data (for example, + * Direct IO). This lets the filesystem know that some non-file-system entity is + * potentially changing the pages' data. In contrast to FOLL_GET (whose pages + * are released via put_page()), FOLL_PIN pages must be released, ultimately, by + * a call to unpin_user_page(). + * + * FOLL_PIN is similar to FOLL_GET: both of these pin pages. They use different + * and separate refcounting mechanisms, however, and that means that each has + * its own acquire and release mechanisms: + * + * FOLL_GET: get_user_pages*() to acquire, and put_page() to release. + * + * FOLL_PIN: pin_user_pages*() to acquire, and unpin_user_pages to release. + * + * FOLL_PIN and FOLL_GET are mutually exclusive for a given function call. + * (The underlying pages may experience both FOLL_GET-based and FOLL_PIN-based + * calls applied to them, and that's perfectly OK. This is a constraint on the + * callers, not on the pages.) + * + * FOLL_PIN should be set internally by the pin_user_pages*() APIs, never + * directly by the caller. That's in order to help avoid mismatches when + * releasing pages: get_user_pages*() pages must be released via put_page(), + * while pin_user_pages*() pages must be released via unpin_user_page(). + * + * Please see Documentation/core-api/pin_user_pages.rst for more information. + */ + +enum { + /* check pte is writable */ + FOLL_WRITE = 1 << 0, + /* do get_page on page */ + FOLL_GET = 1 << 1, + /* give error on hole if it would be zero */ + FOLL_DUMP = 1 << 2, + /* get_user_pages read/write w/o permission */ + FOLL_FORCE = 1 << 3, + /* + * if a disk transfer is needed, start the IO and return without waiting + * upon it + */ + FOLL_NOWAIT = 1 << 4, + /* do not fault in pages */ + FOLL_NOFAULT = 1 << 5, + /* check page is hwpoisoned */ + FOLL_HWPOISON = 1 << 6, + /* don't do file mappings */ + FOLL_ANON = 1 << 7, + /* + * FOLL_LONGTERM indicates that the page will be held for an indefinite + * time period _often_ under userspace control. This is in contrast to + * iov_iter_get_pages(), whose usages are transient. + */ + FOLL_LONGTERM = 1 << 8, + /* split huge pmd before returning */ + FOLL_SPLIT_PMD = 1 << 9, + /* allow returning PCI P2PDMA pages */ + FOLL_PCI_P2PDMA = 1 << 10, + /* allow interrupts from generic signals */ + FOLL_INTERRUPTIBLE = 1 << 11, + + /* See also internal only FOLL flags in mm/internal.h */ +}; + #endif /* _LINUX_MM_TYPES_H */ |