From 2f0584f3f4bd60bcc8735172981fb0bff86e74e0 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Mon, 12 Jun 2023 17:10:27 -0700 Subject: mm: Rename arch pte_mkwrite()'s to pte_mkwrite_novma() The x86 Shadow stack feature includes a new type of memory called shadow stack. This shadow stack memory has some unusual properties, which requires some core mm changes to function properly. One of these unusual properties is that shadow stack memory is writable, but only in limited ways. These limits are applied via a specific PTE bit combination. Nevertheless, the memory is writable, and core mm code will need to apply the writable permissions in the typical paths that call pte_mkwrite(). The goal is to make pte_mkwrite() take a VMA, so that the x86 implementation of it can know whether to create regular writable or shadow stack mappings. But there are a couple of challenges to this. Modifying the signatures of each arch pte_mkwrite() implementation would be error prone because some are generated with macros and would need to be re-implemented. Also, some pte_mkwrite() callers operate on kernel memory without a VMA. So this can be done in a three step process. First pte_mkwrite() can be renamed to pte_mkwrite_novma() in each arch, with a generic pte_mkwrite() added that just calls pte_mkwrite_novma(). Next callers without a VMA can be moved to pte_mkwrite_novma(). And lastly, pte_mkwrite() and all callers can be changed to take/pass a VMA. Start the process by renaming pte_mkwrite() to pte_mkwrite_novma() and adding the pte_mkwrite() wrapper in linux/pgtable.h. Apply the same pattern for pmd_mkwrite(). Since not all archs have a pmd_mkwrite_novma(), create a new arch config HAS_HUGE_PAGE that can be used to tell if pmd_mkwrite() should be defined. Otherwise in the !HAS_HUGE_PAGE cases the compiler would not be able to find pmd_mkwrite_novma(). No functional change. Suggested-by: Linus Torvalds Signed-off-by: Rick Edgecombe Signed-off-by: Dave Hansen Reviewed-by: Mike Rapoport (IBM) Acked-by: Geert Uytterhoeven Acked-by: David Hildenbrand Link: https://lore.kernel.org/lkml/CAHk-=wiZjSu7c9sFYZb3q04108stgHff2wfbokGCCgW7riz+8Q@mail.gmail.com/ Link: https://lore.kernel.org/all/20230613001108.3040476-2-rick.p.edgecombe%40intel.com --- arch/s390/Kconfig | 1 + arch/s390/include/asm/hugetlb.h | 2 +- arch/s390/include/asm/pgtable.h | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/s390') diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 5b39918b7042..91514d535c46 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -128,6 +128,7 @@ config S390 select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_IPC_PARSE_VERSION select ARCH_WANT_OPTIMIZE_VMEMMAP + select ARCH_WANT_KERNEL_PMD_MKWRITE select BUILDTIME_TABLE_SORT select CLONE_BACKWARDS2 select DMA_OPS if PCI diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h index ccdbccfde148..f07267875a19 100644 --- a/arch/s390/include/asm/hugetlb.h +++ b/arch/s390/include/asm/hugetlb.h @@ -104,7 +104,7 @@ static inline int huge_pte_dirty(pte_t pte) static inline pte_t huge_pte_mkwrite(pte_t pte) { - return pte_mkwrite(pte); + return pte_mkwrite_novma(pte); } static inline pte_t huge_pte_mkdirty(pte_t pte) diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index c55f3c3365af..83c5da39237d 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1002,7 +1002,7 @@ static inline pte_t pte_wrprotect(pte_t pte) return set_pte_bit(pte, __pgprot(_PAGE_PROTECT)); } -static inline pte_t pte_mkwrite(pte_t pte) +static inline pte_t pte_mkwrite_novma(pte_t pte) { pte = set_pte_bit(pte, __pgprot(_PAGE_WRITE)); if (pte_val(pte) & _PAGE_DIRTY) @@ -1485,7 +1485,7 @@ static inline pmd_t pmd_wrprotect(pmd_t pmd) return set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_PROTECT)); } -static inline pmd_t pmd_mkwrite(pmd_t pmd) +static inline pmd_t pmd_mkwrite_novma(pmd_t pmd) { pmd = set_pmd_bit(pmd, __pgprot(_SEGMENT_ENTRY_WRITE)); if (pmd_val(pmd) & _SEGMENT_ENTRY_DIRTY) -- cgit v1.2.3 From 6ecc21bb432dab7241bcbd766ecd1b15620c75c3 Mon Sep 17 00:00:00 2001 From: Rick Edgecombe Date: Mon, 12 Jun 2023 17:10:28 -0700 Subject: mm: Move pte/pmd_mkwrite() callers with no VMA to _novma() The x86 Shadow stack feature includes a new type of memory called shadow stack. This shadow stack memory has some unusual properties, which requires some core mm changes to function properly. One of these unusual properties is that shadow stack memory is writable, but only in limited ways. These limits are applied via a specific PTE bit combination. Nevertheless, the memory is writable, and core mm code will need to apply the writable permissions in the typical paths that call pte_mkwrite(). Future patches will make pte_mkwrite() take a VMA, so that the x86 implementation of it can know whether to create regular writable or shadow stack mappings. But there are a couple of challenges to this. Modifying the signatures of each arch pte_mkwrite() implementation would be error prone because some are generated with macros and would need to be re-implemented. Also, some pte_mkwrite() callers operate on kernel memory without a VMA. So this can be done in a three step process. First pte_mkwrite() can be renamed to pte_mkwrite_novma() in each arch, with a generic pte_mkwrite() added that just calls pte_mkwrite_novma(). Next callers without a VMA can be moved to pte_mkwrite_novma(). And lastly, pte_mkwrite() and all callers can be changed to take/pass a VMA. Earlier work did the first step, so next move the callers that don't have a VMA to pte_mkwrite_novma(). Also do the same for pmd_mkwrite(). This will be ok for the shadow stack feature, as these callers are on kernel memory which will not need to be made shadow stack, and the other architectures only currently support one type of memory in pte_mkwrite() Signed-off-by: Rick Edgecombe Signed-off-by: Dave Hansen Reviewed-by: Mike Rapoport (IBM) Acked-by: David Hildenbrand Link: https://lore.kernel.org/all/20230613001108.3040476-3-rick.p.edgecombe%40intel.com --- arch/arm64/mm/trans_pgd.c | 4 ++-- arch/s390/mm/pageattr.c | 4 ++-- arch/x86/xen/mmu_pv.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/s390') diff --git a/arch/arm64/mm/trans_pgd.c b/arch/arm64/mm/trans_pgd.c index 4ea2eefbc053..a01493f3a06f 100644 --- a/arch/arm64/mm/trans_pgd.c +++ b/arch/arm64/mm/trans_pgd.c @@ -40,7 +40,7 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) * read only (code, rodata). Clear the RDONLY bit from * the temporary mappings we use during restore. */ - set_pte(dst_ptep, pte_mkwrite(pte)); + set_pte(dst_ptep, pte_mkwrite_novma(pte)); } else if (debug_pagealloc_enabled() && !pte_none(pte)) { /* * debug_pagealloc will removed the PTE_VALID bit if @@ -53,7 +53,7 @@ static void _copy_pte(pte_t *dst_ptep, pte_t *src_ptep, unsigned long addr) */ BUG_ON(!pfn_valid(pte_pfn(pte))); - set_pte(dst_ptep, pte_mkpresent(pte_mkwrite(pte))); + set_pte(dst_ptep, pte_mkpresent(pte_mkwrite_novma(pte))); } } diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c index ca5a418c58a8..e5ec76271b16 100644 --- a/arch/s390/mm/pageattr.c +++ b/arch/s390/mm/pageattr.c @@ -98,7 +98,7 @@ static int walk_pte_level(pmd_t *pmdp, unsigned long addr, unsigned long end, if (flags & SET_MEMORY_RO) new = pte_wrprotect(new); else if (flags & SET_MEMORY_RW) - new = pte_mkwrite(pte_mkdirty(new)); + new = pte_mkwrite_novma(pte_mkdirty(new)); if (flags & SET_MEMORY_NX) new = set_pte_bit(new, __pgprot(_PAGE_NOEXEC)); else if (flags & SET_MEMORY_X) @@ -156,7 +156,7 @@ static void modify_pmd_page(pmd_t *pmdp, unsigned long addr, if (flags & SET_MEMORY_RO) new = pmd_wrprotect(new); else if (flags & SET_MEMORY_RW) - new = pmd_mkwrite(pmd_mkdirty(new)); + new = pmd_mkwrite_novma(pmd_mkdirty(new)); if (flags & SET_MEMORY_NX) new = set_pmd_bit(new, __pgprot(_SEGMENT_ENTRY_NOEXEC)); else if (flags & SET_MEMORY_X) diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index e0a975165de7..ccf3d308c874 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -166,7 +166,7 @@ void make_lowmem_page_readwrite(void *vaddr) if (pte == NULL) return; /* vaddr missing */ - ptev = pte_mkwrite(*pte); + ptev = pte_mkwrite_novma(*pte); if (HYPERVISOR_update_va_mapping(address, ptev, 0)) BUG(); -- cgit v1.2.3