summaryrefslogtreecommitdiff
path: root/arch/x86/mm/pat
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/mm/pat')
-rw-r--r--arch/x86/mm/pat/memtype.c73
-rw-r--r--arch/x86/mm/pat/set_memory.c68
2 files changed, 113 insertions, 28 deletions
diff --git a/arch/x86/mm/pat/memtype.c b/arch/x86/mm/pat/memtype.c
index 0d72183b5dd0..bdc2a240c2aa 100644
--- a/arch/x86/mm/pat/memtype.c
+++ b/arch/x86/mm/pat/memtype.c
@@ -39,6 +39,7 @@
#include <linux/pfn_t.h>
#include <linux/slab.h>
#include <linux/mm.h>
+#include <linux/highmem.h>
#include <linux/fs.h>
#include <linux/rbtree.h>
@@ -947,6 +948,61 @@ static void free_pfn_range(u64 paddr, unsigned long size)
memtype_free(paddr, paddr + size);
}
+static int follow_phys(struct vm_area_struct *vma, unsigned long *prot,
+ resource_size_t *phys)
+{
+ pte_t *ptep, pte;
+ spinlock_t *ptl;
+
+ if (follow_pte(vma, vma->vm_start, &ptep, &ptl))
+ return -EINVAL;
+
+ pte = ptep_get(ptep);
+
+ /* Never return PFNs of anon folios in COW mappings. */
+ if (vm_normal_folio(vma, vma->vm_start, pte)) {
+ pte_unmap_unlock(ptep, ptl);
+ return -EINVAL;
+ }
+
+ *prot = pgprot_val(pte_pgprot(pte));
+ *phys = (resource_size_t)pte_pfn(pte) << PAGE_SHIFT;
+ pte_unmap_unlock(ptep, ptl);
+ return 0;
+}
+
+static int get_pat_info(struct vm_area_struct *vma, resource_size_t *paddr,
+ pgprot_t *pgprot)
+{
+ unsigned long prot;
+
+ VM_WARN_ON_ONCE(!(vma->vm_flags & VM_PAT));
+
+ /*
+ * We need the starting PFN and cachemode used for track_pfn_remap()
+ * that covered the whole VMA. For most mappings, we can obtain that
+ * information from the page tables. For COW mappings, we might now
+ * suddenly have anon folios mapped and follow_phys() will fail.
+ *
+ * Fallback to using vma->vm_pgoff, see remap_pfn_range_notrack(), to
+ * detect the PFN. If we need the cachemode as well, we're out of luck
+ * for now and have to fail fork().
+ */
+ if (!follow_phys(vma, &prot, paddr)) {
+ if (pgprot)
+ *pgprot = __pgprot(prot);
+ return 0;
+ }
+ if (is_cow_mapping(vma->vm_flags)) {
+ if (pgprot)
+ return -EINVAL;
+ *paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
+ return 0;
+ }
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+}
+
/*
* track_pfn_copy is called when vma that is covering the pfnmap gets
* copied through copy_page_range().
@@ -957,20 +1013,13 @@ static void free_pfn_range(u64 paddr, unsigned long size)
int track_pfn_copy(struct vm_area_struct *vma)
{
resource_size_t paddr;
- unsigned long prot;
unsigned long vma_size = vma->vm_end - vma->vm_start;
pgprot_t pgprot;
if (vma->vm_flags & VM_PAT) {
- /*
- * reserve the whole chunk covered by vma. We need the
- * starting address and protection from pte.
- */
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, &pgprot))
return -EINVAL;
- }
- pgprot = __pgprot(prot);
+ /* reserve the whole chunk covered by vma. */
return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
}
@@ -1045,7 +1094,6 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
unsigned long size, bool mm_wr_locked)
{
resource_size_t paddr;
- unsigned long prot;
if (vma && !(vma->vm_flags & VM_PAT))
return;
@@ -1053,11 +1101,8 @@ void untrack_pfn(struct vm_area_struct *vma, unsigned long pfn,
/* free the chunk starting from pfn or the whole chunk */
paddr = (resource_size_t)pfn << PAGE_SHIFT;
if (!paddr && !size) {
- if (follow_phys(vma, vma->vm_start, 0, &prot, &paddr)) {
- WARN_ON_ONCE(1);
+ if (get_pat_info(vma, &paddr, NULL))
return;
- }
-
size = vma->vm_end - vma->vm_start;
}
free_pfn_range(paddr, size);
diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c
index 80c9037ffadf..19fdfbb171ed 100644
--- a/arch/x86/mm/pat/set_memory.c
+++ b/arch/x86/mm/pat/set_memory.c
@@ -619,7 +619,8 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start,
* Validate strict W^X semantics.
*/
static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start,
- unsigned long pfn, unsigned long npg)
+ unsigned long pfn, unsigned long npg,
+ bool nx, bool rw)
{
unsigned long end;
@@ -641,6 +642,10 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star
if ((pgprot_val(new) & (_PAGE_RW | _PAGE_NX)) != _PAGE_RW)
return new;
+ /* Non-leaf translation entries can disable writing or execution. */
+ if (!rw || nx)
+ return new;
+
end = start + npg * PAGE_SIZE - 1;
WARN_ONCE(1, "CPA detected W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n",
(unsigned long long)pgprot_val(old),
@@ -657,20 +662,26 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star
/*
* Lookup the page table entry for a virtual address in a specific pgd.
- * Return a pointer to the entry and the level of the mapping.
+ * Return a pointer to the entry, the level of the mapping, and the effective
+ * NX and RW bits of all page table levels.
*/
-pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
- unsigned int *level)
+pte_t *lookup_address_in_pgd_attr(pgd_t *pgd, unsigned long address,
+ unsigned int *level, bool *nx, bool *rw)
{
p4d_t *p4d;
pud_t *pud;
pmd_t *pmd;
*level = PG_LEVEL_NONE;
+ *nx = false;
+ *rw = true;
if (pgd_none(*pgd))
return NULL;
+ *nx |= pgd_flags(*pgd) & _PAGE_NX;
+ *rw &= pgd_flags(*pgd) & _PAGE_RW;
+
p4d = p4d_offset(pgd, address);
if (p4d_none(*p4d))
return NULL;
@@ -679,6 +690,9 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
if (p4d_leaf(*p4d) || !p4d_present(*p4d))
return (pte_t *)p4d;
+ *nx |= p4d_flags(*p4d) & _PAGE_NX;
+ *rw &= p4d_flags(*p4d) & _PAGE_RW;
+
pud = pud_offset(p4d, address);
if (pud_none(*pud))
return NULL;
@@ -687,6 +701,9 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
if (pud_leaf(*pud) || !pud_present(*pud))
return (pte_t *)pud;
+ *nx |= pud_flags(*pud) & _PAGE_NX;
+ *rw &= pud_flags(*pud) & _PAGE_RW;
+
pmd = pmd_offset(pud, address);
if (pmd_none(*pmd))
return NULL;
@@ -695,12 +712,27 @@ pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
if (pmd_leaf(*pmd) || !pmd_present(*pmd))
return (pte_t *)pmd;
+ *nx |= pmd_flags(*pmd) & _PAGE_NX;
+ *rw &= pmd_flags(*pmd) & _PAGE_RW;
+
*level = PG_LEVEL_4K;
return pte_offset_kernel(pmd, address);
}
/*
+ * Lookup the page table entry for a virtual address in a specific pgd.
+ * Return a pointer to the entry and the level of the mapping.
+ */
+pte_t *lookup_address_in_pgd(pgd_t *pgd, unsigned long address,
+ unsigned int *level)
+{
+ bool nx, rw;
+
+ return lookup_address_in_pgd_attr(pgd, address, level, &nx, &rw);
+}
+
+/*
* Lookup the page table entry for a virtual address. Return a pointer
* to the entry and the level of the mapping.
*
@@ -715,13 +747,16 @@ pte_t *lookup_address(unsigned long address, unsigned int *level)
EXPORT_SYMBOL_GPL(lookup_address);
static pte_t *_lookup_address_cpa(struct cpa_data *cpa, unsigned long address,
- unsigned int *level)
+ unsigned int *level, bool *nx, bool *rw)
{
- if (cpa->pgd)
- return lookup_address_in_pgd(cpa->pgd + pgd_index(address),
- address, level);
+ pgd_t *pgd;
+
+ if (!cpa->pgd)
+ pgd = pgd_offset_k(address);
+ else
+ pgd = cpa->pgd + pgd_index(address);
- return lookup_address(address, level);
+ return lookup_address_in_pgd_attr(pgd, address, level, nx, rw);
}
/*
@@ -849,12 +884,13 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
pgprot_t old_prot, new_prot, req_prot, chk_prot;
pte_t new_pte, *tmp;
enum pg_level level;
+ bool nx, rw;
/*
* Check for races, another CPU might have split this page
* up already:
*/
- tmp = _lookup_address_cpa(cpa, address, &level);
+ tmp = _lookup_address_cpa(cpa, address, &level, &nx, &rw);
if (tmp != kpte)
return 1;
@@ -965,7 +1001,8 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address,
new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages,
psize, CPA_DETECT);
- new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages);
+ new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages,
+ nx, rw);
/*
* If there is a conflict, split the large page.
@@ -1046,6 +1083,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
pte_t *pbase = (pte_t *)page_address(base);
unsigned int i, level;
pgprot_t ref_prot;
+ bool nx, rw;
pte_t *tmp;
spin_lock(&pgd_lock);
@@ -1053,7 +1091,7 @@ __split_large_page(struct cpa_data *cpa, pte_t *kpte, unsigned long address,
* Check for races, another CPU might have split this page
* up for us already:
*/
- tmp = _lookup_address_cpa(cpa, address, &level);
+ tmp = _lookup_address_cpa(cpa, address, &level, &nx, &rw);
if (tmp != kpte) {
spin_unlock(&pgd_lock);
return 1;
@@ -1594,10 +1632,11 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
int do_split, err;
unsigned int level;
pte_t *kpte, old_pte;
+ bool nx, rw;
address = __cpa_addr(cpa, cpa->curpage);
repeat:
- kpte = _lookup_address_cpa(cpa, address, &level);
+ kpte = _lookup_address_cpa(cpa, address, &level, &nx, &rw);
if (!kpte)
return __cpa_process_fault(cpa, address, primary);
@@ -1619,7 +1658,8 @@ repeat:
new_prot = static_protections(new_prot, address, pfn, 1, 0,
CPA_PROTECT);
- new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1);
+ new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1,
+ nx, rw);
new_prot = pgprot_clear_protnone_bits(new_prot);