From c5034945ce59abacdd02c5eff29f4f54df197880 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 10 Sep 2012 16:14:33 +0200 Subject: s390/mm,gmap: implement gmap_translate() Implement gmap_translate() function which translates a guest absolute address to a user space process address without establishing the guest page table entries. This is useful for kvm guest address translations where no memory access is expected to happen soon (e.g. tprot exception handler). Signed-off-by: Heiko Carstens Reviewed-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 107 +++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 85 insertions(+), 22 deletions(-) (limited to 'arch/s390/mm/pgtable.c') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index ae44d2a34313..2accf7113d13 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -379,45 +379,108 @@ out_unmap: } EXPORT_SYMBOL_GPL(gmap_map_segment); +static unsigned long *gmap_table_walk(unsigned long address, struct gmap *gmap) +{ + unsigned long *table; + + table = gmap->table + ((address >> 53) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 42) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 31) & 0x7ff); + if (unlikely(*table & _REGION_ENTRY_INV)) + return ERR_PTR(-EFAULT); + table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); + table = table + ((address >> 20) & 0x7ff); + return table; +} + +/** + * __gmap_translate - translate a guest address to a user space address + * @address: guest address + * @gmap: pointer to guest mapping meta data structure + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + * The mmap_sem of the mm that belongs to the address space must be held + * when this function gets called. + */ +unsigned long __gmap_translate(unsigned long address, struct gmap *gmap) +{ + unsigned long *segment_ptr, vmaddr, segment; + struct gmap_pgtable *mp; + struct page *page; + + current->thread.gmap_addr = address; + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) + return PTR_ERR(segment_ptr); + /* Convert the gmap address to an mm address. */ + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INV)) { + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + return mp->vmaddr | (address & ~PMD_MASK); + } else if (segment & _SEGMENT_ENTRY_RO) { + vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; + return vmaddr | (address & ~PMD_MASK); + } + return -EFAULT; +} +EXPORT_SYMBOL_GPL(__gmap_translate); + +/** + * gmap_translate - translate a guest address to a user space address + * @address: guest address + * @gmap: pointer to guest mapping meta data structure + * + * Returns user space address which corresponds to the guest address or + * -EFAULT if no such mapping exists. + * This function does not establish potentially missing page table entries. + */ +unsigned long gmap_translate(unsigned long address, struct gmap *gmap) +{ + unsigned long rc; + + down_read(&gmap->mm->mmap_sem); + rc = __gmap_translate(address, gmap); + up_read(&gmap->mm->mmap_sem); + return rc; +} +EXPORT_SYMBOL_GPL(gmap_translate); + /* * this function is assumed to be called with mmap_sem held */ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) { - unsigned long *table, vmaddr, segment; - struct mm_struct *mm; + unsigned long *segment_ptr, vmaddr, segment; + struct vm_area_struct *vma; struct gmap_pgtable *mp; struct gmap_rmap *rmap; - struct vm_area_struct *vma; + struct mm_struct *mm; struct page *page; pgd_t *pgd; pud_t *pud; pmd_t *pmd; current->thread.gmap_addr = address; - mm = gmap->mm; - /* Walk the gmap address space page table */ - table = gmap->table + ((address >> 53) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) - return -EFAULT; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 42) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) + segment_ptr = gmap_table_walk(address, gmap); + if (IS_ERR(segment_ptr)) return -EFAULT; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 31) & 0x7ff); - if (unlikely(*table & _REGION_ENTRY_INV)) - return -EFAULT; - table = (unsigned long *)(*table & _REGION_ENTRY_ORIGIN); - table = table + ((address >> 20) & 0x7ff); - /* Convert the gmap address to an mm address. */ - segment = *table; - if (likely(!(segment & _SEGMENT_ENTRY_INV))) { + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INV)) { page = pfn_to_page(segment >> PAGE_SHIFT); mp = (struct gmap_pgtable *) page->index; return mp->vmaddr | (address & ~PMD_MASK); } else if (segment & _SEGMENT_ENTRY_RO) { + mm = gmap->mm; vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; vma = find_vma(mm, vmaddr); if (!vma || vma->vm_start > vmaddr) @@ -441,12 +504,12 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) /* Link gmap segment table entry location to page table. */ page = pmd_page(*pmd); mp = (struct gmap_pgtable *) page->index; - rmap->entry = table; + rmap->entry = segment_ptr; spin_lock(&mm->page_table_lock); list_add(&rmap->list, &mp->mapper); spin_unlock(&mm->page_table_lock); /* Set gmap segment table entry to page table. */ - *table = pmd_val(*pmd) & PAGE_MASK; + *segment_ptr = pmd_val(*pmd) & PAGE_MASK; return vmaddr | (address & ~PMD_MASK); } return -EFAULT; -- cgit v1.2.3 From ab8e5235868f99dfc779e4eaff28f53d63714ce4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 16 Apr 2013 13:37:46 +0200 Subject: s390/mm,gmap: segment mapping race The gmap_map_segment function creates a special invalid segment table entry with the address of the requested target location in the process address space. The first access will create the connection between the gmap segment table and the target page table of the main process. If two threads do this concurrently both will walk the page tables and allocate a gmap_rmap structure for the same segment table entry. To avoid the race recheck the segment table entry after taking to page table lock. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgtable.c | 160 ++++++++++++++++++++++++++++--------------------- 1 file changed, 91 insertions(+), 69 deletions(-) (limited to 'arch/s390/mm/pgtable.c') diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 2accf7113d13..bd954e96f51c 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -454,12 +454,11 @@ unsigned long gmap_translate(unsigned long address, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_translate); -/* - * this function is assumed to be called with mmap_sem held - */ -unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) +static int gmap_connect_pgtable(unsigned long segment, + unsigned long *segment_ptr, + struct gmap *gmap) { - unsigned long *segment_ptr, vmaddr, segment; + unsigned long vmaddr; struct vm_area_struct *vma; struct gmap_pgtable *mp; struct gmap_rmap *rmap; @@ -469,48 +468,94 @@ unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) pud_t *pud; pmd_t *pmd; + mm = gmap->mm; + vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; + vma = find_vma(mm, vmaddr); + if (!vma || vma->vm_start > vmaddr) + return -EFAULT; + /* Walk the parent mm page table */ + pgd = pgd_offset(mm, vmaddr); + pud = pud_alloc(mm, pgd, vmaddr); + if (!pud) + return -ENOMEM; + pmd = pmd_alloc(mm, pud, vmaddr); + if (!pmd) + return -ENOMEM; + if (!pmd_present(*pmd) && + __pte_alloc(mm, vma, pmd, vmaddr)) + return -ENOMEM; + /* pmd now points to a valid segment table entry. */ + rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); + if (!rmap) + return -ENOMEM; + /* Link gmap segment table entry location to page table. */ + page = pmd_page(*pmd); + mp = (struct gmap_pgtable *) page->index; + rmap->entry = segment_ptr; + spin_lock(&mm->page_table_lock); + if (*segment_ptr == segment) { + list_add(&rmap->list, &mp->mapper); + /* Set gmap segment table entry to page table. */ + *segment_ptr = pmd_val(*pmd) & PAGE_MASK; + rmap = NULL; + } + spin_unlock(&mm->page_table_lock); + kfree(rmap); + return 0; +} + +static void gmap_disconnect_pgtable(struct mm_struct *mm, unsigned long *table) +{ + struct gmap_rmap *rmap, *next; + struct gmap_pgtable *mp; + struct page *page; + int flush; + + flush = 0; + spin_lock(&mm->page_table_lock); + page = pfn_to_page(__pa(table) >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + list_for_each_entry_safe(rmap, next, &mp->mapper, list) { + *rmap->entry = + _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; + list_del(&rmap->list); + kfree(rmap); + flush = 1; + } + spin_unlock(&mm->page_table_lock); + if (flush) + __tlb_flush_global(); +} + +/* + * this function is assumed to be called with mmap_sem held + */ +unsigned long __gmap_fault(unsigned long address, struct gmap *gmap) +{ + unsigned long *segment_ptr, segment; + struct gmap_pgtable *mp; + struct page *page; + int rc; + current->thread.gmap_addr = address; segment_ptr = gmap_table_walk(address, gmap); if (IS_ERR(segment_ptr)) return -EFAULT; /* Convert the gmap address to an mm address. */ - segment = *segment_ptr; - if (!(segment & _SEGMENT_ENTRY_INV)) { - page = pfn_to_page(segment >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - return mp->vmaddr | (address & ~PMD_MASK); - } else if (segment & _SEGMENT_ENTRY_RO) { - mm = gmap->mm; - vmaddr = segment & _SEGMENT_ENTRY_ORIGIN; - vma = find_vma(mm, vmaddr); - if (!vma || vma->vm_start > vmaddr) - return -EFAULT; - - /* Walk the parent mm page table */ - pgd = pgd_offset(mm, vmaddr); - pud = pud_alloc(mm, pgd, vmaddr); - if (!pud) - return -ENOMEM; - pmd = pmd_alloc(mm, pud, vmaddr); - if (!pmd) - return -ENOMEM; - if (!pmd_present(*pmd) && - __pte_alloc(mm, vma, pmd, vmaddr)) - return -ENOMEM; - /* pmd now points to a valid segment table entry. */ - rmap = kmalloc(sizeof(*rmap), GFP_KERNEL|__GFP_REPEAT); - if (!rmap) - return -ENOMEM; - /* Link gmap segment table entry location to page table. */ - page = pmd_page(*pmd); - mp = (struct gmap_pgtable *) page->index; - rmap->entry = segment_ptr; - spin_lock(&mm->page_table_lock); - list_add(&rmap->list, &mp->mapper); - spin_unlock(&mm->page_table_lock); - /* Set gmap segment table entry to page table. */ - *segment_ptr = pmd_val(*pmd) & PAGE_MASK; - return vmaddr | (address & ~PMD_MASK); + while (1) { + segment = *segment_ptr; + if (!(segment & _SEGMENT_ENTRY_INV)) { + /* Page table is present */ + page = pfn_to_page(segment >> PAGE_SHIFT); + mp = (struct gmap_pgtable *) page->index; + return mp->vmaddr | (address & ~PMD_MASK); + } + if (!(segment & _SEGMENT_ENTRY_RO)) + /* Nothing mapped in the gmap address space. */ + break; + rc = gmap_connect_pgtable(segment, segment_ptr, gmap); + if (rc) + return rc; } return -EFAULT; } @@ -574,29 +619,6 @@ void gmap_discard(unsigned long from, unsigned long to, struct gmap *gmap) } EXPORT_SYMBOL_GPL(gmap_discard); -void gmap_unmap_notifier(struct mm_struct *mm, unsigned long *table) -{ - struct gmap_rmap *rmap, *next; - struct gmap_pgtable *mp; - struct page *page; - int flush; - - flush = 0; - spin_lock(&mm->page_table_lock); - page = pfn_to_page(__pa(table) >> PAGE_SHIFT); - mp = (struct gmap_pgtable *) page->index; - list_for_each_entry_safe(rmap, next, &mp->mapper, list) { - *rmap->entry = - _SEGMENT_ENTRY_INV | _SEGMENT_ENTRY_RO | mp->vmaddr; - list_del(&rmap->list); - kfree(rmap); - flush = 1; - } - spin_unlock(&mm->page_table_lock); - if (flush) - __tlb_flush_global(); -} - static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm, unsigned long vmaddr) { @@ -649,8 +671,8 @@ static inline void page_table_free_pgste(unsigned long *table) { } -static inline void gmap_unmap_notifier(struct mm_struct *mm, - unsigned long *table) +static inline void gmap_disconnect_pgtable(struct mm_struct *mm, + unsigned long *table) { } @@ -716,7 +738,7 @@ void page_table_free(struct mm_struct *mm, unsigned long *table) unsigned int bit, mask; if (mm_has_pgste(mm)) { - gmap_unmap_notifier(mm, table); + gmap_disconnect_pgtable(mm, table); return page_table_free_pgste(table); } /* Free 1K/2K page table fragment of a 4K page */ @@ -759,7 +781,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table) mm = tlb->mm; if (mm_has_pgste(mm)) { - gmap_unmap_notifier(mm, table); + gmap_disconnect_pgtable(mm, table); table = (unsigned long *) (__pa(table) | FRAG_MASK); tlb_remove_table(tlb, table); return; -- cgit v1.2.3