diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 217 |
1 files changed, 138 insertions, 79 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index bb16b795d1bc..f4038b33c404 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -158,12 +158,13 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr) static int svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, unsigned long offset, unsigned long npages, - unsigned long *hmm_pfns, uint32_t gpuidx) + unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; dma_addr_t *addr = prange->dma_addr[gpuidx]; struct device *dev = adev->dev; struct page *page; + uint64_t vram_pages_dev; int i, r; if (!addr) { @@ -173,6 +174,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, prange->dma_addr[gpuidx] = addr; } + vram_pages_dev = 0; addr += offset; for (i = 0; i < npages; i++) { if (svm_is_valid_dma_mapping_addr(dev, addr[i])) @@ -182,6 +184,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, if (is_zone_device_page(page)) { struct amdgpu_device *bo_adev = prange->svm_bo->node->adev; + vram_pages_dev++; addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + bo_adev->vm_manager.vram_base_offset - bo_adev->kfd.pgmap.range.start; @@ -198,13 +201,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n", addr[i] >> PAGE_SHIFT, page_to_pfn(page)); } + *vram_pages = vram_pages_dev; return 0; } static int svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, unsigned long offset, unsigned long npages, - unsigned long *hmm_pfns) + unsigned long *hmm_pfns, uint64_t *vram_pages) { struct kfd_process *p; uint32_t gpuidx; @@ -223,7 +227,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, } r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages, - hmm_pfns, gpuidx); + hmm_pfns, gpuidx, vram_pages); if (r) break; } @@ -231,7 +235,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, return r; } -void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, +void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr, unsigned long offset, unsigned long npages) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; @@ -249,7 +253,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, } } -void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma) +void svm_range_dma_unmap(struct svm_range *prange) { struct kfd_process_device *pdd; dma_addr_t *dma_addr; @@ -270,10 +274,8 @@ void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma) continue; } dev = &pdd->dev->adev->pdev->dev; - if (unmap_dma) - svm_range_dma_unmap(dev, dma_addr, 0, prange->npages); - kvfree(dma_addr); - prange->dma_addr[gpuidx] = NULL; + + svm_range_dma_unmap_dev(dev, dma_addr, 0, prange->npages); } } @@ -281,18 +283,29 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap) { uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT; struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); + uint32_t gpuidx; pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); svm_range_vram_node_free(prange); - svm_range_free_dma_mappings(prange, do_unmap); + if (do_unmap) + svm_range_dma_unmap(prange); if (do_unmap && !p->xnack_enabled) { pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size); amdgpu_amdkfd_unreserve_mem_limit(NULL, size, KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0); } + + /* free dma_addr array for each gpu */ + for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) { + if (prange->dma_addr[gpuidx]) { + kvfree(prange->dma_addr[gpuidx]); + prange->dma_addr[gpuidx] = NULL; + } + } + mutex_destroy(&prange->lock); mutex_destroy(&prange->migrate_mutex); kfree(prange); @@ -340,6 +353,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->child_list); atomic_set(&prange->invalid, 0); prange->validate_timestamp = 0; + prange->vram_pages = 0; mutex_init(&prange->migrate_mutex); mutex_init(&prange->lock); @@ -386,6 +400,8 @@ static void svm_range_bo_release(struct kref *kref) prange->start, prange->last); mutex_lock(&prange->lock); prange->svm_bo = NULL; + /* prange should not hold vram page now */ + WARN_ON(prange->actual_loc); mutex_unlock(&prange->lock); spin_lock(&svm_bo->list_lock); @@ -495,11 +511,11 @@ svm_range_validate_svm_bo(struct kfd_node *node, struct svm_range *prange) /* We need a new svm_bo. Spin-loop to wait for concurrent * svm_range_bo_release to finish removing this range from - * its range list. After this, it is safe to reuse the - * svm_bo pointer and svm_bo_list head. + * its range list and set prange->svm_bo to null. After this, + * it is safe to reuse the svm_bo pointer and svm_bo_list head. */ - while (!list_empty_careful(&prange->svm_bo_list)) - ; + while (!list_empty_careful(&prange->svm_bo_list) || prange->svm_bo) + cond_resched(); return false; } @@ -628,8 +644,15 @@ create_bo_failed: void svm_range_vram_node_free(struct svm_range *prange) { - svm_range_bo_unref(prange->svm_bo); - prange->ttm_res = NULL; + /* serialize prange->svm_bo unref */ + mutex_lock(&prange->lock); + /* prange->svm_bo has not been unref */ + if (prange->ttm_res) { + prange->ttm_res = NULL; + mutex_unlock(&prange->lock); + svm_range_bo_unref(prange->svm_bo); + } else + mutex_unlock(&prange->lock); } struct kfd_node * @@ -820,7 +843,7 @@ svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange, } } - return !prange->is_error_flag; + return true; } /** @@ -959,6 +982,11 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old, new->svm_bo = svm_range_bo_ref(old->svm_bo); new->ttm_res = old->ttm_res; + /* set new's vram_pages as old range's now, the acurate vram_pages + * will be updated during mapping + */ + new->vram_pages = min(old->vram_pages, new->npages); + spin_lock(&new->svm_bo->list_lock); list_add(&new->svm_bo_list, &new->svm_bo->range_list); spin_unlock(&new->svm_bo->list_lock); @@ -1189,14 +1217,15 @@ svm_range_get_pte_flags(struct kfd_node *node, uint32_t mapping_flags = 0; uint64_t pte_flags; bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); - bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; + bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT); + bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT; bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/ unsigned int mtype_local; if (domain == SVM_RANGE_VRAM_DOMAIN) bo_node = prange->svm_bo->node; - switch (node->adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(node->adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 1): if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_node == node) { @@ -1233,7 +1262,8 @@ svm_range_get_pte_flags(struct kfd_node *node, break; case IP_VERSION(9, 4, 3): mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : - (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW); + (amdgpu_mtype_local == 2 || ext_coherent ? + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW); snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; @@ -1242,10 +1272,12 @@ svm_range_get_pte_flags(struct kfd_node *node, if (bo_node->adev == node->adev && (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id)) mapping_flags |= mtype_local; - /* local HBM region far from partition or remote XGMI GPU */ - else if (svm_nodes_in_same_hive(bo_node, node)) + /* local HBM region far from partition or remote XGMI GPU + * with regular system scope coherence + */ + else if (svm_nodes_in_same_hive(bo_node, node) && !ext_coherent) mapping_flags |= AMDGPU_VM_MTYPE_NC; - /* PCIe P2P */ + /* PCIe P2P or extended system scope coherence */ else mapping_flags |= AMDGPU_VM_MTYPE_UC; /* system memory accessed by the APU */ @@ -1592,6 +1624,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, struct svm_validate_context *ctx; unsigned long start, end, addr; struct kfd_process *p; + uint64_t vram_pages; void *owner; int32_t idx; int r = 0; @@ -1660,75 +1693,85 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } } + vram_pages = 0; start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; - for (addr = start; addr < end && !r; ) { + for (addr = start; !r && addr < end; ) { struct hmm_range *hmm_range; struct vm_area_struct *vma; - unsigned long next; + uint64_t vram_pages_vma; + unsigned long next = 0; unsigned long offset; unsigned long npages; bool readonly; vma = vma_lookup(mm, addr); - if (!vma) { + if (vma) { + readonly = !(vma->vm_flags & VM_WRITE); + + next = min(vma->vm_end, end); + npages = (next - addr) >> PAGE_SHIFT; + WRITE_ONCE(p->svms.faulting_task, current); + r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, + readonly, owner, NULL, + &hmm_range); + WRITE_ONCE(p->svms.faulting_task, NULL); + if (r) { + pr_debug("failed %d to get svm range pages\n", r); + if (r == -EBUSY) + r = -EAGAIN; + } + } else { r = -EFAULT; - goto unreserve_out; - } - readonly = !(vma->vm_flags & VM_WRITE); - - next = min(vma->vm_end, end); - npages = (next - addr) >> PAGE_SHIFT; - WRITE_ONCE(p->svms.faulting_task, current); - r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, - readonly, owner, NULL, - &hmm_range); - WRITE_ONCE(p->svms.faulting_task, NULL); - if (r) { - pr_debug("failed %d to get svm range pages\n", r); - if (r == -EBUSY) - r = -EAGAIN; - goto unreserve_out; } - offset = (addr - start) >> PAGE_SHIFT; - r = svm_range_dma_map(prange, ctx->bitmap, offset, npages, - hmm_range->hmm_pfns); - if (r) { - pr_debug("failed %d to dma map range\n", r); - goto unreserve_out; + if (!r) { + offset = (addr - start) >> PAGE_SHIFT; + r = svm_range_dma_map(prange, ctx->bitmap, offset, npages, + hmm_range->hmm_pfns, &vram_pages_vma); + if (r) + pr_debug("failed %d to dma map range\n", r); + else + vram_pages += vram_pages_vma; } svm_range_lock(prange); - if (amdgpu_hmm_range_get_pages_done(hmm_range)) { + if (!r && amdgpu_hmm_range_get_pages_done(hmm_range)) { pr_debug("hmm update the range, need validate again\n"); r = -EAGAIN; - goto unlock_out; } - if (!list_empty(&prange->child_list)) { + + if (!r && !list_empty(&prange->child_list)) { pr_debug("range split by unmap in parallel, validate again\n"); r = -EAGAIN; - goto unlock_out; } - r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx->bitmap, wait, flush_tlb); + if (!r) + r = svm_range_map_to_gpus(prange, offset, npages, readonly, + ctx->bitmap, wait, flush_tlb); + + if (!r && next == end) + prange->mapped_to_gpu = true; -unlock_out: svm_range_unlock(prange); addr = next; } if (addr == end) { - prange->validated_once = true; - prange->mapped_to_gpu = true; + prange->vram_pages = vram_pages; + + /* if prange does not include any vram page and it + * has not released svm_bo drop its svm_bo reference + * and set its actaul_loc to sys ram + */ + if (!vram_pages && prange->ttm_res) { + prange->actual_loc = 0; + svm_range_vram_node_free(prange); + } } -unreserve_out: svm_range_unreserve_bos(ctx); - - prange->is_error_flag = !!r; if (!r) prange->validate_timestamp = ktime_get_boottime(); @@ -1980,6 +2023,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old) new->actual_loc = old->actual_loc; new->granularity = old->granularity; new->mapped_to_gpu = old->mapped_to_gpu; + new->vram_pages = old->vram_pages; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); @@ -2097,7 +2141,8 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, next = interval_tree_iter_next(node, start, last); next_start = min(node->last, last) + 1; - if (svm_range_is_same_attrs(p, prange, nattr, attrs)) { + if (svm_range_is_same_attrs(p, prange, nattr, attrs) && + prange->mapped_to_gpu) { /* nothing to do */ } else if (node->start < start || node->last > last) { /* node intersects the update range and its attributes @@ -2884,6 +2929,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t vmid, uint32_t node_id, uint64_t addr, bool write_fault) { + unsigned long start, last, size; struct mm_struct *mm = NULL; struct svm_range_list *svms; struct svm_range *prange; @@ -3019,32 +3065,35 @@ retry_write_locked: kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, write_fault, timestamp); - if (prange->actual_loc != best_loc) { + if (prange->actual_loc != 0 || best_loc != 0) { migration = true; + /* Align migration range start and size to granularity size */ + size = 1UL << prange->granularity; + start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start); + last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last); + if (best_loc) { - r = svm_migrate_to_vram(prange, best_loc, mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); + r = svm_migrate_to_vram(prange, best_loc, start, last, + mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); if (r) { pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", r, addr); /* Fallback to system memory if migration to * VRAM failed */ - if (prange->actual_loc) - r = svm_migrate_vram_to_ram(prange, mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, - NULL); + if (prange->actual_loc && prange->actual_loc != best_loc) + r = svm_migrate_vram_to_ram(prange, mm, start, last, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL); else r = 0; } } else { - r = svm_migrate_vram_to_ram(prange, mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, - NULL); + r = svm_migrate_vram_to_ram(prange, mm, start, last, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL); } if (r) { pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", - r, svms, prange->start, prange->last); + r, svms, start, last); goto out_unlock_range; } } @@ -3398,18 +3447,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, *migrated = false; best_loc = svm_range_best_prefetch_location(prange); - if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED || - best_loc == prange->actual_loc) + /* when best_loc is a gpu node and same as prange->actual_loc + * we still need do migration as prange->actual_loc !=0 does + * not mean all pages in prange are vram. hmm migrate will pick + * up right pages during migration. + */ + if ((best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) || + (best_loc == 0 && prange->actual_loc == 0)) return 0; if (!best_loc) { - r = svm_migrate_vram_to_ram(prange, mm, + r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last, KFD_MIGRATE_TRIGGER_PREFETCH, NULL); *migrated = !r; return r; } - r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH); + r = svm_migrate_to_vram(prange, best_loc, prange->start, prange->last, + mm, KFD_MIGRATE_TRIGGER_PREFETCH); *migrated = !r; return r; @@ -3464,7 +3519,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) mutex_lock(&prange->migrate_mutex); do { + /* migrate all vram pages in this prange to sys ram + * after that prange->actual_loc should be zero + */ r = svm_migrate_vram_to_ram(prange, mm, + prange->start, prange->last, KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL); } while (!r && prange->actual_loc && --retries); @@ -3507,7 +3566,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, struct svm_range *next; bool update_mapping = false; bool flush_tlb; - int r = 0; + int r, ret = 0; pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n", p->pasid, &p->svms, start, start + size - 1, size); @@ -3595,7 +3654,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, out_unlock_range: mutex_unlock(&prange->migrate_mutex); if (r) - break; + ret = r; } dynamic_svm_range_dump(svms); @@ -3608,7 +3667,7 @@ out: pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid, &p->svms, start, start + size - 1, r); - return r; + return ret ? ret : r; } static int |