diff options
author | Dave Airlie <airlied@redhat.com> | 2023-10-18 09:08:07 +0300 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2023-10-18 09:08:07 +0300 |
commit | 27442758e9b4e083bef3f164a1739475c01f3202 (patch) | |
tree | 58f4a82f1b9e41c11b6247d8f7b9c3c9b8b52711 /drivers/gpu/drm/amd/amdkfd/kfd_svm.c | |
parent | 08057253366d916a73e62bafb913d9b659228cc1 (diff) | |
parent | cd90511557fdfb394bb4ac4c3b539b007383914c (diff) | |
download | linux-27442758e9b4e083bef3f164a1739475c01f3202.tar.xz |
Merge tag 'amd-drm-next-6.7-2023-10-13' of https://gitlab.freedesktop.org/agd5f/linux into drm-next
amd-drm-next-6.7-2023-10-13:
amdgpu:
- DC replay fixes
- Misc code cleanups and spelling fixes
- Documentation updates
- RAS EEPROM Updates
- FRU EEPROM Updates
- IP discovery updates
- SR-IOV fixes
- RAS updates
- DC PQ fixes
- SMU 13.0.6 updates
- GC 11.5 Support
- NBIO 7.11 Support
- GMC 11 Updates
- Reset fixes
- SMU 11.5 Updates
- SMU 13.0 OD support
- Use flexible arrays for bo list handling
- W=1 Fixes
- SubVP fixes
- DPIA fixes
- DCN 3.5 Support
- Devcoredump fixes
- VPE 6.1 support
- VCN 4.0 Updates
- S/G display fixes
- DML fixes
- DML2 Support
- MST fixes
- VRR fixes
- Enable seamless boot in more cases
- Enable content type property for HDMI
- OLED fixes
- Rework and clean up GPUVM TLB flushing
- DC ODM fixes
- DP 2.x fixes
- AGP aperture fixes
- SDMA firmware loading cleanups
- Cyan Skillfish GPU clock counter fix
- GC 11 GART fix
- Cache GPU fault info for userspace queries
- DC cursor check fixes
- eDP fixes
- DC FP handling fixes
- Variable sized array fixes
- SMU 13.0.x fixes
- IB start and size alignment fixes for VCN
- SMU 14 Support
- Suspend and resume sequence rework
- vkms fix
amdkfd:
- GC 11 fixes
- GC 10 fixes
- Doorbell fixes
- CWSR fixes
- SVM fixes
- Clean up GC info enumeration
- Rework memory limit handling
- Coherent memory handling fixes
- Use partial migrations in GPU faults
- TLB flush fixes
- DMA unmap fixes
- GC 9.4.3 fixes
- SQ interrupt fix
- GTT mapping fix
- GC 11.5 Support
radeon:
- Misc code cleanups
- W=1 Fixes
- Fix possible buffer overflow
- Fix possible NULL pointer dereference
UAPI:
- Add EXT_COHERENT memory allocation flags. These allow for system scope atomics.
Proposed userspace: https://github.com/RadeonOpenCompute/ROCT-Thunk-Interface/pull/88
- Add support for new VPE engine. This is a memory to memory copy engine with advanced scaling, CSC, and color management features
Proposed mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/25713
- Add INFO IOCTL interface to query GPU faults
Proposed Mesa MR: https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/23238
Proposed libdrm MR: https://gitlab.freedesktop.org/mesa/drm/-/merge_requests/298
Signed-off-by: Dave Airlie <airlied@redhat.com>
From: Alex Deucher <alexander.deucher@amd.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20231013175758.1735031-1-alexander.deucher@amd.com
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 217 |
1 files changed, 138 insertions, 79 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index bb16b795d1bc..f4038b33c404 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -158,12 +158,13 @@ svm_is_valid_dma_mapping_addr(struct device *dev, dma_addr_t dma_addr) static int svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, unsigned long offset, unsigned long npages, - unsigned long *hmm_pfns, uint32_t gpuidx) + unsigned long *hmm_pfns, uint32_t gpuidx, uint64_t *vram_pages) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; dma_addr_t *addr = prange->dma_addr[gpuidx]; struct device *dev = adev->dev; struct page *page; + uint64_t vram_pages_dev; int i, r; if (!addr) { @@ -173,6 +174,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, prange->dma_addr[gpuidx] = addr; } + vram_pages_dev = 0; addr += offset; for (i = 0; i < npages; i++) { if (svm_is_valid_dma_mapping_addr(dev, addr[i])) @@ -182,6 +184,7 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, if (is_zone_device_page(page)) { struct amdgpu_device *bo_adev = prange->svm_bo->node->adev; + vram_pages_dev++; addr[i] = (hmm_pfns[i] << PAGE_SHIFT) + bo_adev->vm_manager.vram_base_offset - bo_adev->kfd.pgmap.range.start; @@ -198,13 +201,14 @@ svm_range_dma_map_dev(struct amdgpu_device *adev, struct svm_range *prange, pr_debug_ratelimited("dma mapping 0x%llx for page addr 0x%lx\n", addr[i] >> PAGE_SHIFT, page_to_pfn(page)); } + *vram_pages = vram_pages_dev; return 0; } static int svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, unsigned long offset, unsigned long npages, - unsigned long *hmm_pfns) + unsigned long *hmm_pfns, uint64_t *vram_pages) { struct kfd_process *p; uint32_t gpuidx; @@ -223,7 +227,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, } r = svm_range_dma_map_dev(pdd->dev->adev, prange, offset, npages, - hmm_pfns, gpuidx); + hmm_pfns, gpuidx, vram_pages); if (r) break; } @@ -231,7 +235,7 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, return r; } -void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, +void svm_range_dma_unmap_dev(struct device *dev, dma_addr_t *dma_addr, unsigned long offset, unsigned long npages) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; @@ -249,7 +253,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, } } -void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma) +void svm_range_dma_unmap(struct svm_range *prange) { struct kfd_process_device *pdd; dma_addr_t *dma_addr; @@ -270,10 +274,8 @@ void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma) continue; } dev = &pdd->dev->adev->pdev->dev; - if (unmap_dma) - svm_range_dma_unmap(dev, dma_addr, 0, prange->npages); - kvfree(dma_addr); - prange->dma_addr[gpuidx] = NULL; + + svm_range_dma_unmap_dev(dev, dma_addr, 0, prange->npages); } } @@ -281,18 +283,29 @@ static void svm_range_free(struct svm_range *prange, bool do_unmap) { uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT; struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms); + uint32_t gpuidx; pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx]\n", prange->svms, prange, prange->start, prange->last); svm_range_vram_node_free(prange); - svm_range_free_dma_mappings(prange, do_unmap); + if (do_unmap) + svm_range_dma_unmap(prange); if (do_unmap && !p->xnack_enabled) { pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size); amdgpu_amdkfd_unreserve_mem_limit(NULL, size, KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0); } + + /* free dma_addr array for each gpu */ + for (gpuidx = 0; gpuidx < MAX_GPU_INSTANCE; gpuidx++) { + if (prange->dma_addr[gpuidx]) { + kvfree(prange->dma_addr[gpuidx]); + prange->dma_addr[gpuidx] = NULL; + } + } + mutex_destroy(&prange->lock); mutex_destroy(&prange->migrate_mutex); kfree(prange); @@ -340,6 +353,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->child_list); atomic_set(&prange->invalid, 0); prange->validate_timestamp = 0; + prange->vram_pages = 0; mutex_init(&prange->migrate_mutex); mutex_init(&prange->lock); @@ -386,6 +400,8 @@ static void svm_range_bo_release(struct kref *kref) prange->start, prange->last); mutex_lock(&prange->lock); prange->svm_bo = NULL; + /* prange should not hold vram page now */ + WARN_ON(prange->actual_loc); mutex_unlock(&prange->lock); spin_lock(&svm_bo->list_lock); @@ -495,11 +511,11 @@ svm_range_validate_svm_bo(struct kfd_node *node, struct svm_range *prange) /* We need a new svm_bo. Spin-loop to wait for concurrent * svm_range_bo_release to finish removing this range from - * its range list. After this, it is safe to reuse the - * svm_bo pointer and svm_bo_list head. + * its range list and set prange->svm_bo to null. After this, + * it is safe to reuse the svm_bo pointer and svm_bo_list head. */ - while (!list_empty_careful(&prange->svm_bo_list)) - ; + while (!list_empty_careful(&prange->svm_bo_list) || prange->svm_bo) + cond_resched(); return false; } @@ -628,8 +644,15 @@ create_bo_failed: void svm_range_vram_node_free(struct svm_range *prange) { - svm_range_bo_unref(prange->svm_bo); - prange->ttm_res = NULL; + /* serialize prange->svm_bo unref */ + mutex_lock(&prange->lock); + /* prange->svm_bo has not been unref */ + if (prange->ttm_res) { + prange->ttm_res = NULL; + mutex_unlock(&prange->lock); + svm_range_bo_unref(prange->svm_bo); + } else + mutex_unlock(&prange->lock); } struct kfd_node * @@ -820,7 +843,7 @@ svm_range_is_same_attrs(struct kfd_process *p, struct svm_range *prange, } } - return !prange->is_error_flag; + return true; } /** @@ -959,6 +982,11 @@ svm_range_split_nodes(struct svm_range *new, struct svm_range *old, new->svm_bo = svm_range_bo_ref(old->svm_bo); new->ttm_res = old->ttm_res; + /* set new's vram_pages as old range's now, the acurate vram_pages + * will be updated during mapping + */ + new->vram_pages = min(old->vram_pages, new->npages); + spin_lock(&new->svm_bo->list_lock); list_add(&new->svm_bo_list, &new->svm_bo->range_list); spin_unlock(&new->svm_bo->list_lock); @@ -1189,14 +1217,15 @@ svm_range_get_pte_flags(struct kfd_node *node, uint32_t mapping_flags = 0; uint64_t pte_flags; bool snoop = (domain != SVM_RANGE_VRAM_DOMAIN); - bool coherent = flags & KFD_IOCTL_SVM_FLAG_COHERENT; + bool coherent = flags & (KFD_IOCTL_SVM_FLAG_COHERENT | KFD_IOCTL_SVM_FLAG_EXT_COHERENT); + bool ext_coherent = flags & KFD_IOCTL_SVM_FLAG_EXT_COHERENT; bool uncached = false; /*flags & KFD_IOCTL_SVM_FLAG_UNCACHED;*/ unsigned int mtype_local; if (domain == SVM_RANGE_VRAM_DOMAIN) bo_node = prange->svm_bo->node; - switch (node->adev->ip_versions[GC_HWIP][0]) { + switch (amdgpu_ip_version(node->adev, GC_HWIP, 0)) { case IP_VERSION(9, 4, 1): if (domain == SVM_RANGE_VRAM_DOMAIN) { if (bo_node == node) { @@ -1233,7 +1262,8 @@ svm_range_get_pte_flags(struct kfd_node *node, break; case IP_VERSION(9, 4, 3): mtype_local = amdgpu_mtype_local == 1 ? AMDGPU_VM_MTYPE_NC : - (amdgpu_mtype_local == 2 ? AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW); + (amdgpu_mtype_local == 2 || ext_coherent ? + AMDGPU_VM_MTYPE_CC : AMDGPU_VM_MTYPE_RW); snoop = true; if (uncached) { mapping_flags |= AMDGPU_VM_MTYPE_UC; @@ -1242,10 +1272,12 @@ svm_range_get_pte_flags(struct kfd_node *node, if (bo_node->adev == node->adev && (!bo_node->xcp || !node->xcp || bo_node->xcp->mem_id == node->xcp->mem_id)) mapping_flags |= mtype_local; - /* local HBM region far from partition or remote XGMI GPU */ - else if (svm_nodes_in_same_hive(bo_node, node)) + /* local HBM region far from partition or remote XGMI GPU + * with regular system scope coherence + */ + else if (svm_nodes_in_same_hive(bo_node, node) && !ext_coherent) mapping_flags |= AMDGPU_VM_MTYPE_NC; - /* PCIe P2P */ + /* PCIe P2P or extended system scope coherence */ else mapping_flags |= AMDGPU_VM_MTYPE_UC; /* system memory accessed by the APU */ @@ -1592,6 +1624,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm, struct svm_validate_context *ctx; unsigned long start, end, addr; struct kfd_process *p; + uint64_t vram_pages; void *owner; int32_t idx; int r = 0; @@ -1660,75 +1693,85 @@ static int svm_range_validate_and_map(struct mm_struct *mm, } } + vram_pages = 0; start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; - for (addr = start; addr < end && !r; ) { + for (addr = start; !r && addr < end; ) { struct hmm_range *hmm_range; struct vm_area_struct *vma; - unsigned long next; + uint64_t vram_pages_vma; + unsigned long next = 0; unsigned long offset; unsigned long npages; bool readonly; vma = vma_lookup(mm, addr); - if (!vma) { + if (vma) { + readonly = !(vma->vm_flags & VM_WRITE); + + next = min(vma->vm_end, end); + npages = (next - addr) >> PAGE_SHIFT; + WRITE_ONCE(p->svms.faulting_task, current); + r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, + readonly, owner, NULL, + &hmm_range); + WRITE_ONCE(p->svms.faulting_task, NULL); + if (r) { + pr_debug("failed %d to get svm range pages\n", r); + if (r == -EBUSY) + r = -EAGAIN; + } + } else { r = -EFAULT; - goto unreserve_out; - } - readonly = !(vma->vm_flags & VM_WRITE); - - next = min(vma->vm_end, end); - npages = (next - addr) >> PAGE_SHIFT; - WRITE_ONCE(p->svms.faulting_task, current); - r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages, - readonly, owner, NULL, - &hmm_range); - WRITE_ONCE(p->svms.faulting_task, NULL); - if (r) { - pr_debug("failed %d to get svm range pages\n", r); - if (r == -EBUSY) - r = -EAGAIN; - goto unreserve_out; } - offset = (addr - start) >> PAGE_SHIFT; - r = svm_range_dma_map(prange, ctx->bitmap, offset, npages, - hmm_range->hmm_pfns); - if (r) { - pr_debug("failed %d to dma map range\n", r); - goto unreserve_out; + if (!r) { + offset = (addr - start) >> PAGE_SHIFT; + r = svm_range_dma_map(prange, ctx->bitmap, offset, npages, + hmm_range->hmm_pfns, &vram_pages_vma); + if (r) + pr_debug("failed %d to dma map range\n", r); + else + vram_pages += vram_pages_vma; } svm_range_lock(prange); - if (amdgpu_hmm_range_get_pages_done(hmm_range)) { + if (!r && amdgpu_hmm_range_get_pages_done(hmm_range)) { pr_debug("hmm update the range, need validate again\n"); r = -EAGAIN; - goto unlock_out; } - if (!list_empty(&prange->child_list)) { + + if (!r && !list_empty(&prange->child_list)) { pr_debug("range split by unmap in parallel, validate again\n"); r = -EAGAIN; - goto unlock_out; } - r = svm_range_map_to_gpus(prange, offset, npages, readonly, - ctx->bitmap, wait, flush_tlb); + if (!r) + r = svm_range_map_to_gpus(prange, offset, npages, readonly, + ctx->bitmap, wait, flush_tlb); + + if (!r && next == end) + prange->mapped_to_gpu = true; -unlock_out: svm_range_unlock(prange); addr = next; } if (addr == end) { - prange->validated_once = true; - prange->mapped_to_gpu = true; + prange->vram_pages = vram_pages; + + /* if prange does not include any vram page and it + * has not released svm_bo drop its svm_bo reference + * and set its actaul_loc to sys ram + */ + if (!vram_pages && prange->ttm_res) { + prange->actual_loc = 0; + svm_range_vram_node_free(prange); + } } -unreserve_out: svm_range_unreserve_bos(ctx); - - prange->is_error_flag = !!r; if (!r) prange->validate_timestamp = ktime_get_boottime(); @@ -1980,6 +2023,7 @@ static struct svm_range *svm_range_clone(struct svm_range *old) new->actual_loc = old->actual_loc; new->granularity = old->granularity; new->mapped_to_gpu = old->mapped_to_gpu; + new->vram_pages = old->vram_pages; bitmap_copy(new->bitmap_access, old->bitmap_access, MAX_GPU_INSTANCE); bitmap_copy(new->bitmap_aip, old->bitmap_aip, MAX_GPU_INSTANCE); @@ -2097,7 +2141,8 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, next = interval_tree_iter_next(node, start, last); next_start = min(node->last, last) + 1; - if (svm_range_is_same_attrs(p, prange, nattr, attrs)) { + if (svm_range_is_same_attrs(p, prange, nattr, attrs) && + prange->mapped_to_gpu) { /* nothing to do */ } else if (node->start < start || node->last > last) { /* node intersects the update range and its attributes @@ -2884,6 +2929,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint32_t vmid, uint32_t node_id, uint64_t addr, bool write_fault) { + unsigned long start, last, size; struct mm_struct *mm = NULL; struct svm_range_list *svms; struct svm_range *prange; @@ -3019,32 +3065,35 @@ retry_write_locked: kfd_smi_event_page_fault_start(node, p->lead_thread->pid, addr, write_fault, timestamp); - if (prange->actual_loc != best_loc) { + if (prange->actual_loc != 0 || best_loc != 0) { migration = true; + /* Align migration range start and size to granularity size */ + size = 1UL << prange->granularity; + start = max_t(unsigned long, ALIGN_DOWN(addr, size), prange->start); + last = min_t(unsigned long, ALIGN(addr + 1, size) - 1, prange->last); + if (best_loc) { - r = svm_migrate_to_vram(prange, best_loc, mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); + r = svm_migrate_to_vram(prange, best_loc, start, last, + mm, KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU); if (r) { pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", r, addr); /* Fallback to system memory if migration to * VRAM failed */ - if (prange->actual_loc) - r = svm_migrate_vram_to_ram(prange, mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, - NULL); + if (prange->actual_loc && prange->actual_loc != best_loc) + r = svm_migrate_vram_to_ram(prange, mm, start, last, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL); else r = 0; } } else { - r = svm_migrate_vram_to_ram(prange, mm, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, - NULL); + r = svm_migrate_vram_to_ram(prange, mm, start, last, + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, NULL); } if (r) { pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", - r, svms, prange->start, prange->last); + r, svms, start, last); goto out_unlock_range; } } @@ -3398,18 +3447,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, *migrated = false; best_loc = svm_range_best_prefetch_location(prange); - if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED || - best_loc == prange->actual_loc) + /* when best_loc is a gpu node and same as prange->actual_loc + * we still need do migration as prange->actual_loc !=0 does + * not mean all pages in prange are vram. hmm migrate will pick + * up right pages during migration. + */ + if ((best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) || + (best_loc == 0 && prange->actual_loc == 0)) return 0; if (!best_loc) { - r = svm_migrate_vram_to_ram(prange, mm, + r = svm_migrate_vram_to_ram(prange, mm, prange->start, prange->last, KFD_MIGRATE_TRIGGER_PREFETCH, NULL); *migrated = !r; return r; } - r = svm_migrate_to_vram(prange, best_loc, mm, KFD_MIGRATE_TRIGGER_PREFETCH); + r = svm_migrate_to_vram(prange, best_loc, prange->start, prange->last, + mm, KFD_MIGRATE_TRIGGER_PREFETCH); *migrated = !r; return r; @@ -3464,7 +3519,11 @@ static void svm_range_evict_svm_bo_worker(struct work_struct *work) mutex_lock(&prange->migrate_mutex); do { + /* migrate all vram pages in this prange to sys ram + * after that prange->actual_loc should be zero + */ r = svm_migrate_vram_to_ram(prange, mm, + prange->start, prange->last, KFD_MIGRATE_TRIGGER_TTM_EVICTION, NULL); } while (!r && prange->actual_loc && --retries); @@ -3507,7 +3566,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, struct svm_range *next; bool update_mapping = false; bool flush_tlb; - int r = 0; + int r, ret = 0; pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] pages 0x%llx\n", p->pasid, &p->svms, start, start + size - 1, size); @@ -3595,7 +3654,7 @@ svm_range_set_attr(struct kfd_process *p, struct mm_struct *mm, out_unlock_range: mutex_unlock(&prange->migrate_mutex); if (r) - break; + ret = r; } dynamic_svm_range_dump(svms); @@ -3608,7 +3667,7 @@ out: pr_debug("pasid 0x%x svms 0x%p [0x%llx 0x%llx] done, r=%d\n", p->pasid, &p->svms, start, start + size - 1, r); - return r; + return ret ? ret : r; } static int |