From 814ab9930cfd709768439799eae3c7ef0a658b54 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 7 Feb 2020 15:34:33 -0500 Subject: drm/amdkfd: register HMM device private zone Register vram memory as MEMORY_DEVICE_PRIVATE type resource, to allocate vram backing pages for page migration. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 49 ++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_migrate.h (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_migrate.h') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h new file mode 100644 index 000000000000..89392548ec44 --- /dev/null +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright 2020-2021 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef KFD_MIGRATE_H_ +#define KFD_MIGRATE_H_ + +#include +#include +#include +#include +#include +#include "kfd_priv.h" +#include "kfd_svm.h" + +#if defined(CONFIG_DEVICE_PRIVATE) +int svm_migrate_init(struct amdgpu_device *adev); +void svm_migrate_fini(struct amdgpu_device *adev); + +#else +static inline int svm_migrate_init(struct amdgpu_device *adev) +{ + DRM_WARN_ONCE("DEVICE_PRIVATE kernel config option is not enabled, " + "add CONFIG_DEVICE_PRIVATE=y in config file to fix\n"); + return -ENODEV; +} +static inline void svm_migrate_fini(struct amdgpu_device *adev) {} +#endif +#endif /* KFD_MIGRATE_H_ */ -- cgit v1.2.3 From 50ea50cf6f6d31d3235ad1853c5dbea766a3ed11 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 7 Feb 2020 17:08:04 -0500 Subject: drm/amdkfd: copy memory through gart table Use sdma linear copy to migrate data between ram and vram. The sdma linear copy command uses kernel buffer function queue to access system memory through gart table. Use reserved gart table window 0 to map system page address, and vram page address is direct mapping. Use the same kernel buffer function to fill in gart table mapping, so this is serialized with memory copy by sdma job submit. We only need wait for the last memory copy sdma fence for larger buffer migration. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 172 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 5 + 2 files changed, 177 insertions(+) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_migrate.h') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index d8cec5ebe1d4..74b38856cce3 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -33,6 +33,178 @@ #include "kfd_svm.h" #include "kfd_migrate.h" +static uint64_t +svm_migrate_direct_mapping_addr(struct amdgpu_device *adev, uint64_t addr) +{ + return addr + amdgpu_ttm_domain_start(adev, TTM_PL_VRAM); +} + +static int +svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages, + dma_addr_t *addr, uint64_t *gart_addr, uint64_t flags) +{ + struct amdgpu_device *adev = ring->adev; + struct amdgpu_job *job; + unsigned int num_dw, num_bytes; + struct dma_fence *fence; + uint64_t src_addr, dst_addr; + uint64_t pte_flags; + void *cpu_addr; + int r; + + /* use gart window 0 */ + *gart_addr = adev->gmc.gart_start; + + num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8); + num_bytes = npages * 8; + + r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes, + AMDGPU_IB_POOL_DELAYED, &job); + if (r) + return r; + + src_addr = num_dw * 4; + src_addr += job->ibs[0].gpu_addr; + + dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); + amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, + dst_addr, num_bytes, false); + + amdgpu_ring_pad_ib(ring, &job->ibs[0]); + WARN_ON(job->ibs[0].length_dw > num_dw); + + pte_flags = AMDGPU_PTE_VALID | AMDGPU_PTE_READABLE; + pte_flags |= AMDGPU_PTE_SYSTEM | AMDGPU_PTE_SNOOPED; + if (!(flags & KFD_IOCTL_SVM_FLAG_GPU_RO)) + pte_flags |= AMDGPU_PTE_WRITEABLE; + pte_flags |= adev->gart.gart_pte_flags; + + cpu_addr = &job->ibs[0].ptr[num_dw]; + + r = amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr); + if (r) + goto error_free; + + r = amdgpu_job_submit(job, &adev->mman.entity, + AMDGPU_FENCE_OWNER_UNDEFINED, &fence); + if (r) + goto error_free; + + dma_fence_put(fence); + + return r; + +error_free: + amdgpu_job_free(job); + return r; +} + +/** + * svm_migrate_copy_memory_gart - sdma copy data between ram and vram + * + * @adev: amdgpu device the sdma ring running + * @src: source page address array + * @dst: destination page address array + * @npages: number of pages to copy + * @direction: enum MIGRATION_COPY_DIR + * @mfence: output, sdma fence to signal after sdma is done + * + * ram address uses GART table continuous entries mapping to ram pages, + * vram address uses direct mapping of vram pages, which must have npages + * number of continuous pages. + * GART update and sdma uses same buf copy function ring, sdma is splited to + * multiple GTT_MAX_PAGES transfer, all sdma operations are serialized, wait for + * the last sdma finish fence which is returned to check copy memory is done. + * + * Context: Process context, takes and releases gtt_window_lock + * + * Return: + * 0 - OK, otherwise error code + */ + +static int +svm_migrate_copy_memory_gart(struct amdgpu_device *adev, dma_addr_t *sys, + uint64_t *vram, uint64_t npages, + enum MIGRATION_COPY_DIR direction, + struct dma_fence **mfence) +{ + const uint64_t GTT_MAX_PAGES = AMDGPU_GTT_MAX_TRANSFER_SIZE; + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + uint64_t gart_s, gart_d; + struct dma_fence *next; + uint64_t size; + int r; + + mutex_lock(&adev->mman.gtt_window_lock); + + while (npages) { + size = min(GTT_MAX_PAGES, npages); + + if (direction == FROM_VRAM_TO_RAM) { + gart_s = svm_migrate_direct_mapping_addr(adev, *vram); + r = svm_migrate_gart_map(ring, size, sys, &gart_d, 0); + + } else if (direction == FROM_RAM_TO_VRAM) { + r = svm_migrate_gart_map(ring, size, sys, &gart_s, + KFD_IOCTL_SVM_FLAG_GPU_RO); + gart_d = svm_migrate_direct_mapping_addr(adev, *vram); + } + if (r) { + pr_debug("failed %d to create gart mapping\n", r); + goto out_unlock; + } + + r = amdgpu_copy_buffer(ring, gart_s, gart_d, size * PAGE_SIZE, + NULL, &next, false, true, false); + if (r) { + pr_debug("failed %d to copy memory\n", r); + goto out_unlock; + } + + dma_fence_put(*mfence); + *mfence = next; + npages -= size; + if (npages) { + sys += size; + vram += size; + } + } + +out_unlock: + mutex_unlock(&adev->mman.gtt_window_lock); + + return r; +} + +/** + * svm_migrate_copy_done - wait for memory copy sdma is done + * + * @adev: amdgpu device the sdma memory copy is executing on + * @mfence: migrate fence + * + * Wait for dma fence is signaled, if the copy ssplit into multiple sdma + * operations, this is the last sdma operation fence. + * + * Context: called after svm_migrate_copy_memory + * + * Return: + * 0 - success + * otherwise - error code from dma fence signal + */ +int +svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence) +{ + int r = 0; + + if (mfence) { + r = dma_fence_wait(mfence, false); + dma_fence_put(mfence); + pr_debug("sdma copy memory fence done\n"); + } + + return r; +} + static void svm_migrate_page_free(struct page *page) { } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 89392548ec44..df84e4143e25 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -33,6 +33,11 @@ #include "kfd_priv.h" #include "kfd_svm.h" +enum MIGRATION_COPY_DIR { + FROM_RAM_TO_VRAM = 0, + FROM_VRAM_TO_RAM +}; + #if defined(CONFIG_DEVICE_PRIVATE) int svm_migrate_init(struct amdgpu_device *adev); void svm_migrate_fini(struct amdgpu_device *adev); -- cgit v1.2.3 From 0b0e518d61af8e1cb73cbbfb313b215640c8a6f3 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 24 Feb 2021 20:40:20 -0500 Subject: drm/amdkfd: HMM migrate ram to vram Register svm range with same address and size but perferred_location is changed from CPU to GPU or from GPU to CPU, trigger migration the svm range from ram to vram or from vram to ram. If svm range prefetch location is GPU with flags KFD_IOCTL_SVM_FLAG_HOST_ACCESS, validate the svm range on ram first, then migrate it from ram to vram. After migrating to vram is done, CPU access will have cpu page fault, page fault handler migrate it back to ram and resume cpu access. Migration steps: 1. migrate_vma_pages get svm range ram pages, notify the interval is invalidated and unmap from CPU page table, HMM interval notifier callback evict process queues 2. Allocate new pages in vram using TTM 3. Use svm copy memory to sdma copy data from ram to vram 4. migrate_vma_pages copy ram pages structure to vram pages structure 5. migrate_vma_finalize put ram pages to free ram pages and memory 6. Restore work wait for migration is finished, then update GPUs page table mapping to new vram pages, resume process queues If migrate_vma_setup failed to collect all ram pages of range, retry 3 times until success to start migration. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 305 +++++++++++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 2 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 201 ++++++++++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 7 + 4 files changed, 502 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_migrate.h') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 74b38856cce3..7b025c169935 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -205,6 +205,311 @@ svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence) return r; } +static uint64_t +svm_migrate_node_physical_addr(struct amdgpu_device *adev, + struct drm_mm_node **mm_node, uint64_t *offset) +{ + struct drm_mm_node *node = *mm_node; + uint64_t pos = *offset; + + if (node->start == AMDGPU_BO_INVALID_OFFSET) { + pr_debug("drm node is not validated\n"); + return 0; + } + + pr_debug("vram node start 0x%llx npages 0x%llx\n", node->start, + node->size); + + if (pos >= node->size) { + do { + pos -= node->size; + node++; + } while (pos >= node->size); + + *mm_node = node; + *offset = pos; + } + + return (node->start + pos) << PAGE_SHIFT; +} + +unsigned long +svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr) +{ + return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT; +} + +static void +svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn) +{ + struct page *page; + + page = pfn_to_page(pfn); + page->zone_device_data = prange; + get_page(page); + lock_page(page); +} + +static void +svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr) +{ + struct page *page; + + page = pfn_to_page(svm_migrate_addr_to_pfn(adev, addr)); + unlock_page(page); + put_page(page); +} + + +static int +svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, + struct migrate_vma *migrate, struct dma_fence **mfence, + dma_addr_t *scratch) +{ + uint64_t npages = migrate->cpages; + struct device *dev = adev->dev; + struct drm_mm_node *node; + dma_addr_t *src; + uint64_t *dst; + uint64_t vram_addr; + uint64_t offset; + uint64_t i, j; + int r = -ENOMEM; + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, + prange->last); + + src = scratch; + dst = (uint64_t *)(scratch + npages); + + r = svm_range_vram_node_new(adev, prange, true); + if (r) { + pr_debug("failed %d get 0x%llx pages from vram\n", r, npages); + goto out; + } + + node = prange->ttm_res->mm_node; + offset = prange->offset; + vram_addr = svm_migrate_node_physical_addr(adev, &node, &offset); + if (!vram_addr) { + WARN_ONCE(1, "vram node address is 0\n"); + r = -ENOMEM; + goto out; + } + + for (i = j = 0; i < npages; i++) { + struct page *spage; + + dst[i] = vram_addr + (j << PAGE_SHIFT); + migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]); + svm_migrate_get_vram_page(prange, migrate->dst[i]); + + migrate->dst[i] = migrate_pfn(migrate->dst[i]); + migrate->dst[i] |= MIGRATE_PFN_LOCKED; + + if (migrate->src[i] & MIGRATE_PFN_VALID) { + spage = migrate_pfn_to_page(migrate->src[i]); + src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE, + DMA_TO_DEVICE); + r = dma_mapping_error(dev, src[i]); + if (r) { + pr_debug("failed %d dma_map_page\n", r); + goto out_free_vram_pages; + } + } else { + if (j) { + r = svm_migrate_copy_memory_gart( + adev, src + i - j, + dst + i - j, j, + FROM_RAM_TO_VRAM, + mfence); + if (r) + goto out_free_vram_pages; + offset += j; + vram_addr = (node->start + offset) << PAGE_SHIFT; + j = 0; + } else { + offset++; + vram_addr += PAGE_SIZE; + } + if (offset >= node->size) { + node++; + pr_debug("next node size 0x%llx\n", node->size); + vram_addr = node->start << PAGE_SHIFT; + offset = 0; + } + continue; + } + + pr_debug("dma mapping src to 0x%llx, page_to_pfn 0x%lx\n", + src[i] >> PAGE_SHIFT, page_to_pfn(spage)); + + if (j + offset >= node->size - 1 && i < npages - 1) { + r = svm_migrate_copy_memory_gart(adev, src + i - j, + dst + i - j, j + 1, + FROM_RAM_TO_VRAM, + mfence); + if (r) + goto out_free_vram_pages; + + node++; + pr_debug("next node size 0x%llx\n", node->size); + vram_addr = node->start << PAGE_SHIFT; + offset = 0; + j = 0; + } else { + j++; + } + } + + r = svm_migrate_copy_memory_gart(adev, src + i - j, dst + i - j, j, + FROM_RAM_TO_VRAM, mfence); + +out_free_vram_pages: + if (r) { + pr_debug("failed %d to copy memory to vram\n", r); + while (i--) { + svm_migrate_put_vram_page(adev, dst[i]); + migrate->dst[i] = 0; + } + } + +out: + return r; +} + +static int +svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange, + struct vm_area_struct *vma, uint64_t start, + uint64_t end) +{ + uint64_t npages = (end - start) >> PAGE_SHIFT; + struct dma_fence *mfence = NULL; + struct migrate_vma migrate; + dma_addr_t *scratch; + size_t size; + void *buf; + int r = -ENOMEM; + int retry = 0; + + memset(&migrate, 0, sizeof(migrate)); + migrate.vma = vma; + migrate.start = start; + migrate.end = end; + migrate.flags = MIGRATE_VMA_SELECT_SYSTEM; + migrate.pgmap_owner = adev; + + size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); + size *= npages; + buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO); + if (!buf) + goto out; + + migrate.src = buf; + migrate.dst = migrate.src + npages; + scratch = (dma_addr_t *)(migrate.dst + npages); + +retry: + r = migrate_vma_setup(&migrate); + if (r) { + pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", + r, prange->svms, prange->start, prange->last); + goto out_free; + } + if (migrate.cpages != npages) { + pr_debug("collect 0x%lx/0x%llx pages, retry\n", migrate.cpages, + npages); + migrate_vma_finalize(&migrate); + if (retry++ >= 3) { + r = -ENOMEM; + pr_debug("failed %d migrate svms 0x%p [0x%lx 0x%lx]\n", + r, prange->svms, prange->start, prange->last); + goto out_free; + } + + goto retry; + } + + if (migrate.cpages) { + svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, + scratch); + migrate_vma_pages(&migrate); + svm_migrate_copy_done(adev, mfence); + migrate_vma_finalize(&migrate); + } + + svm_range_dma_unmap(adev->dev, scratch, 0, npages); + svm_range_free_dma_mappings(prange); + +out_free: + kvfree(buf); +out: + return r; +} + +/** + * svm_migrate_ram_to_vram - migrate svm range from system to device + * @prange: range structure + * @best_loc: the device to migrate to + * + * Context: Process context, caller hold mmap read lock, svms lock, prange lock + * + * Return: + * 0 - OK, otherwise error code + */ +int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc) +{ + unsigned long addr, start, end; + struct vm_area_struct *vma; + struct amdgpu_device *adev; + struct mm_struct *mm; + int r = 0; + + if (prange->actual_loc == best_loc) { + pr_debug("svms 0x%p [0x%lx 0x%lx] already on best_loc 0x%x\n", + prange->svms, prange->start, prange->last, best_loc); + return 0; + } + + adev = svm_range_get_adev_by_id(prange, best_loc); + if (!adev) { + pr_debug("failed to get device by id 0x%x\n", best_loc); + return -ENODEV; + } + + pr_debug("svms 0x%p [0x%lx 0x%lx] to gpu 0x%x\n", prange->svms, + prange->start, prange->last, best_loc); + + /* FIXME: workaround for page locking bug with invalid pages */ + svm_range_prefault(prange, mm); + + start = prange->start << PAGE_SHIFT; + end = (prange->last + 1) << PAGE_SHIFT; + + mm = current->mm; + + for (addr = start; addr < end;) { + unsigned long next; + + vma = find_vma(mm, addr); + if (!vma || addr < vma->vm_start) + break; + + next = min(vma->vm_end, end); + r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next); + if (r) { + pr_debug("failed to migrate\n"); + break; + } + addr = next; + } + + if (!r) + prange->actual_loc = best_loc; + + return r; +} + static void svm_migrate_page_free(struct page *page) { } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index df84e4143e25..d9cee0f6285a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -38,6 +38,8 @@ enum MIGRATION_COPY_DIR { FROM_VRAM_TO_RAM }; +int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc); + #if defined(CONFIG_DEVICE_PRIVATE) int svm_migrate_init(struct amdgpu_device *adev); void svm_migrate_fini(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 7a70f5e92f18..c49fb8513b2b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -31,6 +31,7 @@ #include "amdgpu_xgmi.h" #include "kfd_priv.h" #include "kfd_svm.h" +#include "kfd_migrate.h" #define AMDGPU_SVM_RANGE_RESTORE_DELAY_MS 1 @@ -177,8 +178,8 @@ svm_range_dma_map(struct svm_range *prange, unsigned long *bitmap, return r; } -static void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, - unsigned long offset, unsigned long npages) +void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, + unsigned long offset, unsigned long npages) { enum dma_data_direction dir = DMA_BIDIRECTIONAL; int i; @@ -195,7 +196,7 @@ static void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, } } -static void svm_range_free_dma_mappings(struct svm_range *prange) +void svm_range_free_dma_mappings(struct svm_range *prange) { struct kfd_process_device *pdd; dma_addr_t *dma_addr; @@ -230,6 +231,7 @@ static void svm_range_free(struct svm_range *prange) svm_range_vram_node_free(prange); svm_range_free_dma_mappings(prange); mutex_destroy(&prange->lock); + mutex_destroy(&prange->migrate_mutex); kfree(prange); } @@ -266,6 +268,7 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, INIT_LIST_HEAD(&prange->deferred_list); INIT_LIST_HEAD(&prange->child_list); atomic_set(&prange->invalid, 0); + mutex_init(&prange->migrate_mutex); mutex_init(&prange->lock); svm_range_set_default_attributes(&prange->preferred_loc, &prange->prefetch_loc, @@ -1238,6 +1241,8 @@ static int svm_range_validate_and_map(struct mm_struct *mm, pr_debug("failed %d to dma map range\n", r); goto unreserve_out; } + + prange->validated_once = true; } svm_range_lock(prange); @@ -1329,21 +1334,28 @@ static void svm_range_restore_work(struct work_struct *work) prange->svms, prange, prange->start, prange->last, invalid); + /* + * If range is migrating, wait for migration is done. + */ + mutex_lock(&prange->migrate_mutex); + r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, false, true); - if (r) { + if (r) pr_debug("failed %d to map 0x%lx to gpus\n", r, prange->start); - goto unlock_out; - } + + mutex_unlock(&prange->migrate_mutex); + if (r) + goto out_reschedule; if (atomic_cmpxchg(&prange->invalid, invalid, 0) != invalid) - goto unlock_out; + goto out_reschedule; } if (atomic_cmpxchg(&svms->evicted_ranges, evicted_ranges, 0) != evicted_ranges) - goto unlock_out; + goto out_reschedule; evicted_ranges = 0; @@ -1357,7 +1369,7 @@ static void svm_range_restore_work(struct work_struct *work) pr_debug("restore svm ranges successfully\n"); -unlock_out: +out_reschedule: mutex_unlock(&svms->lock); mmap_write_unlock(mm); mutex_unlock(&process_info->lock); @@ -1649,6 +1661,7 @@ static void svm_range_deferred_list_work(struct work_struct *work) list_del_init(&prange->deferred_list); spin_unlock(&svms->deferred_list_lock); + mutex_lock(&prange->migrate_mutex); while (!list_empty(&prange->child_list)) { struct svm_range *pchild; @@ -1659,6 +1672,7 @@ static void svm_range_deferred_list_work(struct work_struct *work) list_del_init(&pchild->child_list); svm_range_handle_list_op(svms, pchild); } + mutex_unlock(&prange->migrate_mutex); svm_range_handle_list_op(svms, prange); mutex_unlock(&svms->lock); @@ -1957,6 +1971,151 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, return 0; } +/* svm_range_best_location - decide the best actual location + * @prange: svm range structure + * + * For xnack off: + * If range map to single GPU, the best acutal location is prefetch loc, which + * can be CPU or GPU. + * + * If range map to multiple GPUs, only if mGPU connection on xgmi same hive, + * the best actual location could be prefetch_loc GPU. If mGPU connection on + * PCIe, the best actual location is always CPU, because GPU cannot access vram + * of other GPUs, assuming PCIe small bar (large bar support is not upstream). + * + * For xnack on: + * The best actual location is prefetch location. If mGPU connection on xgmi + * same hive, range map to multiple GPUs. Otherwise, the range only map to + * actual location GPU. Other GPU access vm fault will trigger migration. + * + * Context: Process context + * + * Return: + * 0 for CPU or GPU id + */ +static uint32_t svm_range_best_location(struct svm_range *prange) +{ + DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); + uint32_t best_loc = prange->prefetch_loc; + struct kfd_process_device *pdd; + struct amdgpu_device *bo_adev; + struct amdgpu_device *adev; + struct kfd_process *p; + uint32_t gpuidx; + + p = container_of(prange->svms, struct kfd_process, svms); + + /* xnack on */ + if (p->xnack_enabled) + goto out; + + /* xnack off */ + if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED) + goto out; + + bo_adev = svm_range_get_adev_by_id(prange, best_loc); + bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, + MAX_GPU_INSTANCE); + + for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + pdd = kfd_process_device_from_gpuidx(p, gpuidx); + if (!pdd) { + pr_debug("failed to get device by idx 0x%x\n", gpuidx); + continue; + } + adev = (struct amdgpu_device *)pdd->dev->kgd; + + if (adev == bo_adev) + continue; + + if (!amdgpu_xgmi_same_hive(adev, bo_adev)) { + best_loc = 0; + break; + } + } + +out: + pr_debug("xnack %d svms 0x%p [0x%lx 0x%lx] best loc 0x%x\n", + p->xnack_enabled, &p->svms, prange->start, prange->last, + best_loc); + + return best_loc; +} + +/* FIXME: This is a workaround for page locking bug when some pages are + * invalid during migration to VRAM + */ +void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm) +{ + struct hmm_range *hmm_range; + int r; + + if (prange->validated_once) + return; + + r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL, + prange->start << PAGE_SHIFT, + prange->npages, &hmm_range, + false, true); + if (!r) { + amdgpu_hmm_range_get_pages_done(hmm_range); + prange->validated_once = true; + } +} + +/* svm_range_trigger_migration - start page migration if prefetch loc changed + * @mm: current process mm_struct + * @prange: svm range structure + * @migrated: output, true if migration is triggered + * + * If range perfetch_loc is GPU, actual loc is cpu 0, then migrate the range + * from ram to vram. + * If range prefetch_loc is cpu 0, actual loc is GPU, then migrate the range + * from vram to ram. + * + * If GPU vm fault retry is not enabled, migration interact with MMU notifier + * and restore work: + * 1. migrate_vma_setup invalidate pages, MMU notifier callback svm_range_evict + * stops all queues, schedule restore work + * 2. svm_range_restore_work wait for migration is done by + * a. svm_range_validate_vram takes prange->migrate_mutex + * b. svm_range_validate_ram HMM get pages wait for CPU fault handle returns + * 3. restore work update mappings of GPU, resume all queues. + * + * Context: Process context + * + * Return: + * 0 - OK, otherwise - error code of migration + */ +static int +svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, + bool *migrated) +{ + uint32_t best_loc; + int r = 0; + + *migrated = false; + best_loc = svm_range_best_location(prange); + + if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED || + best_loc == prange->actual_loc) + return 0; + + if (best_loc && !prange->actual_loc && + !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) + return 0; + + if (best_loc) { + pr_debug("migrate from ram to vram\n"); + r = svm_migrate_ram_to_vram(prange, best_loc); + + if (!r) + *migrated = true; + } + + return r; +} + static int svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, uint32_t nattr, struct kfd_ioctl_svm_attribute *attrs) @@ -2027,13 +2186,29 @@ svm_range_set_attr(struct kfd_process *p, uint64_t start, uint64_t size, * case because the rollback wouldn't be guaranteed to work either. */ list_for_each_entry(prange, &update_list, update_list) { + bool migrated; + + mutex_lock(&prange->migrate_mutex); + + r = svm_range_trigger_migration(mm, prange, &migrated); + if (r) + goto out_unlock_range; + + if (migrated) { + pr_debug("restore_work will update mappings of GPUs\n"); + mutex_unlock(&prange->migrate_mutex); + continue; + } + r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, true, true); - if (r) { - pr_debug("failed %d to map 0x%lx to gpus\n", r, - prange->start); + if (r) + pr_debug("failed %d to map svm range\n", r); + +out_unlock_range: + mutex_unlock(&prange->migrate_mutex); + if (r) break; - } } svm_range_debug_dump(svms); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 0aab88c71855..34214a44b099 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -56,6 +56,7 @@ struct svm_work_list_item { * struct svm_range - shared virtual memory range * * @svms: list of svm ranges, structure defined in kfd_process + * @migrate_mutex: to serialize range migration, validation and mapping update * @start: range start address in pages * @last: range last address in pages * @it_node: node [start, last] stored in interval tree, start, last are page @@ -92,6 +93,7 @@ struct svm_work_list_item { */ struct svm_range { struct svm_range_list *svms; + struct mutex migrate_mutex; unsigned long start; unsigned long last; struct interval_tree_node it_node; @@ -120,6 +122,7 @@ struct svm_range { struct list_head child_list; DECLARE_BITMAP(bitmap_access, MAX_GPU_INSTANCE); DECLARE_BITMAP(bitmap_aip, MAX_GPU_INSTANCE); + bool validated_once; }; static inline void svm_range_lock(struct svm_range *prange) @@ -144,5 +147,9 @@ struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange, int svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, bool clear); void svm_range_vram_node_free(struct svm_range *prange); +void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, + unsigned long offset, unsigned long npages); +void svm_range_free_dma_mappings(struct svm_range *prange); +void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm); #endif /* KFD_SVM_H_ */ -- cgit v1.2.3 From 48ff079b28d82dbce000cc45c0fd35b6ae9ffbda Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 17 Mar 2021 00:24:12 -0400 Subject: drm/amdkfd: HMM migrate vram to ram If CPU page fault happens, HMM pgmap_ops callback migrate_to_ram start migrate memory from vram to ram in steps: 1. migrate_vma_pages get vram pages, and notify HMM to invalidate the pages, HMM interval notifier callback evict process queues 2. Allocate system memory pages 3. Use svm copy memory to migrate data from vram to ram 4. migrate_vma_pages copy pages structure from vram pages to ram pages 5. Return VM_FAULT_SIGBUS if migration failed, to notify application 6. migrate_vma_finalize put vram pages, page_free callback free vram pages and vram nodes 7. Restore work wait for migration is finished, then update GPU page table mapping to system memory, and resume process queues Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 302 ++++++++++++++++++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 3 + drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 127 ++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 10 + 4 files changed, 429 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_migrate.h') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 7b025c169935..73c10dad0489 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -191,7 +191,7 @@ out_unlock: * 0 - success * otherwise - error code from dma fence signal */ -int +static int svm_migrate_copy_done(struct amdgpu_device *adev, struct dma_fence *mfence) { int r = 0; @@ -260,6 +260,35 @@ svm_migrate_put_vram_page(struct amdgpu_device *adev, unsigned long addr) put_page(page); } +static unsigned long +svm_migrate_addr(struct amdgpu_device *adev, struct page *page) +{ + unsigned long addr; + + addr = page_to_pfn(page) << PAGE_SHIFT; + return (addr - adev->kfd.dev->pgmap.range.start); +} + +static struct page * +svm_migrate_get_sys_page(struct vm_area_struct *vma, unsigned long addr) +{ + struct page *page; + + page = alloc_page_vma(GFP_HIGHUSER, vma, addr); + if (page) + lock_page(page); + + return page; +} + +void svm_migrate_put_sys_page(unsigned long addr) +{ + struct page *page; + + page = pfn_to_page(addr >> PAGE_SHIFT); + unlock_page(page); + put_page(page); +} static int svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange, @@ -512,13 +541,213 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc) static void svm_migrate_page_free(struct page *page) { + /* Keep this function to avoid warning */ +} + +static int +svm_migrate_copy_to_ram(struct amdgpu_device *adev, struct svm_range *prange, + struct migrate_vma *migrate, struct dma_fence **mfence, + dma_addr_t *scratch) +{ + uint64_t npages = migrate->cpages; + struct device *dev = adev->dev; + uint64_t *src; + dma_addr_t *dst; + struct page *dpage; + uint64_t i = 0, j; + uint64_t addr; + int r = 0; + + pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start, + prange->last); + + addr = prange->start << PAGE_SHIFT; + + src = (uint64_t *)(scratch + npages); + dst = scratch; + + for (i = 0, j = 0; i < npages; i++, j++, addr += PAGE_SIZE) { + struct page *spage; + + spage = migrate_pfn_to_page(migrate->src[i]); + if (!spage) { + pr_debug("failed get spage svms 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange->start, prange->last); + r = -ENOMEM; + goto out_oom; + } + src[i] = svm_migrate_addr(adev, spage); + if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) { + r = svm_migrate_copy_memory_gart(adev, dst + i - j, + src + i - j, j, + FROM_VRAM_TO_RAM, + mfence); + if (r) + goto out_oom; + j = 0; + } + + dpage = svm_migrate_get_sys_page(migrate->vma, addr); + if (!dpage) { + pr_debug("failed get page svms 0x%p [0x%lx 0x%lx]\n", + prange->svms, prange->start, prange->last); + r = -ENOMEM; + goto out_oom; + } + + dst[i] = dma_map_page(dev, dpage, 0, PAGE_SIZE, DMA_FROM_DEVICE); + r = dma_mapping_error(dev, dst[i]); + if (r) { + pr_debug("failed %d dma_map_page\n", r); + goto out_oom; + } + + pr_debug("dma mapping dst to 0x%llx, page_to_pfn 0x%lx\n", + dst[i] >> PAGE_SHIFT, page_to_pfn(dpage)); + + migrate->dst[i] = migrate_pfn(page_to_pfn(dpage)); + migrate->dst[i] |= MIGRATE_PFN_LOCKED; + } + + r = svm_migrate_copy_memory_gart(adev, dst + i - j, src + i - j, j, + FROM_VRAM_TO_RAM, mfence); + +out_oom: + if (r) { + pr_debug("failed %d copy to ram\n", r); + while (i--) { + svm_migrate_put_sys_page(dst[i]); + migrate->dst[i] = 0; + } + } + + return r; +} + +static int +svm_migrate_vma_to_ram(struct amdgpu_device *adev, struct svm_range *prange, + struct vm_area_struct *vma, uint64_t start, uint64_t end) +{ + uint64_t npages = (end - start) >> PAGE_SHIFT; + struct dma_fence *mfence = NULL; + struct migrate_vma migrate; + dma_addr_t *scratch; + size_t size; + void *buf; + int r = -ENOMEM; + + memset(&migrate, 0, sizeof(migrate)); + migrate.vma = vma; + migrate.start = start; + migrate.end = end; + migrate.flags = MIGRATE_VMA_SELECT_DEVICE_PRIVATE; + migrate.pgmap_owner = adev; + + size = 2 * sizeof(*migrate.src) + sizeof(uint64_t) + sizeof(dma_addr_t); + size *= npages; + buf = kvmalloc(size, GFP_KERNEL | __GFP_ZERO); + if (!buf) + goto out; + + migrate.src = buf; + migrate.dst = migrate.src + npages; + scratch = (dma_addr_t *)(migrate.dst + npages); + + r = migrate_vma_setup(&migrate); + if (r) { + pr_debug("failed %d prepare migrate svms 0x%p [0x%lx 0x%lx]\n", + r, prange->svms, prange->start, prange->last); + goto out_free; + } + + pr_debug("cpages %ld\n", migrate.cpages); + + if (migrate.cpages) { + svm_migrate_copy_to_ram(adev, prange, &migrate, &mfence, + scratch); + migrate_vma_pages(&migrate); + svm_migrate_copy_done(adev, mfence); + migrate_vma_finalize(&migrate); + } else { + pr_debug("failed collect migrate device pages [0x%lx 0x%lx]\n", + prange->start, prange->last); + } + + svm_range_dma_unmap(adev->dev, scratch, 0, npages); + +out_free: + kvfree(buf); +out: + return r; +} + +/** + * svm_migrate_vram_to_ram - migrate svm range from device to system + * @prange: range structure + * @mm: process mm, use current->mm if NULL + * + * Context: Process context, caller hold mmap read lock, svms lock, prange lock + * + * Return: + * 0 - OK, otherwise error code + */ +int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) +{ + struct amdgpu_device *adev; + struct vm_area_struct *vma; + unsigned long addr; + unsigned long start; + unsigned long end; + int r = 0; + + if (!prange->actual_loc) { + pr_debug("[0x%lx 0x%lx] already migrated to ram\n", + prange->start, prange->last); + return 0; + } + + adev = svm_range_get_adev_by_id(prange, prange->actual_loc); + if (!adev) { + pr_debug("failed to get device by id 0x%x\n", + prange->actual_loc); + return -ENODEV; + } + + pr_debug("svms 0x%p prange 0x%p [0x%lx 0x%lx] from gpu 0x%x to ram\n", + prange->svms, prange, prange->start, prange->last, + prange->actual_loc); + + start = prange->start << PAGE_SHIFT; + end = (prange->last + 1) << PAGE_SHIFT; + + for (addr = start; addr < end;) { + unsigned long next; + + vma = find_vma(mm, addr); + if (!vma || addr < vma->vm_start) + break; + + next = min(vma->vm_end, end); + r = svm_migrate_vma_to_ram(adev, prange, vma, addr, next); + if (r) { + pr_debug("failed %d to migrate\n", r); + break; + } + addr = next; + } + + if (!r) { + svm_range_vram_node_free(prange); + prange->actual_loc = 0; + } + return r; } /** * svm_migrate_to_ram - CPU page fault handler * @vmf: CPU vm fault vma, address * - * Context: vm fault handler, mm->mmap_sem is taken + * Context: vm fault handler, caller holds the mmap read lock * * Return: * 0 - OK @@ -526,7 +755,74 @@ static void svm_migrate_page_free(struct page *page) */ static vm_fault_t svm_migrate_to_ram(struct vm_fault *vmf) { - return VM_FAULT_SIGBUS; + unsigned long addr = vmf->address; + struct vm_area_struct *vma; + enum svm_work_list_ops op; + struct svm_range *parent; + struct svm_range *prange; + struct kfd_process *p; + struct mm_struct *mm; + int r = 0; + + vma = vmf->vma; + mm = vma->vm_mm; + + p = kfd_lookup_process_by_mm(vma->vm_mm); + if (!p) { + pr_debug("failed find process at fault address 0x%lx\n", addr); + return VM_FAULT_SIGBUS; + } + addr >>= PAGE_SHIFT; + pr_debug("CPU page fault svms 0x%p address 0x%lx\n", &p->svms, addr); + + mutex_lock(&p->svms.lock); + + prange = svm_range_from_addr(&p->svms, addr, &parent); + if (!prange) { + pr_debug("cannot find svm range at 0x%lx\n", addr); + r = -EFAULT; + goto out; + } + + mutex_lock(&parent->migrate_mutex); + if (prange != parent) + mutex_lock_nested(&prange->migrate_mutex, 1); + + if (!prange->actual_loc) + goto out_unlock_prange; + + svm_range_lock(parent); + if (prange != parent) + mutex_lock_nested(&prange->lock, 1); + r = svm_range_split_by_granularity(p, mm, addr, parent, prange); + if (prange != parent) + mutex_unlock(&prange->lock); + svm_range_unlock(parent); + if (r) { + pr_debug("failed %d to split range by granularity\n", r); + goto out_unlock_prange; + } + + r = svm_migrate_vram_to_ram(prange, mm); + if (r) + pr_debug("failed %d migrate 0x%p [0x%lx 0x%lx] to ram\n", r, + prange, prange->start, prange->last); + + op = SVM_OP_UPDATE_RANGE_NOTIFIER; + svm_range_add_list_work(&p->svms, parent, mm, op); + schedule_deferred_list_work(&p->svms); + +out_unlock_prange: + if (prange != parent) + mutex_unlock(&prange->migrate_mutex); + mutex_unlock(&parent->migrate_mutex); +out: + mutex_unlock(&p->svms.lock); + kfd_unref_process(p); + + pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr); + + return r ? VM_FAULT_SIGBUS : 0; } static const struct dev_pagemap_ops svm_migrate_pgmap_ops = { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index d9cee0f6285a..082b9bb22270 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -39,6 +39,9 @@ enum MIGRATION_COPY_DIR { }; int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc); +int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); +unsigned long +svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); #if defined(CONFIG_DEVICE_PRIVATE) int svm_migrate_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c49fb8513b2b..6fcfb9fa1b37 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -861,6 +861,60 @@ svm_range_add_child(struct svm_range *prange, struct mm_struct *mm, list_add_tail(&pchild->child_list, &prange->child_list); } +/** + * svm_range_split_by_granularity - collect ranges within granularity boundary + * + * @p: the process with svms list + * @mm: mm structure + * @addr: the vm fault address in pages, to split the prange + * @parent: parent range if prange is from child list + * @prange: prange to split + * + * Trims @prange to be a single aligned block of prange->granularity if + * possible. The head and tail are added to the child_list in @parent. + * + * Context: caller must hold mmap_read_lock and prange->lock + * + * Return: + * 0 - OK, otherwise error code + */ +int +svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, + unsigned long addr, struct svm_range *parent, + struct svm_range *prange) +{ + struct svm_range *head, *tail; + unsigned long start, last, size; + int r; + + /* Align splited range start and size to granularity size, then a single + * PTE will be used for whole range, this reduces the number of PTE + * updated and the L1 TLB space used for translation. + */ + size = 1UL << prange->granularity; + start = ALIGN_DOWN(addr, size); + last = ALIGN(addr + 1, size) - 1; + + pr_debug("svms 0x%p split [0x%lx 0x%lx] to [0x%lx 0x%lx] size 0x%lx\n", + prange->svms, prange->start, prange->last, start, last, size); + + if (start > prange->start) { + r = svm_range_split(prange, start, prange->last, &head); + if (r) + return r; + svm_range_add_child(parent, mm, head, SVM_OP_ADD_RANGE); + } + + if (last < prange->last) { + r = svm_range_split(prange, prange->start, last, &tail); + if (r) + return r; + svm_range_add_child(parent, mm, tail, SVM_OP_ADD_RANGE); + } + + return 0; +} + static uint64_t svm_range_get_pte_flags(struct amdgpu_device *adev, struct svm_range *prange) { @@ -1685,7 +1739,7 @@ static void svm_range_deferred_list_work(struct work_struct *work) pr_debug("exit svms 0x%p\n", svms); } -static void +void svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, struct mm_struct *mm, enum svm_work_list_ops op) { @@ -1708,7 +1762,7 @@ svm_range_add_list_work(struct svm_range_list *svms, struct svm_range *prange, spin_unlock(&svms->deferred_list_lock); } -static void schedule_deferred_list_work(struct svm_range_list *svms) +void schedule_deferred_list_work(struct svm_range_list *svms) { spin_lock(&svms->deferred_list_lock); if (!list_empty(&svms->deferred_range_list)) @@ -1798,12 +1852,19 @@ svm_range_unmap_from_cpu(struct mm_struct *mm, struct svm_range *prange, /** * svm_range_cpu_invalidate_pagetables - interval notifier callback * - * MMU range unmap notifier to remove svm ranges + * If event is MMU_NOTIFY_UNMAP, this is from CPU unmap range, otherwise, it + * is from migration, or CPU page invalidation callback. + * + * For unmap event, unmap range from GPUs, remove prange from svms in a delayed + * work thread, and split prange if only part of prange is unmapped. + * + * For invalidation event, if GPU retry fault is not enabled, evict the queues, + * then schedule svm_range_restore_work to update GPU mapping and resume queues. + * If GPU retry fault is enabled, unmap the svm range from GPU, retry fault will + * update GPU mapping to recover. * - * If GPU vm fault retry is not enabled, evict the svm range, then restore - * work will update GPU mapping. - * If GPU vm fault retry is enabled, unmap the svm range from GPU, vm fault - * will update GPU mapping. + * Context: mmap lock, notifier_invalidate_start lock are held + * for invalidate event, prange lock is held if this is from migration */ static bool svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, @@ -1846,6 +1907,49 @@ svm_range_cpu_invalidate_pagetables(struct mmu_interval_notifier *mni, return true; } +/** + * svm_range_from_addr - find svm range from fault address + * @svms: svm range list header + * @addr: address to search range interval tree, in pages + * @parent: parent range if range is on child list + * + * Context: The caller must hold svms->lock + * + * Return: the svm_range found or NULL + */ +struct svm_range * +svm_range_from_addr(struct svm_range_list *svms, unsigned long addr, + struct svm_range **parent) +{ + struct interval_tree_node *node; + struct svm_range *prange; + struct svm_range *pchild; + + node = interval_tree_iter_first(&svms->objects, addr, addr); + if (!node) + return NULL; + + prange = container_of(node, struct svm_range, it_node); + pr_debug("address 0x%lx prange [0x%lx 0x%lx] node [0x%lx 0x%lx]\n", + addr, prange->start, prange->last, node->start, node->last); + + if (addr >= prange->start && addr <= prange->last) { + if (parent) + *parent = prange; + return prange; + } + list_for_each_entry(pchild, &prange->child_list, child_list) + if (addr >= pchild->start && addr <= pchild->last) { + pr_debug("found address 0x%lx pchild [0x%lx 0x%lx]\n", + addr, pchild->start, pchild->last); + if (parent) + *parent = prange; + return pchild; + } + + return NULL; +} + void svm_range_list_fini(struct kfd_process *p) { struct svm_range *prange; @@ -2108,11 +2212,14 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, if (best_loc) { pr_debug("migrate from ram to vram\n"); r = svm_migrate_ram_to_vram(prange, best_loc); - - if (!r) - *migrated = true; + } else { + pr_debug("migrate from vram to ram\n"); + r = svm_migrate_vram_to_ram(prange, current->mm); } + if (!r) + *migrated = true; + return r; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index 34214a44b099..37cfa1689c4f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -142,11 +142,21 @@ void svm_range_list_fini(struct kfd_process *p); int svm_ioctl(struct kfd_process *p, enum kfd_ioctl_svm_op op, uint64_t start, uint64_t size, uint32_t nattrs, struct kfd_ioctl_svm_attribute *attrs); +struct svm_range *svm_range_from_addr(struct svm_range_list *svms, + unsigned long addr, + struct svm_range **parent); struct amdgpu_device *svm_range_get_adev_by_id(struct svm_range *prange, uint32_t id); int svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, bool clear); void svm_range_vram_node_free(struct svm_range *prange); +int svm_range_split_by_granularity(struct kfd_process *p, struct mm_struct *mm, + unsigned long addr, struct svm_range *parent, + struct svm_range *prange); +void svm_range_add_list_work(struct svm_range_list *svms, + struct svm_range *prange, struct mm_struct *mm, + enum svm_work_list_ops op); +void schedule_deferred_list_work(struct svm_range_list *svms); void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, unsigned long offset, unsigned long npages); void svm_range_free_dma_mappings(struct svm_range *prange); -- cgit v1.2.3 From cda0f85bfa5e5fddc51b94cfd6680c6697707a89 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 24 Feb 2021 23:46:28 -0500 Subject: drm/amdkfd: refine migration policy with xnack on With xnack on, GPU vm fault handler decide the best restore location, then migrate range to the best restore location and update GPU mapping to recover the GPU vm fault. Signed-off-by: Philip Yang Signed-off-by: Alex Sierra Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 7 +- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 3 +- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 3 + drivers/gpu/drm/amd/amdkfd/kfd_process.c | 16 ++++ drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 136 ++++++++++++++++++++++++++++--- 5 files changed, 150 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_migrate.h') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 2a32c423a393..3726a671d7d8 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -480,18 +480,19 @@ out: * svm_migrate_ram_to_vram - migrate svm range from system to device * @prange: range structure * @best_loc: the device to migrate to + * @mm: the process mm structure * * Context: Process context, caller hold mmap read lock, svms lock, prange lock * * Return: * 0 - OK, otherwise error code */ -int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc) +int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) { unsigned long addr, start, end; struct vm_area_struct *vma; struct amdgpu_device *adev; - struct mm_struct *mm; int r = 0; if (prange->actual_loc == best_loc) { @@ -515,8 +516,6 @@ int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc) start = prange->start << PAGE_SHIFT; end = (prange->last + 1) << PAGE_SHIFT; - mm = current->mm; - for (addr = start; addr < end;) { unsigned long next; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 082b9bb22270..53c899b80b85 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -38,7 +38,8 @@ enum MIGRATION_COPY_DIR { FROM_VRAM_TO_RAM }; -int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc); +int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm); int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index a58bea31e23c..a1ddcf6446db 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -864,6 +864,9 @@ struct kfd_process *kfd_lookup_process_by_pasid(u32 pasid); struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm); int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id); +int kfd_process_gpuid_from_kgd(struct kfd_process *p, + struct amdgpu_device *adev, uint32_t *gpuid, + uint32_t *gpuidx); static inline int kfd_process_gpuid_from_gpuidx(struct kfd_process *p, uint32_t gpuidx, uint32_t *gpuid) { return gpuidx < p->n_pdds ? p->pdds[gpuidx]->dev->id : -EINVAL; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index b8db509e2bbd..d7006ef2388f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -1676,6 +1676,22 @@ int kfd_process_gpuidx_from_gpuid(struct kfd_process *p, uint32_t gpu_id) return -EINVAL; } +int +kfd_process_gpuid_from_kgd(struct kfd_process *p, struct amdgpu_device *adev, + uint32_t *gpuid, uint32_t *gpuidx) +{ + struct kgd_dev *kgd = (struct kgd_dev *)adev; + int i; + + for (i = 0; i < p->n_pdds; i++) + if (p->pdds[i] && p->pdds[i]->dev->kgd == kgd) { + *gpuid = p->pdds[i]->dev->id; + *gpuidx = i; + return 0; + } + return -EINVAL; +} + static void evict_process_worker(struct work_struct *work) { int ret; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index c771532420dc..7206e0b7b422 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -1317,6 +1317,24 @@ static int svm_range_validate_and_map(struct mm_struct *mm, if (gpuidx < MAX_GPU_INSTANCE) { bitmap_zero(ctx.bitmap, MAX_GPU_INSTANCE); bitmap_set(ctx.bitmap, gpuidx, 1); + } else if (ctx.process->xnack_enabled) { + bitmap_copy(ctx.bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE); + + /* If prefetch range to GPU, or GPU retry fault migrate range to + * GPU, which has ACCESS attribute to the range, create mapping + * on that GPU. + */ + if (prange->actual_loc) { + gpuidx = kfd_process_gpuidx_from_gpuid(ctx.process, + prange->actual_loc); + if (gpuidx < 0) { + WARN_ONCE(1, "failed get device by id 0x%x\n", + prange->actual_loc); + return -EINVAL; + } + if (test_bit(gpuidx, prange->bitmap_access)) + bitmap_set(ctx.bitmap, gpuidx, 1); + } } else { bitmap_or(ctx.bitmap, prange->bitmap_access, prange->bitmap_aip, MAX_GPU_INSTANCE); @@ -2046,15 +2064,75 @@ svm_range_from_addr(struct svm_range_list *svms, unsigned long addr, return NULL; } +/* svm_range_best_restore_location - decide the best fault restore location + * @prange: svm range structure + * @adev: the GPU on which vm fault happened + * + * This is only called when xnack is on, to decide the best location to restore + * the range mapping after GPU vm fault. Caller uses the best location to do + * migration if actual loc is not best location, then update GPU page table + * mapping to the best location. + * + * If vm fault gpu is range preferred loc, the best_loc is preferred loc. + * If vm fault gpu idx is on range ACCESSIBLE bitmap, best_loc is vm fault gpu + * If vm fault gpu idx is on range ACCESSIBLE_IN_PLACE bitmap, then + * if range actual loc is cpu, best_loc is cpu + * if vm fault gpu is on xgmi same hive of range actual loc gpu, best_loc is + * range actual loc. + * Otherwise, GPU no access, best_loc is -1. + * + * Return: + * -1 means vm fault GPU no access + * 0 for CPU or GPU id + */ +static int32_t +svm_range_best_restore_location(struct svm_range *prange, + struct amdgpu_device *adev, + int32_t *gpuidx) +{ + struct amdgpu_device *bo_adev; + struct kfd_process *p; + uint32_t gpuid; + int r; + + p = container_of(prange->svms, struct kfd_process, svms); + + r = kfd_process_gpuid_from_kgd(p, adev, &gpuid, gpuidx); + if (r < 0) { + pr_debug("failed to get gpuid from kgd\n"); + return -1; + } + + if (prange->preferred_loc == gpuid) + return prange->preferred_loc; + + if (test_bit(*gpuidx, prange->bitmap_access)) + return gpuid; + + if (test_bit(*gpuidx, prange->bitmap_aip)) { + if (!prange->actual_loc) + return 0; + + bo_adev = svm_range_get_adev_by_id(prange, prange->actual_loc); + if (amdgpu_xgmi_same_hive(adev, bo_adev)) + return prange->actual_loc; + else + return 0; + } + + return -1; +} + int svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, uint64_t addr) { - int r = 0; struct mm_struct *mm = NULL; - struct svm_range *prange; struct svm_range_list *svms; + struct svm_range *prange; struct kfd_process *p; + int32_t best_loc, gpuidx; + int r = 0; p = kfd_lookup_process_by_pasid(pasid); if (!p) { @@ -2089,11 +2167,48 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, mutex_lock(&prange->migrate_mutex); - r = svm_range_validate_and_map(mm, prange, MAX_GPU_INSTANCE, false, false); - if (r) - pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpu\n", r, + best_loc = svm_range_best_restore_location(prange, adev, &gpuidx); + if (best_loc == -1) { + pr_debug("svms %p failed get best restore loc [0x%lx 0x%lx]\n", svms, prange->start, prange->last); + r = -EACCES; + goto out_unlock_range; + } + + pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual loc 0x%x\n", + svms, prange->start, prange->last, best_loc, + prange->actual_loc); + if (prange->actual_loc != best_loc) { + if (best_loc) { + r = svm_migrate_ram_to_vram(prange, best_loc, mm); + if (r) { + pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", + r, addr); + /* Fallback to system memory if migration to + * VRAM failed + */ + if (prange->actual_loc) + r = svm_migrate_vram_to_ram(prange, mm); + else + r = 0; + } + } else { + r = svm_migrate_vram_to_ram(prange, mm); + } + if (r) { + pr_debug("failed %d to migrate svms %p [0x%lx 0x%lx]\n", + r, svms, prange->start, prange->last); + goto out_unlock_range; + } + } + + r = svm_range_validate_and_map(mm, prange, gpuidx, false, false); + if (r) + pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] to gpus\n", + r, svms, prange->start, prange->last); + +out_unlock_range: mutex_unlock(&prange->migrate_mutex); out_unlock_svms: mutex_unlock(&svms->lock); @@ -2230,7 +2345,7 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, return 0; } -/* svm_range_best_location - decide the best actual location +/* svm_range_best_prefetch_location - decide the best prefetch location * @prange: svm range structure * * For xnack off: @@ -2252,7 +2367,8 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size, * Return: * 0 for CPU or GPU id */ -static uint32_t svm_range_best_location(struct svm_range *prange) +static uint32_t +svm_range_best_prefetch_location(struct svm_range *prange) { DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE); uint32_t best_loc = prange->prefetch_loc; @@ -2354,7 +2470,7 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, int r = 0; *migrated = false; - best_loc = svm_range_best_location(prange); + best_loc = svm_range_best_prefetch_location(prange); if (best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED || best_loc == prange->actual_loc) @@ -2366,10 +2482,10 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, if (best_loc) { pr_debug("migrate from ram to vram\n"); - r = svm_migrate_ram_to_vram(prange, best_loc); + r = svm_migrate_ram_to_vram(prange, best_loc, mm); } else { pr_debug("migrate from vram to ram\n"); - r = svm_migrate_vram_to_ram(prange, current->mm); + r = svm_migrate_vram_to_ram(prange, mm); } if (!r) -- cgit v1.2.3 From 1a3b2b5dca1924f2e7eae618ad79471c4a253236 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Wed, 24 Feb 2021 23:57:33 -0500 Subject: drm/amdkfd: multiple gpu migrate vram to vram If prefetch range to gpu with acutal location is another gpu, or GPU retry fault restore pages to migrate the range with acutal location is gpu, then migrate from one gpu to another gpu. Use system memory as bridge because sdma engine may not able to access another gpu vram, use sdma of source gpu to migrate to system memory, then use sdma of destination gpu to migrate from system memory to gpu. Print out gpuid or gpuidx in debug messages. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 47 +++++++++++++++++++++++++++-- drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 4 +-- drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 51 +++++++++++++++++++++++++------- 3 files changed, 87 insertions(+), 15 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_migrate.h') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c index 3726a671d7d8..d44a46eb00d6 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c @@ -487,8 +487,9 @@ out: * Return: * 0 - OK, otherwise error code */ -int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm) +static int +svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) { unsigned long addr, start, end; struct vm_area_struct *vma; @@ -742,6 +743,48 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm) return r; } +/** + * svm_migrate_vram_to_vram - migrate svm range from device to device + * @prange: range structure + * @best_loc: the device to migrate to + * @mm: process mm, use current->mm if NULL + * + * Context: Process context, caller hold mmap read lock, svms lock, prange lock + * + * Return: + * 0 - OK, otherwise error code + */ +static int +svm_migrate_vram_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) +{ + int r; + + /* + * TODO: for both devices with PCIe large bar or on same xgmi hive, skip + * system memory as migration bridge + */ + + pr_debug("from gpu 0x%x to gpu 0x%x\n", prange->actual_loc, best_loc); + + r = svm_migrate_vram_to_ram(prange, mm); + if (r) + return r; + + return svm_migrate_ram_to_vram(prange, best_loc, mm); +} + +int +svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm) +{ + if (!prange->actual_loc) + return svm_migrate_ram_to_vram(prange, best_loc, mm); + else + return svm_migrate_vram_to_vram(prange, best_loc, mm); + +} + /** * svm_migrate_to_ram - CPU page fault handler * @vmf: CPU vm fault vma, address diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 53c899b80b85..37ad99cb073d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -38,8 +38,8 @@ enum MIGRATION_COPY_DIR { FROM_VRAM_TO_RAM }; -int svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc, - struct mm_struct *mm); +int svm_migrate_to_vram(struct svm_range *prange, uint32_t best_loc, + struct mm_struct *mm); int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c index 0694211a118b..0e0b4ffd20ab 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c @@ -348,8 +348,11 @@ static void svm_range_bo_unref(struct svm_range_bo *svm_bo) kref_put(&svm_bo->kref, svm_range_bo_release); } -static bool svm_range_validate_svm_bo(struct svm_range *prange) +static bool +svm_range_validate_svm_bo(struct amdgpu_device *adev, struct svm_range *prange) { + struct amdgpu_device *bo_adev; + mutex_lock(&prange->lock); if (!prange->svm_bo) { mutex_unlock(&prange->lock); @@ -361,6 +364,22 @@ static bool svm_range_validate_svm_bo(struct svm_range *prange) return true; } if (svm_bo_ref_unless_zero(prange->svm_bo)) { + /* + * Migrate from GPU to GPU, remove range from source bo_adev + * svm_bo range list, and return false to allocate svm_bo from + * destination adev. + */ + bo_adev = amdgpu_ttm_adev(prange->svm_bo->bo->tbo.bdev); + if (bo_adev != adev) { + mutex_unlock(&prange->lock); + + spin_lock(&prange->svm_bo->list_lock); + list_del_init(&prange->svm_bo_list); + spin_unlock(&prange->svm_bo->list_lock); + + svm_range_bo_unref(prange->svm_bo); + return false; + } if (READ_ONCE(prange->svm_bo->evicting)) { struct dma_fence *f; struct svm_range_bo *svm_bo; @@ -434,7 +453,7 @@ svm_range_vram_node_new(struct amdgpu_device *adev, struct svm_range *prange, pr_debug("pasid: %x svms 0x%p [0x%lx 0x%lx]\n", p->pasid, prange->svms, prange->start, prange->last); - if (svm_range_validate_svm_bo(prange)) + if (svm_range_validate_svm_bo(adev, prange)) return 0; svm_bo = svm_range_bo_new(); @@ -1173,6 +1192,7 @@ static int svm_range_map_to_gpus(struct svm_range *prange, p = container_of(prange->svms, struct kfd_process, svms); for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) { + pr_debug("mapping to gpu idx 0x%x\n", gpuidx); pdd = kfd_process_device_from_gpuidx(p, gpuidx); if (!pdd) { pr_debug("failed to find device idx %d\n", gpuidx); @@ -2198,7 +2218,7 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, if (prange->actual_loc != best_loc) { if (best_loc) { - r = svm_migrate_ram_to_vram(prange, best_loc, mm); + r = svm_migrate_to_vram(prange, best_loc, mm); if (r) { pr_debug("svm_migrate_to_vram failed (%d) at %llx, falling back to system memory\n", r, addr); @@ -2406,6 +2426,11 @@ svm_range_best_prefetch_location(struct svm_range *prange) goto out; bo_adev = svm_range_get_adev_by_id(prange, best_loc); + if (!bo_adev) { + WARN_ONCE(1, "failed to get device by id 0x%x\n", best_loc); + best_loc = 0; + goto out; + } bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip, MAX_GPU_INSTANCE); @@ -2493,20 +2518,24 @@ svm_range_trigger_migration(struct mm_struct *mm, struct svm_range *prange, best_loc == prange->actual_loc) return 0; + /* + * Prefetch to GPU without host access flag, set actual_loc to gpu, then + * validate on gpu and map to gpus will be handled afterwards. + */ if (best_loc && !prange->actual_loc && - !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) + !(prange->flags & KFD_IOCTL_SVM_FLAG_HOST_ACCESS)) { + prange->actual_loc = best_loc; return 0; + } - if (best_loc) { - pr_debug("migrate from ram to vram\n"); - r = svm_migrate_ram_to_vram(prange, best_loc, mm); - } else { - pr_debug("migrate from vram to ram\n"); + if (!best_loc) { r = svm_migrate_vram_to_ram(prange, mm); + *migrated = !r; + return r; } - if (!r) - *migrated = true; + r = svm_migrate_to_vram(prange, best_loc, mm); + *migrated = !r; return r; } -- cgit v1.2.3 From 4ab159d2547c26b34a4ff4770598b72660da1461 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Mon, 29 Mar 2021 18:49:12 -0400 Subject: drm/amdkfd: Add CONFIG_HSA_AMD_SVM Control whether to build SVM support into amdgpu with a Kconfig option. This makes it easier to disable it in production kernels if this new feature causes problems in production environments. Use "depends on" instead of "select" for DEVICE_PRIVATE, as is recommended for visible options. Reviewed-by: Philip Yang Signed-off-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/Kconfig | 15 +++++++++++++-- drivers/gpu/drm/amd/amdkfd/Makefile | 9 ++++++--- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 7 +++++++ drivers/gpu/drm/amd/amdkfd/kfd_migrate.h | 17 +++++++++++------ drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 30 ++++++++++++++++++++++++++++++ 5 files changed, 67 insertions(+), 11 deletions(-) (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_migrate.h') diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig index fb8d85716599..8cc0a76ddf9f 100644 --- a/drivers/gpu/drm/amd/amdkfd/Kconfig +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig @@ -8,9 +8,20 @@ config HSA_AMD depends on DRM_AMDGPU && (X86_64 || ARM64 || PPC64) imply AMD_IOMMU_V2 if X86_64 select HMM_MIRROR - select ZONE_DEVICE - select DEVICE_PRIVATE select MMU_NOTIFIER select DRM_AMDGPU_USERPTR help Enable this if you want to use HSA features on AMD GPU devices. + +config HSA_AMD_SVM + bool "Enable HMM-based shared virtual memory manager" + depends on HSA_AMD && DEVICE_PRIVATE + default y + select HMM_MIRROR + select MMU_NOTIFIER + help + Enable this to use unified memory and managed memory in HIP. This + memory manager supports two modes of operation. One based on + preemptions and one based on page faults. To enable page fault + based memory management on most GFXv9 GPUs, set the module + parameter amdgpu.noretry=0. diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile index a93301dbc464..c4f3aff11072 100644 --- a/drivers/gpu/drm/amd/amdkfd/Makefile +++ b/drivers/gpu/drm/amd/amdkfd/Makefile @@ -54,9 +54,7 @@ AMDKFD_FILES := $(AMDKFD_PATH)/kfd_module.o \ $(AMDKFD_PATH)/kfd_dbgdev.o \ $(AMDKFD_PATH)/kfd_dbgmgr.o \ $(AMDKFD_PATH)/kfd_smi_events.o \ - $(AMDKFD_PATH)/kfd_crat.o \ - $(AMDKFD_PATH)/kfd_svm.o \ - $(AMDKFD_PATH)/kfd_migrate.o + $(AMDKFD_PATH)/kfd_crat.o ifneq ($(CONFIG_AMD_IOMMU_V2),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_iommu.o @@ -65,3 +63,8 @@ endif ifneq ($(CONFIG_DEBUG_FS),) AMDKFD_FILES += $(AMDKFD_PATH)/kfd_debugfs.o endif + +ifneq ($(CONFIG_HSA_AMD_SVM),) +AMDKFD_FILES += $(AMDKFD_PATH)/kfd_svm.o \ + $(AMDKFD_PATH)/kfd_migrate.o +endif diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 913cc29d8857..059c3f1ca27d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -1770,6 +1770,7 @@ static int kfd_ioctl_set_xnack_mode(struct file *filep, return r; } +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) { struct kfd_ioctl_svm_args *args = data; @@ -1795,6 +1796,12 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) return r; } +#else +static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) +{ + return -EPERM; +} +#endif #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \ [_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h index 37ad99cb073d..0de76b5d4973 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.h @@ -25,6 +25,8 @@ #ifndef KFD_MIGRATE_H_ #define KFD_MIGRATE_H_ +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) + #include #include #include @@ -44,17 +46,20 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm); unsigned long svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr); -#if defined(CONFIG_DEVICE_PRIVATE) int svm_migrate_init(struct amdgpu_device *adev); void svm_migrate_fini(struct amdgpu_device *adev); #else + static inline int svm_migrate_init(struct amdgpu_device *adev) { - DRM_WARN_ONCE("DEVICE_PRIVATE kernel config option is not enabled, " - "add CONFIG_DEVICE_PRIVATE=y in config file to fix\n"); - return -ENODEV; + return 0; +} +static inline void svm_migrate_fini(struct amdgpu_device *adev) +{ + /* empty */ } -static inline void svm_migrate_fini(struct amdgpu_device *adev) {} -#endif + +#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */ + #endif /* KFD_MIGRATE_H_ */ diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h index aa829b3c992d..573f984b81fe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h @@ -25,6 +25,8 @@ #ifndef KFD_SVM_H_ #define KFD_SVM_H_ +#if IS_ENABLED(CONFIG_HSA_AMD_SVM) + #include #include #include @@ -173,4 +175,32 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, void svm_range_free_dma_mappings(struct svm_range *prange); void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm); +#else + +struct kfd_process; + +static inline int svm_range_list_init(struct kfd_process *p) +{ + return 0; +} +static inline void svm_range_list_fini(struct kfd_process *p) +{ + /* empty */ +} + +static inline int svm_range_restore_pages(struct amdgpu_device *adev, + unsigned int pasid, uint64_t addr) +{ + return -EFAULT; +} + +static inline int svm_range_schedule_evict_svm_bo( + struct amdgpu_amdkfd_fence *fence) +{ + WARN_ONCE(1, "SVM eviction fence triggered, but SVM is disabled"); + return -EINVAL; +} + +#endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */ + #endif /* KFD_SVM_H_ */ -- cgit v1.2.3