summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c133
1 files changed, 84 insertions, 49 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 5ff1a5a89d96..011561605983 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -23,7 +23,10 @@
#include <linux/types.h>
#include <linux/sched/task.h>
+#include <linux/dynamic_debug.h>
#include <drm/ttm/ttm_tt.h>
+#include <drm/drm_exec.h>
+
#include "amdgpu_sync.h"
#include "amdgpu_object.h"
#include "amdgpu_vm.h"
@@ -46,6 +49,13 @@
* page table is updated.
*/
#define AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING (2UL * NSEC_PER_MSEC)
+#if IS_ENABLED(CONFIG_DYNAMIC_DEBUG)
+#define dynamic_svm_range_dump(svms) \
+ _dynamic_func_call_no_desc("svm_range_dump", svm_range_debug_dump, svms)
+#else
+#define dynamic_svm_range_dump(svms) \
+ do { if (0) svm_range_debug_dump(svms); } while (0)
+#endif
/* Giant svm range split into smaller ranges based on this, it is decided using
* minimum of all dGPU/APU 1/32 VRAM size, between 2MB to 1GB and alignment to
@@ -239,7 +249,7 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr,
}
}
-void svm_range_free_dma_mappings(struct svm_range *prange)
+void svm_range_free_dma_mappings(struct svm_range *prange, bool unmap_dma)
{
struct kfd_process_device *pdd;
dma_addr_t *dma_addr;
@@ -260,13 +270,14 @@ void svm_range_free_dma_mappings(struct svm_range *prange)
continue;
}
dev = &pdd->dev->adev->pdev->dev;
- svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
+ if (unmap_dma)
+ svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
kvfree(dma_addr);
prange->dma_addr[gpuidx] = NULL;
}
}
-static void svm_range_free(struct svm_range *prange, bool update_mem_usage)
+static void svm_range_free(struct svm_range *prange, bool do_unmap)
{
uint64_t size = (prange->last - prange->start + 1) << PAGE_SHIFT;
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
@@ -275,9 +286,9 @@ static void svm_range_free(struct svm_range *prange, bool update_mem_usage)
prange->start, prange->last);
svm_range_vram_node_free(prange);
- svm_range_free_dma_mappings(prange);
+ svm_range_free_dma_mappings(prange, do_unmap);
- if (update_mem_usage && !p->xnack_enabled) {
+ if (do_unmap && !p->xnack_enabled) {
pr_debug("unreserve prange 0x%p size: 0x%llx\n", prange, size);
amdgpu_amdkfd_unreserve_mem_limit(NULL, size,
KFD_IOC_ALLOC_MEM_FLAGS_USERPTR, 0);
@@ -849,6 +860,37 @@ static void svm_range_debug_dump(struct svm_range_list *svms)
}
}
+static void *
+svm_range_copy_array(void *psrc, size_t size, uint64_t num_elements,
+ uint64_t offset)
+{
+ unsigned char *dst;
+
+ dst = kvmalloc_array(num_elements, size, GFP_KERNEL);
+ if (!dst)
+ return NULL;
+ memcpy(dst, (unsigned char *)psrc + offset, num_elements * size);
+
+ return (void *)dst;
+}
+
+static int
+svm_range_copy_dma_addrs(struct svm_range *dst, struct svm_range *src)
+{
+ int i;
+
+ for (i = 0; i < MAX_GPU_INSTANCE; i++) {
+ if (!src->dma_addr[i])
+ continue;
+ dst->dma_addr[i] = svm_range_copy_array(src->dma_addr[i],
+ sizeof(*src->dma_addr[i]), src->npages, 0);
+ if (!dst->dma_addr[i])
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
static int
svm_range_split_array(void *ppnew, void *ppold, size_t size,
uint64_t old_start, uint64_t old_n,
@@ -863,22 +905,16 @@ svm_range_split_array(void *ppnew, void *ppold, size_t size,
if (!pold)
return 0;
- new = kvmalloc_array(new_n, size, GFP_KERNEL);
+ d = (new_start - old_start) * size;
+ new = svm_range_copy_array(pold, size, new_n, d);
if (!new)
return -ENOMEM;
-
- d = (new_start - old_start) * size;
- memcpy(new, pold + d, new_n * size);
-
- old = kvmalloc_array(old_n, size, GFP_KERNEL);
+ d = (new_start == old_start) ? new_n * size : 0;
+ old = svm_range_copy_array(pold, size, old_n, d);
if (!old) {
kvfree(new);
return -ENOMEM;
}
-
- d = (new_start == old_start) ? new_n * size : 0;
- memcpy(old, pold + d, old_n * size);
-
kvfree(pold);
*(void **)ppold = old;
*(void **)ppnew = new;
@@ -1455,37 +1491,34 @@ struct svm_validate_context {
struct svm_range *prange;
bool intr;
DECLARE_BITMAP(bitmap, MAX_GPU_INSTANCE);
- struct ttm_validate_buffer tv[MAX_GPU_INSTANCE];
- struct list_head validate_list;
- struct ww_acquire_ctx ticket;
+ struct drm_exec exec;
};
-static int svm_range_reserve_bos(struct svm_validate_context *ctx)
+static int svm_range_reserve_bos(struct svm_validate_context *ctx, bool intr)
{
struct kfd_process_device *pdd;
struct amdgpu_vm *vm;
uint32_t gpuidx;
int r;
- INIT_LIST_HEAD(&ctx->validate_list);
- for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
- pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
- if (!pdd) {
- pr_debug("failed to find device idx %d\n", gpuidx);
- return -EINVAL;
- }
- vm = drm_priv_to_vm(pdd->drm_priv);
-
- ctx->tv[gpuidx].bo = &vm->root.bo->tbo;
- ctx->tv[gpuidx].num_shared = 4;
- list_add(&ctx->tv[gpuidx].head, &ctx->validate_list);
- }
+ drm_exec_init(&ctx->exec, intr ? DRM_EXEC_INTERRUPTIBLE_WAIT: 0);
+ drm_exec_until_all_locked(&ctx->exec) {
+ for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
+ pdd = kfd_process_device_from_gpuidx(ctx->process, gpuidx);
+ if (!pdd) {
+ pr_debug("failed to find device idx %d\n", gpuidx);
+ r = -EINVAL;
+ goto unreserve_out;
+ }
+ vm = drm_priv_to_vm(pdd->drm_priv);
- r = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->validate_list,
- ctx->intr, NULL);
- if (r) {
- pr_debug("failed %d to reserve bo\n", r);
- return r;
+ r = amdgpu_vm_lock_pd(vm, &ctx->exec, 2);
+ drm_exec_retry_on_contention(&ctx->exec);
+ if (unlikely(r)) {
+ pr_debug("failed %d to reserve bo\n", r);
+ goto unreserve_out;
+ }
+ }
}
for_each_set_bit(gpuidx, ctx->bitmap, MAX_GPU_INSTANCE) {
@@ -1508,13 +1541,13 @@ static int svm_range_reserve_bos(struct svm_validate_context *ctx)
return 0;
unreserve_out:
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+ drm_exec_fini(&ctx->exec);
return r;
}
static void svm_range_unreserve_bos(struct svm_validate_context *ctx)
{
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->validate_list);
+ drm_exec_fini(&ctx->exec);
}
static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
@@ -1522,6 +1555,8 @@ static void *kfd_svm_page_owner(struct kfd_process *p, int32_t gpuidx)
struct kfd_process_device *pdd;
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
+ if (!pdd)
+ return NULL;
return SVM_ADEV_PGMAP_OWNER(pdd->dev->adev);
}
@@ -1596,12 +1631,12 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
}
if (bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
- if (!prange->mapped_to_gpu) {
+ bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
+ if (!prange->mapped_to_gpu ||
+ bitmap_empty(ctx->bitmap, MAX_GPU_INSTANCE)) {
r = 0;
goto free_ctx;
}
-
- bitmap_copy(ctx->bitmap, prange->bitmap_access, MAX_GPU_INSTANCE);
}
if (prange->actual_loc && !prange->ttm_res) {
@@ -1613,7 +1648,7 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
goto free_ctx;
}
- svm_range_reserve_bos(ctx);
+ svm_range_reserve_bos(ctx, intr);
p = container_of(prange->svms, struct kfd_process, svms);
owner = kfd_svm_page_owner(p, find_first_bit(ctx->bitmap,
@@ -1925,7 +1960,10 @@ static struct svm_range *svm_range_clone(struct svm_range *old)
new = svm_range_new(old->svms, old->start, old->last, false);
if (!new)
return NULL;
-
+ if (svm_range_copy_dma_addrs(new, old)) {
+ svm_range_free(new, false);
+ return NULL;
+ }
if (old->svm_bo) {
new->ttm_res = old->ttm_res;
new->offset = old->offset;
@@ -2621,10 +2659,7 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
return -EFAULT;
}
- *is_heap_stack = (vma->vm_start <= vma->vm_mm->brk &&
- vma->vm_end >= vma->vm_mm->start_brk) ||
- (vma->vm_start <= vma->vm_mm->start_stack &&
- vma->vm_end >= vma->vm_mm->start_stack);
+ *is_heap_stack = vma_is_initial_heap(vma) || vma_is_initial_stack(vma);
start_limit = max(vma->vm_start >> PAGE_SHIFT,
(unsigned long)ALIGN_DOWN(addr, 2UL << 8));
@@ -3561,7 +3596,7 @@ out_unlock_range:
break;
}
- svm_range_debug_dump(svms);
+ dynamic_svm_range_dump(svms);
mutex_unlock(&svms->lock);
mmap_read_unlock(mm);