summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_svm.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_svm.c43
1 files changed, 27 insertions, 16 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 7df69b77bc5c..ecc390c2f28c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -2675,22 +2675,26 @@ svm_range_add(struct kfd_process *p, uint64_t start, uint64_t size,
return 0;
}
-/* svm_range_best_prefetch_location - decide the best prefetch location
+/**
+ * svm_range_best_prefetch_location - decide the best prefetch location
* @prange: svm range structure
*
* For xnack off:
- * If range map to single GPU, the best acutal location is prefetch loc, which
+ * If range map to single GPU, the best prefetch location is prefetch_loc, which
* can be CPU or GPU.
*
- * If range map to multiple GPUs, only if mGPU connection on xgmi same hive,
- * the best actual location could be prefetch_loc GPU. If mGPU connection on
- * PCIe, the best actual location is always CPU, because GPU cannot access vram
- * of other GPUs, assuming PCIe small bar (large bar support is not upstream).
+ * If range is ACCESS or ACCESS_IN_PLACE by mGPUs, only if mGPU connection on
+ * XGMI same hive, the best prefetch location is prefetch_loc GPU, othervise
+ * the best prefetch location is always CPU, because GPU can not have coherent
+ * mapping VRAM of other GPUs even with large-BAR PCIe connection.
*
* For xnack on:
- * The best actual location is prefetch location. If mGPU connection on xgmi
- * same hive, range map to multiple GPUs. Otherwise, the range only map to
- * actual location GPU. Other GPU access vm fault will trigger migration.
+ * If range is not ACCESS_IN_PLACE by mGPUs, the best prefetch location is
+ * prefetch_loc, other GPU access will generate vm fault and trigger migration.
+ *
+ * If range is ACCESS_IN_PLACE by mGPUs, only if mGPU connection on XGMI same
+ * hive, the best prefetch location is prefetch_loc GPU, otherwise the best
+ * prefetch location is always CPU.
*
* Context: Process context
*
@@ -2710,11 +2714,6 @@ svm_range_best_prefetch_location(struct svm_range *prange)
p = container_of(prange->svms, struct kfd_process, svms);
- /* xnack on */
- if (p->xnack_enabled)
- goto out;
-
- /* xnack off */
if (!best_loc || best_loc == KFD_IOCTL_SVM_LOCATION_UNDEFINED)
goto out;
@@ -2724,8 +2723,12 @@ svm_range_best_prefetch_location(struct svm_range *prange)
best_loc = 0;
goto out;
}
- bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
- MAX_GPU_INSTANCE);
+
+ if (p->xnack_enabled)
+ bitmap_copy(bitmap, prange->bitmap_aip, MAX_GPU_INSTANCE);
+ else
+ bitmap_or(bitmap, prange->bitmap_access, prange->bitmap_aip,
+ MAX_GPU_INSTANCE);
for_each_set_bit(gpuidx, bitmap, MAX_GPU_INSTANCE) {
pdd = kfd_process_device_from_gpuidx(p, gpuidx);
@@ -3027,6 +3030,14 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size,
pr_debug("svms 0x%p [0x%llx 0x%llx] nattr 0x%x\n", &p->svms, start,
start + size - 1, nattr);
+ /* Flush pending deferred work to avoid racing with deferred actions from
+ * previous memory map changes (e.g. munmap). Concurrent memory map changes
+ * can still race with get_attr because we don't hold the mmap lock. But that
+ * would be a race condition in the application anyway, and undefined
+ * behaviour is acceptable in that case.
+ */
+ flush_work(&p->svms.deferred_list_work);
+
mmap_read_lock(mm);
if (!svm_range_is_valid(mm, start, size)) {
pr_debug("invalid range\n");