summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c227
1 files changed, 156 insertions, 71 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 15a28578d458..1f036af85ba6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -44,6 +44,7 @@
#include <linux/debugfs.h>
#include <linux/iommu.h>
#include "amdgpu.h"
+#include "amdgpu_object.h"
#include "amdgpu_trace.h"
#include "bif/bif_4_1_d.h"
@@ -209,7 +210,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
placement->num_busy_placement = 1;
return;
}
- abo = container_of(bo, struct amdgpu_bo, tbo);
+ abo = ttm_to_amdgpu_bo(bo);
switch (bo->mem.mem_type) {
case TTM_PL_VRAM:
if (adev->mman.buffer_funcs &&
@@ -257,7 +258,7 @@ gtt:
static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
{
- struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
+ struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
if (amdgpu_ttm_tt_get_usermm(bo->ttm))
return -EPERM;
@@ -289,97 +290,177 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
return addr;
}
-static int amdgpu_move_blit(struct ttm_buffer_object *bo,
- bool evict, bool no_wait_gpu,
- struct ttm_mem_reg *new_mem,
- struct ttm_mem_reg *old_mem)
+/**
+ * amdgpu_find_mm_node - Helper function finds the drm_mm_node
+ * corresponding to @offset. It also modifies the offset to be
+ * within the drm_mm_node returned
+ */
+static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
+ unsigned long *offset)
{
- struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
- struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct drm_mm_node *mm_node = mem->mm_node;
- struct drm_mm_node *old_mm, *new_mm;
- uint64_t old_start, old_size, new_start, new_size;
- unsigned long num_pages;
- struct dma_fence *fence = NULL;
- int r;
+ while (*offset >= (mm_node->size << PAGE_SHIFT)) {
+ *offset -= (mm_node->size << PAGE_SHIFT);
+ ++mm_node;
+ }
+ return mm_node;
+}
- BUILD_BUG_ON((PAGE_SIZE % AMDGPU_GPU_PAGE_SIZE) != 0);
+/**
+ * amdgpu_copy_ttm_mem_to_mem - Helper function for copy
+ *
+ * The function copies @size bytes from {src->mem + src->offset} to
+ * {dst->mem + dst->offset}. src->bo and dst->bo could be same BO for a
+ * move and different for a BO to BO copy.
+ *
+ * @f: Returns the last fence if multiple jobs are submitted.
+ */
+int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
+ struct amdgpu_copy_mem *src,
+ struct amdgpu_copy_mem *dst,
+ uint64_t size,
+ struct reservation_object *resv,
+ struct dma_fence **f)
+{
+ struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
+ struct drm_mm_node *src_mm, *dst_mm;
+ uint64_t src_node_start, dst_node_start, src_node_size,
+ dst_node_size, src_page_offset, dst_page_offset;
+ struct dma_fence *fence = NULL;
+ int r = 0;
+ const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
+ AMDGPU_GPU_PAGE_SIZE);
if (!ring->ready) {
DRM_ERROR("Trying to move memory with ring turned off.\n");
return -EINVAL;
}
- old_mm = old_mem->mm_node;
- old_size = old_mm->size;
- old_start = amdgpu_mm_node_addr(bo, old_mm, old_mem);
+ src_mm = amdgpu_find_mm_node(src->mem, &src->offset);
+ src_node_start = amdgpu_mm_node_addr(src->bo, src_mm, src->mem) +
+ src->offset;
+ src_node_size = (src_mm->size << PAGE_SHIFT) - src->offset;
+ src_page_offset = src_node_start & (PAGE_SIZE - 1);
- new_mm = new_mem->mm_node;
- new_size = new_mm->size;
- new_start = amdgpu_mm_node_addr(bo, new_mm, new_mem);
+ dst_mm = amdgpu_find_mm_node(dst->mem, &dst->offset);
+ dst_node_start = amdgpu_mm_node_addr(dst->bo, dst_mm, dst->mem) +
+ dst->offset;
+ dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst->offset;
+ dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
- num_pages = new_mem->num_pages;
mutex_lock(&adev->mman.gtt_window_lock);
- while (num_pages) {
- unsigned long cur_pages = min(min(old_size, new_size),
- (u64)AMDGPU_GTT_MAX_TRANSFER_SIZE);
- uint64_t from = old_start, to = new_start;
+
+ while (size) {
+ unsigned long cur_size;
+ uint64_t from = src_node_start, to = dst_node_start;
struct dma_fence *next;
- if (old_mem->mem_type == TTM_PL_TT &&
- !amdgpu_gtt_mgr_is_allocated(old_mem)) {
- r = amdgpu_map_buffer(bo, old_mem, cur_pages,
- old_start, 0, ring, &from);
+ /* Copy size cannot exceed GTT_MAX_BYTES. So if src or dst
+ * begins at an offset, then adjust the size accordingly
+ */
+ cur_size = min3(min(src_node_size, dst_node_size), size,
+ GTT_MAX_BYTES);
+ if (cur_size + src_page_offset > GTT_MAX_BYTES ||
+ cur_size + dst_page_offset > GTT_MAX_BYTES)
+ cur_size -= max(src_page_offset, dst_page_offset);
+
+ /* Map only what needs to be accessed. Map src to window 0 and
+ * dst to window 1
+ */
+ if (src->mem->mem_type == TTM_PL_TT &&
+ !amdgpu_gtt_mgr_is_allocated(src->mem)) {
+ r = amdgpu_map_buffer(src->bo, src->mem,
+ PFN_UP(cur_size + src_page_offset),
+ src_node_start, 0, ring,
+ &from);
if (r)
goto error;
+ /* Adjust the offset because amdgpu_map_buffer returns
+ * start of mapped page
+ */
+ from += src_page_offset;
}
- if (new_mem->mem_type == TTM_PL_TT &&
- !amdgpu_gtt_mgr_is_allocated(new_mem)) {
- r = amdgpu_map_buffer(bo, new_mem, cur_pages,
- new_start, 1, ring, &to);
+ if (dst->mem->mem_type == TTM_PL_TT &&
+ !amdgpu_gtt_mgr_is_allocated(dst->mem)) {
+ r = amdgpu_map_buffer(dst->bo, dst->mem,
+ PFN_UP(cur_size + dst_page_offset),
+ dst_node_start, 1, ring,
+ &to);
if (r)
goto error;
+ to += dst_page_offset;
}
- r = amdgpu_copy_buffer(ring, from, to,
- cur_pages * PAGE_SIZE,
- bo->resv, &next, false, true);
+ r = amdgpu_copy_buffer(ring, from, to, cur_size,
+ resv, &next, false, true);
if (r)
goto error;
dma_fence_put(fence);
fence = next;
- num_pages -= cur_pages;
- if (!num_pages)
+ size -= cur_size;
+ if (!size)
break;
- old_size -= cur_pages;
- if (!old_size) {
- old_start = amdgpu_mm_node_addr(bo, ++old_mm, old_mem);
- old_size = old_mm->size;
+ src_node_size -= cur_size;
+ if (!src_node_size) {
+ src_node_start = amdgpu_mm_node_addr(src->bo, ++src_mm,
+ src->mem);
+ src_node_size = (src_mm->size << PAGE_SHIFT);
} else {
- old_start += cur_pages * PAGE_SIZE;
+ src_node_start += cur_size;
+ src_page_offset = src_node_start & (PAGE_SIZE - 1);
}
-
- new_size -= cur_pages;
- if (!new_size) {
- new_start = amdgpu_mm_node_addr(bo, ++new_mm, new_mem);
- new_size = new_mm->size;
+ dst_node_size -= cur_size;
+ if (!dst_node_size) {
+ dst_node_start = amdgpu_mm_node_addr(dst->bo, ++dst_mm,
+ dst->mem);
+ dst_node_size = (dst_mm->size << PAGE_SHIFT);
} else {
- new_start += cur_pages * PAGE_SIZE;
+ dst_node_start += cur_size;
+ dst_page_offset = dst_node_start & (PAGE_SIZE - 1);
}
}
+error:
mutex_unlock(&adev->mman.gtt_window_lock);
+ if (f)
+ *f = dma_fence_get(fence);
+ dma_fence_put(fence);
+ return r;
+}
+
+
+static int amdgpu_move_blit(struct ttm_buffer_object *bo,
+ bool evict, bool no_wait_gpu,
+ struct ttm_mem_reg *new_mem,
+ struct ttm_mem_reg *old_mem)
+{
+ struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
+ struct amdgpu_copy_mem src, dst;
+ struct dma_fence *fence = NULL;
+ int r;
+
+ src.bo = bo;
+ dst.bo = bo;
+ src.mem = old_mem;
+ dst.mem = new_mem;
+ src.offset = 0;
+ dst.offset = 0;
+
+ r = amdgpu_ttm_copy_mem_to_mem(adev, &src, &dst,
+ new_mem->num_pages << PAGE_SHIFT,
+ bo->resv, &fence);
+ if (r)
+ goto error;
r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
dma_fence_put(fence);
return r;
error:
- mutex_unlock(&adev->mman.gtt_window_lock);
-
if (fence)
dma_fence_wait(fence, false);
dma_fence_put(fence);
@@ -484,7 +565,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
int r;
/* Can't move a pinned BO */
- abo = container_of(bo, struct amdgpu_bo, tbo);
+ abo = ttm_to_amdgpu_bo(bo);
if (WARN_ON_ONCE(abo->pin_count > 0))
return -EINVAL;
@@ -582,13 +663,12 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
unsigned long page_offset)
{
- struct drm_mm_node *mm = bo->mem.mm_node;
- uint64_t size = mm->size;
- uint64_t offset = page_offset;
+ struct drm_mm_node *mm;
+ unsigned long offset = (page_offset << PAGE_SHIFT);
- page_offset = do_div(offset, size);
- mm += offset;
- return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset;
+ mm = amdgpu_find_mm_node(&bo->mem, &offset);
+ return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start +
+ (offset >> PAGE_SHIFT);
}
/*
@@ -829,7 +909,8 @@ int amdgpu_ttm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *bo_mem)
placement.busy_placement = &placements;
placements.fpfn = 0;
placements.lpfn = adev->mc.gart_size >> PAGE_SHIFT;
- placements.flags = bo->mem.placement | TTM_PL_FLAG_TT;
+ placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) |
+ TTM_PL_FLAG_TT;
r = ttm_bo_mem_space(bo, &placement, &tmp, true, false);
if (unlikely(r))
@@ -1112,9 +1193,6 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
unsigned long num_pages = bo->mem.num_pages;
struct drm_mm_node *node = bo->mem.mm_node;
- if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET)
- return ttm_bo_eviction_valuable(bo, place);
-
switch (bo->mem.mem_type) {
case TTM_PL_TT:
return true;
@@ -1129,7 +1207,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
num_pages -= node->size;
++node;
}
- break;
+ return false;
default:
break;
@@ -1142,9 +1220,9 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
unsigned long offset,
void *buf, int len, int write)
{
- struct amdgpu_bo *abo = container_of(bo, struct amdgpu_bo, tbo);
+ struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
- struct drm_mm_node *nodes = abo->tbo.mem.mm_node;
+ struct drm_mm_node *nodes;
uint32_t value = 0;
int ret = 0;
uint64_t pos;
@@ -1153,10 +1231,7 @@ static int amdgpu_ttm_access_memory(struct ttm_buffer_object *bo,
if (bo->mem.mem_type != TTM_PL_VRAM)
return -EIO;
- while (offset >= (nodes->size << PAGE_SHIFT)) {
- offset -= nodes->size << PAGE_SHIFT;
- ++nodes;
- }
+ nodes = amdgpu_find_mm_node(&abo->tbo.mem, &offset);
pos = (nodes->start << PAGE_SHIFT) + offset;
while (len && pos < adev->mc.mc_vram_size) {
@@ -1255,6 +1330,15 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
/* Change the size here instead of the init above so only lpfn is affected */
amdgpu_ttm_set_active_vram_size(adev, adev->mc.visible_vram_size);
+ /*
+ *The reserved vram for firmware must be pinned to the specified
+ *place on the VRAM, so reserve it early.
+ */
+ r = amdgpu_fw_reserve_vram_init(adev);
+ if (r) {
+ return r;
+ }
+
r = amdgpu_bo_create_kernel(adev, adev->mc.stolen_size, PAGE_SIZE,
AMDGPU_GEM_DOMAIN_VRAM,
&adev->stolen_vga_memory,
@@ -1479,7 +1563,8 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
job->vm_needs_flush = vm_needs_flush;
if (resv) {
r = amdgpu_sync_resv(adev, &job->sync, resv,
- AMDGPU_FENCE_OWNER_UNDEFINED);
+ AMDGPU_FENCE_OWNER_UNDEFINED,
+ false);
if (r) {
DRM_ERROR("sync failed (%d).\n", r);
goto error_free;
@@ -1571,7 +1656,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
if (resv) {
r = amdgpu_sync_resv(adev, &job->sync, resv,
- AMDGPU_FENCE_OWNER_UNDEFINED);
+ AMDGPU_FENCE_OWNER_UNDEFINED, false);
if (r) {
DRM_ERROR("sync failed (%d).\n", r);
goto error_free;