summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c186
1 files changed, 111 insertions, 75 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e59c01a83dac..e11c5d69843d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -58,6 +58,7 @@
#include "amdgpu_amdkfd.h"
#include "amdgpu_sdma.h"
#include "amdgpu_ras.h"
+#include "amdgpu_atomfirmware.h"
#include "bif/bif_4_1_d.h"
#define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128
@@ -91,15 +92,13 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
case TTM_PL_TT:
/* GTT memory */
man->func = &amdgpu_gtt_mgr_func;
- man->gpu_offset = adev->gmc.gart_start;
man->available_caching = TTM_PL_MASK_CACHING;
man->default_caching = TTM_PL_FLAG_CACHED;
- man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA;
+ man->flags = TTM_MEMTYPE_FLAG_MAPPABLE;
break;
case TTM_PL_VRAM:
/* "On-card" video ram */
man->func = &amdgpu_vram_mgr_func;
- man->gpu_offset = adev->gmc.vram_start;
man->flags = TTM_MEMTYPE_FLAG_FIXED |
TTM_MEMTYPE_FLAG_MAPPABLE;
man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC;
@@ -110,8 +109,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
case AMDGPU_PL_OA:
/* On-chip GDS memory*/
man->func = &ttm_bo_manager_func;
- man->gpu_offset = 0;
- man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA;
+ man->flags = TTM_MEMTYPE_FLAG_FIXED;
man->available_caching = TTM_PL_FLAG_UNCACHED;
man->default_caching = TTM_PL_FLAG_UNCACHED;
break;
@@ -258,7 +256,8 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
if (mm_node->start != AMDGPU_BO_INVALID_OFFSET) {
addr = mm_node->start << PAGE_SHIFT;
- addr += bo->bdev->man[mem->mem_type].gpu_offset;
+ addr += amdgpu_ttm_domain_start(amdgpu_ttm_adev(bo->bdev),
+ mem->mem_type);
}
return addr;
}
@@ -431,12 +430,22 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
}
src_offset = src->offset;
- src_mm = amdgpu_find_mm_node(src->mem, &src_offset);
- src_node_size = (src_mm->size << PAGE_SHIFT) - src_offset;
+ if (src->mem->mm_node) {
+ src_mm = amdgpu_find_mm_node(src->mem, &src_offset);
+ src_node_size = (src_mm->size << PAGE_SHIFT) - src_offset;
+ } else {
+ src_mm = NULL;
+ src_node_size = ULLONG_MAX;
+ }
dst_offset = dst->offset;
- dst_mm = amdgpu_find_mm_node(dst->mem, &dst_offset);
- dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst_offset;
+ if (dst->mem->mm_node) {
+ dst_mm = amdgpu_find_mm_node(dst->mem, &dst_offset);
+ dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst_offset;
+ } else {
+ dst_mm = NULL;
+ dst_node_size = ULLONG_MAX;
+ }
mutex_lock(&adev->mman.gtt_window_lock);
@@ -828,10 +837,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
return 0;
}
-static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
-{
-}
-
static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
unsigned long page_offset)
{
@@ -843,6 +848,27 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
(offset >> PAGE_SHIFT);
}
+/**
+ * amdgpu_ttm_domain_start - Returns GPU start address
+ * @adev: amdgpu device object
+ * @type: type of the memory
+ *
+ * Returns:
+ * GPU start address of a memory domain
+ */
+
+uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type)
+{
+ switch (type) {
+ case TTM_PL_TT:
+ return adev->gmc.gart_start;
+ case TTM_PL_VRAM:
+ return adev->gmc.vram_start;
+ }
+
+ return 0;
+}
+
/*
* TTM backend functions.
*/
@@ -1024,7 +1050,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- unsigned nents;
int r;
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
@@ -1039,9 +1064,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
goto release_sg;
/* Map SG to device */
- r = -ENOMEM;
- nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
- if (nents == 0)
+ r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0);
+ if (r)
goto release_sg;
/* convert SG to linear array of pages and dma addresses */
@@ -1072,8 +1096,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
return;
/* unmap the pages mapped to the device */
- dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction);
-
+ dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
sg_free_table(ttm->sg);
#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
@@ -1091,7 +1114,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm)
#endif
}
-int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
+static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
struct ttm_buffer_object *tbo,
uint64_t flags)
{
@@ -1239,9 +1262,6 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo)
bo->mem = tmp;
}
- bo->offset = (bo->mem.start << PAGE_SHIFT) +
- bo->bdev->man[bo->mem.mem_type].gpu_offset;
-
return 0;
}
@@ -1272,7 +1292,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo)
* Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and
* ttm_tt_destroy().
*/
-static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
+static void amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
{
struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
@@ -1283,14 +1303,13 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm)
amdgpu_ttm_tt_unpin_userptr(ttm);
if (gtt->offset == AMDGPU_BO_INVALID_OFFSET)
- return 0;
+ return;
/* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */
r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages);
if (r)
DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n",
gtt->ttm.ttm.num_pages, gtt->offset);
- return r;
}
static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm)
@@ -1731,7 +1750,6 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
.release_notify = &amdgpu_bo_release_notify,
.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
- .io_mem_free = &amdgpu_ttm_io_mem_free,
.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
.access_memory = &amdgpu_ttm_access_memory,
.del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
@@ -1801,54 +1819,86 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev)
return 0;
}
-static u64 amdgpu_ttm_training_get_c2p_offset(u64 vram_size)
+static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev)
{
- if ((vram_size & (SZ_1M - 1)) < (SZ_4K + 1) )
- vram_size -= SZ_1M;
+ struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
+
+ memset(ctx, 0, sizeof(*ctx));
- return ALIGN(vram_size, SZ_1M);
+ ctx->c2p_train_data_offset =
+ ALIGN((adev->gmc.mc_vram_size - adev->discovery_tmr_size - SZ_1M), SZ_1M);
+ ctx->p2c_train_data_offset =
+ (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
+ ctx->train_data_size =
+ GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
+
+ DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
+ ctx->train_data_size,
+ ctx->p2c_train_data_offset,
+ ctx->c2p_train_data_offset);
}
-/**
- * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training
- *
- * @adev: amdgpu_device pointer
- *
- * create bo vram reservation from memory training.
+/*
+ * reserve TMR memory at the top of VRAM which holds
+ * IP Discovery data and is protected by PSP.
*/
-static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev)
+static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
{
int ret;
struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx;
+ bool mem_train_support = false;
- memset(ctx, 0, sizeof(*ctx));
- if (!adev->fw_vram_usage.mem_train_support) {
- DRM_DEBUG("memory training does not support!\n");
- return 0;
+ if (!amdgpu_sriov_vf(adev)) {
+ ret = amdgpu_mem_train_support(adev);
+ if (ret == 1)
+ mem_train_support = true;
+ else if (ret == -1)
+ return -EINVAL;
+ else
+ DRM_DEBUG("memory training does not support!\n");
}
- ctx->c2p_train_data_offset = amdgpu_ttm_training_get_c2p_offset(adev->gmc.mc_vram_size);
- ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET);
- ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES;
-
- DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n",
- ctx->train_data_size,
- ctx->p2c_train_data_offset,
- ctx->c2p_train_data_offset);
-
- ret = amdgpu_bo_create_kernel_at(adev,
+ /*
+ * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all
+ * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc)
+ *
+ * Otherwise, fallback to legacy approach to check and reserve tmr block for ip
+ * discovery data and G6 memory training data respectively
+ */
+ adev->discovery_tmr_size =
+ amdgpu_atomfirmware_get_fw_reserved_fb_size(adev);
+ if (!adev->discovery_tmr_size)
+ adev->discovery_tmr_size = DISCOVERY_TMR_OFFSET;
+
+ if (mem_train_support) {
+ /* reserve vram for mem train according to TMR location */
+ amdgpu_ttm_training_data_block_init(adev);
+ ret = amdgpu_bo_create_kernel_at(adev,
ctx->c2p_train_data_offset,
ctx->train_data_size,
AMDGPU_GEM_DOMAIN_VRAM,
&ctx->c2p_bo,
NULL);
+ if (ret) {
+ DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
+ amdgpu_ttm_training_reserve_vram_fini(adev);
+ return ret;
+ }
+ ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
+ }
+
+ ret = amdgpu_bo_create_kernel_at(adev,
+ adev->gmc.real_vram_size - adev->discovery_tmr_size,
+ adev->discovery_tmr_size,
+ AMDGPU_GEM_DOMAIN_VRAM,
+ &adev->discovery_memory,
+ NULL);
if (ret) {
- DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret);
- amdgpu_ttm_training_reserve_vram_fini(adev);
+ DRM_ERROR("alloc tmr failed(%d)!\n", ret);
+ amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL);
return ret;
}
- ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS;
return 0;
}
@@ -1916,11 +1966,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
}
/*
- *The reserved vram for memory training must be pinned to the specified
- *place on the VRAM, so reserve it early.
+ * only NAVI10 and onwards ASIC support for IP discovery.
+ * If IP discovery enabled, a block of memory should be
+ * reserved for IP discovey.
*/
- if (!amdgpu_sriov_vf(adev)) {
- r = amdgpu_ttm_training_reserve_vram_init(adev);
+ if (adev->discovery_bin) {
+ r = amdgpu_ttm_reserve_tmr(adev);
if (r)
return r;
}
@@ -1936,21 +1987,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
if (r)
return r;
- /*
- * reserve TMR memory at the top of VRAM which holds
- * IP Discovery data and is protected by PSP.
- */
- if (adev->discovery_tmr_size > 0) {
- r = amdgpu_bo_create_kernel_at(adev,
- adev->gmc.real_vram_size - adev->discovery_tmr_size,
- adev->discovery_tmr_size,
- AMDGPU_GEM_DOMAIN_VRAM,
- &adev->discovery_memory,
- NULL);
- if (r)
- return r;
- }
-
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));