diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c')
-rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 186 |
1 files changed, 111 insertions, 75 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index e59c01a83dac..e11c5d69843d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -58,6 +58,7 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_sdma.h" #include "amdgpu_ras.h" +#include "amdgpu_atomfirmware.h" #include "bif/bif_4_1_d.h" #define AMDGPU_TTM_VRAM_MAX_DW_READ (size_t)128 @@ -91,15 +92,13 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, case TTM_PL_TT: /* GTT memory */ man->func = &amdgpu_gtt_mgr_func; - man->gpu_offset = adev->gmc.gart_start; man->available_caching = TTM_PL_MASK_CACHING; man->default_caching = TTM_PL_FLAG_CACHED; - man->flags = TTM_MEMTYPE_FLAG_MAPPABLE | TTM_MEMTYPE_FLAG_CMA; + man->flags = TTM_MEMTYPE_FLAG_MAPPABLE; break; case TTM_PL_VRAM: /* "On-card" video ram */ man->func = &amdgpu_vram_mgr_func; - man->gpu_offset = adev->gmc.vram_start; man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_MAPPABLE; man->available_caching = TTM_PL_FLAG_UNCACHED | TTM_PL_FLAG_WC; @@ -110,8 +109,7 @@ static int amdgpu_init_mem_type(struct ttm_bo_device *bdev, uint32_t type, case AMDGPU_PL_OA: /* On-chip GDS memory*/ man->func = &ttm_bo_manager_func; - man->gpu_offset = 0; - man->flags = TTM_MEMTYPE_FLAG_FIXED | TTM_MEMTYPE_FLAG_CMA; + man->flags = TTM_MEMTYPE_FLAG_FIXED; man->available_caching = TTM_PL_FLAG_UNCACHED; man->default_caching = TTM_PL_FLAG_UNCACHED; break; @@ -258,7 +256,8 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, if (mm_node->start != AMDGPU_BO_INVALID_OFFSET) { addr = mm_node->start << PAGE_SHIFT; - addr += bo->bdev->man[mem->mem_type].gpu_offset; + addr += amdgpu_ttm_domain_start(amdgpu_ttm_adev(bo->bdev), + mem->mem_type); } return addr; } @@ -431,12 +430,22 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, } src_offset = src->offset; - src_mm = amdgpu_find_mm_node(src->mem, &src_offset); - src_node_size = (src_mm->size << PAGE_SHIFT) - src_offset; + if (src->mem->mm_node) { + src_mm = amdgpu_find_mm_node(src->mem, &src_offset); + src_node_size = (src_mm->size << PAGE_SHIFT) - src_offset; + } else { + src_mm = NULL; + src_node_size = ULLONG_MAX; + } dst_offset = dst->offset; - dst_mm = amdgpu_find_mm_node(dst->mem, &dst_offset); - dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst_offset; + if (dst->mem->mm_node) { + dst_mm = amdgpu_find_mm_node(dst->mem, &dst_offset); + dst_node_size = (dst_mm->size << PAGE_SHIFT) - dst_offset; + } else { + dst_mm = NULL; + dst_node_size = ULLONG_MAX; + } mutex_lock(&adev->mman.gtt_window_lock); @@ -828,10 +837,6 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_ return 0; } -static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem) -{ -} - static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, unsigned long page_offset) { @@ -843,6 +848,27 @@ static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo, (offset >> PAGE_SHIFT); } +/** + * amdgpu_ttm_domain_start - Returns GPU start address + * @adev: amdgpu device object + * @type: type of the memory + * + * Returns: + * GPU start address of a memory domain + */ + +uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type) +{ + switch (type) { + case TTM_PL_TT: + return adev->gmc.gart_start; + case TTM_PL_VRAM: + return adev->gmc.vram_start; + } + + return 0; +} + /* * TTM backend functions. */ @@ -1024,7 +1050,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; - unsigned nents; int r; int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY); @@ -1039,9 +1064,8 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm) goto release_sg; /* Map SG to device */ - r = -ENOMEM; - nents = dma_map_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - if (nents == 0) + r = dma_map_sgtable(adev->dev, ttm->sg, direction, 0); + if (r) goto release_sg; /* convert SG to linear array of pages and dma addresses */ @@ -1072,8 +1096,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) return; /* unmap the pages mapped to the device */ - dma_unmap_sg(adev->dev, ttm->sg->sgl, ttm->sg->nents, direction); - + dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0); sg_free_table(ttm->sg); #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR) @@ -1091,7 +1114,7 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_tt *ttm) #endif } -int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, +static int amdgpu_ttm_gart_bind(struct amdgpu_device *adev, struct ttm_buffer_object *tbo, uint64_t flags) { @@ -1239,9 +1262,6 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) bo->mem = tmp; } - bo->offset = (bo->mem.start << PAGE_SHIFT) + - bo->bdev->man[bo->mem.mem_type].gpu_offset; - return 0; } @@ -1272,7 +1292,7 @@ int amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo) * Called by ttm_tt_unbind() on behalf of ttm_bo_move_ttm() and * ttm_tt_destroy(). */ -static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) +static void amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) { struct amdgpu_device *adev = amdgpu_ttm_adev(ttm->bdev); struct amdgpu_ttm_tt *gtt = (void *)ttm; @@ -1283,14 +1303,13 @@ static int amdgpu_ttm_backend_unbind(struct ttm_tt *ttm) amdgpu_ttm_tt_unpin_userptr(ttm); if (gtt->offset == AMDGPU_BO_INVALID_OFFSET) - return 0; + return; /* unbind shouldn't be done for GDS/GWS/OA in ttm_bo_clean_mm */ r = amdgpu_gart_unbind(adev, gtt->offset, ttm->num_pages); if (r) DRM_ERROR("failed to unbind %lu pages at 0x%08llX\n", gtt->ttm.ttm.num_pages, gtt->offset); - return r; } static void amdgpu_ttm_backend_destroy(struct ttm_tt *ttm) @@ -1731,7 +1750,6 @@ static struct ttm_bo_driver amdgpu_bo_driver = { .release_notify = &amdgpu_bo_release_notify, .fault_reserve_notify = &amdgpu_bo_fault_reserve_notify, .io_mem_reserve = &amdgpu_ttm_io_mem_reserve, - .io_mem_free = &amdgpu_ttm_io_mem_free, .io_mem_pfn = amdgpu_ttm_io_mem_pfn, .access_memory = &amdgpu_ttm_access_memory, .del_from_lru_notify = &amdgpu_vm_del_from_lru_notify @@ -1801,54 +1819,86 @@ static int amdgpu_ttm_training_reserve_vram_fini(struct amdgpu_device *adev) return 0; } -static u64 amdgpu_ttm_training_get_c2p_offset(u64 vram_size) +static void amdgpu_ttm_training_data_block_init(struct amdgpu_device *adev) { - if ((vram_size & (SZ_1M - 1)) < (SZ_4K + 1) ) - vram_size -= SZ_1M; + struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; + + memset(ctx, 0, sizeof(*ctx)); - return ALIGN(vram_size, SZ_1M); + ctx->c2p_train_data_offset = + ALIGN((adev->gmc.mc_vram_size - adev->discovery_tmr_size - SZ_1M), SZ_1M); + ctx->p2c_train_data_offset = + (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); + ctx->train_data_size = + GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; + + DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", + ctx->train_data_size, + ctx->p2c_train_data_offset, + ctx->c2p_train_data_offset); } -/** - * amdgpu_ttm_training_reserve_vram_init - create bo vram reservation from memory training - * - * @adev: amdgpu_device pointer - * - * create bo vram reservation from memory training. +/* + * reserve TMR memory at the top of VRAM which holds + * IP Discovery data and is protected by PSP. */ -static int amdgpu_ttm_training_reserve_vram_init(struct amdgpu_device *adev) +static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev) { int ret; struct psp_memory_training_context *ctx = &adev->psp.mem_train_ctx; + bool mem_train_support = false; - memset(ctx, 0, sizeof(*ctx)); - if (!adev->fw_vram_usage.mem_train_support) { - DRM_DEBUG("memory training does not support!\n"); - return 0; + if (!amdgpu_sriov_vf(adev)) { + ret = amdgpu_mem_train_support(adev); + if (ret == 1) + mem_train_support = true; + else if (ret == -1) + return -EINVAL; + else + DRM_DEBUG("memory training does not support!\n"); } - ctx->c2p_train_data_offset = amdgpu_ttm_training_get_c2p_offset(adev->gmc.mc_vram_size); - ctx->p2c_train_data_offset = (adev->gmc.mc_vram_size - GDDR6_MEM_TRAINING_OFFSET); - ctx->train_data_size = GDDR6_MEM_TRAINING_DATA_SIZE_IN_BYTES; - - DRM_DEBUG("train_data_size:%llx,p2c_train_data_offset:%llx,c2p_train_data_offset:%llx.\n", - ctx->train_data_size, - ctx->p2c_train_data_offset, - ctx->c2p_train_data_offset); - - ret = amdgpu_bo_create_kernel_at(adev, + /* + * Query reserved tmr size through atom firmwareinfo for Sienna_Cichlid and onwards for all + * the use cases (IP discovery/G6 memory training/profiling/diagnostic data.etc) + * + * Otherwise, fallback to legacy approach to check and reserve tmr block for ip + * discovery data and G6 memory training data respectively + */ + adev->discovery_tmr_size = + amdgpu_atomfirmware_get_fw_reserved_fb_size(adev); + if (!adev->discovery_tmr_size) + adev->discovery_tmr_size = DISCOVERY_TMR_OFFSET; + + if (mem_train_support) { + /* reserve vram for mem train according to TMR location */ + amdgpu_ttm_training_data_block_init(adev); + ret = amdgpu_bo_create_kernel_at(adev, ctx->c2p_train_data_offset, ctx->train_data_size, AMDGPU_GEM_DOMAIN_VRAM, &ctx->c2p_bo, NULL); + if (ret) { + DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); + amdgpu_ttm_training_reserve_vram_fini(adev); + return ret; + } + ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; + } + + ret = amdgpu_bo_create_kernel_at(adev, + adev->gmc.real_vram_size - adev->discovery_tmr_size, + adev->discovery_tmr_size, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->discovery_memory, + NULL); if (ret) { - DRM_ERROR("alloc c2p_bo failed(%d)!\n", ret); - amdgpu_ttm_training_reserve_vram_fini(adev); + DRM_ERROR("alloc tmr failed(%d)!\n", ret); + amdgpu_bo_free_kernel(&adev->discovery_memory, NULL, NULL); return ret; } - ctx->init = PSP_MEM_TRAIN_RESERVE_SUCCESS; return 0; } @@ -1916,11 +1966,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) } /* - *The reserved vram for memory training must be pinned to the specified - *place on the VRAM, so reserve it early. + * only NAVI10 and onwards ASIC support for IP discovery. + * If IP discovery enabled, a block of memory should be + * reserved for IP discovey. */ - if (!amdgpu_sriov_vf(adev)) { - r = amdgpu_ttm_training_reserve_vram_init(adev); + if (adev->discovery_bin) { + r = amdgpu_ttm_reserve_tmr(adev); if (r) return r; } @@ -1936,21 +1987,6 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) if (r) return r; - /* - * reserve TMR memory at the top of VRAM which holds - * IP Discovery data and is protected by PSP. - */ - if (adev->discovery_tmr_size > 0) { - r = amdgpu_bo_create_kernel_at(adev, - adev->gmc.real_vram_size - adev->discovery_tmr_size, - adev->discovery_tmr_size, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->discovery_memory, - NULL); - if (r) - return r; - } - DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); |