From ccf979ee330b195961116f74cd75ba677fdc4cd7 Mon Sep 17 00:00:00 2001 From: Moti Haimovski Date: Mon, 5 Oct 2020 17:59:29 +0300 Subject: habanalabs: refactor MMU to support dual residency MMU This commit refactors the MMU code to support PCI MMU page tables residing on host and DCORE MMU residing on the device DRAM at the same time. This is needed for future devices as on GAUDI and GOYA we have a single MMU where its page tables always reside on DRAM. Signed-off-by: Moti Haimovski Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/mmu_v1.c | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'drivers/misc/habanalabs/common/mmu_v1.c') diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c index 8d1eb5265419..ec7e8a3c37b8 100644 --- a/drivers/misc/habanalabs/common/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu_v1.c @@ -29,7 +29,7 @@ static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info) { struct hl_device *hdev = ctx->hdev; - gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, pgt_info->phys_addr, + gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, pgt_info->phys_addr, hdev->asic_prop.mmu_hop_table_size); hash_del(&pgt_info->node); kfree((u64 *) (uintptr_t) pgt_info->shadow_addr); @@ -54,7 +54,7 @@ static u64 alloc_hop(struct hl_ctx *ctx) if (!pgt_info) return ULLONG_MAX; - phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.mmu_pgt_pool, + phys_addr = (u64) gen_pool_alloc(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_hop_table_size); if (!phys_addr) { dev_err(hdev->dev, "failed to allocate page\n"); @@ -75,7 +75,7 @@ static u64 alloc_hop(struct hl_ctx *ctx) return shadow_addr; shadow_err: - gen_pool_free(hdev->mmu_priv.mmu_pgt_pool, phys_addr, + gen_pool_free(hdev->mmu_priv.dr.mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size); pool_add_err: kfree(pgt_info); @@ -91,7 +91,7 @@ static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx) static inline u64 get_hop0_addr(struct hl_ctx *ctx) { - return (u64) (uintptr_t) ctx->hdev->mmu_priv.mmu_shadow_hop0 + + return (u64) (uintptr_t) ctx->hdev->mmu_priv.dr.mmu_shadow_hop0 + (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); } @@ -419,15 +419,15 @@ static int hl_mmu_v1_init(struct hl_device *hdev) struct asic_fixed_properties *prop = &hdev->asic_prop; int rc; - hdev->mmu_priv.mmu_pgt_pool = + hdev->mmu_priv.dr.mmu_pgt_pool = gen_pool_create(__ffs(prop->mmu_hop_table_size), -1); - if (!hdev->mmu_priv.mmu_pgt_pool) { + if (!hdev->mmu_priv.dr.mmu_pgt_pool) { dev_err(hdev->dev, "Failed to create page gen pool\n"); return -ENOMEM; } - rc = gen_pool_add(hdev->mmu_priv.mmu_pgt_pool, prop->mmu_pgt_addr + + rc = gen_pool_add(hdev->mmu_priv.dr.mmu_pgt_pool, prop->mmu_pgt_addr + prop->mmu_hop0_tables_total_size, prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size, -1); @@ -436,10 +436,10 @@ static int hl_mmu_v1_init(struct hl_device *hdev) goto err_pool_add; } - hdev->mmu_priv.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid, + hdev->mmu_priv.dr.mmu_shadow_hop0 = kvmalloc_array(prop->max_asid, prop->mmu_hop_table_size, GFP_KERNEL | __GFP_ZERO); - if (ZERO_OR_NULL_PTR(hdev->mmu_priv.mmu_shadow_hop0)) { + if (ZERO_OR_NULL_PTR(hdev->mmu_priv.dr.mmu_shadow_hop0)) { rc = -ENOMEM; goto err_pool_add; } @@ -449,7 +449,7 @@ static int hl_mmu_v1_init(struct hl_device *hdev) return 0; err_pool_add: - gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool); + gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); return rc; } @@ -468,8 +468,8 @@ static void hl_mmu_v1_fini(struct hl_device *hdev) { /* MMU H/W fini was already done in device hw_fini() */ - kvfree(hdev->mmu_priv.mmu_shadow_hop0); - gen_pool_destroy(hdev->mmu_priv.mmu_pgt_pool); + kvfree(hdev->mmu_priv.dr.mmu_shadow_hop0); + gen_pool_destroy(hdev->mmu_priv.dr.mmu_pgt_pool); } /** @@ -847,10 +847,8 @@ static void hl_mmu_v1_swap_in(struct hl_ctx *ctx) * * @hdev: pointer to the device structure */ -void hl_mmu_v1_set_funcs(struct hl_device *hdev) +void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) { - struct hl_mmu_funcs *mmu = &hdev->mmu_func; - mmu->init = hl_mmu_v1_init; mmu->fini = hl_mmu_v1_fini; mmu->ctx_init = hl_mmu_v1_ctx_init; -- cgit v1.2.3 From ba7e389c30c6bfb78857ba7a6f8d2cd4bbf5bde7 Mon Sep 17 00:00:00 2001 From: Tomer Tayar Date: Sun, 25 Oct 2020 17:47:22 +0200 Subject: habanalabs: Move repeatedly included headers to habanalabs.h Several header files are repeatedly included in many files. Move these files to habanalabs.h which is included by all. Signed-off-by: Tomer Tayar Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/command_buffer.c | 1 - drivers/misc/habanalabs/common/device.c | 1 - drivers/misc/habanalabs/common/firmware_if.c | 2 -- drivers/misc/habanalabs/common/habanalabs.h | 4 ++++ drivers/misc/habanalabs/common/memory.c | 1 - drivers/misc/habanalabs/common/mmu_v1.c | 1 - drivers/misc/habanalabs/gaudi/gaudi.c | 2 -- drivers/misc/habanalabs/gaudi/gaudi_coresight.c | 2 -- drivers/misc/habanalabs/goya/goya.c | 2 -- drivers/misc/habanalabs/goya/goya_coresight.c | 2 -- 10 files changed, 4 insertions(+), 14 deletions(-) (limited to 'drivers/misc/habanalabs/common/mmu_v1.c') diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c index ada570f35a41..075679626c7b 100644 --- a/drivers/misc/habanalabs/common/command_buffer.c +++ b/drivers/misc/habanalabs/common/command_buffer.c @@ -11,7 +11,6 @@ #include #include #include -#include static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) { diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 20572224099a..421e37123b03 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -10,7 +10,6 @@ #include "habanalabs.h" #include -#include #include #include diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c index d84a70ec0ce1..fb9d901d5059 100644 --- a/drivers/misc/habanalabs/common/firmware_if.c +++ b/drivers/misc/habanalabs/common/firmware_if.c @@ -9,8 +9,6 @@ #include "../include/common/hl_boot_if.h" #include -#include -#include #include #define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */ diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 033f6809980f..f00891ddc3ad 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -19,6 +19,10 @@ #include #include #include +#include +#include +#include +#include #define HL_NAME "habanalabs" diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 75e269bc42a7..5c1dae6aaf4d 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -11,7 +11,6 @@ #include #include -#include #define HL_MMU_DEBUG 0 diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c index ec7e8a3c37b8..ac3784523baa 100644 --- a/drivers/misc/habanalabs/common/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu_v1.c @@ -8,7 +8,6 @@ #include "habanalabs.h" #include "../include/hw_ip/mmu/mmu_general.h" -#include #include static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr); diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 65b7e20b8f4a..ecfcfdfdd6b0 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -17,8 +17,6 @@ #include #include #include -#include -#include #include #include diff --git a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c index 3d2b0f0f4650..2e3612e1ee28 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi_coresight.c +++ b/drivers/misc/habanalabs/gaudi/gaudi_coresight.c @@ -11,8 +11,6 @@ #include "../include/gaudi/gaudi_masks.h" #include -#include - #define SPMU_SECTION_SIZE MME0_ACC_SPMU_MAX_OFFSET #define SPMU_EVENT_TYPES_OFFSET 0x400 #define SPMU_MAX_COUNTERS 6 diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 16046dd31e72..ab467943cef3 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -12,9 +12,7 @@ #include "../include/goya/goya_reg_map.h" #include -#include #include -#include #include #include diff --git a/drivers/misc/habanalabs/goya/goya_coresight.c b/drivers/misc/habanalabs/goya/goya_coresight.c index 4027a6a334d7..6fa03933b438 100644 --- a/drivers/misc/habanalabs/goya/goya_coresight.c +++ b/drivers/misc/habanalabs/goya/goya_coresight.c @@ -12,8 +12,6 @@ #include -#include - #define GOYA_PLDM_CORESIGHT_TIMEOUT_USEC (CORESIGHT_TIMEOUT_USEC * 100) #define SPMU_SECTION_SIZE DMA_CH_0_CS_SPMU_MAX_OFFSET -- cgit v1.2.3 From 7f070c913c361ed79dd29d9256b486b1b105d7f0 Mon Sep 17 00:00:00 2001 From: Oded Gabbay Date: Mon, 9 Nov 2020 09:48:31 +0200 Subject: habanalabs: move asic property to correct structure Whether an ASIC has MMU towards its DRAM is an ASIC property, so move it to the asic fixed properties structure. Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/debugfs.c | 2 +- drivers/misc/habanalabs/common/habanalabs.h | 4 ++-- drivers/misc/habanalabs/common/memory.c | 6 +++--- drivers/misc/habanalabs/common/mmu_v1.c | 4 ++-- drivers/misc/habanalabs/gaudi/gaudi.c | 3 +-- drivers/misc/habanalabs/goya/goya.c | 2 +- 6 files changed, 10 insertions(+), 11 deletions(-) (limited to 'drivers/misc/habanalabs/common/mmu_v1.c') diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 104b9686e57b..3d9bd86312e2 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -598,7 +598,7 @@ static bool hl_is_device_va(struct hl_device *hdev, u64 addr) if (!hdev->mmu_enable) goto out; - if (hdev->dram_supports_virtual_memory && + if (prop->dram_supports_virtual_memory && (addr >= prop->dmmu.start_addr && addr < prop->dmmu.end_addr)) return true; diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 822f90087e04..a1d82de60ef6 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -411,6 +411,7 @@ struct hl_mmu_properties { * false otherwise * @fw_security_status_valid: security status bits are valid and can be fetched * from BOOT_DEV_STS0 + * @dram_supports_virtual_memory: is there an MMU towards the DRAM */ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; @@ -467,6 +468,7 @@ struct asic_fixed_properties { u8 completion_queues_count; u8 fw_security_disabled; u8 fw_security_status_valid; + u8 dram_supports_virtual_memory; }; /** @@ -1772,7 +1774,6 @@ struct hl_mmu_funcs { * @heartbeat: is heartbeat sanity check towards CPU-CP enabled. * @reset_on_lockup: true if a reset should be done in case of stuck CS, false * otherwise. - * @dram_supports_virtual_memory: is MMU enabled towards DRAM. * @dram_default_page_mapping: is DRAM default page mapping enabled. * @memory_scrub: true to perform device memory scrub in various locations, * such as context-switch, context close, page free, etc. @@ -1886,7 +1887,6 @@ struct hl_device { u8 hard_reset_pending; u8 heartbeat; u8 reset_on_lockup; - u8 dram_supports_virtual_memory; u8 dram_default_page_mapping; u8 memory_scrub; u8 pmmu_huge_range; diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index 01e93e83d3af..f27ca80d3c3c 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1351,7 +1351,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) * the user's input, the driver can't ensure the validity * of this accounting. */ - if (!hdev->dram_supports_virtual_memory) { + if (!hdev->asic_prop.dram_supports_virtual_memory) { atomic64_add(args->in.alloc.mem_size, &ctx->dram_phys_mem); atomic64_add(args->in.alloc.mem_size, @@ -1380,7 +1380,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data) * the user's input, the driver can't ensure the validity * of this accounting. */ - if (!hdev->dram_supports_virtual_memory) { + if (!hdev->asic_prop.dram_supports_virtual_memory) { atomic64_sub(args->in.alloc.mem_size, &ctx->dram_phys_mem); atomic64_sub(args->in.alloc.mem_size, @@ -1915,7 +1915,7 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx) * because the user notifies us on allocations. If the user is no more, * all DRAM is available */ - if (!ctx->hdev->dram_supports_virtual_memory) + if (!ctx->hdev->asic_prop.dram_supports_virtual_memory) atomic64_set(&ctx->hdev->dram_used_mem, 0); } diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c index ac3784523baa..5f62cb158eef 100644 --- a/drivers/misc/habanalabs/common/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu_v1.c @@ -262,7 +262,7 @@ static int dram_default_mapping_init(struct hl_ctx *ctx) hop2_pte_addr, hop3_pte_addr, pte_val; int rc, i, j, hop3_allocated = 0; - if ((!hdev->dram_supports_virtual_memory) || + if ((!prop->dram_supports_virtual_memory) || (!hdev->dram_default_page_mapping) || (ctx->asid == HL_KERNEL_ASID_ID)) return 0; @@ -362,7 +362,7 @@ static void dram_default_mapping_fini(struct hl_ctx *ctx) hop2_pte_addr, hop3_pte_addr; int i, j; - if ((!hdev->dram_supports_virtual_memory) || + if ((!prop->dram_supports_virtual_memory) || (!hdev->dram_default_page_mapping) || (ctx->asid == HL_KERNEL_ASID_ID)) return; diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index e2b5160130d7..fe2e2b3d2a04 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -462,6 +462,7 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev) prop->mmu_hop_table_size = HOP_TABLE_SIZE; prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; + prop->dram_supports_virtual_memory = false; prop->pmmu.hop0_shift = HOP0_SHIFT; prop->pmmu.hop1_shift = HOP1_SHIFT; @@ -3562,8 +3563,6 @@ static int gaudi_mmu_init(struct hl_device *hdev) if (gaudi->hw_cap_initialized & HW_CAP_MMU) return 0; - hdev->dram_supports_virtual_memory = false; - for (i = 0 ; i < prop->max_asid ; i++) { hop0_addr = prop->mmu_pgt_addr + (i * prop->mmu_hop_table_size); diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index 002fc53fb39d..bf21f05f7849 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -410,6 +410,7 @@ int goya_get_fixed_properties(struct hl_device *hdev) prop->mmu_hop_table_size = HOP_TABLE_SIZE; prop->mmu_hop0_tables_total_size = HOP0_TABLES_TOTAL_SIZE; prop->dram_page_size = PAGE_SIZE_2MB; + prop->dram_supports_virtual_memory = true; prop->dmmu.hop0_shift = HOP0_SHIFT; prop->dmmu.hop1_shift = HOP1_SHIFT; @@ -2481,7 +2482,6 @@ int goya_mmu_init(struct hl_device *hdev) if (goya->hw_cap_initialized & HW_CAP_MMU) return 0; - hdev->dram_supports_virtual_memory = true; hdev->dram_default_page_mapping = true; for (i = 0 ; i < prop->max_asid ; i++) { -- cgit v1.2.3 From fe2bc2d249539c117cc887dc1f960ffcbba52d1e Mon Sep 17 00:00:00 2001 From: Moti Haimovski Date: Tue, 27 Oct 2020 11:03:32 +0200 Subject: habanalabs: share a single ctx-mutex between all MMUs Multiple locks are usually a source of problems, which in the MMU case can be avoided since it is relatively rare that both MMU tables are updated at the same time. Therefore, use a single shared lock instead of two separate ones. Signed-off-by: Moti Haimovski Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/mmu.c | 4 ++++ drivers/misc/habanalabs/common/mmu_v1.c | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/misc/habanalabs/common/mmu_v1.c') diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c index 6f535c81478d..f4b3d02fe0d8 100644 --- a/drivers/misc/habanalabs/common/mmu.c +++ b/drivers/misc/habanalabs/common/mmu.c @@ -81,6 +81,8 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx) if (!hdev->mmu_enable) return 0; + mutex_init(&ctx->mmu_lock); + if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) { rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx); if (rc) @@ -115,6 +117,8 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx) if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL) hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx); + + mutex_destroy(&ctx->mmu_lock); } /* diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c index 5f62cb158eef..92b22298bb5c 100644 --- a/drivers/misc/habanalabs/common/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu_v1.c @@ -481,9 +481,7 @@ static void hl_mmu_v1_fini(struct hl_device *hdev) */ static int hl_mmu_v1_ctx_init(struct hl_ctx *ctx) { - mutex_init(&ctx->mmu_lock); hash_init(ctx->mmu_shadow_hash); - return dram_default_mapping_init(ctx); } @@ -516,8 +514,6 @@ static void hl_mmu_v1_ctx_fini(struct hl_ctx *ctx) pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes); _free_hop(ctx, pgt_info); } - - mutex_destroy(&ctx->mmu_lock); } static int _hl_mmu_v1_unmap(struct hl_ctx *ctx, -- cgit v1.2.3 From 00e1b59c8b155d524cf416155607c3e52c54c32a Mon Sep 17 00:00:00 2001 From: Moti Haimovski Date: Tue, 27 Oct 2020 10:55:42 +0200 Subject: habanalabs: fix MMU debugfs operations After the MMU-code refactoring, the existing MMU debugfs operations are no longer working so we need to fix them. In addition, remove the duplicate code that was in the debugfs code and use the already existing MMU-code. Signed-off-by: Moti Haimovski Reviewed-by: Oded Gabbay Signed-off-by: Oded Gabbay --- drivers/misc/habanalabs/common/debugfs.c | 282 ++-------------------------- drivers/misc/habanalabs/common/habanalabs.h | 35 ++++ drivers/misc/habanalabs/common/mmu.c | 50 +++++ drivers/misc/habanalabs/common/mmu_v1.c | 102 ++++++++++ 4 files changed, 206 insertions(+), 263 deletions(-) (limited to 'drivers/misc/habanalabs/common/mmu_v1.c') diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c index 3d9bd86312e2..1db804de45ba 100644 --- a/drivers/misc/habanalabs/common/debugfs.c +++ b/drivers/misc/habanalabs/common/debugfs.c @@ -301,93 +301,15 @@ static int vm_show(struct seq_file *s, void *data) return 0; } -/* these inline functions are copied from mmu.c */ -static inline u64 get_hop0_addr(struct hl_ctx *ctx) -{ - return ctx->hdev->asic_prop.mmu_pgt_addr + - (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size); -} - -static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr, - u64 virt_addr, u64 mask, u64 shift) -{ - return hop_addr + ctx->hdev->asic_prop.mmu_pte_size * - ((virt_addr & mask) >> shift); -} - -static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop0_mask, - mmu_specs->hop0_shift); -} - -static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop1_mask, - mmu_specs->hop1_shift); -} - -static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop2_mask, - mmu_specs->hop2_shift); -} - -static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop3_mask, - mmu_specs->hop3_shift); -} - -static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop4_mask, - mmu_specs->hop4_shift); -} - -static inline u64 get_hop5_pte_addr(struct hl_ctx *ctx, - struct hl_mmu_properties *mmu_specs, - u64 hop_addr, u64 vaddr) -{ - return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_specs->hop5_mask, - mmu_specs->hop5_shift); -} - -static inline u64 get_next_hop_addr(u64 curr_pte) -{ - if (curr_pte & PAGE_PRESENT_MASK) - return curr_pte & HOP_PHYS_ADDR_MASK; - else - return ULLONG_MAX; -} - static int mmu_show(struct seq_file *s, void *data) { struct hl_debugfs_entry *entry = s->private; struct hl_dbg_device_entry *dev_entry = entry->dev_entry; struct hl_device *hdev = dev_entry->hdev; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_mmu_properties *mmu_prop; struct hl_ctx *ctx; - bool is_dram_addr; - - u64 hop0_addr = 0, hop0_pte_addr = 0, hop0_pte = 0, - hop1_addr = 0, hop1_pte_addr = 0, hop1_pte = 0, - hop2_addr = 0, hop2_pte_addr = 0, hop2_pte = 0, - hop3_addr = 0, hop3_pte_addr = 0, hop3_pte = 0, - hop4_addr = 0, hop4_pte_addr = 0, hop4_pte = 0, - hop5_addr = 0, hop5_pte_addr = 0, hop5_pte = 0, - virt_addr = dev_entry->mmu_addr; + struct hl_mmu_hop_info hops_info; + u64 virt_addr = dev_entry->mmu_addr; + int i; if (!hdev->mmu_enable) return 0; @@ -402,132 +324,24 @@ static int mmu_show(struct seq_file *s, void *data) return 0; } - is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->dmmu.start_addr, - prop->dmmu.end_addr); - - /* shifts and masks are the same in PMMU and HPMMU, use one of them */ - mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; - - mutex_lock(&ctx->mmu_lock); - - /* the following lookup is copied from unmap() in mmu.c */ - - hop0_addr = get_hop0_addr(ctx); - hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr); - hop0_pte = hdev->asic_funcs->read_pte(hdev, hop0_pte_addr); - hop1_addr = get_next_hop_addr(hop0_pte); - - if (hop1_addr == ULLONG_MAX) - goto not_mapped; - - hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr); - hop1_pte = hdev->asic_funcs->read_pte(hdev, hop1_pte_addr); - hop2_addr = get_next_hop_addr(hop1_pte); - - if (hop2_addr == ULLONG_MAX) - goto not_mapped; - - hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr); - hop2_pte = hdev->asic_funcs->read_pte(hdev, hop2_pte_addr); - hop3_addr = get_next_hop_addr(hop2_pte); - - if (hop3_addr == ULLONG_MAX) - goto not_mapped; - - hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr); - hop3_pte = hdev->asic_funcs->read_pte(hdev, hop3_pte_addr); - - if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) { - if (!(hop3_pte & LAST_MASK)) { - hop4_addr = get_next_hop_addr(hop3_pte); - - if (hop4_addr == ULLONG_MAX) - goto not_mapped; - - hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, - hop4_addr, virt_addr); - hop4_pte = hdev->asic_funcs->read_pte(hdev, - hop4_pte_addr); - if (!(hop4_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - } else { - if (!(hop3_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - } - } else { - hop4_addr = get_next_hop_addr(hop3_pte); - - if (hop4_addr == ULLONG_MAX) - goto not_mapped; - - hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, - hop4_addr, virt_addr); - hop4_pte = hdev->asic_funcs->read_pte(hdev, - hop4_pte_addr); - if (!(hop4_pte & LAST_MASK)) { - hop5_addr = get_next_hop_addr(hop4_pte); - - if (hop5_addr == ULLONG_MAX) - goto not_mapped; - - hop5_pte_addr = get_hop5_pte_addr(ctx, mmu_prop, - hop5_addr, virt_addr); - hop5_pte = hdev->asic_funcs->read_pte(hdev, - hop5_pte_addr); - if (!(hop5_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - } else { - if (!(hop4_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - } + if (hl_mmu_get_tlb_info(ctx, virt_addr, &hops_info)) { + dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); + return 0; } seq_printf(s, "asid: %u, virt_addr: 0x%llx\n", dev_entry->mmu_asid, dev_entry->mmu_addr); - seq_printf(s, "hop0_addr: 0x%llx\n", hop0_addr); - seq_printf(s, "hop0_pte_addr: 0x%llx\n", hop0_pte_addr); - seq_printf(s, "hop0_pte: 0x%llx\n", hop0_pte); - - seq_printf(s, "hop1_addr: 0x%llx\n", hop1_addr); - seq_printf(s, "hop1_pte_addr: 0x%llx\n", hop1_pte_addr); - seq_printf(s, "hop1_pte: 0x%llx\n", hop1_pte); - - seq_printf(s, "hop2_addr: 0x%llx\n", hop2_addr); - seq_printf(s, "hop2_pte_addr: 0x%llx\n", hop2_pte_addr); - seq_printf(s, "hop2_pte: 0x%llx\n", hop2_pte); - - seq_printf(s, "hop3_addr: 0x%llx\n", hop3_addr); - seq_printf(s, "hop3_pte_addr: 0x%llx\n", hop3_pte_addr); - seq_printf(s, "hop3_pte: 0x%llx\n", hop3_pte); - - if (mmu_prop->num_hops == MMU_ARCH_5_HOPS) { - if (!(hop3_pte & LAST_MASK)) { - seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr); - seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr); - seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte); - } - } else { - seq_printf(s, "hop4_addr: 0x%llx\n", hop4_addr); - seq_printf(s, "hop4_pte_addr: 0x%llx\n", hop4_pte_addr); - seq_printf(s, "hop4_pte: 0x%llx\n", hop4_pte); - - if (!(hop4_pte & LAST_MASK)) { - seq_printf(s, "hop5_addr: 0x%llx\n", hop5_addr); - seq_printf(s, "hop5_pte_addr: 0x%llx\n", hop5_pte_addr); - seq_printf(s, "hop5_pte: 0x%llx\n", hop5_pte); - } + for (i = 0 ; i < hops_info.used_hops ; i++) { + seq_printf(s, "hop%d_addr: 0x%llx\n", + i, hops_info.hop_info[i].hop_addr); + seq_printf(s, "hop%d_pte_addr: 0x%llx\n", + i, hops_info.hop_info[i].hop_pte_addr); + seq_printf(s, "hop%d_pte: 0x%llx\n", + i, hops_info.hop_info[i].hop_pte_val); } - goto out; - -not_mapped: - dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", - virt_addr); -out: - mutex_unlock(&ctx->mmu_lock); - return 0; } @@ -617,78 +431,20 @@ static int device_va_to_pa(struct hl_device *hdev, u64 virt_addr, u64 *phys_addr) { struct hl_ctx *ctx = hdev->compute_ctx; - struct asic_fixed_properties *prop = &hdev->asic_prop; - struct hl_mmu_properties *mmu_prop; - u64 hop_addr, hop_pte_addr, hop_pte; - u64 offset_mask = HOP4_MASK | FLAGS_MASK; int rc = 0; - bool is_dram_addr; if (!ctx) { dev_err(hdev->dev, "no ctx available\n"); return -EINVAL; } - is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, - prop->dmmu.start_addr, - prop->dmmu.end_addr); - - /* shifts and masks are the same in PMMU and HPMMU, use one of them */ - mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; - - mutex_lock(&ctx->mmu_lock); - - /* hop 0 */ - hop_addr = get_hop0_addr(ctx); - hop_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - /* hop 1 */ - hop_addr = get_next_hop_addr(hop_pte); - if (hop_addr == ULLONG_MAX) - goto not_mapped; - hop_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - /* hop 2 */ - hop_addr = get_next_hop_addr(hop_pte); - if (hop_addr == ULLONG_MAX) - goto not_mapped; - hop_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - /* hop 3 */ - hop_addr = get_next_hop_addr(hop_pte); - if (hop_addr == ULLONG_MAX) - goto not_mapped; - hop_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - if (!(hop_pte & LAST_MASK)) { - /* hop 4 */ - hop_addr = get_next_hop_addr(hop_pte); - if (hop_addr == ULLONG_MAX) - goto not_mapped; - hop_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop_addr, - virt_addr); - hop_pte = hdev->asic_funcs->read_pte(hdev, hop_pte_addr); - - offset_mask = FLAGS_MASK; + rc = hl_mmu_va_to_pa(ctx, virt_addr, phys_addr); + if (rc) { + dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", + virt_addr); + rc = -EINVAL; } - if (!(hop_pte & PAGE_PRESENT_MASK)) - goto not_mapped; - - *phys_addr = (hop_pte & ~offset_mask) | (virt_addr & offset_mask); - - goto out; - -not_mapped: - dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n", - virt_addr); - rc = -EINVAL; -out: - mutex_unlock(&ctx->mmu_lock); return rc; } diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 76ab9741895d..b8ad5bfa6359 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -10,6 +10,7 @@ #include "../include/common/cpucp_if.h" #include "../include/common/qman_if.h" +#include "../include/hw_ip/mmu/mmu_general.h" #include #include @@ -1661,6 +1662,32 @@ struct hl_mmu_priv { struct hl_mmu_hr_priv hr; }; +/** + * struct hl_mmu_per_hop_info - A structure describing one TLB HOP and its entry + * that was created in order to translate a virtual address to a + * physical one. + * @hop_addr: The address of the hop. + * @hop_pte_addr: The address of the hop entry. + * @hop_pte_val: The value in the hop entry. + */ +struct hl_mmu_per_hop_info { + u64 hop_addr; + u64 hop_pte_addr; + u64 hop_pte_val; +}; + +/** + * struct hl_mmu_hop_info - A structure describing the TLB hops and their + * hop-entries that were created in order to translate a virtual address to a + * physical one. + * @hop_info: Array holding the per-hop information used for the translation. + * @used_hops: The number of hops used for the translation. + */ +struct hl_mmu_hop_info { + struct hl_mmu_per_hop_info hop_info[MMU_ARCH_5_HOPS]; + u32 used_hops; +}; + /** * struct hl_mmu_funcs - Device related MMU functions. * @init: initialize the MMU module. @@ -1672,6 +1699,9 @@ struct hl_mmu_priv { * @flush: flush all writes from all cores to reach device MMU. * @swap_out: marks all mapping of the given context as swapped out. * @swap_in: marks all mapping of the given context as swapped in. + * @get_tlb_info: returns the list of hops and hop-entries used that were + * created in order to translate the giver virtual address to a + * physical one. */ struct hl_mmu_funcs { int (*init)(struct hl_device *hdev); @@ -1686,6 +1716,8 @@ struct hl_mmu_funcs { void (*flush)(struct hl_ctx *ctx); void (*swap_out)(struct hl_ctx *ctx); void (*swap_in)(struct hl_ctx *ctx); + int (*get_tlb_info)(struct hl_ctx *ctx, + u64 virt_addr, struct hl_mmu_hop_info *hops); }; /** @@ -2138,6 +2170,9 @@ void hl_mmu_swap_out(struct hl_ctx *ctx); void hl_mmu_swap_in(struct hl_ctx *ctx); int hl_mmu_if_set_funcs(struct hl_device *hdev); void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu); +int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr); +int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, + struct hl_mmu_hop_info *hops); int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name, void __iomem *dst, u32 src_offset, u32 size); diff --git a/drivers/misc/habanalabs/common/mmu.c b/drivers/misc/habanalabs/common/mmu.c index f4b3d02fe0d8..7279c83cc081 100644 --- a/drivers/misc/habanalabs/common/mmu.c +++ b/drivers/misc/habanalabs/common/mmu.c @@ -342,6 +342,56 @@ void hl_mmu_swap_in(struct hl_ctx *ctx) hdev->mmu_func[MMU_HR_PGT].swap_in(ctx); } +int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr) +{ + struct hl_mmu_hop_info hops; + u64 tmp_addr; + int rc; + + rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops); + if (rc) + return rc; + + /* last hop holds the phys address and flags */ + tmp_addr = hops.hop_info[hops.used_hops - 1].hop_pte_val; + *phys_addr = (tmp_addr & HOP_PHYS_ADDR_MASK) | (virt_addr & FLAGS_MASK); + + return 0; +} + +int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, + struct hl_mmu_hop_info *hops) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + int rc; + bool is_dram_addr; + + if (!hdev->mmu_enable) + return -EOPNOTSUPP; + + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); + + /* host-residency is the same in PMMU and HPMMU, use one of them */ + mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu; + + mutex_lock(&ctx->mmu_lock); + + if (mmu_prop->host_resident) + rc = hdev->mmu_func[MMU_HR_PGT].get_tlb_info(ctx, + virt_addr, hops); + else + rc = hdev->mmu_func[MMU_DR_PGT].get_tlb_info(ctx, + virt_addr, hops); + + mutex_unlock(&ctx->mmu_lock); + + return rc; +} + int hl_mmu_if_set_funcs(struct hl_device *hdev) { if (!hdev->mmu_enable) diff --git a/drivers/misc/habanalabs/common/mmu_v1.c b/drivers/misc/habanalabs/common/mmu_v1.c index 92b22298bb5c..2ce6ea89d4fa 100644 --- a/drivers/misc/habanalabs/common/mmu_v1.c +++ b/drivers/misc/habanalabs/common/mmu_v1.c @@ -837,6 +837,107 @@ static void hl_mmu_v1_swap_in(struct hl_ctx *ctx) } +static inline u64 get_hop_pte_addr(struct hl_ctx *ctx, + struct hl_mmu_properties *mmu_prop, + int hop_num, u64 hop_addr, u64 virt_addr) +{ + switch (hop_num) { + case 0: + return get_hop0_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + case 1: + return get_hop1_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + case 2: + return get_hop2_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + case 3: + return get_hop3_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + case 4: + return get_hop4_pte_addr(ctx, mmu_prop, hop_addr, virt_addr); + default: + break; + } + return U64_MAX; +} + +static int hl_mmu_v1_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr, + struct hl_mmu_hop_info *hops) +{ + struct hl_device *hdev = ctx->hdev; + struct asic_fixed_properties *prop = &hdev->asic_prop; + struct hl_mmu_properties *mmu_prop; + bool is_dram_addr, is_pmmu_addr, is_pmmu_h_addr, is_huge; + int i, used_hops; + + is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size, + prop->dmmu.start_addr, + prop->dmmu.end_addr); + is_pmmu_addr = hl_mem_area_inside_range(virt_addr, prop->pmmu.page_size, + prop->pmmu.start_addr, + prop->pmmu.end_addr); + is_pmmu_h_addr = hl_mem_area_inside_range(virt_addr, + prop->pmmu_huge.page_size, + prop->pmmu_huge.start_addr, + prop->pmmu_huge.end_addr); + if (is_dram_addr) { + mmu_prop = &prop->dmmu; + is_huge = true; + } else if (is_pmmu_addr) { + mmu_prop = &prop->pmmu; + is_huge = false; + } else if (is_pmmu_h_addr) { + mmu_prop = &prop->pmmu_huge; + is_huge = true; + } else { + return -EINVAL; + } + + used_hops = mmu_prop->num_hops; + + /* huge pages use lesser hops */ + if (is_huge) + used_hops--; + + hops->hop_info[0].hop_addr = get_phys_hop0_addr(ctx); + hops->hop_info[0].hop_pte_addr = + get_hop_pte_addr(ctx, mmu_prop, 0, + hops->hop_info[0].hop_addr, virt_addr); + hops->hop_info[0].hop_pte_val = + hdev->asic_funcs->read_pte(hdev, + hops->hop_info[0].hop_pte_addr); + + for (i = 1 ; i < used_hops ; i++) { + hops->hop_info[i].hop_addr = + get_next_hop_addr(ctx, + hops->hop_info[i - 1].hop_pte_val); + if (hops->hop_info[i].hop_addr == ULLONG_MAX) + return -EFAULT; + + hops->hop_info[i].hop_pte_addr = + get_hop_pte_addr(ctx, mmu_prop, i, + hops->hop_info[i].hop_addr, + virt_addr); + hops->hop_info[i].hop_pte_val = + hdev->asic_funcs->read_pte(hdev, + hops->hop_info[i].hop_pte_addr); + + if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) + return -EFAULT; + + if (hops->hop_info[i].hop_pte_val & LAST_MASK) + break; + } + + /* if passed over all hops then no last hop was found */ + if (i == mmu_prop->num_hops) + return -EFAULT; + + if (!(hops->hop_info[i].hop_pte_val & PAGE_PRESENT_MASK)) + return -EFAULT; + + hops->used_hops = i + 1; + + return 0; +} + /* * hl_mmu_v1_prepare - prepare mmu for working with mmu v1 * @@ -853,4 +954,5 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu) mmu->flush = flush; mmu->swap_out = hl_mmu_v1_swap_out; mmu->swap_in = hl_mmu_v1_swap_in; + mmu->get_tlb_info = hl_mmu_v1_get_tlb_info; } -- cgit v1.2.3