summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/common
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common')
-rw-r--r--drivers/misc/habanalabs/common/command_buffer.c127
-rw-r--r--drivers/misc/habanalabs/common/command_submission.c75
-rw-r--r--drivers/misc/habanalabs/common/debugfs.c35
-rw-r--r--drivers/misc/habanalabs/common/device.c147
-rw-r--r--drivers/misc/habanalabs/common/firmware_if.c184
-rw-r--r--drivers/misc/habanalabs/common/habanalabs.h171
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_drv.c44
-rw-r--r--drivers/misc/habanalabs/common/habanalabs_ioctl.c123
-rw-r--r--drivers/misc/habanalabs/common/hw_queue.c4
-rw-r--r--drivers/misc/habanalabs/common/hwmon.c24
-rw-r--r--drivers/misc/habanalabs/common/memory.c57
-rw-r--r--drivers/misc/habanalabs/common/memory_mgr.c10
-rw-r--r--drivers/misc/habanalabs/common/mmu/mmu.c31
-rw-r--r--drivers/misc/habanalabs/common/sysfs.c10
14 files changed, 689 insertions, 353 deletions
diff --git a/drivers/misc/habanalabs/common/command_buffer.c b/drivers/misc/habanalabs/common/command_buffer.c
index b027f66f8bd4..2b332991ac6a 100644
--- a/drivers/misc/habanalabs/common/command_buffer.c
+++ b/drivers/misc/habanalabs/common/command_buffer.c
@@ -12,20 +12,18 @@
#include <linux/slab.h>
#include <linux/uaccess.h>
+#define CB_VA_POOL_SIZE (4UL * SZ_1G)
+
static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
{
struct hl_device *hdev = ctx->hdev;
struct asic_fixed_properties *prop = &hdev->asic_prop;
- struct hl_vm_va_block *va_block, *tmp;
- dma_addr_t bus_addr;
- u64 virt_addr;
u32 page_size = prop->pmmu.page_size;
- s32 offset;
int rc;
if (!hdev->supports_cb_mapping) {
dev_err_ratelimited(hdev->dev,
- "Cannot map CB because no VA range is allocated for CB mapping\n");
+ "Mapping a CB to the device's MMU is not supported\n");
return -EINVAL;
}
@@ -35,106 +33,45 @@ static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb)
return -EINVAL;
}
- INIT_LIST_HEAD(&cb->va_block_list);
-
- for (bus_addr = cb->bus_address;
- bus_addr < cb->bus_address + cb->size;
- bus_addr += page_size) {
-
- virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size);
- if (!virt_addr) {
- dev_err(hdev->dev,
- "Failed to allocate device virtual address for CB\n");
- rc = -ENOMEM;
- goto err_va_pool_free;
- }
+ if (cb->is_mmu_mapped)
+ return 0;
- va_block = kzalloc(sizeof(*va_block), GFP_KERNEL);
- if (!va_block) {
- rc = -ENOMEM;
- gen_pool_free(ctx->cb_va_pool, virt_addr, page_size);
- goto err_va_pool_free;
- }
+ cb->roundup_size = roundup(cb->size, page_size);
- va_block->start = virt_addr;
- va_block->end = virt_addr + page_size - 1;
- va_block->size = page_size;
- list_add_tail(&va_block->node, &cb->va_block_list);
+ cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size);
+ if (!cb->virtual_addr) {
+ dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n");
+ return -ENOMEM;
}
- mutex_lock(&ctx->mmu_lock);
-
- bus_addr = cb->bus_address;
- offset = 0;
- list_for_each_entry(va_block, &cb->va_block_list, node) {
- rc = hl_mmu_map_page(ctx, va_block->start, bus_addr,
- va_block->size, list_is_last(&va_block->node,
- &cb->va_block_list));
- if (rc) {
- dev_err(hdev->dev, "Failed to map VA %#llx to CB\n",
- va_block->start);
- goto err_va_umap;
- }
-
- bus_addr += va_block->size;
- offset += va_block->size;
+ mutex_lock(&hdev->mmu_lock);
+ rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size);
+ if (rc) {
+ dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr);
+ goto err_va_umap;
}
-
rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV);
-
- mutex_unlock(&ctx->mmu_lock);
+ mutex_unlock(&hdev->mmu_lock);
cb->is_mmu_mapped = true;
-
return rc;
err_va_umap:
- list_for_each_entry(va_block, &cb->va_block_list, node) {
- if (offset <= 0)
- break;
- hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
- offset <= va_block->size);
- offset -= va_block->size;
- }
-
- rc = hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
-
- mutex_unlock(&ctx->mmu_lock);
-
-err_va_pool_free:
- list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
- gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
- list_del(&va_block->node);
- kfree(va_block);
- }
-
+ mutex_unlock(&hdev->mmu_lock);
+ gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
return rc;
}
static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb)
{
struct hl_device *hdev = ctx->hdev;
- struct hl_vm_va_block *va_block, *tmp;
-
- mutex_lock(&ctx->mmu_lock);
-
- list_for_each_entry(va_block, &cb->va_block_list, node)
- if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size,
- list_is_last(&va_block->node,
- &cb->va_block_list)))
- dev_warn_ratelimited(hdev->dev,
- "Failed to unmap CB's va 0x%llx\n",
- va_block->start);
+ mutex_lock(&hdev->mmu_lock);
+ hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size);
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
+ mutex_unlock(&hdev->mmu_lock);
- mutex_unlock(&ctx->mmu_lock);
-
- list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) {
- gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size);
- list_del(&va_block->node);
- kfree(va_block);
- }
+ gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size);
}
static void cb_fini(struct hl_device *hdev, struct hl_cb *cb)
@@ -376,7 +313,6 @@ int hl_cb_destroy(struct hl_mem_mgr *mmg, u64 cb_handle)
static int hl_cb_info(struct hl_mem_mgr *mmg,
u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va)
{
- struct hl_vm_va_block *va_block;
struct hl_cb *cb;
int rc = 0;
@@ -388,9 +324,8 @@ static int hl_cb_info(struct hl_mem_mgr *mmg,
}
if (flags & HL_CB_FLAGS_GET_DEVICE_VA) {
- va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node);
- if (va_block) {
- *device_va = va_block->start;
+ if (cb->is_mmu_mapped) {
+ *device_va = cb->virtual_addr;
} else {
dev_err(mmg->dev, "CB is not mapped to the device's MMU\n");
rc = -EINVAL;
@@ -566,16 +501,23 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx)
return -ENOMEM;
}
- rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr,
- prop->cb_va_end_addr - prop->cb_va_start_addr, -1);
+ ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST,
+ CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED);
+ if (!ctx->cb_va_pool_base) {
+ rc = -ENOMEM;
+ goto err_pool_destroy;
+ }
+ rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1);
if (rc) {
dev_err(hdev->dev,
"Failed to add memory to VA gen pool for CB mapping\n");
- goto err_pool_destroy;
+ goto err_unreserve_va_block;
}
return 0;
+err_unreserve_va_block:
+ hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
err_pool_destroy:
gen_pool_destroy(ctx->cb_va_pool);
@@ -590,4 +532,5 @@ void hl_cb_va_pool_fini(struct hl_ctx *ctx)
return;
gen_pool_destroy(ctx->cb_va_pool);
+ hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE);
}
diff --git a/drivers/misc/habanalabs/common/command_submission.c b/drivers/misc/habanalabs/common/command_submission.c
index 90a4574cbe2d..fa05770865c6 100644
--- a/drivers/misc/habanalabs/common/command_submission.c
+++ b/drivers/misc/habanalabs/common/command_submission.c
@@ -12,7 +12,9 @@
#include <linux/slab.h>
#define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \
- HL_CS_FLAGS_COLLECTIVE_WAIT)
+ HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \
+ HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND)
+
#define MAX_TS_ITER_NUM 10
@@ -824,10 +826,10 @@ static void cs_timedout(struct work_struct *work)
}
/* Save only the first CS timeout parameters */
- rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_enable, 1, 0);
+ rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0);
if (rc) {
- hdev->last_error.cs_timeout.timestamp = ktime_get();
- hdev->last_error.cs_timeout.seq = cs->sequence;
+ hdev->captured_err_info.cs_timeout.timestamp = ktime_get();
+ hdev->captured_err_info.cs_timeout.seq = cs->sequence;
event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT |
HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT;
@@ -1242,6 +1244,8 @@ static enum hl_cs_type hl_cs_get_cs_type(u32 cs_type_flags)
return CS_RESERVE_SIGNALS;
else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY)
return CS_UNRESERVE_SIGNALS;
+ else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND)
+ return CS_TYPE_ENGINE_CORE;
else
return CS_TYPE_DEFAULT;
}
@@ -1253,6 +1257,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
u32 cs_type_flags, num_chunks;
enum hl_device_status status;
enum hl_cs_type cs_type;
+ bool is_sync_stream;
if (!hl_device_operational(hdev, &status)) {
return -EBUSY;
@@ -1276,9 +1281,10 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
cs_type = hl_cs_get_cs_type(cs_type_flags);
num_chunks = args->in.num_chunks_execute;
- if (unlikely((cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
- cs_type == CS_TYPE_COLLECTIVE_WAIT) &&
- !hdev->supports_sync_stream)) {
+ is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT ||
+ cs_type == CS_TYPE_COLLECTIVE_WAIT);
+
+ if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) {
dev_err(hdev->dev, "Sync stream CS is not supported\n");
return -EINVAL;
}
@@ -1288,7 +1294,7 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid);
return -EINVAL;
}
- } else if (num_chunks != 1) {
+ } else if (is_sync_stream && num_chunks != 1) {
dev_err(hdev->dev,
"Sync stream CS mandates one chunk only, context %d\n",
ctx->asid);
@@ -1584,13 +1590,14 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
struct hl_device *hdev = hpriv->hdev;
struct hl_ctx *ctx = hpriv->ctx;
bool need_soft_reset = false;
- int rc = 0, do_ctx_switch;
+ int rc = 0, do_ctx_switch = 0;
void __user *chunks;
u32 num_chunks, tmp;
u16 sob_count;
int ret;
- do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
+ if (hdev->supports_ctx_switch)
+ do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0);
if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) {
mutex_lock(&hpriv->restore_phase_mutex);
@@ -1661,9 +1668,10 @@ wait_again:
}
}
- ctx->thread_ctx_switch_wait_token = 1;
+ if (hdev->supports_ctx_switch)
+ ctx->thread_ctx_switch_wait_token = 1;
- } else if (!ctx->thread_ctx_switch_wait_token) {
+ } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) {
rc = hl_poll_timeout_memory(hdev,
&ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1),
100, jiffies_to_usecs(hdev->timeout_jiffies), false);
@@ -2351,6 +2359,41 @@ out:
return rc;
}
+static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores,
+ u32 num_engine_cores, u32 core_command)
+{
+ int rc;
+ struct hl_device *hdev = hpriv->hdev;
+ void __user *engine_cores_arr;
+ u32 *cores;
+
+ if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) {
+ dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores);
+ return -EINVAL;
+ }
+
+ if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) {
+ dev_err(hdev->dev, "Engine core command is invalid\n");
+ return -EINVAL;
+ }
+
+ engine_cores_arr = (void __user *) (uintptr_t) engine_cores;
+ cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL);
+ if (!cores)
+ return -ENOMEM;
+
+ if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) {
+ dev_err(hdev->dev, "Failed to copy core-ids array from user\n");
+ kfree(cores);
+ return -EFAULT;
+ }
+
+ rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command);
+ kfree(cores);
+
+ return rc;
+}
+
int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
{
union hl_cs_args *args = data;
@@ -2403,6 +2446,10 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
rc = cs_ioctl_unreserve_signals(hpriv,
args->in.encaps_sig_handle_id);
break;
+ case CS_TYPE_ENGINE_CORE:
+ rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores,
+ args->in.num_engine_cores, args->in.core_command);
+ break;
default:
rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
args->in.cs_flags,
@@ -2524,7 +2571,7 @@ static int hl_cs_poll_fences(struct multi_cs_data *mcs_data, struct multi_cs_com
ktime_t max_ktime, first_cs_time;
enum hl_cs_wait_status status;
- memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr));
+ memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *));
/* get all fences under the same lock */
rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len);
@@ -2826,7 +2873,7 @@ static int hl_multi_cs_wait_ioctl(struct hl_fpriv *hpriv, void *data)
}
/* allocate array for the fences */
- fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL);
+ fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL);
if (!fence_arr) {
rc = -ENOMEM;
goto free_seq_arr;
diff --git a/drivers/misc/habanalabs/common/debugfs.c b/drivers/misc/habanalabs/common/debugfs.c
index 64439f33a19b..48d3ec8b5c82 100644
--- a/drivers/misc/habanalabs/common/debugfs.c
+++ b/drivers/misc/habanalabs/common/debugfs.c
@@ -291,14 +291,16 @@ static int vm_show(struct seq_file *s, void *data)
if (ctx->asid != HL_KERNEL_ASID_ID &&
!list_empty(&ctx->hw_block_mem_list)) {
seq_puts(s, "\nhw_block mappings:\n\n");
- seq_puts(s, " virtual address size HW block id\n");
- seq_puts(s, "-------------------------------------------\n");
+ seq_puts(s,
+ " virtual address block size mapped size HW block id\n");
+ seq_puts(s,
+ "---------------------------------------------------------------\n");
mutex_lock(&ctx->hw_block_list_lock);
- list_for_each_entry(lnode, &ctx->hw_block_mem_list,
- node) {
+ list_for_each_entry(lnode, &ctx->hw_block_mem_list, node) {
seq_printf(s,
- " 0x%-14lx %-6u %-9u\n",
- lnode->vaddr, lnode->size, lnode->id);
+ " 0x%-14lx %-6u %-6u %-9u\n",
+ lnode->vaddr, lnode->block_size, lnode->mapped_size,
+ lnode->id);
}
mutex_unlock(&ctx->hw_block_list_lock);
}
@@ -591,6 +593,7 @@ static int engines_show(struct seq_file *s, void *data)
struct hl_debugfs_entry *entry = s->private;
struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
struct hl_device *hdev = dev_entry->hdev;
+ struct engines_data eng_data;
if (hdev->reset_info.in_reset) {
dev_warn_ratelimited(hdev->dev,
@@ -598,7 +601,25 @@ static int engines_show(struct seq_file *s, void *data)
return 0;
}
- hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s);
+ eng_data.actual_size = 0;
+ eng_data.allocated_buf_size = HL_ENGINES_DATA_MAX_SIZE;
+ eng_data.buf = vmalloc(eng_data.allocated_buf_size);
+ if (!eng_data.buf)
+ return -ENOMEM;
+
+ hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);
+
+ if (eng_data.actual_size > eng_data.allocated_buf_size) {
+ dev_err(hdev->dev,
+ "Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",
+ eng_data.actual_size, eng_data.allocated_buf_size);
+ vfree(eng_data.buf);
+ return -ENOMEM;
+ }
+
+ seq_write(s, eng_data.buf, eng_data.actual_size);
+
+ vfree(eng_data.buf);
return 0;
}
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c
index b30aeb1c657f..233d8b46c831 100644
--- a/drivers/misc/habanalabs/common/device.c
+++ b/drivers/misc/habanalabs/common/device.c
@@ -13,6 +13,8 @@
#include <linux/pci.h>
#include <linux/hwmon.h>
+#include <trace/events/habanalabs.h>
+
#define HL_RESET_DELAY_USEC 10000 /* 10ms */
enum dma_alloc_type {
@@ -26,8 +28,9 @@ enum dma_alloc_type {
/*
* hl_set_dram_bar- sets the bar to allow later access to address
*
- * @hdev: pointer to habanalabs device structure
+ * @hdev: pointer to habanalabs device structure.
* @addr: the address the caller wants to access.
+ * @region: the PCI region.
*
* @return: the old BAR base address on success, U64_MAX for failure.
* The caller should set it back to the old address after use.
@@ -37,58 +40,64 @@ enum dma_alloc_type {
* This function can be called also if the bar doesn't need to be set,
* in that case it just won't change the base.
*/
-static uint64_t hl_set_dram_bar(struct hl_device *hdev, u64 addr)
+static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region)
{
struct asic_fixed_properties *prop = &hdev->asic_prop;
- u64 bar_base_addr;
+ u64 bar_base_addr, old_base;
- bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
+ if (is_power_of_2(prop->dram_pci_bar_size))
+ bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull);
+ else
+ bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) *
+ prop->dram_pci_bar_size;
- return hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
-}
+ old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr);
+ /* in case of success we need to update the new BAR base */
+ if (old_base != U64_MAX)
+ region->region_base = bar_base_addr;
+
+ return old_base;
+}
static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val,
enum debugfs_access_type acc_type, enum pci_region region_type)
{
struct pci_mem_region *region = &hdev->pci_mem_region[region_type];
+ void __iomem *acc_addr;
u64 old_base = 0, rc;
if (region_type == PCI_REGION_DRAM) {
- old_base = hl_set_dram_bar(hdev, addr);
+ old_base = hl_set_dram_bar(hdev, addr, region);
if (old_base == U64_MAX)
return -EIO;
}
+ acc_addr = hdev->pcie_bar[region->bar_id] + addr - region->region_base +
+ region->offset_in_bar;
switch (acc_type) {
case DEBUGFS_READ8:
- *val = readb(hdev->pcie_bar[region->bar_id] +
- addr - region->region_base + region->offset_in_bar);
+ *val = readb(acc_addr);
break;
case DEBUGFS_WRITE8:
- writeb(*val, hdev->pcie_bar[region->bar_id] +
- addr - region->region_base + region->offset_in_bar);
+ writeb(*val, acc_addr);
break;
case DEBUGFS_READ32:
- *val = readl(hdev->pcie_bar[region->bar_id] +
- addr - region->region_base + region->offset_in_bar);
+ *val = readl(acc_addr);
break;
case DEBUGFS_WRITE32:
- writel(*val, hdev->pcie_bar[region->bar_id] +
- addr - region->region_base + region->offset_in_bar);
+ writel(*val, acc_addr);
break;
case DEBUGFS_READ64:
- *val = readq(hdev->pcie_bar[region->bar_id] +
- addr - region->region_base + region->offset_in_bar);
+ *val = readq(acc_addr);
break;
case DEBUGFS_WRITE64:
- writeq(*val, hdev->pcie_bar[region->bar_id] +
- addr - region->region_base + region->offset_in_bar);
+ writeq(*val, acc_addr);
break;
}
if (region_type == PCI_REGION_DRAM) {
- rc = hl_set_dram_bar(hdev, old_base);
+ rc = hl_set_dram_bar(hdev, old_base, region);
if (rc == U64_MAX)
return -EIO;
}
@@ -97,9 +106,10 @@ static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val
}
static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
- gfp_t flag, enum dma_alloc_type alloc_type)
+ gfp_t flag, enum dma_alloc_type alloc_type,
+ const char *caller)
{
- void *ptr;
+ void *ptr = NULL;
switch (alloc_type) {
case DMA_ALLOC_COHERENT:
@@ -113,11 +123,16 @@ static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t
break;
}
+ if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr))
+ trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size,
+ caller);
+
return ptr;
}
static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr,
- dma_addr_t dma_handle, enum dma_alloc_type alloc_type)
+ dma_addr_t dma_handle, enum dma_alloc_type alloc_type,
+ const char *caller)
{
switch (alloc_type) {
case DMA_ALLOC_COHERENT:
@@ -130,39 +145,44 @@ static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *c
hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle);
break;
}
+
+ trace_habanalabs_dma_free(hdev->dev, (u64) (uintptr_t) cpu_addr, dma_handle, size, caller);
}
-void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
- gfp_t flag)
+void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
+ gfp_t flag, const char *caller)
{
- return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT);
+ return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT, caller);
}
-void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr,
- dma_addr_t dma_handle)
+void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr,
+ dma_addr_t dma_handle, const char *caller)
{
- hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT);
+ hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller);
}
-void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle)
+void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size,
+ dma_addr_t *dma_handle, const char *caller)
{
- return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE);
+ return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller);
}
-void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr)
+void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr,
+ const char *caller)
{
- hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE);
+ hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller);
}
-void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags,
- dma_addr_t *dma_handle)
+void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags,
+ dma_addr_t *dma_handle, const char *caller)
{
- return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL);
+ return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL, caller);
}
-void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr)
+void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
+ const char *caller)
{
- hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL);
+ hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller);
}
int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir)
@@ -267,6 +287,30 @@ int hl_access_dev_mem(struct hl_device *hdev, enum pci_region region_type,
return 0;
}
+void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...)
+{
+ va_list args;
+ int str_size;
+
+ va_start(args, fmt);
+ /* Calculate formatted string length. Assuming each string is null terminated, hence
+ * increment result by 1
+ */
+ str_size = vsnprintf(NULL, 0, fmt, args) + 1;
+ va_end(args);
+
+ if ((e->actual_size + str_size) < e->allocated_buf_size) {
+ va_start(args, fmt);
+ vsnprintf(e->buf + e->actual_size, str_size, fmt, args);
+ va_end(args);
+ }
+
+ /* Need to update the size even when not updating destination buffer to get the exact size
+ * of all input strings
+ */
+ e->actual_size += str_size;
+}
+
enum hl_device_status hl_device_status(struct hl_device *hdev)
{
enum hl_device_status status;
@@ -322,6 +366,8 @@ static void hpriv_release(struct kref *ref)
hdev = hpriv->hdev;
+ hdev->asic_funcs->send_device_activity(hdev, false);
+
put_pid(hpriv->taskpid);
hl_debugfs_remove_file(hpriv);
@@ -673,7 +719,7 @@ static int device_early_init(struct hl_device *hdev)
if (hdev->asic_prop.completion_queues_count) {
hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count,
- sizeof(*hdev->cq_wq),
+ sizeof(struct workqueue_struct *),
GFP_KERNEL);
if (!hdev->cq_wq) {
rc = -ENOMEM;
@@ -1091,7 +1137,9 @@ int hl_device_resume(struct hl_device *hdev)
/* 'in_reset' was set to true during suspend, now we must clear it in order
* for hard reset to be performed
*/
+ spin_lock(&hdev->reset_info.lock);
hdev->reset_info.in_reset = 0;
+ spin_unlock(&hdev->reset_info.lock);
rc = hl_device_reset(hdev, HL_DRV_RESET_HARD);
if (rc) {
@@ -1518,6 +1566,13 @@ kill_processes:
*/
hdev->disabled = false;
+ /* F/W security enabled indication might be updated after hard-reset */
+ if (hard_reset) {
+ rc = hl_fw_read_preboot_status(hdev);
+ if (rc)
+ goto out_err;
+ }
+
rc = hdev->asic_funcs->hw_init(hdev);
if (rc) {
dev_err(hdev->dev, "failed to initialize the H/W after reset\n");
@@ -1556,7 +1611,7 @@ kill_processes:
if (!hdev->asic_prop.fw_security_enabled)
hl_fw_set_max_power(hdev);
} else {
- rc = hdev->asic_funcs->non_hard_reset_late_init(hdev);
+ rc = hdev->asic_funcs->compute_reset_late_init(hdev);
if (rc) {
if (reset_upon_device_release)
dev_err(hdev->dev,
@@ -1704,7 +1759,9 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
char *name;
bool add_cdev_sysfs_on_err = false;
- name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2);
+ hdev->cdev_idx = hdev->id / 2;
+
+ name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx);
if (!name) {
rc = -ENOMEM;
goto out_disabled;
@@ -1719,7 +1776,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
if (rc)
goto out_disabled;
- name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2);
+ name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx);
if (!name) {
rc = -ENOMEM;
goto free_dev;
@@ -1806,7 +1863,7 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
}
hdev->shadow_cs_queue = kcalloc(hdev->asic_prop.max_pending_cs,
- sizeof(*hdev->shadow_cs_queue), GFP_KERNEL);
+ sizeof(struct hl_cs *), GFP_KERNEL);
if (!hdev->shadow_cs_queue) {
rc = -ENOMEM;
goto cq_fini;
@@ -1997,10 +2054,10 @@ out_disabled:
if (hdev->pdev)
dev_err(&hdev->pdev->dev,
"Failed to initialize hl%d. Device is NOT usable !\n",
- hdev->id / 2);
+ hdev->cdev_idx);
else
pr_err("Failed to initialize hl%d. Device is NOT usable !\n",
- hdev->id / 2);
+ hdev->cdev_idx);
return rc;
}
diff --git a/drivers/misc/habanalabs/common/firmware_if.c b/drivers/misc/habanalabs/common/firmware_if.c
index 608ca67527a5..2de6a9bd564d 100644
--- a/drivers/misc/habanalabs/common/firmware_if.c
+++ b/drivers/misc/habanalabs/common/firmware_if.c
@@ -15,14 +15,6 @@
#define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */
-struct fw_binning_conf {
- u64 tpc_binning;
- u32 dec_binning;
- u32 hbm_binning;
- u32 edma_binning;
- u32 mme_redundancy;
-};
-
static char *extract_fw_ver_from_str(const char *fw_str)
{
char *str, *fw_ver, *whitespace;
@@ -260,7 +252,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
struct cpucp_packet *pkt;
dma_addr_t pkt_dma_addr;
struct hl_bd *sent_bd;
- u32 tmp, expected_ack_val, pi;
+ u32 tmp, expected_ack_val, pi, opcode;
int rc;
pkt = hl_cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr);
@@ -327,8 +319,35 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT;
if (rc) {
- dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n",
- rc, (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT);
+ opcode = (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT;
+
+ if (!prop->supports_advanced_cpucp_rc) {
+ dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, opcode);
+ goto scrub_descriptor;
+ }
+
+ switch (rc) {
+ case cpucp_packet_invalid:
+ dev_err(hdev->dev,
+ "CPU packet %d is not supported by F/W\n", opcode);
+ break;
+ case cpucp_packet_fault:
+ dev_err(hdev->dev,
+ "F/W failed processing CPU packet %d\n", opcode);
+ break;
+ case cpucp_packet_invalid_pkt:
+ dev_dbg(hdev->dev,
+ "CPU packet %d is not supported by F/W\n", opcode);
+ break;
+ case cpucp_packet_invalid_params:
+ dev_err(hdev->dev,
+ "F/W reports invalid parameters for CPU packet %d\n", opcode);
+ break;
+
+ default:
+ dev_err(hdev->dev,
+ "Unknown F/W ERROR %d for CPU packet %d\n", rc, opcode);
+ }
/* propagate the return code from the f/w to the callers who want to check it */
if (result)
@@ -340,6 +359,7 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
*result = le64_to_cpu(pkt->result);
}
+scrub_descriptor:
/* Scrub previous buffer descriptor 'ctl' field which contains the
* previous PI value written during packet submission.
* We must do this or else F/W can read an old value upon queue wraparound.
@@ -462,6 +482,21 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
size);
}
+int hl_fw_send_device_activity(struct hl_device *hdev, bool open)
+{
+ struct cpucp_packet pkt;
+ int rc;
+
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.ctl = cpu_to_le32(CPUCP_PACKET_ACTIVE_STATUS_SET << CPUCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.value = cpu_to_le64(open);
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL);
+ if (rc)
+ dev_err(hdev->dev, "failed to send device activity msg(%u)\n", open);
+
+ return rc;
+}
+
int hl_fw_send_heartbeat(struct hl_device *hdev)
{
struct cpucp_packet hb_pkt;
@@ -581,6 +616,15 @@ static bool fw_report_boot_dev0(struct hl_device *hdev, u32 err_val,
dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val);
/* All warnings should go here in order not to reach the unknown error validation */
+ if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) {
+ dev_warn(hdev->dev,
+ "Device boot warning - EEPROM failure detected, default settings applied\n");
+ /* This is a warning so we don't want it to disable the
+ * device
+ */
+ err_val &= ~CPU_BOOT_ERR0_EEPROM_FAIL;
+ }
+
if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) {
dev_warn(hdev->dev,
"Device boot warning - Skipped DRAM initialization\n");
@@ -1476,6 +1520,8 @@ static void hl_fw_preboot_update_state(struct hl_device *hdev)
*/
prop->hard_reset_done_by_fw = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN);
+ prop->fw_security_enabled = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_SECURITY_EN);
+
dev_dbg(hdev->dev, "Firmware preboot boot device status0 %#x\n",
cpu_boot_dev_sts0);
@@ -1514,7 +1560,7 @@ int hl_fw_read_preboot_status(struct hl_device *hdev)
hdev->asic_funcs->init_firmware_preload_params(hdev);
/*
- * In order to determine boot method (static VS dymanic) we need to
+ * In order to determine boot method (static VS dynamic) we need to
* read the boot caps register
*/
rc = hl_fw_read_preboot_caps(hdev);
@@ -1781,7 +1827,7 @@ int hl_fw_dynamic_send_protocol_cmd(struct hl_device *hdev,
*
* @return the CRC32 result
*
- * NOTE: kernel's CRC32 differ's from standard CRC32 calculation.
+ * NOTE: kernel's CRC32 differs from standard CRC32 calculation.
* in order to be aligned we need to flip the bits of both the input
* initial CRC and kernel's CRC32 result.
* in addition both sides use initial CRC of 0,
@@ -1798,7 +1844,7 @@ static u32 hl_fw_compat_crc32(u8 *data, size_t size)
*
* @hdev: pointer to the habanalabs device structure
* @addr: device address of memory transfer
- * @size: memory transter size
+ * @size: memory transfer size
* @region: PCI memory region
*
* @return 0 on success, otherwise non-zero error code
@@ -1854,50 +1900,36 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev,
u64 addr;
int rc;
- if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC) {
- dev_err(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n",
+ if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC)
+ dev_warn(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n",
fw_desc->header.magic);
- return -EIO;
- }
- if (fw_desc->header.version != HL_COMMS_DESC_VER) {
- dev_err(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n",
+ if (fw_desc->header.version != HL_COMMS_DESC_VER)
+ dev_warn(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n",
fw_desc->header.version);
- return -EIO;
- }
/*
- * calc CRC32 of data without header.
+ * Calc CRC32 of data without header. use the size of the descriptor
+ * reported by firmware, without calculating it ourself, to allow adding
+ * more fields to the lkd_fw_comms_desc structure.
* note that no alignment/stride address issues here as all structures
- * are 64 bit padded
+ * are 64 bit padded.
*/
- data_size = sizeof(struct lkd_fw_comms_desc) -
- sizeof(struct comms_desc_header);
data_ptr = (u8 *)fw_desc + sizeof(struct comms_desc_header);
-
- if (le16_to_cpu(fw_desc->header.size) != data_size) {
- dev_err(hdev->dev,
- "Invalid descriptor size 0x%x, expected size 0x%zx\n",
- le16_to_cpu(fw_desc->header.size), data_size);
- return -EIO;
- }
+ data_size = le16_to_cpu(fw_desc->header.size);
data_crc32 = hl_fw_compat_crc32(data_ptr, data_size);
-
if (data_crc32 != le32_to_cpu(fw_desc->header.crc32)) {
- dev_err(hdev->dev,
- "CRC32 mismatch for dynamic FW descriptor (%x:%x)\n",
- data_crc32, fw_desc->header.crc32);
+ dev_err(hdev->dev, "CRC32 mismatch for dynamic FW descriptor (%x:%x)\n",
+ data_crc32, fw_desc->header.crc32);
return -EIO;
}
/* find memory region to which to copy the image */
addr = le64_to_cpu(fw_desc->img_addr);
region_id = hl_get_pci_memory_region(hdev, addr);
- if ((region_id != PCI_REGION_SRAM) &&
- ((region_id != PCI_REGION_DRAM))) {
- dev_err(hdev->dev,
- "Invalid region to copy FW image address=%llx\n", addr);
+ if ((region_id != PCI_REGION_SRAM) && ((region_id != PCI_REGION_DRAM))) {
+ dev_err(hdev->dev, "Invalid region to copy FW image address=%llx\n", addr);
return -EIO;
}
@@ -1914,8 +1946,7 @@ static int hl_fw_dynamic_validate_descriptor(struct hl_device *hdev,
fw_loader->dynamic_loader.fw_image_size,
region);
if (rc) {
- dev_err(hdev->dev,
- "invalid mem transfer request for FW image\n");
+ dev_err(hdev->dev, "invalid mem transfer request for FW image\n");
return rc;
}
@@ -2422,18 +2453,6 @@ static int hl_fw_dynamic_send_msg(struct hl_device *hdev,
msg.reset_cause = *(__u8 *) data;
break;
- case HL_COMMS_BINNING_CONF_TYPE:
- {
- struct fw_binning_conf *binning_conf = (struct fw_binning_conf *) data;
-
- msg.tpc_binning_conf = cpu_to_le64(binning_conf->tpc_binning);
- msg.dec_binning_conf = cpu_to_le32(binning_conf->dec_binning);
- msg.hbm_binning_conf = cpu_to_le32(binning_conf->hbm_binning);
- msg.edma_binning_conf = cpu_to_le32(binning_conf->edma_binning);
- msg.mme_redundancy_conf = cpu_to_le32(binning_conf->mme_redundancy);
- break;
- }
-
default:
dev_err(hdev->dev,
"Send COMMS message - invalid message type %u\n",
@@ -2503,13 +2522,6 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
*/
dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs;
- /* if no preboot loaded indication- wait for preboot */
- if (!(hdev->fw_loader.fw_comp_loaded & FW_TYPE_PREBOOT_CPU)) {
- rc = hl_fw_wait_preboot_ready(hdev);
- if (rc)
- return -EIO;
- }
-
rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE,
0, true,
fw_loader->cpu_timeout);
@@ -2547,7 +2559,7 @@ static int hl_fw_dynamic_init_cpu(struct hl_device *hdev,
/*
* when testing FW load (without Linux) on PLDM we don't want to
* wait until boot fit is active as it may take several hours.
- * instead, we load the bootfit and let it do all initializations in
+ * instead, we load the bootfit and let it do all initialization in
* the background.
*/
if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX))
@@ -2961,3 +2973,49 @@ void hl_fw_set_max_power(struct hl_device *hdev)
if (rc)
dev_err(hdev->dev, "Failed to set max power, error %d\n", rc);
}
+
+static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void *data, u32 size,
+ u32 nonce, u32 timeout)
+{
+ struct cpucp_packet pkt = {};
+ dma_addr_t req_dma_addr;
+ void *req_cpu_addr;
+ int rc;
+
+ req_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, size, &req_dma_addr);
+ if (!data) {
+ dev_err(hdev->dev,
+ "Failed to allocate DMA memory for CPU-CP packet %u\n", packet_id);
+ return -ENOMEM;
+ }
+
+ memset(data, 0, size);
+
+ pkt.ctl = cpu_to_le32(packet_id << CPUCP_PKT_CTL_OPCODE_SHIFT);
+ pkt.addr = cpu_to_le64(req_dma_addr);
+ pkt.data_max_size = cpu_to_le32(size);
+ pkt.nonce = cpu_to_le32(nonce);
+
+ rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt),
+ timeout, NULL);
+ if (rc) {
+ dev_err(hdev->dev,
+ "Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc);
+ goto out;
+ }
+
+ memcpy(data, req_cpu_addr, size);
+
+out:
+ hl_cpu_accessible_dma_pool_free(hdev, size, req_cpu_addr);
+
+ return rc;
+}
+
+int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
+ u32 nonce)
+{
+ return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_SEC_ATTEST_GET, sec_attest_info,
+ sizeof(struct cpucp_sec_attest_info), nonce,
+ HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC);
+}
diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h
index d59bba9e55c9..58c95b13be69 100644
--- a/drivers/misc/habanalabs/common/habanalabs.h
+++ b/drivers/misc/habanalabs/common/habanalabs.h
@@ -66,6 +66,7 @@ struct hl_fpriv;
#define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */
#define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */
#define HL_CPUCP_MON_DUMP_TIMEOUT_USEC 10000000 /* 10s */
+#define HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC 10000000 /* 10s */
#define HL_FW_STATUS_POLL_INTERVAL_USEC 10000 /* 10ms */
#define HL_FW_COMMS_STATUS_PLDM_POLL_INTERVAL_USEC 1000000 /* 1s */
@@ -94,7 +95,7 @@ struct hl_fpriv;
#define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */
/**
- * enum hl_mmu_page_table_locaion - mmu page table location
+ * enum hl_mmu_page_table_location - mmu page table location
* @MMU_DR_PGT: page-table is located on device DRAM.
* @MMU_HR_PGT: page-table is located on host memory.
* @MMU_NUM_PGT_LOCATIONS: number of page-table locations currently supported.
@@ -143,6 +144,25 @@ enum hl_mmu_enablement {
#define HL_MAX_DCORES 8
+/* DMA alloc/free wrappers */
+#define hl_asic_dma_alloc_coherent(hdev, size, dma_handle, flags) \
+ hl_asic_dma_alloc_coherent_caller(hdev, size, dma_handle, flags, __func__)
+
+#define hl_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle) \
+ hl_cpu_accessible_dma_pool_alloc_caller(hdev, size, dma_handle, __func__)
+
+#define hl_asic_dma_pool_zalloc(hdev, size, mem_flags, dma_handle) \
+ hl_asic_dma_pool_zalloc_caller(hdev, size, mem_flags, dma_handle, __func__)
+
+#define hl_asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle) \
+ hl_asic_dma_free_coherent_caller(hdev, size, cpu_addr, dma_handle, __func__)
+
+#define hl_cpu_accessible_dma_pool_free(hdev, size, vaddr) \
+ hl_cpu_accessible_dma_pool_free_caller(hdev, size, vaddr, __func__)
+
+#define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \
+ hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__)
+
/*
* Reset Flags
*
@@ -208,6 +228,7 @@ enum hl_protection_levels {
* struct iterate_module_ctx - HW module iterator
* @fn: function to apply to each HW module instance
* @data: optional internal data to the function iterator
+ * @rc: return code for optional use of iterator/iterator-caller
*/
struct iterate_module_ctx {
/*
@@ -217,10 +238,12 @@ struct iterate_module_ctx {
* @inst: HW module instance within the block
* @offset: current HW module instance offset from the 1-st HW module instance
* in the 1-st block
- * @data: function specific data
+ * @ctx: the iterator context.
*/
- void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset, void *data);
+ void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset,
+ struct iterate_module_ctx *ctx);
void *data;
+ int rc;
};
struct hl_block_glbl_sec {
@@ -342,7 +365,8 @@ enum hl_cs_type {
CS_TYPE_WAIT,
CS_TYPE_COLLECTIVE_WAIT,
CS_RESERVE_SIGNALS,
- CS_UNRESERVE_SIGNALS
+ CS_UNRESERVE_SIGNALS,
+ CS_TYPE_ENGINE_CORE
};
/*
@@ -544,10 +568,6 @@ struct hl_hints_range {
* @tpc_binning_mask: which TPCs are binned. 0 means usable and 1 means binned.
* @dram_enabled_mask: which DRAMs are enabled.
* @dram_binning_mask: which DRAMs are binned. 0 means usable, 1 means binned.
- * @cb_va_start_addr: virtual start address of command buffers which are mapped
- * to the device's MMU.
- * @cb_va_end_addr: virtual end address of command buffers which are mapped to
- * the device's MMU.
* @dram_hints_align_mask: dram va hint addresses alignment mask which is used
* for hints validity check.
* @cfg_base_address: config space base address.
@@ -614,6 +634,7 @@ struct hl_hints_range {
* which the property supports_user_set_page_size is true
* (i.e. the DRAM supports multiple page sizes), otherwise
* it will shall be equal to dram_page_size.
+ * @num_engine_cores: number of engine cpu cores
* @collective_first_sob: first sync object available for collective use
* @collective_first_mon: first monitor available for collective use
* @sync_stream_first_sob: first sync object available for sync stream use
@@ -658,6 +679,7 @@ struct hl_hints_range {
* @set_max_power_on_device_init: true if need to set max power in F/W on device init.
* @supports_user_set_page_size: true if user can set the allocation page size.
* @dma_mask: the dma mask to be set for this device
+ * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported.
*/
struct asic_fixed_properties {
struct hw_queue_properties *hw_queues_props;
@@ -689,8 +711,6 @@ struct asic_fixed_properties {
u64 tpc_binning_mask;
u64 dram_enabled_mask;
u64 dram_binning_mask;
- u64 cb_va_start_addr;
- u64 cb_va_end_addr;
u64 dram_hints_align_mask;
u64 cfg_base_address;
u64 mmu_cache_mng_addr;
@@ -734,6 +754,7 @@ struct asic_fixed_properties {
u32 faulty_dram_cluster_map;
u32 xbar_edge_enabled_mask;
u32 device_mem_alloc_default_page_size;
+ u32 num_engine_cores;
u16 collective_first_sob;
u16 collective_first_mon;
u16 sync_stream_first_sob;
@@ -766,6 +787,7 @@ struct asic_fixed_properties {
u8 set_max_power_on_device_init;
u8 supports_user_set_page_size;
u8 dma_mask;
+ u8 supports_advanced_cpucp_rc;
};
/**
@@ -797,7 +819,7 @@ struct hl_fence {
* @lock: spinlock to protect fence.
* @hdev: habanalabs device structure.
* @hw_sob: the H/W SOB used in this signal/wait CS.
- * @encaps_sig_hdl: encaps signals hanlder.
+ * @encaps_sig_hdl: encaps signals handler.
* @cs_seq: command submission sequence number.
* @type: type of the CS - signal/wait.
* @sob_val: the SOB value that is used in this signal/wait CS.
@@ -898,14 +920,14 @@ struct hl_mmap_mem_buf {
* @buf: back pointer to the parent mappable memory buffer
* @debugfs_list: node in debugfs list of command buffers.
* @pool_list: node in pool list of command buffers.
- * @va_block_list: list of virtual addresses blocks of the CB if it is mapped to
- * the device's MMU.
* @kernel_address: Holds the CB's kernel virtual address.
+ * @virtual_addr: Holds the CB's virtual address.
* @bus_address: Holds the CB's DMA address.
* @size: holds the CB's size.
+ * @roundup_size: holds the cb size after roundup to page size.
* @cs_cnt: holds number of CS that this CB participates in.
* @is_pool: true if CB was acquired from the pool, false otherwise.
- * @is_internal: internaly allocated
+ * @is_internal: internally allocated
* @is_mmu_mapped: true if the CB is mapped to the device's MMU.
*/
struct hl_cb {
@@ -914,10 +936,11 @@ struct hl_cb {
struct hl_mmap_mem_buf *buf;
struct list_head debugfs_list;
struct list_head pool_list;
- struct list_head va_block_list;
void *kernel_address;
+ u64 virtual_addr;
dma_addr_t bus_address;
u32 size;
+ u32 roundup_size;
atomic_t cs_cnt;
u8 is_pool;
u8 is_internal;
@@ -1113,7 +1136,7 @@ struct timestamp_reg_info {
* @fence: hl fence object for interrupt completion
* @cq_target_value: CQ target value
* @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt
- * handler for taget value comparison
+ * handler for target value comparison
*/
struct hl_user_pending_interrupt {
struct timestamp_reg_info ts_reg_info;
@@ -1372,6 +1395,18 @@ struct fw_load_mgr {
struct hl_cs;
/**
+ * struct engines_data - asic engines data
+ * @buf: buffer for engines data in ascii
+ * @actual_size: actual size of data that was written by the driver to the allocated buffer
+ * @allocated_buf_size: total size of allocated buffer
+ */
+struct engines_data {
+ char *buf;
+ int actual_size;
+ u32 allocated_buf_size;
+};
+
+/**
* struct hl_asic_funcs - ASIC specific functions that are can be called from
* common code.
* @early_init: sets up early driver state (pre sw_init), doesn't configure H/W.
@@ -1434,11 +1469,9 @@ struct hl_cs;
* @send_heartbeat: send is-alive packet to CPU-CP and verify response.
* @debug_coresight: perform certain actions on Coresight for debugging.
* @is_device_idle: return true if device is idle, false otherwise.
- * @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset
+ * @compute_reset_late_init: perform certain actions needed after a compute reset
* @hw_queues_lock: acquire H/W queues lock.
* @hw_queues_unlock: release H/W queues lock.
- * @kdma_lock: acquire H/W queues lock. Relevant from GRECO ASIC
- * @kdma_unlock: release H/W queues lock. Relevant from GRECO ASIC
* @get_pci_id: retrieve PCI ID.
* @get_eeprom_data: retrieve EEPROM data from F/W.
* @get_monitor_dump: retrieve monitor registers dump from F/W.
@@ -1498,6 +1531,8 @@ struct hl_cs;
* @check_if_razwi_happened: check if there was a razwi due to RR violation.
* @access_dev_mem: access device memory
* @set_dram_bar_base: set the base of the DRAM BAR
+ * @set_engine_cores: set a config command to enigne cores
+ * @send_device_activity: indication to FW about device availability
*/
struct hl_asic_funcs {
int (*early_init)(struct hl_device *hdev);
@@ -1570,13 +1605,11 @@ struct hl_asic_funcs {
int (*mmu_prefetch_cache_range)(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size);
int (*send_heartbeat)(struct hl_device *hdev);
int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data);
- bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
- u8 mask_len, struct seq_file *s);
- int (*non_hard_reset_late_init)(struct hl_device *hdev);
+ bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, u8 mask_len,
+ struct engines_data *e);
+ int (*compute_reset_late_init)(struct hl_device *hdev);
void (*hw_queues_lock)(struct hl_device *hdev);
void (*hw_queues_unlock)(struct hl_device *hdev);
- void (*kdma_lock)(struct hl_device *hdev, int dcore_id);
- void (*kdma_unlock)(struct hl_device *hdev, int dcore_id);
u32 (*get_pci_id)(struct hl_device *hdev);
int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size);
int (*get_monitor_dump)(struct hl_device *hdev, void *data);
@@ -1634,6 +1667,9 @@ struct hl_asic_funcs {
int (*access_dev_mem)(struct hl_device *hdev, enum pci_region region_type,
u64 addr, u64 *val, enum debugfs_access_type acc_type);
u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr);
+ int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids,
+ u32 num_cores, u32 core_command);
+ int (*send_device_activity)(struct hl_device *hdev, bool open);
};
@@ -1727,10 +1763,10 @@ struct hl_cs_outcome {
/**
* struct hl_cs_outcome_store - represents a limited store of completed CS outcomes
- * @outcome_map: index of completed CS searcheable by sequence number
+ * @outcome_map: index of completed CS searchable by sequence number
* @used_list: list of outcome objects currently in use
* @free_list: list of outcome objects currently not in use
- * @nodes_pool: a static pool of preallocated outcome objects
+ * @nodes_pool: a static pool of pre-allocated outcome objects
* @db_lock: any operation on the store must take this lock
*/
struct hl_cs_outcome_store {
@@ -1754,12 +1790,10 @@ struct hl_cs_outcome_store {
* @refcount: reference counter for the context. Context is released only when
* this hits 0l. It is incremented on CS and CS_WAIT.
* @cs_pending: array of hl fence objects representing pending CS.
- * @outcome_store: storage data structure used to remember ouitcomes of completed
+ * @outcome_store: storage data structure used to remember outcomes of completed
* command submissions for a long time after CS id wraparound.
* @va_range: holds available virtual addresses for host and dram mappings.
* @mem_hash_lock: protects the mem_hash.
- * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
- * MMU hash or walking the PGT requires talking this lock.
* @hw_block_list_lock: protects the HW block memory list.
* @debugfs_list: node in debugfs list of contexts.
* @hw_block_mem_list: list of HW block virtual mapped addresses.
@@ -1767,6 +1801,7 @@ struct hl_cs_outcome_store {
* @cb_va_pool: device VA pool for command buffers which are mapped to the
* device's MMU.
* @sig_mgr: encaps signals handle manager.
+ * @cb_va_pool_base: the base address for the device VA pool
* @cs_sequence: sequence number for CS. Value is assigned to a CS and passed
* to user so user could inquire about CS. It is used as
* index to cs_pending array.
@@ -1795,13 +1830,13 @@ struct hl_ctx {
struct hl_cs_outcome_store outcome_store;
struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX];
struct mutex mem_hash_lock;
- struct mutex mmu_lock;
struct mutex hw_block_list_lock;
struct list_head debugfs_list;
struct list_head hw_block_mem_list;
struct hl_cs_counters_atomic cs_counters;
struct gen_pool *cb_va_pool;
struct hl_encaps_signals_mgr sig_mgr;
+ u64 cb_va_pool_base;
u64 cs_sequence;
u64 *dram_default_hops;
spinlock_t cs_lock;
@@ -1823,7 +1858,6 @@ struct hl_ctx_mgr {
};
-
/*
* COMMAND SUBMISSIONS
*/
@@ -1889,7 +1923,7 @@ struct hl_userptr {
* @tdr_active: true if TDR was activated for this CS (to prevent
* double TDR activation).
* @aborted: true if CS was aborted due to some device error.
- * @timestamp: true if a timestmap must be captured upon completion.
+ * @timestamp: true if a timestamp must be captured upon completion.
* @staged_last: true if this is the last staged CS and needs completion.
* @staged_first: true if this is the first staged CS and we need to receive
* timeout for this CS.
@@ -2047,14 +2081,16 @@ struct hl_vm_hash_node {
* @node: node to hang on the list in context object.
* @ctx: the context this node belongs to.
* @vaddr: virtual address of the HW block.
- * @size: size of the block.
+ * @block_size: size of the block.
+ * @mapped_size: size of the block which is mapped. May change if partial un-mappings are done.
* @id: HW block id (handle).
*/
struct hl_vm_hw_block_list_node {
struct list_head node;
struct hl_ctx *ctx;
unsigned long vaddr;
- u32 size;
+ u32 block_size;
+ u32 mapped_size;
u32 id;
};
@@ -2214,7 +2250,7 @@ struct hl_info_list {
/**
* struct hl_debugfs_entry - debugfs dentry wrapper.
- * @info_ent: dentry realted ops.
+ * @info_ent: dentry related ops.
* @dev_entry: ASIC specific debugfs manager.
*/
struct hl_debugfs_entry {
@@ -2492,7 +2528,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
break; \
(val) = __elbi_read; \
} else {\
- (val) = RREG32((u32)addr); \
+ (val) = RREG32((u32)(addr)); \
} \
if (cond) \
break; \
@@ -2503,7 +2539,7 @@ void hl_wreg(struct hl_device *hdev, u32 reg, u32 val);
break; \
(val) = __elbi_read; \
} else {\
- (val) = RREG32((u32)addr); \
+ (val) = RREG32((u32)(addr)); \
} \
break; \
} \
@@ -2919,7 +2955,7 @@ struct razwi_info {
* struct undefined_opcode_info - info about last undefined opcode error
* @timestamp: timestamp of the undefined opcode error
* @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ
- * entiers. In case all streams array entries are
+ * entries. In case all streams array entries are
* filled with values, it means the execution was in Lower-CP.
* @cq_addr: the address of the current handled command buffer
* @cq_size: the size of the current handled command buffer
@@ -2946,12 +2982,12 @@ struct undefined_opcode_info {
};
/**
- * struct last_error_session_info - info about last session errors occurred.
- * @cs_timeout: CS timeout error last information.
- * @razwi: razwi last information.
+ * struct hl_error_info - holds information collected during an error.
+ * @cs_timeout: CS timeout error information.
+ * @razwi: razwi information.
* @undef_opcode: undefined opcode information
*/
-struct last_error_session_info {
+struct hl_error_info {
struct cs_timeout_info cs_timeout;
struct razwi_info razwi;
struct undefined_opcode_info undef_opcode;
@@ -2960,7 +2996,7 @@ struct last_error_session_info {
/**
* struct hl_reset_info - holds current device reset information.
* @lock: lock to protect critical reset flows.
- * @compute_reset_cnt: number of compte resets since the driver was loaded.
+ * @compute_reset_cnt: number of compute resets since the driver was loaded.
* @hard_reset_cnt: number of hard resets since the driver was loaded.
* @hard_reset_schedule_flags: hard reset is scheduled to after current compute reset,
* here we hold the hard reset flags.
@@ -2971,7 +3007,7 @@ struct last_error_session_info {
* @hard_reset_pending: is there a hard reset work pending.
* @curr_reset_cause: saves an enumerated reset cause when a hard reset is
* triggered, and cleared after it is shared with preboot.
- * @prev_reset_trigger: saves the previous trigger which caused a reset, overidden
+ * @prev_reset_trigger: saves the previous trigger which caused a reset, overridden
* with a new value on next reset
* @reset_trigger_repeated: set if device reset is triggered more than once with
* same cause.
@@ -3041,6 +3077,12 @@ struct hl_reset_info {
* @asid_mutex: protects asid_bitmap.
* @send_cpu_message_lock: enforces only one message in Host <-> CPU-CP queue.
* @debug_lock: protects critical section of setting debug mode for device
+ * @mmu_lock: protects the MMU page tables and invalidation h/w. Although the
+ * page tables are per context, the invalidation h/w is per MMU.
+ * Therefore, we can't allow multiple contexts (we only have two,
+ * user and kernel) to access the invalidation h/w at the same time.
+ * In addition, any change to the PGT, modifying the MMU hash or
+ * walking the PGT requires talking this lock.
* @asic_prop: ASIC specific immutable properties.
* @asic_funcs: ASIC specific functions.
* @asic_specific: ASIC specific information to use only from ASIC files.
@@ -3049,7 +3091,7 @@ struct hl_reset_info {
* @hl_chip_info: ASIC's sensors information.
* @device_status_description: device status description.
* @hl_debugfs: device's debugfs manager.
- * @cb_pool: list of preallocated CBs.
+ * @cb_pool: list of pre allocated CBs.
* @cb_pool_lock: protects the CB pool.
* @internal_cb_pool_virt_addr: internal command buffer pool virtual address.
* @internal_cb_pool_dma_addr: internal command buffer pool dma address.
@@ -3070,7 +3112,7 @@ struct hl_reset_info {
* @state_dump_specs: constants and dictionaries needed to dump system state.
* @multi_cs_completion: array of multi-CS completion.
* @clk_throttling: holds information about current/previous clock throttling events
- * @last_error: holds information about last session in which CS timeout or razwi error occurred.
+ * @captured_err_info: holds information about errors.
* @reset_info: holds current device reset information.
* @stream_master_qid_arr: pointer to array with QIDs of master streams.
* @fw_major_version: major version of current loaded preboot.
@@ -3111,7 +3153,8 @@ struct hl_reset_info {
* @edma_binning: contains mask of edma engines that is received from the f/w which
* indicates which edma engines are binned-out
* @id: device minor.
- * @id_control: minor of the control device
+ * @id_control: minor of the control device.
+ * @cdev_idx: char device index. Used for setting its name.
* @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit
* addresses.
* @is_in_dram_scrub: true if dram scrub operation is on going.
@@ -3165,6 +3208,7 @@ struct hl_reset_info {
* Used only for testing.
* @heartbeat: Controls if we want to enable the heartbeat mechanism vs. the f/w, which verifies
* that the f/w is always alive. Used only for testing.
+ * @supports_ctx_switch: true if a ctx switch is required upon first submission.
*/
struct hl_device {
struct pci_dev *pdev;
@@ -3204,6 +3248,7 @@ struct hl_device {
struct mutex asid_mutex;
struct mutex send_cpu_message_lock;
struct mutex debug_lock;
+ struct mutex mmu_lock;
struct asic_fixed_properties asic_prop;
const struct hl_asic_funcs *asic_funcs;
void *asic_specific;
@@ -3242,7 +3287,7 @@ struct hl_device {
struct multi_cs_completion multi_cs_completion[
MULTI_CS_MAX_USER_CTX];
struct hl_clk_throttle clk_throttling;
- struct last_error_session_info last_error;
+ struct hl_error_info captured_err_info;
struct hl_reset_info reset_info;
@@ -3271,6 +3316,7 @@ struct hl_device {
u32 edma_binning;
u16 id;
u16 id_control;
+ u16 cdev_idx;
u16 cpu_pci_msb_addr;
u8 is_in_dram_scrub;
u8 disabled;
@@ -3300,6 +3346,7 @@ struct hl_device {
u8 compute_ctx_in_release;
u8 supports_mmu_prefetch;
u8 reset_upon_device_release;
+ u8 supports_ctx_switch;
/* Parameters for bring-up */
u64 nic_ports_mask;
@@ -3426,15 +3473,18 @@ static inline bool hl_mem_area_crosses_range(u64 address, u32 size,
}
uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr);
-void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
- gfp_t flag);
-void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr,
- dma_addr_t dma_handle);
-void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle);
-void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr);
-void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags,
- dma_addr_t *dma_handle);
-void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr);
+void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle,
+ gfp_t flag, const char *caller);
+void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr,
+ dma_addr_t dma_handle, const char *caller);
+void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size,
+ dma_addr_t *dma_handle, const char *caller);
+void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr,
+ const char *caller);
+void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags,
+ dma_addr_t *dma_handle, const char *caller);
+void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr,
+ const char *caller);
int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir);
void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt,
enum dma_data_direction dir);
@@ -3513,6 +3563,7 @@ void hl_sysfs_fini(struct hl_device *hdev);
int hl_hwmon_init(struct hl_device *hdev);
void hl_hwmon_fini(struct hl_device *hdev);
+void hl_hwmon_release_resources(struct hl_device *hdev);
int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg,
struct hl_ctx *ctx, u32 cb_size, bool internal_cb,
@@ -3557,7 +3608,7 @@ void hl_hw_block_mem_init(struct hl_ctx *ctx);
void hl_hw_block_mem_fini(struct hl_ctx *ctx);
u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
- enum hl_va_range_type type, u32 size, u32 alignment);
+ enum hl_va_range_type type, u64 size, u32 alignment);
int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
u64 start_addr, u64 size);
int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
@@ -3674,6 +3725,7 @@ int hl_fw_dram_replaced_row_get(struct hl_device *hdev,
struct cpucp_hbm_row_info *info);
int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num);
int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid);
+int hl_fw_send_device_activity(struct hl_device *hdev, bool open);
int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3],
bool is_wc[3]);
int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data);
@@ -3697,6 +3749,8 @@ int hl_get_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long *va
void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value);
long hl_fw_get_max_power(struct hl_device *hdev);
void hl_fw_set_max_power(struct hl_device *hdev);
+int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info,
+ u32 nonce);
int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value);
int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value);
int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value);
@@ -3743,6 +3797,7 @@ struct hl_mmap_mem_buf *
hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg,
struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp,
void *args);
+__printf(2, 3) void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...);
#ifdef CONFIG_DEBUG_FS
diff --git a/drivers/misc/habanalabs/common/habanalabs_drv.c b/drivers/misc/habanalabs/common/habanalabs_drv.c
index f733ead605e7..112632afe7d5 100644
--- a/drivers/misc/habanalabs/common/habanalabs_drv.c
+++ b/drivers/misc/habanalabs/common/habanalabs_drv.c
@@ -14,6 +14,9 @@
#include <linux/aer.h>
#include <linux/module.h>
+#define CREATE_TRACE_POINTS
+#include <trace/events/habanalabs.h>
+
#define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team"
#define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators"
@@ -27,7 +30,10 @@ static struct class *hl_class;
static DEFINE_IDR(hl_devs_idr);
static DEFINE_MUTEX(hl_devs_idr_lock);
-static int timeout_locked = 30;
+#define HL_DEFAULT_TIMEOUT_LOCKED 30 /* 30 seconds */
+#define GAUDI_DEFAULT_TIMEOUT_LOCKED 600 /* 10 minutes */
+
+static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED;
static int reset_on_lockup = 1;
static int memory_scrub;
static ulong boot_error_status_mask = ULONG_MAX;
@@ -55,14 +61,12 @@ MODULE_PARM_DESC(boot_error_status_mask,
#define PCI_IDS_GAUDI_SEC 0x1010
#define PCI_IDS_GAUDI2 0x1020
-#define PCI_IDS_GAUDI2_SEC 0x1030
static const struct pci_device_id ids[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), },
{ PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), },
- { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2_SEC), },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, ids);
@@ -92,9 +96,6 @@ static enum hl_asic_type get_asic_type(u16 device)
case PCI_IDS_GAUDI2:
asic_type = ASIC_GAUDI2;
break;
- case PCI_IDS_GAUDI2_SEC:
- asic_type = ASIC_GAUDI2_SEC;
- break;
default:
asic_type = ASIC_INVALID;
break;
@@ -107,7 +108,6 @@ static bool is_asic_secured(enum hl_asic_type asic_type)
{
switch (asic_type) {
case ASIC_GAUDI_SEC:
- case ASIC_GAUDI2_SEC:
return true;
default:
return false;
@@ -161,7 +161,7 @@ int hl_device_open(struct inode *inode, struct file *filp)
mutex_lock(&hdev->fpriv_list_lock);
if (!hl_device_operational(hdev, &status)) {
- dev_err_ratelimited(hdev->dev,
+ dev_dbg_ratelimited(hdev->dev,
"Can't open %s because it is %s\n",
dev_name(hdev->dev), hdev->status[status]);
@@ -207,11 +207,13 @@ int hl_device_open(struct inode *inode, struct file *filp)
list_add(&hpriv->dev_node, &hdev->fpriv_list);
mutex_unlock(&hdev->fpriv_list_lock);
+ hdev->asic_funcs->send_device_activity(hdev, true);
+
hl_debugfs_add_file(hpriv);
- atomic_set(&hdev->last_error.cs_timeout.write_enable, 1);
- atomic_set(&hdev->last_error.razwi.write_enable, 1);
- hdev->last_error.undef_opcode.write_enable = true;
+ atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1);
+ atomic_set(&hdev->captured_err_info.razwi.write_enable, 1);
+ hdev->captured_err_info.undef_opcode.write_enable = true;
hdev->open_counter++;
hdev->last_successful_open_jif = jiffies;
@@ -269,7 +271,7 @@ int hl_device_open_ctrl(struct inode *inode, struct file *filp)
mutex_lock(&hdev->fpriv_ctrl_list_lock);
if (!hl_device_operational(hdev, NULL)) {
- dev_err_ratelimited(hdev->dev_ctrl,
+ dev_dbg_ratelimited(hdev->dev_ctrl,
"Can't open %s because it is disabled or in reset\n",
dev_name(hdev->dev_ctrl));
rc = -EPERM;
@@ -314,12 +316,22 @@ static void copy_kernel_module_params_to_device(struct hl_device *hdev)
hdev->boot_error_status_mask = boot_error_status_mask;
}
-static void fixup_device_params_per_asic(struct hl_device *hdev)
+static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout)
{
switch (hdev->asic_type) {
- case ASIC_GOYA:
case ASIC_GAUDI:
case ASIC_GAUDI_SEC:
+ /* If user didn't request a different timeout than the default one, we have
+ * a different default timeout for Gaudi
+ */
+ if (timeout == HL_DEFAULT_TIMEOUT_LOCKED)
+ hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED *
+ MSEC_PER_SEC);
+
+ hdev->reset_upon_device_release = 0;
+ break;
+
+ case ASIC_GOYA:
hdev->reset_upon_device_release = 0;
break;
@@ -339,7 +351,7 @@ static int fixup_device_params(struct hl_device *hdev)
hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC;
if (tmp_timeout)
- hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * 1000);
+ hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC);
else
hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT;
@@ -360,7 +372,7 @@ static int fixup_device_params(struct hl_device *hdev)
if (!hdev->cpu_queues_enable)
hdev->heartbeat = 0;
- fixup_device_params_per_asic(hdev);
+ fixup_device_params_per_asic(hdev, tmp_timeout);
return 0;
}
diff --git a/drivers/misc/habanalabs/common/habanalabs_ioctl.c b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
index 6a30bd98ab5e..43afe40966e5 100644
--- a/drivers/misc/habanalabs/common/habanalabs_ioctl.c
+++ b/drivers/misc/habanalabs/common/habanalabs_ioctl.c
@@ -14,6 +14,7 @@
#include <linux/fs.h>
#include <linux/uaccess.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = {
[HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr),
@@ -103,6 +104,7 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
hw_ip.edma_enabled_mask = prop->edma_enabled_mask;
hw_ip.server_type = prop->server_type;
+ hw_ip.security_enabled = prop->fw_security_enabled;
return copy_to_user(out, &hw_ip,
min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0;
@@ -591,8 +593,8 @@ static int cs_timeout_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
- info.seq = hdev->last_error.cs_timeout.seq;
- info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp);
+ info.seq = hdev->captured_err_info.cs_timeout.seq;
+ info.timestamp = ktime_to_ns(hdev->captured_err_info.cs_timeout.timestamp);
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}
@@ -607,12 +609,12 @@ static int razwi_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
if ((!max_size) || (!out))
return -EINVAL;
- info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp);
- info.addr = hdev->last_error.razwi.addr;
- info.engine_id_1 = hdev->last_error.razwi.engine_id_1;
- info.engine_id_2 = hdev->last_error.razwi.engine_id_2;
- info.no_engine_id = hdev->last_error.razwi.non_engine_initiator;
- info.error_type = hdev->last_error.razwi.type;
+ info.timestamp = ktime_to_ns(hdev->captured_err_info.razwi.timestamp);
+ info.addr = hdev->captured_err_info.razwi.addr;
+ info.engine_id_1 = hdev->captured_err_info.razwi.engine_id_1;
+ info.engine_id_2 = hdev->captured_err_info.razwi.engine_id_2;
+ info.no_engine_id = hdev->captured_err_info.razwi.non_engine_initiator;
+ info.error_type = hdev->captured_err_info.razwi.type;
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}
@@ -627,13 +629,13 @@ static int undefined_opcode_info(struct hl_fpriv *hpriv, struct hl_info_args *ar
if ((!max_size) || (!out))
return -EINVAL;
- info.timestamp = ktime_to_ns(hdev->last_error.undef_opcode.timestamp);
- info.engine_id = hdev->last_error.undef_opcode.engine_id;
- info.cq_addr = hdev->last_error.undef_opcode.cq_addr;
- info.cq_size = hdev->last_error.undef_opcode.cq_size;
- info.stream_id = hdev->last_error.undef_opcode.stream_id;
- info.cb_addr_streams_len = hdev->last_error.undef_opcode.cb_addr_streams_len;
- memcpy(info.cb_addr_streams, hdev->last_error.undef_opcode.cb_addr_streams,
+ info.timestamp = ktime_to_ns(hdev->captured_err_info.undef_opcode.timestamp);
+ info.engine_id = hdev->captured_err_info.undef_opcode.engine_id;
+ info.cq_addr = hdev->captured_err_info.undef_opcode.cq_addr;
+ info.cq_size = hdev->captured_err_info.undef_opcode.cq_size;
+ info.stream_id = hdev->captured_err_info.undef_opcode.stream_id;
+ info.cb_addr_streams_len = hdev->captured_err_info.undef_opcode.cb_addr_streams_len;
+ memcpy(info.cb_addr_streams, hdev->captured_err_info.undef_opcode.cb_addr_streams,
sizeof(info.cb_addr_streams));
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
@@ -660,6 +662,55 @@ static int dev_mem_alloc_page_sizes_info(struct hl_fpriv *hpriv, struct hl_info_
return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0;
}
+static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ struct cpucp_sec_attest_info *sec_attest_info;
+ struct hl_info_sec_attest *info;
+ u32 max_size = args->return_size;
+ int rc;
+
+ if ((!max_size) || (!out))
+ return -EINVAL;
+
+ sec_attest_info = kmalloc(sizeof(*sec_attest_info), GFP_KERNEL);
+ if (!sec_attest_info)
+ return -ENOMEM;
+
+ info = kmalloc(sizeof(*info), GFP_KERNEL);
+ if (!info) {
+ rc = -ENOMEM;
+ goto free_sec_attest_info;
+ }
+
+ rc = hl_fw_get_sec_attest_info(hpriv->hdev, sec_attest_info, args->sec_attest_nonce);
+ if (rc)
+ goto free_info;
+
+ info->nonce = le32_to_cpu(sec_attest_info->nonce);
+ info->pcr_quote_len = le16_to_cpu(sec_attest_info->pcr_quote_len);
+ info->pub_data_len = le16_to_cpu(sec_attest_info->pub_data_len);
+ info->certificate_len = le16_to_cpu(sec_attest_info->certificate_len);
+ info->pcr_num_reg = sec_attest_info->pcr_num_reg;
+ info->pcr_reg_len = sec_attest_info->pcr_reg_len;
+ info->quote_sig_len = sec_attest_info->quote_sig_len;
+ memcpy(&info->pcr_data, &sec_attest_info->pcr_data, sizeof(info->pcr_data));
+ memcpy(&info->pcr_quote, &sec_attest_info->pcr_quote, sizeof(info->pcr_quote));
+ memcpy(&info->public_data, &sec_attest_info->public_data, sizeof(info->public_data));
+ memcpy(&info->certificate, &sec_attest_info->certificate, sizeof(info->certificate));
+ memcpy(&info->quote_sig, &sec_attest_info->quote_sig, sizeof(info->quote_sig));
+
+ rc = copy_to_user(out, info,
+ min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0;
+
+free_info:
+ kfree(info);
+free_sec_attest_info:
+ kfree(sec_attest_info);
+
+ return rc;
+}
+
static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args)
{
int rc;
@@ -697,6 +748,42 @@ static int eventfd_unregister(struct hl_fpriv *hpriv, struct hl_info_args *args)
return 0;
}
+static int engine_status_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
+{
+ void __user *out = (void __user *) (uintptr_t) args->return_pointer;
+ u32 status_buf_size = args->return_size;
+ struct hl_device *hdev = hpriv->hdev;
+ struct engines_data eng_data;
+ int rc;
+
+ if ((status_buf_size < SZ_1K) || (status_buf_size > HL_ENGINES_DATA_MAX_SIZE) || (!out))
+ return -EINVAL;
+
+ eng_data.actual_size = 0;
+ eng_data.allocated_buf_size = status_buf_size;
+ eng_data.buf = vmalloc(status_buf_size);
+ if (!eng_data.buf)
+ return -ENOMEM;
+
+ hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data);
+
+ if (eng_data.actual_size > eng_data.allocated_buf_size) {
+ dev_err(hdev->dev,
+ "Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n",
+ eng_data.actual_size, status_buf_size);
+ vfree(eng_data.buf);
+ return -ENOMEM;
+ }
+
+ args->user_buffer_actual_size = eng_data.actual_size;
+ rc = copy_to_user(out, eng_data.buf, min_t(size_t, status_buf_size, eng_data.actual_size)) ?
+ -EFAULT : 0;
+
+ vfree(eng_data.buf);
+
+ return rc;
+}
+
static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
struct device *dev)
{
@@ -806,12 +893,18 @@ static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data,
case HL_INFO_DRAM_PENDING_ROWS:
return dram_pending_rows_info(hpriv, args);
+ case HL_INFO_SECURED_ATTESTATION:
+ return sec_attest_info(hpriv, args);
+
case HL_INFO_REGISTER_EVENTFD:
return eventfd_register(hpriv, args);
case HL_INFO_UNREGISTER_EVENTFD:
return eventfd_unregister(hpriv, args);
+ case HL_INFO_ENGINE_STATUS:
+ return engine_status_info(hpriv, args);
+
default:
dev_err(dev, "Invalid request %d\n", args->op);
rc = -EINVAL;
diff --git a/drivers/misc/habanalabs/common/hw_queue.c b/drivers/misc/habanalabs/common/hw_queue.c
index 3f15ab9d827f..d0087c0ec48c 100644
--- a/drivers/misc/habanalabs/common/hw_queue.c
+++ b/drivers/misc/habanalabs/common/hw_queue.c
@@ -826,9 +826,7 @@ static int ext_and_cpu_queue_init(struct hl_device *hdev, struct hl_hw_queue *q,
q->kernel_address = p;
- q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH,
- sizeof(*q->shadow_queue),
- GFP_KERNEL);
+ q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL);
if (!q->shadow_queue) {
dev_err(hdev->dev,
"Failed to allocate shadow queue for H/W queue %d\n",
diff --git a/drivers/misc/habanalabs/common/hwmon.c b/drivers/misc/habanalabs/common/hwmon.c
index 57f5d2c48330..55eb0203817f 100644
--- a/drivers/misc/habanalabs/common/hwmon.c
+++ b/drivers/misc/habanalabs/common/hwmon.c
@@ -194,7 +194,8 @@ int hl_build_hwmon_channel_info(struct hl_device *hdev, struct cpucp_sensor *sen
curr_arr[sensors_by_type_next_index[type]++] = flags;
}
- channels_info = kcalloc(num_active_sensor_types + 1, sizeof(*channels_info), GFP_KERNEL);
+ channels_info = kcalloc(num_active_sensor_types + 1, sizeof(struct hwmon_channel_info *),
+ GFP_KERNEL);
if (!channels_info) {
rc = -ENOMEM;
goto channels_info_array_err;
@@ -910,3 +911,24 @@ void hl_hwmon_fini(struct hl_device *hdev)
hwmon_device_unregister(hdev->hwmon_dev);
}
+
+void hl_hwmon_release_resources(struct hl_device *hdev)
+{
+ const struct hwmon_channel_info **channel_info_arr;
+ int i = 0;
+
+ if (!hdev->hl_chip_info->info)
+ return;
+
+ channel_info_arr = hdev->hl_chip_info->info;
+
+ while (channel_info_arr[i]) {
+ kfree(channel_info_arr[i]->config);
+ kfree(channel_info_arr[i]);
+ i++;
+ }
+
+ kfree(channel_info_arr);
+
+ hdev->hl_chip_info->info = NULL;
+}
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index 61bc1bfe984a..ef28f3b37b93 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -457,7 +457,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
prev = list_prev_entry(va_block, node);
if (&prev->node != va_list && prev->end + 1 == va_block->start) {
prev->end = va_block->end;
- prev->size = prev->end - prev->start;
+ prev->size = prev->end - prev->start + 1;
list_del(&va_block->node);
kfree(va_block);
va_block = prev;
@@ -466,7 +466,7 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
next = list_next_entry(va_block, node);
if (&next->node != va_list && va_block->end + 1 == next->start) {
next->start = va_block->start;
- next->size = next->end - next->start;
+ next->size = next->end - next->start + 1;
list_del(&va_block->node);
kfree(va_block);
}
@@ -755,7 +755,7 @@ out:
* - Return the start address of the virtual block.
*/
u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
- enum hl_va_range_type type, u32 size, u32 alignment)
+ enum hl_va_range_type type, u64 size, u32 alignment)
{
return get_va_block(hdev, ctx->va_range[type], size, 0,
max(alignment, ctx->va_range[type]->page_size),
@@ -1210,18 +1210,18 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device
goto va_block_err;
}
- mutex_lock(&ctx->mmu_lock);
+ mutex_lock(&hdev->mmu_lock);
rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack);
if (rc) {
dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle);
- mutex_unlock(&ctx->mmu_lock);
+ mutex_unlock(&hdev->mmu_lock);
goto map_err;
}
rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV,
ctx->asid, ret_vaddr, phys_pg_pack->total_size);
- mutex_unlock(&ctx->mmu_lock);
+ mutex_unlock(&hdev->mmu_lock);
if (rc)
goto map_err;
@@ -1362,7 +1362,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
else
vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
- mutex_lock(&ctx->mmu_lock);
+ mutex_lock(&hdev->mmu_lock);
unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack);
@@ -1375,7 +1375,7 @@ static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr,
phys_pg_pack->total_size);
- mutex_unlock(&ctx->mmu_lock);
+ mutex_unlock(&hdev->mmu_lock);
/*
* If the context is closing we don't need to check for the MMU cache
@@ -1418,18 +1418,23 @@ vm_type_err:
return rc;
}
-static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
- u32 *size)
+static int map_block(struct hl_device *hdev, u64 address, u64 *handle, u32 *size)
{
- u32 block_id = 0;
+ u32 block_id;
int rc;
+ *handle = 0;
+ if (size)
+ *size = 0;
+
rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
+ if (rc)
+ return rc;
*handle = block_id | HL_MMAP_TYPE_BLOCK;
*handle <<= PAGE_SHIFT;
- return rc;
+ return 0;
}
static void hw_block_vm_close(struct vm_area_struct *vma)
@@ -1437,6 +1442,13 @@ static void hw_block_vm_close(struct vm_area_struct *vma)
struct hl_vm_hw_block_list_node *lnode =
(struct hl_vm_hw_block_list_node *) vma->vm_private_data;
struct hl_ctx *ctx = lnode->ctx;
+ long new_mmap_size;
+
+ new_mmap_size = lnode->mapped_size - (vma->vm_end - vma->vm_start);
+ if (new_mmap_size > 0) {
+ lnode->mapped_size = new_mmap_size;
+ return;
+ }
mutex_lock(&ctx->hw_block_list_lock);
list_del(&lnode->node);
@@ -1487,23 +1499,23 @@ int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
if (!lnode)
return -ENOMEM;
- vma->vm_ops = &hw_block_vm_ops;
- vma->vm_private_data = lnode;
-
- hl_ctx_get(ctx);
-
rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
if (rc) {
- hl_ctx_put(ctx);
kfree(lnode);
return rc;
}
+ hl_ctx_get(ctx);
+
lnode->ctx = ctx;
lnode->vaddr = vma->vm_start;
- lnode->size = block_size;
+ lnode->block_size = block_size;
+ lnode->mapped_size = lnode->block_size;
lnode->id = block_id;
+ vma->vm_private_data = lnode;
+ vma->vm_ops = &hw_block_vm_ops;
+
mutex_lock(&ctx->hw_block_list_lock);
list_add_tail(&lnode->node, &ctx->hw_block_mem_list);
mutex_unlock(&ctx->hw_block_list_lock);
@@ -2296,8 +2308,7 @@ static int get_user_memory(struct hl_device *hdev, u64 addr, u64 size,
return -EFAULT;
}
- userptr->pages = kvmalloc_array(npages, sizeof(*userptr->pages),
- GFP_KERNEL);
+ userptr->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL);
if (!userptr->pages)
return -ENOMEM;
@@ -2759,13 +2770,13 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
unmap_device_va(ctx, &args, true);
}
- mutex_lock(&ctx->mmu_lock);
+ mutex_lock(&hdev->mmu_lock);
/* invalidate the cache once after the unmapping loop */
hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR);
hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK);
- mutex_unlock(&ctx->mmu_lock);
+ mutex_unlock(&hdev->mmu_lock);
INIT_LIST_HEAD(&free_list);
diff --git a/drivers/misc/habanalabs/common/memory_mgr.c b/drivers/misc/habanalabs/common/memory_mgr.c
index 56df962d2f3c..1936d653699e 100644
--- a/drivers/misc/habanalabs/common/memory_mgr.c
+++ b/drivers/misc/habanalabs/common/memory_mgr.c
@@ -11,7 +11,7 @@
* hl_mmap_mem_buf_get - increase the buffer refcount and return a pointer to
* the buffer descriptor.
*
- * @mmg: parent unifed memory manager
+ * @mmg: parent unified memory manager
* @handle: requested buffer handle
*
* Find the buffer in the store and return a pointer to its descriptor.
@@ -104,7 +104,7 @@ int hl_mmap_mem_buf_put(struct hl_mmap_mem_buf *buf)
* hl_mmap_mem_buf_put_handle - decrease the reference to the buffer with the
* given handle.
*
- * @mmg: parent unifed memory manager
+ * @mmg: parent unified memory manager
* @handle: requested buffer handle
*
* Decrease the reference to the buffer, and release it if it was the last one.
@@ -137,7 +137,7 @@ int hl_mmap_mem_buf_put_handle(struct hl_mem_mgr *mmg, u64 handle)
/**
* hl_mmap_mem_buf_alloc - allocate a new mappable buffer
*
- * @mmg: parent unifed memory manager
+ * @mmg: parent unified memory manager
* @behavior: behavior object describing this buffer polymorphic behavior
* @gfp: gfp flags to use for the memory allocations
* @args: additional args passed to behavior->alloc
@@ -222,7 +222,7 @@ static const struct vm_operations_struct hl_mmap_mem_buf_vm_ops = {
/**
* hl_mem_mgr_mmap - map the given buffer to the user
*
- * @mmg: unifed memory manager
+ * @mmg: unified memory manager
* @vma: the vma object for which mmap was closed.
* @args: additional args passed to behavior->mmap
*
@@ -322,7 +322,7 @@ void hl_mem_mgr_init(struct device *dev, struct hl_mem_mgr *mmg)
/**
* hl_mem_mgr_fini - release unified memory manager
*
- * @mmg: parent unifed memory manager
+ * @mmg: parent unified memory manager
*
* Release the unified memory manager. Shall be called from an interrupt context.
*/
diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c
index 60740de47b34..cf8946266615 100644
--- a/drivers/misc/habanalabs/common/mmu/mmu.c
+++ b/drivers/misc/habanalabs/common/mmu/mmu.c
@@ -9,6 +9,8 @@
#include "../habanalabs.h"
+#include <trace/events/habanalabs.h>
+
/**
* hl_mmu_get_funcs() - get MMU functions structure
* @hdev: habanalabs device structure.
@@ -45,6 +47,8 @@ int hl_mmu_init(struct hl_device *hdev)
if (!hdev->mmu_enable)
return 0;
+ mutex_init(&hdev->mmu_lock);
+
if (hdev->mmu_func[MMU_DR_PGT].init != NULL) {
rc = hdev->mmu_func[MMU_DR_PGT].init(hdev);
if (rc)
@@ -86,6 +90,8 @@ void hl_mmu_fini(struct hl_device *hdev)
if (hdev->mmu_func[MMU_HR_PGT].fini != NULL)
hdev->mmu_func[MMU_HR_PGT].fini(hdev);
+
+ mutex_destroy(&hdev->mmu_lock);
}
/**
@@ -104,8 +110,6 @@ int hl_mmu_ctx_init(struct hl_ctx *ctx)
if (!hdev->mmu_enable)
return 0;
- mutex_init(&ctx->mmu_lock);
-
if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) {
rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx);
if (rc)
@@ -149,8 +153,6 @@ void hl_mmu_ctx_fini(struct hl_ctx *ctx)
if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL)
hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx);
-
- mutex_destroy(&ctx->mmu_lock);
}
/*
@@ -259,6 +261,9 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size, bool flu
if (flush_pte)
mmu_funcs->flush(ctx);
+ if (trace_habanalabs_mmu_unmap_enabled() && !rc)
+ trace_habanalabs_mmu_unmap(hdev->dev, virt_addr, 0, page_size, flush_pte);
+
return rc;
}
@@ -344,6 +349,8 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_s
if (flush_pte)
mmu_funcs->flush(ctx);
+ trace_habanalabs_mmu_map(hdev->dev, virt_addr, phys_addr, page_size, flush_pte);
+
return 0;
err:
@@ -403,6 +410,8 @@ int hl_mmu_map_contiguous(struct hl_ctx *ctx, u64 virt_addr,
dev_err(hdev->dev,
"Map failed for va 0x%llx to pa 0x%llx\n",
curr_va, curr_pa);
+ /* last mapping failed so don't try to unmap it - reduce off by page_size */
+ off -= page_size;
goto unmap;
}
}
@@ -600,9 +609,9 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr);
- mutex_lock(&ctx->mmu_lock);
+ mutex_lock(&hdev->mmu_lock);
rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops);
- mutex_unlock(&ctx->mmu_lock);
+ mutex_unlock(&hdev->mmu_lock);
if (rc)
return rc;
@@ -692,16 +701,16 @@ static void hl_mmu_prefetch_work_function(struct work_struct *work)
{
struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work);
struct hl_ctx *ctx = pfw->ctx;
+ struct hl_device *hdev = ctx->hdev;
- if (!hl_device_operational(ctx->hdev, NULL))
+ if (!hl_device_operational(hdev, NULL))
goto put_ctx;
- mutex_lock(&ctx->mmu_lock);
+ mutex_lock(&hdev->mmu_lock);
- ctx->hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid,
- pfw->va, pfw->size);
+ hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, pfw->va, pfw->size);
- mutex_unlock(&ctx->mmu_lock);
+ mutex_unlock(&hdev->mmu_lock);
put_ctx:
/*
diff --git a/drivers/misc/habanalabs/common/sysfs.c b/drivers/misc/habanalabs/common/sysfs.c
index 6c5271f01160..36e9814139d1 100644
--- a/drivers/misc/habanalabs/common/sysfs.c
+++ b/drivers/misc/habanalabs/common/sysfs.c
@@ -375,6 +375,14 @@ out:
return max_size;
}
+static ssize_t security_enabled_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct hl_device *hdev = dev_get_drvdata(dev);
+
+ return sprintf(buf, "%d\n", hdev->asic_prop.fw_security_enabled);
+}
+
static DEVICE_ATTR_RO(armcp_kernel_ver);
static DEVICE_ATTR_RO(armcp_ver);
static DEVICE_ATTR_RO(cpld_ver);
@@ -393,6 +401,7 @@ static DEVICE_ATTR_RO(status);
static DEVICE_ATTR_RO(thermal_ver);
static DEVICE_ATTR_RO(uboot_ver);
static DEVICE_ATTR_RO(fw_os_ver);
+static DEVICE_ATTR_RO(security_enabled);
static struct bin_attribute bin_attr_eeprom = {
.attr = {.name = "eeprom", .mode = (0444)},
@@ -417,6 +426,7 @@ static struct attribute *hl_dev_attrs[] = {
&dev_attr_thermal_ver.attr,
&dev_attr_uboot_ver.attr,
&dev_attr_fw_os_ver.attr,
+ &dev_attr_security_enabled.attr,
NULL,
};