summaryrefslogtreecommitdiff
path: root/drivers/misc/habanalabs/common/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc/habanalabs/common/memory.c')
-rw-r--r--drivers/misc/habanalabs/common/memory.c360
1 files changed, 329 insertions, 31 deletions
diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c
index c1eefaebacb6..e008d82e4ba3 100644
--- a/drivers/misc/habanalabs/common/memory.c
+++ b/drivers/misc/habanalabs/common/memory.c
@@ -11,6 +11,7 @@
#include <linux/uaccess.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <linux/pci-p2pdma.h>
MODULE_IMPORT_NS(DMA_BUF);
@@ -20,6 +21,34 @@ MODULE_IMPORT_NS(DMA_BUF);
/* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
#define DRAM_POOL_PAGE_SIZE SZ_8M
+static int allocate_timestamps_buffers(struct hl_fpriv *hpriv,
+ struct hl_mem_in *args, u64 *handle);
+
+static int set_alloc_page_size(struct hl_device *hdev, struct hl_mem_in *args, u32 *page_size)
+{
+ struct asic_fixed_properties *prop = &hdev->asic_prop;
+ u32 psize;
+
+ /*
+ * for ASIC that supports setting the allocation page size by user we will address
+ * user's choice only if it is not 0 (as 0 means taking the default page size)
+ */
+ if (prop->supports_user_set_page_size && args->alloc.page_size) {
+ psize = args->alloc.page_size;
+
+ if (!hdev->asic_funcs->is_valid_dram_page_size(psize)) {
+ dev_err(hdev->dev, "user page size (%#x) is not valid\n", psize);
+ return -EINVAL;
+ }
+ } else {
+ psize = hdev->asic_prop.dram_page_size;
+ }
+
+ *page_size = psize;
+
+ return 0;
+}
+
/*
* The va ranges in context object contain a list with the available chunks of
* device virtual memory.
@@ -61,11 +90,15 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
struct hl_vm_phys_pg_pack *phys_pg_pack;
u64 paddr = 0, total_size, num_pgs, i;
u32 num_curr_pgs, page_size;
- int handle, rc;
bool contiguous;
+ int handle, rc;
num_curr_pgs = 0;
- page_size = hdev->asic_prop.dram_page_size;
+
+ rc = set_alloc_page_size(hdev, args, &page_size);
+ if (rc)
+ return rc;
+
num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size);
total_size = num_pgs * page_size;
@@ -77,7 +110,11 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
contiguous = args->flags & HL_MEM_CONTIGUOUS;
if (contiguous) {
- paddr = (u64) gen_pool_alloc(vm->dram_pg_pool, total_size);
+ if (is_power_of_2(page_size))
+ paddr = (u64) (uintptr_t) gen_pool_dma_alloc_align(vm->dram_pg_pool,
+ total_size, NULL, page_size);
+ else
+ paddr = (u64) (uintptr_t) gen_pool_alloc(vm->dram_pg_pool, total_size);
if (!paddr) {
dev_err(hdev->dev,
"failed to allocate %llu contiguous pages with total size of %llu\n",
@@ -111,9 +148,14 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
phys_pg_pack->pages[i] = paddr + i * page_size;
} else {
for (i = 0 ; i < num_pgs ; i++) {
- phys_pg_pack->pages[i] = (u64) gen_pool_alloc(
- vm->dram_pg_pool,
- page_size);
+ if (is_power_of_2(page_size))
+ phys_pg_pack->pages[i] =
+ (u64) gen_pool_dma_alloc_align(vm->dram_pg_pool,
+ page_size, NULL,
+ page_size);
+ else
+ phys_pg_pack->pages[i] = (u64) gen_pool_alloc(vm->dram_pg_pool,
+ page_size);
if (!phys_pg_pack->pages[i]) {
dev_err(hdev->dev,
"Failed to allocate device memory (out of memory)\n");
@@ -652,7 +694,7 @@ static u64 get_va_block(struct hl_device *hdev,
continue;
/*
- * In case hint address is 0, and arc_hints_range_reservation
+ * In case hint address is 0, and hints_range_reservation
* property enabled, then avoid allocating va blocks from the
* range reserved for hint addresses
*/
@@ -1967,16 +2009,15 @@ err_dec_exporting_cnt:
static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
{
struct hl_device *hdev = hpriv->hdev;
- struct hl_ctx *ctx = hpriv->ctx;
u64 block_handle, device_addr = 0;
+ struct hl_ctx *ctx = hpriv->ctx;
u32 handle = 0, block_size;
- int rc, dmabuf_fd = -EBADF;
+ int rc;
switch (args->in.op) {
case HL_MEM_OP_ALLOC:
if (args->in.alloc.mem_size == 0) {
- dev_err(hdev->dev,
- "alloc size must be larger than 0\n");
+ dev_err(hdev->dev, "alloc size must be larger than 0\n");
rc = -EINVAL;
goto out;
}
@@ -1997,15 +2038,14 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
case HL_MEM_OP_MAP:
if (args->in.flags & HL_MEM_USERPTR) {
- device_addr = args->in.map_host.host_virt_addr;
- rc = 0;
+ dev_err(hdev->dev, "Failed to map host memory when MMU is disabled\n");
+ rc = -EPERM;
} else {
- rc = get_paddr_from_handle(ctx, &args->in,
- &device_addr);
+ rc = get_paddr_from_handle(ctx, &args->in, &device_addr);
+ memset(args, 0, sizeof(*args));
+ args->out.device_virt_addr = device_addr;
}
- memset(args, 0, sizeof(*args));
- args->out.device_virt_addr = device_addr;
break;
case HL_MEM_OP_UNMAP:
@@ -2013,22 +2053,19 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
break;
case HL_MEM_OP_MAP_BLOCK:
- rc = map_block(hdev, args->in.map_block.block_addr,
- &block_handle, &block_size);
+ rc = map_block(hdev, args->in.map_block.block_addr, &block_handle, &block_size);
args->out.block_handle = block_handle;
args->out.block_size = block_size;
break;
case HL_MEM_OP_EXPORT_DMABUF_FD:
- rc = export_dmabuf_from_addr(ctx,
- args->in.export_dmabuf_fd.handle,
- args->in.export_dmabuf_fd.mem_size,
- args->in.flags,
- &dmabuf_fd);
- memset(args, 0, sizeof(*args));
- args->out.fd = dmabuf_fd;
+ dev_err(hdev->dev, "Failed to export dma-buf object when MMU is disabled\n");
+ rc = -EPERM;
break;
+ case HL_MEM_OP_TS_ALLOC:
+ rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
+ break;
default:
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
rc = -EINVAL;
@@ -2039,6 +2076,258 @@ out:
return rc;
}
+static void ts_buff_release(struct kref *ref)
+{
+ struct hl_ts_buff *buff;
+
+ buff = container_of(ref, struct hl_ts_buff, refcount);
+
+ vfree(buff->kernel_buff_address);
+ vfree(buff->user_buff_address);
+ kfree(buff);
+}
+
+struct hl_ts_buff *hl_ts_get(struct hl_device *hdev, struct hl_ts_mgr *mgr,
+ u32 handle)
+{
+ struct hl_ts_buff *buff;
+
+ spin_lock(&mgr->ts_lock);
+ buff = idr_find(&mgr->ts_handles, handle);
+ if (!buff) {
+ spin_unlock(&mgr->ts_lock);
+ dev_warn(hdev->dev,
+ "TS buff get failed, no match to handle 0x%x\n", handle);
+ return NULL;
+ }
+ kref_get(&buff->refcount);
+ spin_unlock(&mgr->ts_lock);
+
+ return buff;
+}
+
+void hl_ts_put(struct hl_ts_buff *buff)
+{
+ kref_put(&buff->refcount, ts_buff_release);
+}
+
+static void buff_vm_close(struct vm_area_struct *vma)
+{
+ struct hl_ts_buff *buff = (struct hl_ts_buff *) vma->vm_private_data;
+ long new_mmap_size;
+
+ new_mmap_size = buff->mmap_size - (vma->vm_end - vma->vm_start);
+
+ if (new_mmap_size > 0) {
+ buff->mmap_size = new_mmap_size;
+ return;
+ }
+
+ atomic_set(&buff->mmap, 0);
+ hl_ts_put(buff);
+ vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct ts_buff_vm_ops = {
+ .close = buff_vm_close
+};
+
+int hl_ts_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+{
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_ts_buff *buff;
+ u32 handle, user_buff_size;
+ int rc;
+
+ /* We use the page offset to hold the idr and thus we need to clear
+ * it before doing the mmap itself
+ */
+ handle = vma->vm_pgoff;
+ vma->vm_pgoff = 0;
+
+ buff = hl_ts_get(hdev, &hpriv->ts_mem_mgr, handle);
+ if (!buff) {
+ dev_err(hdev->dev,
+ "TS buff mmap failed, no match to handle 0x%x\n", handle);
+ return -EINVAL;
+ }
+
+ /* Validation check */
+ user_buff_size = vma->vm_end - vma->vm_start;
+ if (user_buff_size != ALIGN(buff->user_buff_size, PAGE_SIZE)) {
+ dev_err(hdev->dev,
+ "TS buff mmap failed, mmap size 0x%x != 0x%x buff size\n",
+ user_buff_size, ALIGN(buff->user_buff_size, PAGE_SIZE));
+ rc = -EINVAL;
+ goto put_buff;
+ }
+
+#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
+ if (!access_ok(VERIFY_WRITE,
+ (void __user *) (uintptr_t) vma->vm_start, user_buff_size)) {
+#else
+ if (!access_ok((void __user *) (uintptr_t) vma->vm_start,
+ user_buff_size)) {
+#endif
+ dev_err(hdev->dev,
+ "user pointer is invalid - 0x%lx\n",
+ vma->vm_start);
+
+ rc = -EINVAL;
+ goto put_buff;
+ }
+
+ if (atomic_cmpxchg(&buff->mmap, 0, 1)) {
+ dev_err(hdev->dev, "TS buff memory mmap failed, already mmaped to user\n");
+ rc = -EINVAL;
+ goto put_buff;
+ }
+
+ vma->vm_ops = &ts_buff_vm_ops;
+ vma->vm_private_data = buff;
+ vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP | VM_DONTCOPY | VM_NORESERVE;
+ rc = remap_vmalloc_range(vma, buff->user_buff_address, 0);
+ if (rc) {
+ atomic_set(&buff->mmap, 0);
+ goto put_buff;
+ }
+
+ buff->mmap_size = buff->user_buff_size;
+ vma->vm_pgoff = handle;
+
+ return 0;
+
+put_buff:
+ hl_ts_put(buff);
+ return rc;
+}
+
+void hl_ts_mgr_init(struct hl_ts_mgr *mgr)
+{
+ spin_lock_init(&mgr->ts_lock);
+ idr_init(&mgr->ts_handles);
+}
+
+void hl_ts_mgr_fini(struct hl_device *hdev, struct hl_ts_mgr *mgr)
+{
+ struct hl_ts_buff *buff;
+ struct idr *idp;
+ u32 id;
+
+ idp = &mgr->ts_handles;
+
+ idr_for_each_entry(idp, buff, id) {
+ if (kref_put(&buff->refcount, ts_buff_release) != 1)
+ dev_err(hdev->dev, "TS buff handle %d for CTX is still alive\n",
+ id);
+ }
+
+ idr_destroy(&mgr->ts_handles);
+}
+
+static struct hl_ts_buff *hl_ts_alloc_buff(struct hl_device *hdev, u32 num_elements)
+{
+ struct hl_ts_buff *ts_buff = NULL;
+ u32 size;
+ void *p;
+
+ ts_buff = kzalloc(sizeof(*ts_buff), GFP_KERNEL);
+ if (!ts_buff)
+ return NULL;
+
+ /* Allocate the user buffer */
+ size = num_elements * sizeof(u64);
+ p = vmalloc_user(size);
+ if (!p)
+ goto free_mem;
+
+ ts_buff->user_buff_address = p;
+ ts_buff->user_buff_size = size;
+
+ /* Allocate the internal kernel buffer */
+ size = num_elements * sizeof(struct hl_user_pending_interrupt);
+ p = vmalloc(size);
+ if (!p)
+ goto free_user_buff;
+
+ ts_buff->kernel_buff_address = p;
+ ts_buff->kernel_buff_size = size;
+
+ return ts_buff;
+
+free_user_buff:
+ vfree(ts_buff->user_buff_address);
+free_mem:
+ kfree(ts_buff);
+ return NULL;
+}
+
+/**
+ * allocate_timestamps_buffers() - allocate timestamps buffers
+ * This function will allocate ts buffer that will later on be mapped to the user
+ * in order to be able to read the timestamp.
+ * in additon it'll allocate an extra buffer for registration management.
+ * since we cannot fail during registration for out-of-memory situation, so
+ * we'll prepare a pool which will be used as user interrupt nodes and instead
+ * of dynamically allocating nodes while registration we'll pick the node from
+ * this pool. in addtion it'll add node to the mapping hash which will be used
+ * to map user ts buffer to the internal kernel ts buffer.
+ * @hpriv: pointer to the private data of the fd
+ * @args: ioctl input
+ * @handle: user timestamp buffer handle as an output
+ */
+static int allocate_timestamps_buffers(struct hl_fpriv *hpriv, struct hl_mem_in *args, u64 *handle)
+{
+ struct hl_ts_mgr *ts_mgr = &hpriv->ts_mem_mgr;
+ struct hl_device *hdev = hpriv->hdev;
+ struct hl_ts_buff *ts_buff;
+ int rc = 0;
+
+ if (args->num_of_elements > TS_MAX_ELEMENTS_NUM) {
+ dev_err(hdev->dev, "Num of elements exceeds Max allowed number (0x%x > 0x%x)\n",
+ args->num_of_elements, TS_MAX_ELEMENTS_NUM);
+ return -EINVAL;
+ }
+
+ /* Allocate ts buffer object
+ * This object will contain two buffers one that will be mapped to the user
+ * and another internal buffer for the driver use only, which won't be mapped
+ * to the user.
+ */
+ ts_buff = hl_ts_alloc_buff(hdev, args->num_of_elements);
+ if (!ts_buff) {
+ rc = -ENOMEM;
+ goto out_err;
+ }
+
+ spin_lock(&ts_mgr->ts_lock);
+ rc = idr_alloc(&ts_mgr->ts_handles, ts_buff, 1, 0, GFP_ATOMIC);
+ spin_unlock(&ts_mgr->ts_lock);
+ if (rc < 0) {
+ dev_err(hdev->dev, "Failed to allocate IDR for a new ts buffer\n");
+ goto release_ts_buff;
+ }
+
+ ts_buff->id = rc;
+ ts_buff->hdev = hdev;
+
+ kref_init(&ts_buff->refcount);
+
+ /* idr is 32-bit so we can safely OR it with a mask that is above 32 bit */
+ *handle = (u64) ts_buff->id | HL_MMAP_TYPE_TS_BUFF;
+ *handle <<= PAGE_SHIFT;
+
+ dev_dbg(hdev->dev, "Created ts buff object handle(%u)\n", ts_buff->id);
+
+ return 0;
+
+release_ts_buff:
+ kref_put(&ts_buff->refcount, ts_buff_release);
+out_err:
+ *handle = 0;
+ return rc;
+}
+
int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
{
enum hl_device_status status;
@@ -2154,6 +2443,9 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
args->out.fd = dmabuf_fd;
break;
+ case HL_MEM_OP_TS_ALLOC:
+ rc = allocate_timestamps_buffers(hpriv, &args->in, &args->out.handle);
+ break;
default:
dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
rc = -EINVAL;
@@ -2607,11 +2899,12 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
*/
void hl_vm_ctx_fini(struct hl_ctx *ctx)
{
+ struct hl_vm_phys_pg_pack *phys_pg_list, *tmp_phys_node;
struct hl_device *hdev = ctx->hdev;
- struct hl_vm *vm = &hdev->vm;
- struct hl_vm_phys_pg_pack *phys_pg_list;
struct hl_vm_hash_node *hnode;
+ struct hl_vm *vm = &hdev->vm;
struct hlist_node *tmp_node;
+ struct list_head free_list;
struct hl_mem_in args;
int i;
@@ -2644,19 +2937,24 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
mutex_unlock(&ctx->mmu_lock);
+ INIT_LIST_HEAD(&free_list);
+
spin_lock(&vm->idr_lock);
idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
if (phys_pg_list->asid == ctx->asid) {
dev_dbg(hdev->dev,
"page list 0x%px of asid %d is still alive\n",
phys_pg_list, ctx->asid);
- atomic64_sub(phys_pg_list->total_size,
- &hdev->dram_used_mem);
- free_phys_pg_pack(hdev, phys_pg_list);
+
+ atomic64_sub(phys_pg_list->total_size, &hdev->dram_used_mem);
idr_remove(&vm->phys_pg_pack_handles, i);
+ list_add(&phys_pg_list->node, &free_list);
}
spin_unlock(&vm->idr_lock);
+ list_for_each_entry_safe(phys_pg_list, tmp_phys_node, &free_list, node)
+ free_phys_pg_pack(hdev, phys_pg_list);
+
va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_DRAM]);
va_range_fini(hdev, ctx->va_range[HL_VA_RANGE_TYPE_HOST]);