summaryrefslogtreecommitdiff
path: root/drivers/accel/ivpu
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-01-12 22:32:19 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2024-01-12 22:32:19 +0300
commitcf65598d5909acf5e7b7dc9e21786e386356bc81 (patch)
tree44745a47bfc24f7016ff3c3b1ee6d3b8abf517d7 /drivers/accel/ivpu
parent70d201a40823acba23899342d62bc2644051ad2e (diff)
parentb76c01f1d950425924ee1c1377760de3c024ef78 (diff)
downloadlinux-cf65598d5909acf5e7b7dc9e21786e386356bc81.tar.xz
Merge tag 'drm-next-2024-01-10' of git://anongit.freedesktop.org/drm/drm
Pull drm updates from Dave Airlie: "This contains two major new drivers: - imagination is a first driver for Imagination Technologies devices, it only covers very specific devices, but there is hope to grow it - xe is a reboot of the i915 GPU (shares display) side using a more upstream focused development model, and trying to maximise code sharing. It's not enabled for any hw by default, and will hopefully get switched on for Intel's Lunarlake. This also drops a bunch of the old UMS ioctls. It's been dead long enough. amdgpu has a bunch of new color management code that is being used in the Steam Deck. amdgpu also has a new ACPI WBRF interaction to help avoid radio interference. Otherwise it's the usual lots of changes in lots of places. Detailed summary: new drivers: - imagination - new driver for Imagination Technologies GPU - xe - new driver for Intel GPUs using core drm concepts core: - add CLOSE_FB ioctl - remove old UMS ioctls - increase max objects to accomodate AMD color mgmt encoder: - create per-encoder debugfs directory edid: - split out drm_eld - SAD helpers - drop edid_firmware module parameter format-helper: - cache format conversion buffers sched: - move from kthread to workqueue - rename some internals - implement dynamic job-flow control gpuvm: - provide more features to handle GEM objects client: - don't acquire module reference displayport: - add mst path property documentation fdinfo: - alignment fix dma-buf: - add fence timestamp helper - add fence deadline support bridge: - transparent aux-bridge for DP/USB-C - lt8912b: add suspend/resume support and power regulator support panel: - edp: AUO B116XTN02, BOE NT116WHM-N21,836X2, NV116WHM-N49 - chromebook panel support - elida-kd35t133: rework pm - powkiddy RK2023 panel - himax-hx8394: drop prepare/unprepare and shutdown logic - BOE BP101WX1-100, Powkiddy X55, Ampire AM8001280G - Evervision VGG644804, SDC ATNA45AF01 - nv3052c: register docs, init sequence fixes, fascontek FS035VG158 - st7701: Anbernic RG-ARC support - r63353 panel controller - Ilitek ILI9805 panel controller - AUO G156HAN04.0 simplefb: - support memory regions - support power domains amdgpu: - add new 64-bit sequence number infrastructure - add AMD specific color management - ACPI WBRF support for RF interference handling - GPUVM updates - RAS updates - DCN 3.5 updates - Rework PCIe link speed handling - Document GPU reset types - DMUB fixes - eDP fixes - NBIO 7.9/7.11 updates - SubVP updates - XGMI PCIe state dumping for aqua vanjaram - GFX11 golden register updates - enable tunnelling on high pri compute amdkfd: - Migrate TLB flushing logic to amdgpu - Trap handler fixes - Fix restore workers handling on suspend/resume - Fix possible memory leak in pqm_uninit() - support import/export of dma-bufs using GEM handles radeon: - fix possible overflows in command buffer checking - check for errors in ring_lock i915: - reorg display code for reuse in xe driver - fdinfo memory stats printing - DP MST bandwidth mgmt improvements - DP panel replay enabling - MTL C20 phy state verification - MTL DP DSC fractional bpp support - Audio fastset support - use dma_fence interfaces instead of i915_sw_fence - Separate gem and display code - AUX register macro refactoring - Separate display module/device parameters - Move display capabilities debugfs under display - Makefile cleanups - Register cleanups - Move display lock inits under display/ - VLV/CHV DPIO PHY register and interface refactoring - DSI VBT sequence refactoring - C10/C20 PHY PLL hardware readout - DPLL code cleanups - Cleanup PXP plane protection checks - Improve display debug msgs - PSR selective fetch fixes/improvements - DP MST fixes - Xe2LPD FBC restrictions removed - DGFX uses direct VBT pin mapping - more MTL WAs - fix MTL eDP bug - eliminate use of kmap_atomic habanalabs: - sysfs entry to identify a device minor id with debugfs path - sysfs entry to expose device module id - add signed device info retrieval through INFO ioctl - add Gaudi2C device support - pcie reset prepare/done hooks msm: - Add support for SDM670, SM8650 - Handle the CFG interconnect to fix the obscure hangs / timeouts - Kconfig fix for QMP dependency - use managed allocators - DPU: SDM670, SM8650 support - DPU: Enable SmartDMA on SM8350 and SM8450 - DP: enable runtime PM support - GPU: add metadata UAPI - GPU: move devcoredumps to GPU device - GPU: convert to drm_exec ivpu: - update FW API - new debugfs file - a new NOP job submission test mode - improve suspend/resume - PM improvements - MMU PT optimizations - firmware profile frequency support - support for uncached buffers - switch to gem shmem helpers - replace kthread with threaded irqs rockchip: - rk3066_hdmi: convert to atomic - vop2: support nv20 and nv30 - rk3588 support mediatek: - use devm_platform_ioremap_resource - stop using iommu_present - MT8188 VDOSYS1 display support panfrost: - PM improvements - improve interrupt handling as poweroff qaic: - allow to run with single MSI - support host/device time sync - switch to persistent DRM devices exynos: - fix potential error pointer dereference - fix wrong error checking - add missing call to drm_atomic_helper_shutdown omapdrm: - dma-fence lockdep annotation fix tidss: - dma-fence lockdep annotation fix - support for AM62A7 v3d: - BCM2712 - rpi5 support - fdinfo + gputop support - uapi for CPU job handling virtio-gpu: - add context debug name" * tag 'drm-next-2024-01-10' of git://anongit.freedesktop.org/drm/drm: (2340 commits) drm/amd/display: Allow z8/z10 from driver drm/amd/display: fix bandwidth validation failure on DCN 2.1 drm/amdgpu: apply the RV2 system aperture fix to RN/CZN as well drm/amd/display: Move fixpt_from_s3132 to amdgpu_dm drm/amd/display: Fix recent checkpatch errors in amdgpu_dm Revert "drm/amdkfd: Relocate TBA/TMA to opposite side of VM hole" drm/amd/display: avoid stringop-overflow warnings for dp_decide_lane_settings() drm/amd/display: Fix power_helpers.c codestyle drm/amd/display: Fix hdcp_log.h codestyle drm/amd/display: Fix hdcp2_execution.c codestyle drm/amd/display: Fix hdcp_psp.h codestyle drm/amd/display: Fix freesync.c codestyle drm/amd/display: Fix hdcp_psp.c codestyle drm/amd/display: Fix hdcp1_execution.c codestyle drm/amd/pm/smu7: fix a memleak in smu7_hwmgr_backend_init drm/amdkfd: Fix iterator used outside loop in 'kfd_add_peer_prop()' drm/amdgpu: Drop 'fence' check in 'to_amdgpu_amdkfd_fence()' drm/amdkfd: Confirm list is non-empty before utilizing list_first_entry in kfd_topology.c drm/amdgpu: Fix '*fw' from request_firmware() not released in 'amdgpu_ucode_request()' drm/amdgpu: Fix variable 'mca_funcs' dereferenced before NULL check in 'amdgpu_mca_smu_get_mca_entry()' ...
Diffstat (limited to 'drivers/accel/ivpu')
-rw-r--r--drivers/accel/ivpu/Kconfig11
-rw-r--r--drivers/accel/ivpu/ivpu_debugfs.c57
-rw-r--r--drivers/accel/ivpu/ivpu_drv.c49
-rw-r--r--drivers/accel/ivpu/ivpu_drv.h18
-rw-r--r--drivers/accel/ivpu/ivpu_fw.c79
-rw-r--r--drivers/accel/ivpu/ivpu_fw.h1
-rw-r--r--drivers/accel/ivpu/ivpu_gem.c678
-rw-r--r--drivers/accel/ivpu/ivpu_gem.h75
-rw-r--r--drivers/accel/ivpu/ivpu_hw.h20
-rw-r--r--drivers/accel/ivpu/ivpu_hw_37xx.c59
-rw-r--r--drivers/accel/ivpu/ivpu_hw_37xx_reg.h2
-rw-r--r--drivers/accel/ivpu/ivpu_hw_40xx.c69
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.c251
-rw-r--r--drivers/accel/ivpu/ivpu_ipc.h33
-rw-r--r--drivers/accel/ivpu/ivpu_job.c99
-rw-r--r--drivers/accel/ivpu/ivpu_job.h4
-rw-r--r--drivers/accel/ivpu/ivpu_jsm_msg.c38
-rw-r--r--drivers/accel/ivpu/ivpu_jsm_msg.h1
-rw-r--r--drivers/accel/ivpu/ivpu_mmu.c44
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.c153
-rw-r--r--drivers/accel/ivpu/ivpu_mmu_context.h11
-rw-r--r--drivers/accel/ivpu/ivpu_pm.c72
-rw-r--r--drivers/accel/ivpu/ivpu_pm.h3
-rw-r--r--drivers/accel/ivpu/vpu_boot_api.h90
-rw-r--r--drivers/accel/ivpu/vpu_jsm_api.h309
25 files changed, 1326 insertions, 900 deletions
diff --git a/drivers/accel/ivpu/Kconfig b/drivers/accel/ivpu/Kconfig
index 1a4c4ed9d113..682c53245286 100644
--- a/drivers/accel/ivpu/Kconfig
+++ b/drivers/accel/ivpu/Kconfig
@@ -1,16 +1,17 @@
# SPDX-License-Identifier: GPL-2.0-only
config DRM_ACCEL_IVPU
- tristate "Intel VPU for Meteor Lake and newer"
+ tristate "Intel NPU (Neural Processing Unit)"
depends on DRM_ACCEL
depends on X86_64 && !UML
depends on PCI && PCI_MSI
select FW_LOADER
- select SHMEM
+ select DRM_GEM_SHMEM_HELPER
select GENERIC_ALLOCATOR
help
- Choose this option if you have a system that has an 14th generation Intel CPU
- or newer. VPU stands for Versatile Processing Unit and it's a CPU-integrated
- inference accelerator for Computer Vision and Deep Learning applications.
+ Choose this option if you have a system with an 14th generation
+ Intel CPU (Meteor Lake) or newer. Intel NPU (formerly called Intel VPU)
+ is a CPU-integrated inference accelerator for Computer Vision
+ and Deep Learning applications.
If "M" is selected, the module will be called intel_vpu.
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index ea453b985b49..19035230563d 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -14,6 +14,7 @@
#include "ivpu_fw.h"
#include "ivpu_fw_log.h"
#include "ivpu_gem.h"
+#include "ivpu_hw.h"
#include "ivpu_jsm_msg.h"
#include "ivpu_pm.h"
@@ -115,6 +116,31 @@ static const struct drm_debugfs_info vdev_debugfs_list[] = {
{"reset_pending", reset_pending_show, 0},
};
+static ssize_t
+dvfs_mode_fops_write(struct file *file, const char __user *user_buf, size_t size, loff_t *pos)
+{
+ struct ivpu_device *vdev = file->private_data;
+ struct ivpu_fw_info *fw = vdev->fw;
+ u32 dvfs_mode;
+ int ret;
+
+ ret = kstrtou32_from_user(user_buf, size, 0, &dvfs_mode);
+ if (ret < 0)
+ return ret;
+
+ fw->dvfs_mode = dvfs_mode;
+
+ ivpu_pm_schedule_recovery(vdev);
+
+ return size;
+}
+
+static const struct file_operations dvfs_mode_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = dvfs_mode_fops_write,
+};
+
static int fw_log_show(struct seq_file *s, void *v)
{
struct ivpu_device *vdev = s->private;
@@ -152,6 +178,30 @@ static const struct file_operations fw_log_fops = {
};
static ssize_t
+fw_profiling_freq_fops_write(struct file *file, const char __user *user_buf,
+ size_t size, loff_t *pos)
+{
+ struct ivpu_device *vdev = file->private_data;
+ bool enable;
+ int ret;
+
+ ret = kstrtobool_from_user(user_buf, size, &enable);
+ if (ret < 0)
+ return ret;
+
+ ivpu_hw_profiling_freq_drive(vdev, enable);
+ ivpu_pm_schedule_recovery(vdev);
+
+ return size;
+}
+
+static const struct file_operations fw_profiling_freq_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = fw_profiling_freq_fops_write,
+};
+
+static ssize_t
fw_trace_destination_mask_fops_write(struct file *file, const char __user *user_buf,
size_t size, loff_t *pos)
{
@@ -280,6 +330,9 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
debugfs_create_file("force_recovery", 0200, debugfs_root, vdev,
&ivpu_force_recovery_fops);
+ debugfs_create_file("dvfs_mode", 0200, debugfs_root, vdev,
+ &dvfs_mode_fops);
+
debugfs_create_file("fw_log", 0644, debugfs_root, vdev,
&fw_log_fops);
debugfs_create_file("fw_trace_destination_mask", 0200, debugfs_root, vdev,
@@ -291,4 +344,8 @@ void ivpu_debugfs_init(struct ivpu_device *vdev)
debugfs_create_file("reset_engine", 0200, debugfs_root, vdev,
&ivpu_reset_engine_fops);
+
+ if (ivpu_hw_gen(vdev) >= IVPU_HW_40XX)
+ debugfs_create_file("fw_profiling_freq_drive", 0200,
+ debugfs_root, vdev, &fw_profiling_freq_fops);
}
diff --git a/drivers/accel/ivpu/ivpu_drv.c b/drivers/accel/ivpu/ivpu_drv.c
index 790603017653..64927682161b 100644
--- a/drivers/accel/ivpu/ivpu_drv.c
+++ b/drivers/accel/ivpu/ivpu_drv.c
@@ -31,8 +31,6 @@
__stringify(DRM_IVPU_DRIVER_MINOR) "."
#endif
-static const struct drm_driver driver;
-
static struct lock_class_key submitted_jobs_xa_lock_class_key;
int ivpu_dbg_mask;
@@ -41,7 +39,7 @@ MODULE_PARM_DESC(dbg_mask, "Driver debug mask. See IVPU_DBG_* macros.");
int ivpu_test_mode;
module_param_named_unsafe(test_mode, ivpu_test_mode, int, 0644);
-MODULE_PARM_DESC(test_mode, "Test mode: 0 - normal operation, 1 - fw unit test, 2 - null hw");
+MODULE_PARM_DESC(test_mode, "Test mode mask. See IVPU_TEST_MODE_* macros.");
u8 ivpu_pll_min_ratio;
module_param_named(pll_min_ratio, ivpu_pll_min_ratio, byte, 0644);
@@ -93,8 +91,8 @@ static void file_priv_release(struct kref *ref)
ivpu_dbg(vdev, FILE, "file_priv release: ctx %u\n", file_priv->ctx.id);
ivpu_cmdq_release_all(file_priv);
- ivpu_bo_remove_all_bos_from_context(&file_priv->ctx);
ivpu_jsm_context_release(vdev, file_priv->ctx.id);
+ ivpu_bo_remove_all_bos_from_context(vdev, &file_priv->ctx);
ivpu_mmu_user_context_fini(vdev, &file_priv->ctx);
drm_WARN_ON(&vdev->drm, xa_erase_irq(&vdev->context_xa, file_priv->ctx.id) != file_priv);
mutex_destroy(&file_priv->lock);
@@ -317,16 +315,14 @@ static int ivpu_wait_for_ready(struct ivpu_device *vdev)
unsigned long timeout;
int ret;
- if (ivpu_test_mode == IVPU_TEST_MODE_FW_TEST)
+ if (ivpu_test_mode & IVPU_TEST_MODE_FW_TEST)
return 0;
- ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG);
+ ivpu_ipc_consumer_add(vdev, &cons, IVPU_IPC_CHAN_BOOT_MSG, NULL);
timeout = jiffies + msecs_to_jiffies(vdev->timeout.boot);
while (1) {
- ret = ivpu_ipc_irq_handler(vdev);
- if (ret)
- break;
+ ivpu_ipc_irq_handler(vdev, NULL);
ret = ivpu_ipc_receive(vdev, &cons, &ipc_hdr, NULL, 0);
if (ret != -ETIMEDOUT || time_after_eq(jiffies, timeout))
break;
@@ -362,7 +358,7 @@ int ivpu_boot(struct ivpu_device *vdev)
int ret;
/* Update boot params located at first 4KB of FW memory */
- ivpu_fw_boot_params_setup(vdev, vdev->fw->mem->kvaddr);
+ ivpu_fw_boot_params_setup(vdev, ivpu_bo_vaddr(vdev->fw->mem));
ret = ivpu_hw_boot_fw(vdev);
if (ret) {
@@ -414,7 +410,9 @@ static const struct drm_driver driver = {
.open = ivpu_open,
.postclose = ivpu_postclose,
- .gem_prime_import = ivpu_gem_prime_import,
+
+ .gem_create_object = ivpu_gem_create_object,
+ .gem_prime_import_sg_table = drm_gem_shmem_prime_import_sg_table,
.ioctls = ivpu_drm_ioctls,
.num_ioctls = ARRAY_SIZE(ivpu_drm_ioctls),
@@ -427,6 +425,13 @@ static const struct drm_driver driver = {
.minor = DRM_IVPU_DRIVER_MINOR,
};
+static irqreturn_t ivpu_irq_thread_handler(int irq, void *arg)
+{
+ struct ivpu_device *vdev = arg;
+
+ return ivpu_ipc_irq_thread_handler(vdev);
+}
+
static int ivpu_irq_init(struct ivpu_device *vdev)
{
struct pci_dev *pdev = to_pci_dev(vdev->drm.dev);
@@ -440,8 +445,8 @@ static int ivpu_irq_init(struct ivpu_device *vdev)
vdev->irq = pci_irq_vector(pdev, 0);
- ret = devm_request_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler,
- IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
+ ret = devm_request_threaded_irq(vdev->drm.dev, vdev->irq, vdev->hw->ops->irq_handler,
+ ivpu_irq_thread_handler, IRQF_NO_AUTOEN, DRIVER_NAME, vdev);
if (ret)
ivpu_err(vdev, "Failed to request an IRQ %d\n", ret);
@@ -533,6 +538,11 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
xa_init_flags(&vdev->context_xa, XA_FLAGS_ALLOC);
xa_init_flags(&vdev->submitted_jobs_xa, XA_FLAGS_ALLOC1);
lockdep_set_class(&vdev->submitted_jobs_xa.xa_lock, &submitted_jobs_xa_lock_class_key);
+ INIT_LIST_HEAD(&vdev->bo_list);
+
+ ret = drmm_mutex_init(&vdev->drm, &vdev->bo_list_lock);
+ if (ret)
+ goto err_xa_destroy;
ret = ivpu_pci_init(vdev);
if (ret)
@@ -550,7 +560,7 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
/* Power up early so the rest of init code can access VPU registers */
ret = ivpu_hw_power_up(vdev);
if (ret)
- goto err_xa_destroy;
+ goto err_power_down;
ret = ivpu_mmu_global_context_init(vdev);
if (ret)
@@ -574,20 +584,15 @@ static int ivpu_dev_init(struct ivpu_device *vdev)
ivpu_pm_init(vdev);
- ret = ivpu_job_done_thread_init(vdev);
- if (ret)
- goto err_ipc_fini;
-
ret = ivpu_boot(vdev);
if (ret)
- goto err_job_done_thread_fini;
+ goto err_ipc_fini;
+ ivpu_job_done_consumer_init(vdev);
ivpu_pm_enable(vdev);
return 0;
-err_job_done_thread_fini:
- ivpu_job_done_thread_fini(vdev);
err_ipc_fini:
ivpu_ipc_fini(vdev);
err_fw_fini:
@@ -612,7 +617,7 @@ static void ivpu_dev_fini(struct ivpu_device *vdev)
ivpu_shutdown(vdev);
if (IVPU_WA(d3hot_after_power_off))
pci_set_power_state(to_pci_dev(vdev->drm.dev), PCI_D3hot);
- ivpu_job_done_thread_fini(vdev);
+ ivpu_job_done_consumer_fini(vdev);
ivpu_pm_cancel_recovery(vdev);
ivpu_ipc_fini(vdev);
diff --git a/drivers/accel/ivpu/ivpu_drv.h b/drivers/accel/ivpu/ivpu_drv.h
index 417ddeca8517..ebc4b84f27b2 100644
--- a/drivers/accel/ivpu/ivpu_drv.h
+++ b/drivers/accel/ivpu/ivpu_drv.h
@@ -17,9 +17,10 @@
#include <uapi/drm/ivpu_accel.h>
#include "ivpu_mmu_context.h"
+#include "ivpu_ipc.h"
#define DRIVER_NAME "intel_vpu"
-#define DRIVER_DESC "Driver for Intel Versatile Processing Unit (VPU)"
+#define DRIVER_DESC "Driver for Intel NPU (Neural Processing Unit)"
#define DRIVER_DATE "20230117"
#define PCI_DEVICE_ID_MTL 0x7d1d
@@ -88,6 +89,7 @@ struct ivpu_wa_table {
bool d3hot_after_power_off;
bool interrupt_clear_with_0;
bool disable_clock_relinquish;
+ bool disable_d0i3_msg;
};
struct ivpu_hw_info;
@@ -115,8 +117,11 @@ struct ivpu_device {
struct xarray context_xa;
struct xa_limit context_xa_limit;
+ struct mutex bo_list_lock; /* Protects bo_list */
+ struct list_head bo_list;
+
struct xarray submitted_jobs_xa;
- struct task_struct *job_done_thread;
+ struct ivpu_ipc_consumer job_done_consumer;
atomic64_t unique_id_counter;
@@ -126,6 +131,7 @@ struct ivpu_device {
int tdr;
int reschedule_suspend;
int autosuspend;
+ int d0i3_entry_msg;
} timeout;
};
@@ -148,9 +154,11 @@ extern u8 ivpu_pll_min_ratio;
extern u8 ivpu_pll_max_ratio;
extern bool ivpu_disable_mmu_cont_pages;
-#define IVPU_TEST_MODE_DISABLED 0
-#define IVPU_TEST_MODE_FW_TEST 1
-#define IVPU_TEST_MODE_NULL_HW 2
+#define IVPU_TEST_MODE_FW_TEST BIT(0)
+#define IVPU_TEST_MODE_NULL_HW BIT(1)
+#define IVPU_TEST_MODE_NULL_SUBMISSION BIT(2)
+#define IVPU_TEST_MODE_D0I3_MSG_DISABLE BIT(4)
+#define IVPU_TEST_MODE_D0I3_MSG_ENABLE BIT(5)
extern int ivpu_test_mode;
struct ivpu_file_priv *ivpu_file_priv_get(struct ivpu_file_priv *file_priv);
diff --git a/drivers/accel/ivpu/ivpu_fw.c b/drivers/accel/ivpu/ivpu_fw.c
index 691da521dde5..6576232f3e67 100644
--- a/drivers/accel/ivpu/ivpu_fw.c
+++ b/drivers/accel/ivpu/ivpu_fw.c
@@ -33,12 +33,17 @@
#define ADDR_TO_L2_CACHE_CFG(addr) ((addr) >> 31)
-#define IVPU_FW_CHECK_API(vdev, fw_hdr, name, min_major) \
+/* Check if FW API is compatible with the driver */
+#define IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, name, min_major) \
ivpu_fw_check_api(vdev, fw_hdr, #name, \
VPU_##name##_API_VER_INDEX, \
VPU_##name##_API_VER_MAJOR, \
VPU_##name##_API_VER_MINOR, min_major)
+/* Check if API version is lower that the given version */
+#define IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, name, major, minor) \
+ ivpu_fw_check_api_ver_lt(vdev, fw_hdr, #name, VPU_##name##_API_VER_INDEX, major, minor)
+
static char *ivpu_firmware;
module_param_named_unsafe(firmware, ivpu_firmware, charp, 0644);
MODULE_PARM_DESC(firmware, "VPU firmware binary in /lib/firmware/..");
@@ -105,6 +110,19 @@ ivpu_fw_check_api(struct ivpu_device *vdev, const struct vpu_firmware_header *fw
return 0;
}
+static bool
+ivpu_fw_check_api_ver_lt(struct ivpu_device *vdev, const struct vpu_firmware_header *fw_hdr,
+ const char *str, int index, u16 major, u16 minor)
+{
+ u16 fw_major = (u16)(fw_hdr->api_version[index] >> 16);
+ u16 fw_minor = (u16)(fw_hdr->api_version[index]);
+
+ if (fw_major < major || (fw_major == major && fw_minor < minor))
+ return true;
+
+ return false;
+}
+
static int ivpu_fw_parse(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
@@ -164,9 +182,9 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
ivpu_info(vdev, "Firmware: %s, version: %s", fw->name,
(const char *)fw_hdr + VPU_FW_HEADER_SIZE);
- if (IVPU_FW_CHECK_API(vdev, fw_hdr, BOOT, 3))
+ if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, BOOT, 3))
return -EINVAL;
- if (IVPU_FW_CHECK_API(vdev, fw_hdr, JSM, 3))
+ if (IVPU_FW_CHECK_API_COMPAT(vdev, fw_hdr, JSM, 3))
return -EINVAL;
fw->runtime_addr = runtime_addr;
@@ -182,6 +200,8 @@ static int ivpu_fw_parse(struct ivpu_device *vdev)
fw->trace_destination_mask = VPU_TRACE_DESTINATION_VERBOSE_TRACING;
fw->trace_hw_component_mask = -1;
+ fw->dvfs_mode = 0;
+
ivpu_dbg(vdev, FW_BOOT, "Size: file %lu image %u runtime %u shavenn %u\n",
fw->file->size, fw->image_size, fw->runtime_size, fw->shave_nn_size);
ivpu_dbg(vdev, FW_BOOT, "Address: runtime 0x%llx, load 0x%llx, entry point 0x%llx\n",
@@ -195,6 +215,24 @@ static void ivpu_fw_release(struct ivpu_device *vdev)
release_firmware(vdev->fw->file);
}
+/* Initialize workarounds that depend on FW version */
+static void
+ivpu_fw_init_wa(struct ivpu_device *vdev)
+{
+ const struct vpu_firmware_header *fw_hdr = (const void *)vdev->fw->file->data;
+
+ if (IVPU_FW_CHECK_API_VER_LT(vdev, fw_hdr, BOOT, 3, 17) ||
+ (ivpu_hw_gen(vdev) > IVPU_HW_37XX) ||
+ (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_DISABLE))
+ vdev->wa.disable_d0i3_msg = true;
+
+ /* Force enable the feature for testing purposes */
+ if (ivpu_test_mode & IVPU_TEST_MODE_D0I3_MSG_ENABLE)
+ vdev->wa.disable_d0i3_msg = false;
+
+ IVPU_PRINT_WA(disable_d0i3_msg);
+}
+
static int ivpu_fw_update_global_range(struct ivpu_device *vdev)
{
struct ivpu_fw_info *fw = vdev->fw;
@@ -248,7 +286,7 @@ static int ivpu_fw_mem_init(struct ivpu_device *vdev)
if (fw->shave_nn_size) {
fw->mem_shave_nn = ivpu_bo_alloc_internal(vdev, vdev->hw->ranges.shave.start,
- fw->shave_nn_size, DRM_IVPU_BO_UNCACHED);
+ fw->shave_nn_size, DRM_IVPU_BO_WC);
if (!fw->mem_shave_nn) {
ivpu_err(vdev, "Failed to allocate shavenn buffer\n");
ret = -ENOMEM;
@@ -297,6 +335,8 @@ int ivpu_fw_init(struct ivpu_device *vdev)
if (ret)
goto err_fw_release;
+ ivpu_fw_init_wa(vdev);
+
ret = ivpu_fw_mem_init(vdev);
if (ret)
goto err_fw_release;
@@ -422,14 +462,31 @@ static void ivpu_fw_boot_params_print(struct ivpu_device *vdev, struct vpu_boot_
boot_params->punit_telemetry_sram_size);
ivpu_dbg(vdev, FW_BOOT, "boot_params.vpu_telemetry_enable = 0x%x\n",
boot_params->vpu_telemetry_enable);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.dvfs_mode = %u\n",
+ boot_params->dvfs_mode);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_delayed_entry = %d\n",
+ boot_params->d0i3_delayed_entry);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n",
+ boot_params->d0i3_residency_time_us);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
+ boot_params->d0i3_entry_vpu_ts);
}
void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params *boot_params)
{
struct ivpu_bo *ipc_mem_rx = vdev->ipc->mem_rx;
- /* In case of warm boot we only have to reset the entrypoint addr */
+ /* In case of warm boot only update variable params */
if (!ivpu_fw_is_cold_boot(vdev)) {
+ boot_params->d0i3_residency_time_us =
+ ktime_us_delta(ktime_get_boottime(), vdev->hw->d0i3_entry_host_ts);
+ boot_params->d0i3_entry_vpu_ts = vdev->hw->d0i3_entry_vpu_ts;
+
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_residency_time_us = %lld\n",
+ boot_params->d0i3_residency_time_us);
+ ivpu_dbg(vdev, FW_BOOT, "boot_params.d0i3_entry_vpu_ts = %llu\n",
+ boot_params->d0i3_entry_vpu_ts);
+
boot_params->save_restore_ret_address = 0;
vdev->pm->is_warmboot = true;
wmb(); /* Flush WC buffers after writing save_restore_ret_address */
@@ -443,6 +500,13 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->frequency = ivpu_hw_reg_pll_freq_get(vdev);
/*
+ * This param is a debug firmware feature. It switches default clock
+ * to higher resolution one for fine-grained and more accurate firmware
+ * task profiling.
+ */
+ boot_params->perf_clk_frequency = ivpu_hw_profiling_freq_get(vdev);
+
+ /*
* Uncached region of VPU address space, covers IPC buffers, job queues
* and log buffers, programmable to L2$ Uncached by VPU MTRR
*/
@@ -493,6 +557,11 @@ void ivpu_fw_boot_params_setup(struct ivpu_device *vdev, struct vpu_boot_params
boot_params->punit_telemetry_sram_base = ivpu_hw_reg_telemetry_offset_get(vdev);
boot_params->punit_telemetry_sram_size = ivpu_hw_reg_telemetry_size_get(vdev);
boot_params->vpu_telemetry_enable = ivpu_hw_reg_telemetry_enable_get(vdev);
+ boot_params->dvfs_mode = vdev->fw->dvfs_mode;
+ if (!IVPU_WA(disable_d0i3_msg))
+ boot_params->d0i3_delayed_entry = 1;
+ boot_params->d0i3_residency_time_us = 0;
+ boot_params->d0i3_entry_vpu_ts = 0;
wmb(); /* Flush WC buffers after writing bootparams */
diff --git a/drivers/accel/ivpu/ivpu_fw.h b/drivers/accel/ivpu/ivpu_fw.h
index 10ae2847f0ef..66b60fa161b5 100644
--- a/drivers/accel/ivpu/ivpu_fw.h
+++ b/drivers/accel/ivpu/ivpu_fw.h
@@ -27,6 +27,7 @@ struct ivpu_fw_info {
u32 trace_level;
u32 trace_destination_mask;
u64 trace_hw_component_mask;
+ u32 dvfs_mode;
};
int ivpu_fw_init(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_gem.c b/drivers/accel/ivpu/ivpu_gem.c
index c91852f2edc8..1dda4f38ea25 100644
--- a/drivers/accel/ivpu/ivpu_gem.c
+++ b/drivers/accel/ivpu/ivpu_gem.c
@@ -20,215 +20,18 @@
#include "ivpu_mmu.h"
#include "ivpu_mmu_context.h"
-MODULE_IMPORT_NS(DMA_BUF);
-
static const struct drm_gem_object_funcs ivpu_gem_funcs;
-static struct lock_class_key prime_bo_lock_class_key;
-
-static int __must_check prime_alloc_pages_locked(struct ivpu_bo *bo)
-{
- /* Pages are managed by the underlying dma-buf */
- return 0;
-}
-
-static void prime_free_pages_locked(struct ivpu_bo *bo)
-{
- /* Pages are managed by the underlying dma-buf */
-}
-
-static int prime_map_pages_locked(struct ivpu_bo *bo)
-{
- struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- struct sg_table *sgt;
-
- sgt = dma_buf_map_attachment_unlocked(bo->base.import_attach, DMA_BIDIRECTIONAL);
- if (IS_ERR(sgt)) {
- ivpu_err(vdev, "Failed to map attachment: %ld\n", PTR_ERR(sgt));
- return PTR_ERR(sgt);
- }
-
- bo->sgt = sgt;
- return 0;
-}
-
-static void prime_unmap_pages_locked(struct ivpu_bo *bo)
-{
- dma_buf_unmap_attachment_unlocked(bo->base.import_attach, bo->sgt, DMA_BIDIRECTIONAL);
- bo->sgt = NULL;
-}
-
-static const struct ivpu_bo_ops prime_ops = {
- .type = IVPU_BO_TYPE_PRIME,
- .name = "prime",
- .alloc_pages = prime_alloc_pages_locked,
- .free_pages = prime_free_pages_locked,
- .map_pages = prime_map_pages_locked,
- .unmap_pages = prime_unmap_pages_locked,
-};
-
-static int __must_check shmem_alloc_pages_locked(struct ivpu_bo *bo)
-{
- int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
- struct page **pages;
-
- pages = drm_gem_get_pages(&bo->base);
- if (IS_ERR(pages))
- return PTR_ERR(pages);
-
- if (bo->flags & DRM_IVPU_BO_WC)
- set_pages_array_wc(pages, npages);
- else if (bo->flags & DRM_IVPU_BO_UNCACHED)
- set_pages_array_uc(pages, npages);
-
- bo->pages = pages;
- return 0;
-}
-
-static void shmem_free_pages_locked(struct ivpu_bo *bo)
-{
- if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
- set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
- drm_gem_put_pages(&bo->base, bo->pages, true, false);
- bo->pages = NULL;
-}
-
-static int ivpu_bo_map_pages_locked(struct ivpu_bo *bo)
-{
- int npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
- struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- struct sg_table *sgt;
- int ret;
-
- sgt = drm_prime_pages_to_sg(&vdev->drm, bo->pages, npages);
- if (IS_ERR(sgt)) {
- ivpu_err(vdev, "Failed to allocate sgtable\n");
- return PTR_ERR(sgt);
- }
-
- ret = dma_map_sgtable(vdev->drm.dev, sgt, DMA_BIDIRECTIONAL, 0);
- if (ret) {
- ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
- goto err_free_sgt;
- }
-
- bo->sgt = sgt;
- return 0;
-
-err_free_sgt:
- kfree(sgt);
- return ret;
-}
-
-static void ivpu_bo_unmap_pages_locked(struct ivpu_bo *bo)
-{
- struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-
- dma_unmap_sgtable(vdev->drm.dev, bo->sgt, DMA_BIDIRECTIONAL, 0);
- sg_free_table(bo->sgt);
- kfree(bo->sgt);
- bo->sgt = NULL;
-}
-
-static const struct ivpu_bo_ops shmem_ops = {
- .type = IVPU_BO_TYPE_SHMEM,
- .name = "shmem",
- .alloc_pages = shmem_alloc_pages_locked,
- .free_pages = shmem_free_pages_locked,
- .map_pages = ivpu_bo_map_pages_locked,
- .unmap_pages = ivpu_bo_unmap_pages_locked,
-};
-
-static int __must_check internal_alloc_pages_locked(struct ivpu_bo *bo)
-{
- unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
- struct page **pages;
- int ret;
-
- pages = kvmalloc_array(npages, sizeof(*bo->pages), GFP_KERNEL);
- if (!pages)
- return -ENOMEM;
-
- for (i = 0; i < npages; i++) {
- pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
- if (!pages[i]) {
- ret = -ENOMEM;
- goto err_free_pages;
- }
- cond_resched();
- }
-
- bo->pages = pages;
- return 0;
-
-err_free_pages:
- while (i--)
- put_page(pages[i]);
- kvfree(pages);
- return ret;
-}
-
-static void internal_free_pages_locked(struct ivpu_bo *bo)
-{
- unsigned int i, npages = ivpu_bo_size(bo) >> PAGE_SHIFT;
-
- if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
- set_pages_array_wb(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
- for (i = 0; i < npages; i++)
- put_page(bo->pages[i]);
-
- kvfree(bo->pages);
- bo->pages = NULL;
-}
-
-static const struct ivpu_bo_ops internal_ops = {
- .type = IVPU_BO_TYPE_INTERNAL,
- .name = "internal",
- .alloc_pages = internal_alloc_pages_locked,
- .free_pages = internal_free_pages_locked,
- .map_pages = ivpu_bo_map_pages_locked,
- .unmap_pages = ivpu_bo_unmap_pages_locked,
-};
-
-static int __must_check ivpu_bo_alloc_and_map_pages_locked(struct ivpu_bo *bo)
-{
- struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- int ret;
-
- lockdep_assert_held(&bo->lock);
- drm_WARN_ON(&vdev->drm, bo->sgt);
-
- ret = bo->ops->alloc_pages(bo);
- if (ret) {
- ivpu_err(vdev, "Failed to allocate pages for BO: %d", ret);
- return ret;
- }
-
- ret = bo->ops->map_pages(bo);
- if (ret) {
- ivpu_err(vdev, "Failed to map pages for BO: %d", ret);
- goto err_free_pages;
- }
- return ret;
-
-err_free_pages:
- bo->ops->free_pages(bo);
- return ret;
-}
-
-static void ivpu_bo_unmap_and_free_pages(struct ivpu_bo *bo)
+static inline void ivpu_dbg_bo(struct ivpu_device *vdev, struct ivpu_bo *bo, const char *action)
{
- mutex_lock(&bo->lock);
-
- WARN_ON(!bo->sgt);
- bo->ops->unmap_pages(bo);
- WARN_ON(bo->sgt);
- bo->ops->free_pages(bo);
- WARN_ON(bo->pages);
-
- mutex_unlock(&bo->lock);
+ if (bo->ctx)
+ ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u ctx %d vpu_addr 0x%llx mmu_mapped %d\n",
+ action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt,
+ bo->handle, bo->ctx->id, bo->vpu_addr, bo->mmu_mapped);
+ else
+ ivpu_dbg(vdev, BO, "%6s: size %zu has_pages %d dma_mapped %d handle %u (not added to context)\n",
+ action, ivpu_bo_size(bo), (bool)bo->base.pages, (bool)bo->base.sgt,
+ bo->handle);
}
/*
@@ -245,21 +48,24 @@ int __must_check ivpu_bo_pin(struct ivpu_bo *bo)
mutex_lock(&bo->lock);
- if (!bo->vpu_addr) {
- ivpu_err(vdev, "vpu_addr not set for BO ctx_id: %d handle: %d\n",
- bo->ctx->id, bo->handle);
+ ivpu_dbg_bo(vdev, bo, "pin");
+
+ if (!bo->ctx) {
+ ivpu_err(vdev, "vpu_addr not allocated for BO %d\n", bo->handle);
ret = -EINVAL;
goto unlock;
}
- if (!bo->sgt) {
- ret = ivpu_bo_alloc_and_map_pages_locked(bo);
- if (ret)
+ if (!bo->mmu_mapped) {
+ struct sg_table *sgt = drm_gem_shmem_get_pages_sgt(&bo->base);
+
+ if (IS_ERR(sgt)) {
+ ret = PTR_ERR(sgt);
+ ivpu_err(vdev, "Failed to map BO in IOMMU: %d\n", ret);
goto unlock;
- }
+ }
- if (!bo->mmu_mapped) {
- ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, bo->sgt,
+ ret = ivpu_mmu_context_map_sgt(vdev, bo->ctx, bo->vpu_addr, sgt,
ivpu_bo_is_snooped(bo));
if (ret) {
ivpu_err(vdev, "Failed to map BO in MMU: %d\n", ret);
@@ -281,248 +87,213 @@ ivpu_bo_alloc_vpu_addr(struct ivpu_bo *bo, struct ivpu_mmu_context *ctx,
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
int ret;
- if (!range) {
- if (bo->flags & DRM_IVPU_BO_SHAVE_MEM)
- range = &vdev->hw->ranges.shave;
- else if (bo->flags & DRM_IVPU_BO_DMA_MEM)
- range = &vdev->hw->ranges.dma;
- else
- range = &vdev->hw->ranges.user;
- }
+ mutex_lock(&bo->lock);
- mutex_lock(&ctx->lock);
- ret = ivpu_mmu_context_insert_node_locked(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
+ ret = ivpu_mmu_context_insert_node(ctx, range, ivpu_bo_size(bo), &bo->mm_node);
if (!ret) {
bo->ctx = ctx;
bo->vpu_addr = bo->mm_node.start;
- list_add_tail(&bo->ctx_node, &ctx->bo_list);
+ } else {
+ ivpu_err(vdev, "Failed to add BO to context %u: %d\n", ctx->id, ret);
}
- mutex_unlock(&ctx->lock);
+
+ ivpu_dbg_bo(vdev, bo, "alloc");
+
+ mutex_unlock(&bo->lock);
return ret;
}
-static void ivpu_bo_free_vpu_addr(struct ivpu_bo *bo)
+static void ivpu_bo_unbind_locked(struct ivpu_bo *bo)
{
struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
- struct ivpu_mmu_context *ctx = bo->ctx;
- ivpu_dbg(vdev, BO, "remove from ctx: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
- ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
+ lockdep_assert_held(&bo->lock);
- mutex_lock(&bo->lock);
+ ivpu_dbg_bo(vdev, bo, "unbind");
+
+ /* TODO: dma_unmap */
if (bo->mmu_mapped) {
- drm_WARN_ON(&vdev->drm, !bo->sgt);
- ivpu_mmu_context_unmap_sgt(vdev, ctx, bo->vpu_addr, bo->sgt);
+ drm_WARN_ON(&vdev->drm, !bo->ctx);
+ drm_WARN_ON(&vdev->drm, !bo->vpu_addr);
+ drm_WARN_ON(&vdev->drm, !bo->base.sgt);
+ ivpu_mmu_context_unmap_sgt(vdev, bo->ctx, bo->vpu_addr, bo->base.sgt);
bo->mmu_mapped = false;
}
- mutex_lock(&ctx->lock);
- list_del(&bo->ctx_node);
- bo->vpu_addr = 0;
- bo->ctx = NULL;
- ivpu_mmu_context_remove_node_locked(ctx, &bo->mm_node);
- mutex_unlock(&ctx->lock);
+ if (bo->ctx) {
+ ivpu_mmu_context_remove_node(bo->ctx, &bo->mm_node);
+ bo->vpu_addr = 0;
+ bo->ctx = NULL;
+ }
+}
+static void ivpu_bo_unbind(struct ivpu_bo *bo)
+{
+ mutex_lock(&bo->lock);
+ ivpu_bo_unbind_locked(bo);
mutex_unlock(&bo->lock);
}
-void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx)
+void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx)
{
- struct ivpu_bo *bo, *tmp;
+ struct ivpu_bo *bo;
+
+ if (drm_WARN_ON(&vdev->drm, !ctx))
+ return;
- list_for_each_entry_safe(bo, tmp, &ctx->bo_list, ctx_node)
- ivpu_bo_free_vpu_addr(bo);
+ mutex_lock(&vdev->bo_list_lock);
+ list_for_each_entry(bo, &vdev->bo_list, bo_list_node) {
+ mutex_lock(&bo->lock);
+ if (bo->ctx == ctx)
+ ivpu_bo_unbind_locked(bo);
+ mutex_unlock(&bo->lock);
+ }
+ mutex_unlock(&vdev->bo_list_lock);
}
-static struct ivpu_bo *
-ivpu_bo_alloc(struct ivpu_device *vdev, struct ivpu_mmu_context *mmu_context,
- u64 size, u32 flags, const struct ivpu_bo_ops *ops,
- const struct ivpu_addr_range *range, u64 user_ptr)
+struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size)
{
struct ivpu_bo *bo;
- int ret = 0;
-
- if (drm_WARN_ON(&vdev->drm, size == 0 || !PAGE_ALIGNED(size)))
- return ERR_PTR(-EINVAL);
- switch (flags & DRM_IVPU_BO_CACHE_MASK) {
- case DRM_IVPU_BO_CACHED:
- case DRM_IVPU_BO_UNCACHED:
- case DRM_IVPU_BO_WC:
- break;
- default:
+ if (size == 0 || !PAGE_ALIGNED(size))
return ERR_PTR(-EINVAL);
- }
bo = kzalloc(sizeof(*bo), GFP_KERNEL);
if (!bo)
return ERR_PTR(-ENOMEM);
- mutex_init(&bo->lock);
- bo->base.funcs = &ivpu_gem_funcs;
- bo->flags = flags;
- bo->ops = ops;
- bo->user_ptr = user_ptr;
-
- if (ops->type == IVPU_BO_TYPE_SHMEM)
- ret = drm_gem_object_init(&vdev->drm, &bo->base, size);
- else
- drm_gem_private_object_init(&vdev->drm, &bo->base, size);
-
- if (ret) {
- ivpu_err(vdev, "Failed to initialize drm object\n");
- goto err_free;
- }
-
- if (flags & DRM_IVPU_BO_MAPPABLE) {
- ret = drm_gem_create_mmap_offset(&bo->base);
- if (ret) {
- ivpu_err(vdev, "Failed to allocate mmap offset\n");
- goto err_release;
- }
- }
-
- if (mmu_context) {
- ret = ivpu_bo_alloc_vpu_addr(bo, mmu_context, range);
- if (ret) {
- ivpu_err(vdev, "Failed to add BO to context: %d\n", ret);
- goto err_release;
- }
- }
+ bo->base.base.funcs = &ivpu_gem_funcs;
+ bo->base.pages_mark_dirty_on_put = true; /* VPU can dirty a BO anytime */
- return bo;
+ INIT_LIST_HEAD(&bo->bo_list_node);
+ mutex_init(&bo->lock);
-err_release:
- drm_gem_object_release(&bo->base);
-err_free:
- kfree(bo);
- return ERR_PTR(ret);
+ return &bo->base.base;
}
-static void ivpu_bo_free(struct drm_gem_object *obj)
+static struct ivpu_bo *
+ivpu_bo_create(struct ivpu_device *vdev, u64 size, u32 flags)
{
- struct ivpu_bo *bo = to_ivpu_bo(obj);
- struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-
- if (bo->ctx)
- ivpu_dbg(vdev, BO, "free: ctx %d vpu_addr 0x%llx allocated %d mmu_mapped %d\n",
- bo->ctx->id, bo->vpu_addr, (bool)bo->sgt, bo->mmu_mapped);
- else
- ivpu_dbg(vdev, BO, "free: ctx (released) allocated %d mmu_mapped %d\n",
- (bool)bo->sgt, bo->mmu_mapped);
-
- drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
+ struct drm_gem_shmem_object *shmem;
+ struct ivpu_bo *bo;
- vunmap(bo->kvaddr);
+ switch (flags & DRM_IVPU_BO_CACHE_MASK) {
+ case DRM_IVPU_BO_CACHED:
+ case DRM_IVPU_BO_WC:
+ break;
+ default:
+ return ERR_PTR(-EINVAL);
+ }
- if (bo->ctx)
- ivpu_bo_free_vpu_addr(bo);
+ shmem = drm_gem_shmem_create(&vdev->drm, size);
+ if (IS_ERR(shmem))
+ return ERR_CAST(shmem);
- if (bo->sgt)
- ivpu_bo_unmap_and_free_pages(bo);
+ bo = to_ivpu_bo(&shmem->base);
+ bo->base.map_wc = flags & DRM_IVPU_BO_WC;
+ bo->flags = flags;
- if (bo->base.import_attach)
- drm_prime_gem_destroy(&bo->base, bo->sgt);
+ mutex_lock(&vdev->bo_list_lock);
+ list_add_tail(&bo->bo_list_node, &vdev->bo_list);
+ mutex_unlock(&vdev->bo_list_lock);
- drm_gem_object_release(&bo->base);
+ ivpu_dbg(vdev, BO, "create: vpu_addr 0x%llx size %zu flags 0x%x\n",
+ bo->vpu_addr, bo->base.base.size, flags);
- mutex_destroy(&bo->lock);
- kfree(bo);
+ return bo;
}
-static int ivpu_bo_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+static int ivpu_bo_open(struct drm_gem_object *obj, struct drm_file *file)
{
+ struct ivpu_file_priv *file_priv = file->driver_priv;
+ struct ivpu_device *vdev = file_priv->vdev;
struct ivpu_bo *bo = to_ivpu_bo(obj);
- struct ivpu_device *vdev = ivpu_bo_to_vdev(bo);
-
- ivpu_dbg(vdev, BO, "mmap: ctx %u handle %u vpu_addr 0x%llx size %zu type %s",
- bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo), bo->ops->name);
+ struct ivpu_addr_range *range;
- if (obj->import_attach) {
- /* Drop the reference drm_gem_mmap_obj() acquired.*/
- drm_gem_object_put(obj);
- vma->vm_private_data = NULL;
- return dma_buf_mmap(obj->dma_buf, vma, 0);
- }
-
- vm_flags_set(vma, VM_PFNMAP | VM_DONTEXPAND);
- vma->vm_page_prot = ivpu_bo_pgprot(bo, vm_get_page_prot(vma->vm_flags));
+ if (bo->flags & DRM_IVPU_BO_SHAVE_MEM)
+ range = &vdev->hw->ranges.shave;
+ else if (bo->flags & DRM_IVPU_BO_DMA_MEM)
+ range = &vdev->hw->ranges.dma;
+ else
+ range = &vdev->hw->ranges.user;
- return 0;
+ return ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, range);
}
-static struct sg_table *ivpu_bo_get_sg_table(struct drm_gem_object *obj)
+static void ivpu_bo_free(struct drm_gem_object *obj)
{
+ struct ivpu_device *vdev = to_ivpu_device(obj->dev);
struct ivpu_bo *bo = to_ivpu_bo(obj);
- loff_t npages = obj->size >> PAGE_SHIFT;
- int ret = 0;
- mutex_lock(&bo->lock);
+ mutex_lock(&vdev->bo_list_lock);
+ list_del(&bo->bo_list_node);
+ mutex_unlock(&vdev->bo_list_lock);
- if (!bo->sgt)
- ret = ivpu_bo_alloc_and_map_pages_locked(bo);
+ drm_WARN_ON(&vdev->drm, !dma_resv_test_signaled(obj->resv, DMA_RESV_USAGE_READ));
- mutex_unlock(&bo->lock);
+ ivpu_dbg_bo(vdev, bo, "free");
- if (ret)
- return ERR_PTR(ret);
+ ivpu_bo_unbind(bo);
+ mutex_destroy(&bo->lock);
- return drm_prime_pages_to_sg(obj->dev, bo->pages, npages);
+ drm_WARN_ON(obj->dev, bo->base.pages_use_count > 1);
+ drm_gem_shmem_free(&bo->base);
}
-static vm_fault_t ivpu_vm_fault(struct vm_fault *vmf)
-{
- struct vm_area_struct *vma = vmf->vma;
- struct drm_gem_object *obj = vma->vm_private_data;
- struct ivpu_bo *bo = to_ivpu_bo(obj);
- loff_t npages = obj->size >> PAGE_SHIFT;
- pgoff_t page_offset;
- struct page *page;
- vm_fault_t ret;
- int err;
-
- mutex_lock(&bo->lock);
-
- if (!bo->sgt) {
- err = ivpu_bo_alloc_and_map_pages_locked(bo);
- if (err) {
- ret = vmf_error(err);
- goto unlock;
- }
- }
-
- /* We don't use vmf->pgoff since that has the fake offset */
- page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
- if (page_offset >= npages) {
- ret = VM_FAULT_SIGBUS;
- } else {
- page = bo->pages[page_offset];
- ret = vmf_insert_pfn(vma, vmf->address, page_to_pfn(page));
- }
-
-unlock:
- mutex_unlock(&bo->lock);
+static const struct dma_buf_ops ivpu_bo_dmabuf_ops = {
+ .cache_sgt_mapping = true,
+ .attach = drm_gem_map_attach,
+ .detach = drm_gem_map_detach,
+ .map_dma_buf = drm_gem_map_dma_buf,
+ .unmap_dma_buf = drm_gem_unmap_dma_buf,
+ .release = drm_gem_dmabuf_release,
+ .mmap = drm_gem_dmabuf_mmap,
+ .vmap = drm_gem_dmabuf_vmap,
+ .vunmap = drm_gem_dmabuf_vunmap,
+};
- return ret;
+static struct dma_buf *ivpu_bo_export(struct drm_gem_object *obj, int flags)
+{
+ struct drm_device *dev = obj->dev;
+ struct dma_buf_export_info exp_info = {
+ .exp_name = KBUILD_MODNAME,
+ .owner = dev->driver->fops->owner,
+ .ops = &ivpu_bo_dmabuf_ops,
+ .size = obj->size,
+ .flags = flags,
+ .priv = obj,
+ .resv = obj->resv,
+ };
+ void *sgt;
+
+ /*
+ * Make sure that pages are allocated and dma-mapped before exporting the bo.
+ * DMA-mapping is required if the bo will be imported to the same device.
+ */
+ sgt = drm_gem_shmem_get_pages_sgt(to_drm_gem_shmem_obj(obj));
+ if (IS_ERR(sgt))
+ return sgt;
+
+ return drm_gem_dmabuf_export(dev, &exp_info);
}
-static const struct vm_operations_struct ivpu_vm_ops = {
- .fault = ivpu_vm_fault,
- .open = drm_gem_vm_open,
- .close = drm_gem_vm_close,
-};
-
static const struct drm_gem_object_funcs ivpu_gem_funcs = {
.free = ivpu_bo_free,
- .mmap = ivpu_bo_mmap,
- .vm_ops = &ivpu_vm_ops,
- .get_sg_table = ivpu_bo_get_sg_table,
+ .open = ivpu_bo_open,
+ .export = ivpu_bo_export,
+ .print_info = drm_gem_shmem_object_print_info,
+ .pin = drm_gem_shmem_object_pin,
+ .unpin = drm_gem_shmem_object_unpin,
+ .get_sg_table = drm_gem_shmem_object_get_sg_table,
+ .vmap = drm_gem_shmem_object_vmap,
+ .vunmap = drm_gem_shmem_object_vunmap,
+ .mmap = drm_gem_shmem_object_mmap,
+ .vm_ops = &drm_gem_shmem_vm_ops,
};
-int
-ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct ivpu_file_priv *file_priv = file->driver_priv;
struct ivpu_device *vdev = file_priv->vdev;
@@ -537,23 +308,20 @@ ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (size == 0)
return -EINVAL;
- bo = ivpu_bo_alloc(vdev, &file_priv->ctx, size, args->flags, &shmem_ops, NULL, 0);
+ bo = ivpu_bo_create(vdev, size, args->flags);
if (IS_ERR(bo)) {
ivpu_err(vdev, "Failed to create BO: %pe (ctx %u size %llu flags 0x%x)",
bo, file_priv->ctx.id, args->size, args->flags);
return PTR_ERR(bo);
}
- ret = drm_gem_handle_create(file, &bo->base, &bo->handle);
+ ret = drm_gem_handle_create(file, &bo->base.base, &bo->handle);
if (!ret) {
args->vpu_addr = bo->vpu_addr;
args->handle = bo->handle;
}
- drm_gem_object_put(&bo->base);
-
- ivpu_dbg(vdev, BO, "alloc shmem: ctx %u vpu_addr 0x%llx size %zu flags 0x%x\n",
- file_priv->ctx.id, bo->vpu_addr, ivpu_bo_size(bo), bo->flags);
+ drm_gem_object_put(&bo->base.base);
return ret;
}
@@ -563,8 +331,8 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
{
const struct ivpu_addr_range *range;
struct ivpu_addr_range fixed_range;
+ struct iosys_map map;
struct ivpu_bo *bo;
- pgprot_t prot;
int ret;
drm_WARN_ON(&vdev->drm, !PAGE_ALIGNED(vpu_addr));
@@ -578,81 +346,42 @@ ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 fla
range = &vdev->hw->ranges.global;
}
- bo = ivpu_bo_alloc(vdev, &vdev->gctx, size, flags, &internal_ops, range, 0);
+ bo = ivpu_bo_create(vdev, size, flags);
if (IS_ERR(bo)) {
ivpu_err(vdev, "Failed to create BO: %pe (vpu_addr 0x%llx size %llu flags 0x%x)",
bo, vpu_addr, size, flags);
return NULL;
}
- ret = ivpu_bo_pin(bo);
+ ret = ivpu_bo_alloc_vpu_addr(bo, &vdev->gctx, range);
if (ret)
goto err_put;
- if (ivpu_bo_cache_mode(bo) != DRM_IVPU_BO_CACHED)
- drm_clflush_pages(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
- if (bo->flags & DRM_IVPU_BO_WC)
- set_pages_array_wc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
- else if (bo->flags & DRM_IVPU_BO_UNCACHED)
- set_pages_array_uc(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT);
-
- prot = ivpu_bo_pgprot(bo, PAGE_KERNEL);
- bo->kvaddr = vmap(bo->pages, ivpu_bo_size(bo) >> PAGE_SHIFT, VM_MAP, prot);
- if (!bo->kvaddr) {
- ivpu_err(vdev, "Failed to map BO into kernel virtual memory\n");
+ ret = ivpu_bo_pin(bo);
+ if (ret)
goto err_put;
- }
- ivpu_dbg(vdev, BO, "alloc internal: ctx 0 vpu_addr 0x%llx size %zu flags 0x%x\n",
- bo->vpu_addr, ivpu_bo_size(bo), flags);
+ ret = drm_gem_shmem_vmap(&bo->base, &map);
+ if (ret)
+ goto err_put;
return bo;
err_put:
- drm_gem_object_put(&bo->base);
+ drm_gem_object_put(&bo->base.base);
return NULL;
}
void ivpu_bo_free_internal(struct ivpu_bo *bo)
{
- drm_gem_object_put(&bo->base);
-}
-
-struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *buf)
-{
- struct ivpu_device *vdev = to_ivpu_device(dev);
- struct dma_buf_attachment *attach;
- struct ivpu_bo *bo;
-
- attach = dma_buf_attach(buf, dev->dev);
- if (IS_ERR(attach))
- return ERR_CAST(attach);
-
- get_dma_buf(buf);
+ struct iosys_map map = IOSYS_MAP_INIT_VADDR(bo->base.vaddr);
- bo = ivpu_bo_alloc(vdev, NULL, buf->size, DRM_IVPU_BO_MAPPABLE, &prime_ops, NULL, 0);
- if (IS_ERR(bo)) {
- ivpu_err(vdev, "Failed to import BO: %pe (size %lu)", bo, buf->size);
- goto err_detach;
- }
-
- lockdep_set_class(&bo->lock, &prime_bo_lock_class_key);
-
- bo->base.import_attach = attach;
-
- return &bo->base;
-
-err_detach:
- dma_buf_detach(buf, attach);
- dma_buf_put(buf);
- return ERR_CAST(bo);
+ drm_gem_shmem_vunmap(&bo->base, &map);
+ drm_gem_object_put(&bo->base.base);
}
int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
- struct ivpu_file_priv *file_priv = file->driver_priv;
- struct ivpu_device *vdev = to_ivpu_device(dev);
struct drm_ivpu_bo_info *args = data;
struct drm_gem_object *obj;
struct ivpu_bo *bo;
@@ -665,21 +394,12 @@ int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file
bo = to_ivpu_bo(obj);
mutex_lock(&bo->lock);
-
- if (!bo->ctx) {
- ret = ivpu_bo_alloc_vpu_addr(bo, &file_priv->ctx, NULL);
- if (ret) {
- ivpu_err(vdev, "Failed to allocate vpu_addr: %d\n", ret);
- goto unlock;
- }
- }
-
args->flags = bo->flags;
args->mmap_offset = drm_vma_node_offset_addr(&obj->vma_node);
args->vpu_addr = bo->vpu_addr;
args->size = obj->size;
-unlock:
mutex_unlock(&bo->lock);
+
drm_gem_object_put(obj);
return ret;
}
@@ -714,41 +434,41 @@ static void ivpu_bo_print_info(struct ivpu_bo *bo, struct drm_printer *p)
{
unsigned long dma_refcount = 0;
- if (bo->base.dma_buf && bo->base.dma_buf->file)
- dma_refcount = atomic_long_read(&bo->base.dma_buf->file->f_count);
+ mutex_lock(&bo->lock);
+
+ if (bo->base.base.dma_buf && bo->base.base.dma_buf->file)
+ dma_refcount = atomic_long_read(&bo->base.base.dma_buf->file->f_count);
+
+ drm_printf(p, "%-3u %-6d 0x%-12llx %-10lu 0x%-8x %-4u %-8lu",
+ bo->ctx->id, bo->handle, bo->vpu_addr, bo->base.base.size,
+ bo->flags, kref_read(&bo->base.base.refcount), dma_refcount);
+
+ if (bo->base.base.import_attach)
+ drm_printf(p, " imported");
+
+ if (bo->base.pages)
+ drm_printf(p, " has_pages");
- drm_printf(p, "%5u %6d %16llx %10lu %10u %12lu %14s\n",
- bo->ctx->id, bo->handle, bo->vpu_addr, ivpu_bo_size(bo),
- kref_read(&bo->base.refcount), dma_refcount, bo->ops->name);
+ if (bo->mmu_mapped)
+ drm_printf(p, " mmu_mapped");
+
+ drm_printf(p, "\n");
+
+ mutex_unlock(&bo->lock);
}
void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p)
{
struct ivpu_device *vdev = to_ivpu_device(dev);
- struct ivpu_file_priv *file_priv;
- unsigned long ctx_id;
struct ivpu_bo *bo;
- drm_printf(p, "%5s %6s %16s %10s %10s %12s %14s\n",
- "ctx", "handle", "vpu_addr", "size", "refcount", "dma_refcount", "type");
+ drm_printf(p, "%-3s %-6s %-14s %-10s %-10s %-4s %-8s %s\n",
+ "ctx", "handle", "vpu_addr", "size", "flags", "refs", "dma_refs", "attribs");
- mutex_lock(&vdev->gctx.lock);
- list_for_each_entry(bo, &vdev->gctx.bo_list, ctx_node)
+ mutex_lock(&vdev->bo_list_lock);
+ list_for_each_entry(bo, &vdev->bo_list, bo_list_node)
ivpu_bo_print_info(bo, p);
- mutex_unlock(&vdev->gctx.lock);
-
- xa_for_each(&vdev->context_xa, ctx_id, file_priv) {
- file_priv = ivpu_file_priv_get_by_ctx_id(vdev, ctx_id);
- if (!file_priv)
- continue;
-
- mutex_lock(&file_priv->ctx.lock);
- list_for_each_entry(bo, &file_priv->ctx.bo_list, ctx_node)
- ivpu_bo_print_info(bo, p);
- mutex_unlock(&file_priv->ctx.lock);
-
- ivpu_file_priv_put(&file_priv);
- }
+ mutex_unlock(&vdev->bo_list_lock);
}
void ivpu_bo_list_print(struct drm_device *dev)
diff --git a/drivers/accel/ivpu/ivpu_gem.h b/drivers/accel/ivpu/ivpu_gem.h
index a0b4d4a32b3b..d75cad0d3c74 100644
--- a/drivers/accel/ivpu/ivpu_gem.h
+++ b/drivers/accel/ivpu/ivpu_gem.h
@@ -6,84 +6,52 @@
#define __IVPU_GEM_H__
#include <drm/drm_gem.h>
+#include <drm/drm_gem_shmem_helper.h>
#include <drm/drm_mm.h>
-struct dma_buf;
-struct ivpu_bo_ops;
struct ivpu_file_priv;
struct ivpu_bo {
- struct drm_gem_object base;
- const struct ivpu_bo_ops *ops;
-
+ struct drm_gem_shmem_object base;
struct ivpu_mmu_context *ctx;
- struct list_head ctx_node;
+ struct list_head bo_list_node;
struct drm_mm_node mm_node;
- struct mutex lock; /* Protects: pages, sgt, mmu_mapped */
- struct sg_table *sgt;
- struct page **pages;
- bool mmu_mapped;
-
- void *kvaddr;
+ struct mutex lock; /* Protects: ctx, mmu_mapped, vpu_addr */
u64 vpu_addr;
u32 handle;
u32 flags;
- uintptr_t user_ptr;
- u32 job_status;
-};
-
-enum ivpu_bo_type {
- IVPU_BO_TYPE_SHMEM = 1,
- IVPU_BO_TYPE_INTERNAL,
- IVPU_BO_TYPE_PRIME,
-};
-
-struct ivpu_bo_ops {
- enum ivpu_bo_type type;
- const char *name;
- int (*alloc_pages)(struct ivpu_bo *bo);
- void (*free_pages)(struct ivpu_bo *bo);
- int (*map_pages)(struct ivpu_bo *bo);
- void (*unmap_pages)(struct ivpu_bo *bo);
+ u32 job_status; /* Valid only for command buffer */
+ bool mmu_mapped;
};
int ivpu_bo_pin(struct ivpu_bo *bo);
-void ivpu_bo_remove_all_bos_from_context(struct ivpu_mmu_context *ctx);
-void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
-void ivpu_bo_list_print(struct drm_device *dev);
+void ivpu_bo_remove_all_bos_from_context(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
-struct ivpu_bo *
-ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
+struct drm_gem_object *ivpu_gem_create_object(struct drm_device *dev, size_t size);
+struct ivpu_bo *ivpu_bo_alloc_internal(struct ivpu_device *vdev, u64 vpu_addr, u64 size, u32 flags);
void ivpu_bo_free_internal(struct ivpu_bo *bo);
-struct drm_gem_object *ivpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
-void ivpu_bo_unmap_sgt_and_remove_from_context(struct ivpu_bo *bo);
int ivpu_bo_create_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int ivpu_bo_info_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
int ivpu_bo_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
+void ivpu_bo_list(struct drm_device *dev, struct drm_printer *p);
+void ivpu_bo_list_print(struct drm_device *dev);
+
static inline struct ivpu_bo *to_ivpu_bo(struct drm_gem_object *obj)
{
- return container_of(obj, struct ivpu_bo, base);
+ return container_of(obj, struct ivpu_bo, base.base);
}
static inline void *ivpu_bo_vaddr(struct ivpu_bo *bo)
{
- return bo->kvaddr;
+ return bo->base.vaddr;
}
static inline size_t ivpu_bo_size(struct ivpu_bo *bo)
{
- return bo->base.size;
-}
-
-static inline struct page *ivpu_bo_get_page(struct ivpu_bo *bo, u64 offset)
-{
- if (offset > ivpu_bo_size(bo) || !bo->pages)
- return NULL;
-
- return bo->pages[offset / PAGE_SIZE];
+ return bo->base.base.size;
}
static inline u32 ivpu_bo_cache_mode(struct ivpu_bo *bo)
@@ -96,20 +64,9 @@ static inline bool ivpu_bo_is_snooped(struct ivpu_bo *bo)
return ivpu_bo_cache_mode(bo) == DRM_IVPU_BO_CACHED;
}
-static inline pgprot_t ivpu_bo_pgprot(struct ivpu_bo *bo, pgprot_t prot)
-{
- if (bo->flags & DRM_IVPU_BO_WC)
- return pgprot_writecombine(prot);
-
- if (bo->flags & DRM_IVPU_BO_UNCACHED)
- return pgprot_noncached(prot);
-
- return prot;
-}
-
static inline struct ivpu_device *ivpu_bo_to_vdev(struct ivpu_bo *bo)
{
- return to_ivpu_device(bo->base.dev);
+ return to_ivpu_device(bo->base.base.dev);
}
static inline void *ivpu_to_cpu_addr(struct ivpu_bo *bo, u32 vpu_addr)
diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
index 1079e06255ba..b2909168a0a6 100644
--- a/drivers/accel/ivpu/ivpu_hw.h
+++ b/drivers/accel/ivpu/ivpu_hw.h
@@ -15,8 +15,11 @@ struct ivpu_hw_ops {
int (*power_down)(struct ivpu_device *vdev);
int (*reset)(struct ivpu_device *vdev);
bool (*is_idle)(struct ivpu_device *vdev);
+ int (*wait_for_idle)(struct ivpu_device *vdev);
void (*wdt_disable)(struct ivpu_device *vdev);
void (*diagnose_failure)(struct ivpu_device *vdev);
+ u32 (*profiling_freq_get)(struct ivpu_device *vdev);
+ void (*profiling_freq_drive)(struct ivpu_device *vdev, bool enable);
u32 (*reg_pll_freq_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_offset_get)(struct ivpu_device *vdev);
u32 (*reg_telemetry_size_get)(struct ivpu_device *vdev);
@@ -58,6 +61,8 @@ struct ivpu_hw_info {
u32 sku;
u16 config;
int dma_bits;
+ ktime_t d0i3_entry_host_ts;
+ u64 d0i3_entry_vpu_ts;
};
extern const struct ivpu_hw_ops ivpu_hw_37xx_ops;
@@ -85,6 +90,11 @@ static inline bool ivpu_hw_is_idle(struct ivpu_device *vdev)
return vdev->hw->ops->is_idle(vdev);
};
+static inline int ivpu_hw_wait_for_idle(struct ivpu_device *vdev)
+{
+ return vdev->hw->ops->wait_for_idle(vdev);
+};
+
static inline int ivpu_hw_power_down(struct ivpu_device *vdev)
{
ivpu_dbg(vdev, PM, "HW power down\n");
@@ -104,6 +114,16 @@ static inline void ivpu_hw_wdt_disable(struct ivpu_device *vdev)
vdev->hw->ops->wdt_disable(vdev);
};
+static inline u32 ivpu_hw_profiling_freq_get(struct ivpu_device *vdev)
+{
+ return vdev->hw->ops->profiling_freq_get(vdev);
+};
+
+static inline void ivpu_hw_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
+{
+ return vdev->hw->ops->profiling_freq_drive(vdev, enable);
+};
+
/* Register indirect accesses */
static inline u32 ivpu_hw_reg_pll_freq_get(struct ivpu_device *vdev)
{
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx.c b/drivers/accel/ivpu/ivpu_hw_37xx.c
index d530384f8d60..574cdeefb66b 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_37xx.c
@@ -29,6 +29,7 @@
#define PLL_REF_CLK_FREQ (50 * 1000000)
#define PLL_SIMULATION_FREQ (10 * 1000000)
+#define PLL_PROF_CLK_FREQ (38400 * 1000)
#define PLL_DEFAULT_EPP_VALUE 0x80
#define TIM_SAFE_ENABLE 0xf1d0dead
@@ -37,7 +38,7 @@
#define TIMEOUT_US (150 * USEC_PER_MSEC)
#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
-#define IDLE_TIMEOUT_US (500 * USEC_PER_MSEC)
+#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
#define ICB_0_IRQ_MASK ((REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT)) | \
(REG_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT)) | \
@@ -96,6 +97,7 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
vdev->timeout.tdr = 2000;
vdev->timeout.reschedule_suspend = 10;
vdev->timeout.autosuspend = 10;
+ vdev->timeout.d0i3_entry_msg = 5;
}
static int ivpu_pll_wait_for_cmd_send(struct ivpu_device *vdev)
@@ -722,10 +724,23 @@ static bool ivpu_hw_37xx_is_idle(struct ivpu_device *vdev)
REG_TEST_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, val);
}
+static int ivpu_hw_37xx_wait_for_idle(struct ivpu_device *vdev)
+{
+ return REGB_POLL_FLD(VPU_37XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
+}
+
+static void ivpu_hw_37xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
+{
+ vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
+ vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT);
+}
+
static int ivpu_hw_37xx_power_down(struct ivpu_device *vdev)
{
int ret = 0;
+ ivpu_hw_37xx_save_d0i3_entry_timestamp(vdev);
+
if (!ivpu_hw_37xx_is_idle(vdev))
ivpu_warn(vdev, "VPU not idle during power down\n");
@@ -760,6 +775,16 @@ static void ivpu_hw_37xx_wdt_disable(struct ivpu_device *vdev)
REGV_WR32(VPU_37XX_CPU_SS_TIM_GEN_CONFIG, val);
}
+static u32 ivpu_hw_37xx_profiling_freq_get(struct ivpu_device *vdev)
+{
+ return PLL_PROF_CLK_FREQ;
+}
+
+static void ivpu_hw_37xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
+{
+ /* Profiling freq - is a debug feature. Unavailable on VPU 37XX. */
+}
+
static u32 ivpu_hw_37xx_pll_to_freq(u32 ratio, u32 config)
{
u32 pll_clock = PLL_REF_CLK_FREQ * ratio;
@@ -871,17 +896,20 @@ static void ivpu_hw_37xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
}
/* Handler for IRQs from VPU core (irqV) */
-static u32 ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread)
{
u32 status = REGV_RD32(VPU_37XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
+ if (!status)
+ return false;
+
REGV_WR32(VPU_37XX_HOST_SS_ICB_CLEAR_0, status);
if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_0_INT, status))
ivpu_mmu_irq_evtq_handler(vdev);
if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
- ivpu_ipc_irq_handler(vdev);
+ ivpu_ipc_irq_handler(vdev, wake_thread);
if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
@@ -898,17 +926,17 @@ static u32 ivpu_hw_37xx_irqv_handler(struct ivpu_device *vdev, int irq)
if (REG_TEST_FLD(VPU_37XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
ivpu_hw_37xx_irq_noc_firewall_handler(vdev);
- return status;
+ return true;
}
/* Handler for IRQs from Buttress core (irqB) */
-static u32 ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
{
u32 status = REGB_RD32(VPU_37XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
bool schedule_recovery = false;
- if (status == 0)
- return 0;
+ if (!status)
+ return false;
if (REG_TEST_FLD(VPU_37XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
ivpu_dbg(vdev, IRQ, "FREQ_CHANGE irq: %08x",
@@ -944,23 +972,27 @@ static u32 ivpu_hw_37xx_irqb_handler(struct ivpu_device *vdev, int irq)
if (schedule_recovery)
ivpu_pm_schedule_recovery(vdev);
- return status;
+ return true;
}
static irqreturn_t ivpu_hw_37xx_irq_handler(int irq, void *ptr)
{
struct ivpu_device *vdev = ptr;
- u32 ret_irqv, ret_irqb;
+ bool irqv_handled, irqb_handled, wake_thread = false;
REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
- ret_irqv = ivpu_hw_37xx_irqv_handler(vdev, irq);
- ret_irqb = ivpu_hw_37xx_irqb_handler(vdev, irq);
+ irqv_handled = ivpu_hw_37xx_irqv_handler(vdev, irq, &wake_thread);
+ irqb_handled = ivpu_hw_37xx_irqb_handler(vdev, irq);
/* Re-enable global interrupts to re-trigger MSI for pending interrupts */
REGB_WR32(VPU_37XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
- return IRQ_RETVAL(ret_irqb | ret_irqv);
+ if (wake_thread)
+ return IRQ_WAKE_THREAD;
+ if (irqv_handled || irqb_handled)
+ return IRQ_HANDLED;
+ return IRQ_NONE;
}
static void ivpu_hw_37xx_diagnose_failure(struct ivpu_device *vdev)
@@ -997,11 +1029,14 @@ const struct ivpu_hw_ops ivpu_hw_37xx_ops = {
.info_init = ivpu_hw_37xx_info_init,
.power_up = ivpu_hw_37xx_power_up,
.is_idle = ivpu_hw_37xx_is_idle,
+ .wait_for_idle = ivpu_hw_37xx_wait_for_idle,
.power_down = ivpu_hw_37xx_power_down,
.reset = ivpu_hw_37xx_reset,
.boot_fw = ivpu_hw_37xx_boot_fw,
.wdt_disable = ivpu_hw_37xx_wdt_disable,
.diagnose_failure = ivpu_hw_37xx_diagnose_failure,
+ .profiling_freq_get = ivpu_hw_37xx_profiling_freq_get,
+ .profiling_freq_drive = ivpu_hw_37xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_37xx_reg_pll_freq_get,
.reg_telemetry_offset_get = ivpu_hw_37xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_37xx_reg_telemetry_size_get,
diff --git a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
index 4083beb5e9db..f6fec1919202 100644
--- a/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
+++ b/drivers/accel/ivpu/ivpu_hw_37xx_reg.h
@@ -240,6 +240,8 @@
#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG 0x06021008u
#define VPU_37XX_CPU_SS_TIM_GEN_CONFIG_WDOG_TO_INT_CLR_MASK BIT_MASK(9)
+#define VPU_37XX_CPU_SS_TIM_PERF_FREE_CNT 0x06029000u
+
#define VPU_37XX_CPU_SS_DOORBELL_0 0x06300000u
#define VPU_37XX_CPU_SS_DOORBELL_0_SET_MASK BIT_MASK(0)
diff --git a/drivers/accel/ivpu/ivpu_hw_40xx.c b/drivers/accel/ivpu/ivpu_hw_40xx.c
index e691c49c9841..eba2fdef2ace 100644
--- a/drivers/accel/ivpu/ivpu_hw_40xx.c
+++ b/drivers/accel/ivpu/ivpu_hw_40xx.c
@@ -39,6 +39,7 @@
#define TIMEOUT_US (150 * USEC_PER_MSEC)
#define PWR_ISLAND_STATUS_TIMEOUT_US (5 * USEC_PER_MSEC)
#define PLL_TIMEOUT_US (1500 * USEC_PER_MSEC)
+#define IDLE_TIMEOUT_US (5 * USEC_PER_MSEC)
#define WEIGHTS_DEFAULT 0xf711f711u
#define WEIGHTS_ATS_DEFAULT 0x0000f711u
@@ -139,18 +140,21 @@ static void ivpu_hw_timeouts_init(struct ivpu_device *vdev)
vdev->timeout.tdr = 2000000;
vdev->timeout.reschedule_suspend = 1000;
vdev->timeout.autosuspend = -1;
+ vdev->timeout.d0i3_entry_msg = 500;
} else if (ivpu_is_simics(vdev)) {
vdev->timeout.boot = 50;
vdev->timeout.jsm = 500;
vdev->timeout.tdr = 10000;
vdev->timeout.reschedule_suspend = 10;
vdev->timeout.autosuspend = -1;
+ vdev->timeout.d0i3_entry_msg = 100;
} else {
vdev->timeout.boot = 1000;
vdev->timeout.jsm = 500;
vdev->timeout.tdr = 2000;
vdev->timeout.reschedule_suspend = 10;
vdev->timeout.autosuspend = 10;
+ vdev->timeout.d0i3_entry_msg = 5;
}
}
@@ -824,12 +828,6 @@ static int ivpu_hw_40xx_power_up(struct ivpu_device *vdev)
{
int ret;
- ret = ivpu_hw_40xx_reset(vdev);
- if (ret) {
- ivpu_err(vdev, "Failed to reset HW: %d\n", ret);
- return ret;
- }
-
ret = ivpu_hw_40xx_d0i3_disable(vdev);
if (ret)
ivpu_warn(vdev, "Failed to disable D0I3: %d\n", ret);
@@ -898,10 +896,23 @@ static bool ivpu_hw_40xx_is_idle(struct ivpu_device *vdev)
REG_TEST_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, val);
}
+static int ivpu_hw_40xx_wait_for_idle(struct ivpu_device *vdev)
+{
+ return REGB_POLL_FLD(VPU_40XX_BUTTRESS_VPU_STATUS, IDLE, 0x1, IDLE_TIMEOUT_US);
+}
+
+static void ivpu_hw_40xx_save_d0i3_entry_timestamp(struct ivpu_device *vdev)
+{
+ vdev->hw->d0i3_entry_host_ts = ktime_get_boottime();
+ vdev->hw->d0i3_entry_vpu_ts = REGV_RD64(VPU_40XX_CPU_SS_TIM_PERF_EXT_FREE_CNT);
+}
+
static int ivpu_hw_40xx_power_down(struct ivpu_device *vdev)
{
int ret = 0;
+ ivpu_hw_40xx_save_d0i3_entry_timestamp(vdev);
+
if (!ivpu_hw_40xx_is_idle(vdev) && ivpu_hw_40xx_reset(vdev))
ivpu_warn(vdev, "Failed to reset the VPU\n");
@@ -933,6 +944,19 @@ static void ivpu_hw_40xx_wdt_disable(struct ivpu_device *vdev)
REGV_WR32(VPU_40XX_CPU_SS_TIM_GEN_CONFIG, val);
}
+static u32 ivpu_hw_40xx_profiling_freq_get(struct ivpu_device *vdev)
+{
+ return vdev->hw->pll.profiling_freq;
+}
+
+static void ivpu_hw_40xx_profiling_freq_drive(struct ivpu_device *vdev, bool enable)
+{
+ if (enable)
+ vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_HIGH;
+ else
+ vdev->hw->pll.profiling_freq = PLL_PROFILING_FREQ_DEFAULT;
+}
+
/* Register indirect accesses */
static u32 ivpu_hw_40xx_reg_pll_freq_get(struct ivpu_device *vdev)
{
@@ -1023,13 +1047,12 @@ static void ivpu_hw_40xx_irq_noc_firewall_handler(struct ivpu_device *vdev)
}
/* Handler for IRQs from VPU core (irqV) */
-static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq, bool *wake_thread)
{
u32 status = REGV_RD32(VPU_40XX_HOST_SS_ICB_STATUS_0) & ICB_0_IRQ_MASK;
- irqreturn_t ret = IRQ_NONE;
if (!status)
- return IRQ_NONE;
+ return false;
REGV_WR32(VPU_40XX_HOST_SS_ICB_CLEAR_0, status);
@@ -1037,7 +1060,7 @@ static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
ivpu_mmu_irq_evtq_handler(vdev);
if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, HOST_IPC_FIFO_INT, status))
- ret |= ivpu_ipc_irq_handler(vdev);
+ ivpu_ipc_irq_handler(vdev, wake_thread);
if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, MMU_IRQ_1_INT, status))
ivpu_dbg(vdev, IRQ, "MMU sync complete\n");
@@ -1054,17 +1077,17 @@ static irqreturn_t ivpu_hw_40xx_irqv_handler(struct ivpu_device *vdev, int irq)
if (REG_TEST_FLD(VPU_40XX_HOST_SS_ICB_STATUS_0, NOC_FIREWALL_INT, status))
ivpu_hw_40xx_irq_noc_firewall_handler(vdev);
- return ret;
+ return true;
}
/* Handler for IRQs from Buttress core (irqB) */
-static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
+static bool ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
{
bool schedule_recovery = false;
u32 status = REGB_RD32(VPU_40XX_BUTTRESS_INTERRUPT_STAT) & BUTTRESS_IRQ_MASK;
- if (status == 0)
- return IRQ_NONE;
+ if (!status)
+ return false;
if (REG_TEST_FLD(VPU_40XX_BUTTRESS_INTERRUPT_STAT, FREQ_CHANGE, status))
ivpu_dbg(vdev, IRQ, "FREQ_CHANGE");
@@ -1116,26 +1139,27 @@ static irqreturn_t ivpu_hw_40xx_irqb_handler(struct ivpu_device *vdev, int irq)
if (schedule_recovery)
ivpu_pm_schedule_recovery(vdev);
- return IRQ_HANDLED;
+ return true;
}
static irqreturn_t ivpu_hw_40xx_irq_handler(int irq, void *ptr)
{
+ bool irqv_handled, irqb_handled, wake_thread = false;
struct ivpu_device *vdev = ptr;
- irqreturn_t ret = IRQ_NONE;
REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x1);
- ret |= ivpu_hw_40xx_irqv_handler(vdev, irq);
- ret |= ivpu_hw_40xx_irqb_handler(vdev, irq);
+ irqv_handled = ivpu_hw_40xx_irqv_handler(vdev, irq, &wake_thread);
+ irqb_handled = ivpu_hw_40xx_irqb_handler(vdev, irq);
/* Re-enable global interrupts to re-trigger MSI for pending interrupts */
REGB_WR32(VPU_40XX_BUTTRESS_GLOBAL_INT_MASK, 0x0);
- if (ret & IRQ_WAKE_THREAD)
+ if (wake_thread)
return IRQ_WAKE_THREAD;
-
- return ret;
+ if (irqv_handled || irqb_handled)
+ return IRQ_HANDLED;
+ return IRQ_NONE;
}
static void ivpu_hw_40xx_diagnose_failure(struct ivpu_device *vdev)
@@ -1185,11 +1209,14 @@ const struct ivpu_hw_ops ivpu_hw_40xx_ops = {
.info_init = ivpu_hw_40xx_info_init,
.power_up = ivpu_hw_40xx_power_up,
.is_idle = ivpu_hw_40xx_is_idle,
+ .wait_for_idle = ivpu_hw_40xx_wait_for_idle,
.power_down = ivpu_hw_40xx_power_down,
.reset = ivpu_hw_40xx_reset,
.boot_fw = ivpu_hw_40xx_boot_fw,
.wdt_disable = ivpu_hw_40xx_wdt_disable,
.diagnose_failure = ivpu_hw_40xx_diagnose_failure,
+ .profiling_freq_get = ivpu_hw_40xx_profiling_freq_get,
+ .profiling_freq_drive = ivpu_hw_40xx_profiling_freq_drive,
.reg_pll_freq_get = ivpu_hw_40xx_reg_pll_freq_get,
.reg_telemetry_offset_get = ivpu_hw_40xx_reg_telemetry_offset_get,
.reg_telemetry_size_get = ivpu_hw_40xx_reg_telemetry_size_get,
diff --git a/drivers/accel/ivpu/ivpu_ipc.c b/drivers/accel/ivpu/ivpu_ipc.c
index a4ca40b184d4..e86621f16f85 100644
--- a/drivers/accel/ivpu/ivpu_ipc.c
+++ b/drivers/accel/ivpu/ivpu_ipc.c
@@ -5,7 +5,7 @@
#include <linux/genalloc.h>
#include <linux/highmem.h>
-#include <linux/kthread.h>
+#include <linux/pm_runtime.h>
#include <linux/wait.h>
#include "ivpu_drv.h"
@@ -17,19 +17,12 @@
#include "ivpu_pm.h"
#define IPC_MAX_RX_MSG 128
-#define IS_KTHREAD() (get_current()->flags & PF_KTHREAD)
struct ivpu_ipc_tx_buf {
struct ivpu_ipc_hdr ipc;
struct vpu_jsm_msg jsm;
};
-struct ivpu_ipc_rx_msg {
- struct list_head link;
- struct ivpu_ipc_hdr *ipc_hdr;
- struct vpu_jsm_msg *jsm_msg;
-};
-
static void ivpu_ipc_msg_dump(struct ivpu_device *vdev, char *c,
struct ivpu_ipc_hdr *ipc_hdr, u32 vpu_addr)
{
@@ -139,8 +132,49 @@ static void ivpu_ipc_tx(struct ivpu_device *vdev, u32 vpu_addr)
ivpu_hw_reg_ipc_tx_set(vdev, vpu_addr);
}
-void
-ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons, u32 channel)
+static void
+ivpu_ipc_rx_msg_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+ struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
+{
+ struct ivpu_ipc_info *ipc = vdev->ipc;
+ struct ivpu_ipc_rx_msg *rx_msg;
+
+ lockdep_assert_held(&ipc->cons_lock);
+ lockdep_assert_irqs_disabled();
+
+ rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
+ if (!rx_msg) {
+ ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
+ return;
+ }
+
+ atomic_inc(&ipc->rx_msg_count);
+
+ rx_msg->ipc_hdr = ipc_hdr;
+ rx_msg->jsm_msg = jsm_msg;
+ rx_msg->callback = cons->rx_callback;
+
+ if (rx_msg->callback) {
+ list_add_tail(&rx_msg->link, &ipc->cb_msg_list);
+ } else {
+ spin_lock(&cons->rx_lock);
+ list_add_tail(&rx_msg->link, &cons->rx_msg_list);
+ spin_unlock(&cons->rx_lock);
+ wake_up(&cons->rx_msg_wq);
+ }
+}
+
+static void
+ivpu_ipc_rx_msg_del(struct ivpu_device *vdev, struct ivpu_ipc_rx_msg *rx_msg)
+{
+ list_del(&rx_msg->link);
+ ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+ atomic_dec(&vdev->ipc->rx_msg_count);
+ kfree(rx_msg);
+}
+
+void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
+ u32 channel, ivpu_ipc_rx_callback_t rx_callback)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
@@ -148,13 +182,15 @@ ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
cons->channel = channel;
cons->tx_vpu_addr = 0;
cons->request_id = 0;
- spin_lock_init(&cons->rx_msg_lock);
+ cons->aborted = false;
+ cons->rx_callback = rx_callback;
+ spin_lock_init(&cons->rx_lock);
INIT_LIST_HEAD(&cons->rx_msg_list);
init_waitqueue_head(&cons->rx_msg_wq);
- spin_lock_irq(&ipc->cons_list_lock);
+ spin_lock_irq(&ipc->cons_lock);
list_add_tail(&cons->link, &ipc->cons_list);
- spin_unlock_irq(&ipc->cons_list_lock);
+ spin_unlock_irq(&ipc->cons_lock);
}
void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons)
@@ -162,18 +198,14 @@ void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *c
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_rx_msg *rx_msg, *r;
- spin_lock_irq(&ipc->cons_list_lock);
+ spin_lock_irq(&ipc->cons_lock);
list_del(&cons->link);
- spin_unlock_irq(&ipc->cons_list_lock);
-
- spin_lock_irq(&cons->rx_msg_lock);
- list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link) {
- list_del(&rx_msg->link);
- ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
- atomic_dec(&ipc->rx_msg_count);
- kfree(rx_msg);
- }
- spin_unlock_irq(&cons->rx_msg_lock);
+ spin_unlock_irq(&ipc->cons_lock);
+
+ spin_lock_irq(&cons->rx_lock);
+ list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link)
+ ivpu_ipc_rx_msg_del(vdev, rx_msg);
+ spin_unlock_irq(&cons->rx_lock);
ivpu_ipc_tx_release(vdev, cons->tx_vpu_addr);
}
@@ -202,52 +234,61 @@ unlock:
return ret;
}
+static bool ivpu_ipc_rx_need_wakeup(struct ivpu_ipc_consumer *cons)
+{
+ bool ret;
+
+ spin_lock_irq(&cons->rx_lock);
+ ret = !list_empty(&cons->rx_msg_list) || cons->aborted;
+ spin_unlock_irq(&cons->rx_lock);
+
+ return ret;
+}
+
int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
struct ivpu_ipc_hdr *ipc_buf,
- struct vpu_jsm_msg *ipc_payload, unsigned long timeout_ms)
+ struct vpu_jsm_msg *jsm_msg, unsigned long timeout_ms)
{
- struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_rx_msg *rx_msg;
int wait_ret, ret = 0;
+ if (drm_WARN_ONCE(&vdev->drm, cons->rx_callback, "Consumer works only in async mode\n"))
+ return -EINVAL;
+
wait_ret = wait_event_timeout(cons->rx_msg_wq,
- (IS_KTHREAD() && kthread_should_stop()) ||
- !list_empty(&cons->rx_msg_list),
+ ivpu_ipc_rx_need_wakeup(cons),
msecs_to_jiffies(timeout_ms));
- if (IS_KTHREAD() && kthread_should_stop())
- return -EINTR;
-
if (wait_ret == 0)
return -ETIMEDOUT;
- spin_lock_irq(&cons->rx_msg_lock);
+ spin_lock_irq(&cons->rx_lock);
+ if (cons->aborted) {
+ spin_unlock_irq(&cons->rx_lock);
+ return -ECANCELED;
+ }
rx_msg = list_first_entry_or_null(&cons->rx_msg_list, struct ivpu_ipc_rx_msg, link);
if (!rx_msg) {
- spin_unlock_irq(&cons->rx_msg_lock);
+ spin_unlock_irq(&cons->rx_lock);
return -EAGAIN;
}
- list_del(&rx_msg->link);
- spin_unlock_irq(&cons->rx_msg_lock);
if (ipc_buf)
memcpy(ipc_buf, rx_msg->ipc_hdr, sizeof(*ipc_buf));
if (rx_msg->jsm_msg) {
- u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*ipc_payload));
+ u32 size = min_t(int, rx_msg->ipc_hdr->data_size, sizeof(*jsm_msg));
if (rx_msg->jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
ivpu_dbg(vdev, IPC, "IPC resp result error: %d\n", rx_msg->jsm_msg->result);
ret = -EBADMSG;
}
- if (ipc_payload)
- memcpy(ipc_payload, rx_msg->jsm_msg, size);
+ if (jsm_msg)
+ memcpy(jsm_msg, rx_msg->jsm_msg, size);
}
- ivpu_ipc_rx_mark_free(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
- atomic_dec(&ipc->rx_msg_count);
- kfree(rx_msg);
-
+ ivpu_ipc_rx_msg_del(vdev, rx_msg);
+ spin_unlock_irq(&cons->rx_lock);
return ret;
}
@@ -260,7 +301,7 @@ ivpu_ipc_send_receive_internal(struct ivpu_device *vdev, struct vpu_jsm_msg *req
struct ivpu_ipc_consumer cons;
int ret;
- ivpu_ipc_consumer_add(vdev, &cons, channel);
+ ivpu_ipc_consumer_add(vdev, &cons, channel, NULL);
ret = ivpu_ipc_send(vdev, &cons, req);
if (ret) {
@@ -285,23 +326,19 @@ consumer_del:
return ret;
}
-int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
- enum vpu_ipc_msg_type expected_resp_type,
- struct vpu_jsm_msg *resp, u32 channel,
- unsigned long timeout_ms)
+int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+ u32 channel, unsigned long timeout_ms)
{
struct vpu_jsm_msg hb_req = { .type = VPU_JSM_MSG_QUERY_ENGINE_HB };
struct vpu_jsm_msg hb_resp;
int ret, hb_ret;
- ret = ivpu_rpm_get(vdev);
- if (ret < 0)
- return ret;
+ drm_WARN_ON(&vdev->drm, pm_runtime_status_suspended(vdev->drm.dev));
- ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp_type, resp,
- channel, timeout_ms);
+ ret = ivpu_ipc_send_receive_internal(vdev, req, expected_resp, resp, channel, timeout_ms);
if (ret != -ETIMEDOUT)
- goto rpm_put;
+ return ret;
hb_ret = ivpu_ipc_send_receive_internal(vdev, &hb_req, VPU_JSM_MSG_QUERY_ENGINE_HB_DONE,
&hb_resp, VPU_IPC_CHAN_ASYNC_CMD,
@@ -311,7 +348,21 @@ int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
ivpu_pm_schedule_recovery(vdev);
}
-rpm_put:
+ return ret;
+}
+
+int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+ u32 channel, unsigned long timeout_ms)
+{
+ int ret;
+
+ ret = ivpu_rpm_get(vdev);
+ if (ret < 0)
+ return ret;
+
+ ret = ivpu_ipc_send_receive_active(vdev, req, expected_resp, resp, channel, timeout_ms);
+
ivpu_rpm_put(vdev);
return ret;
}
@@ -329,35 +380,7 @@ ivpu_ipc_match_consumer(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons
return false;
}
-static void
-ivpu_ipc_dispatch(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
- struct ivpu_ipc_hdr *ipc_hdr, struct vpu_jsm_msg *jsm_msg)
-{
- struct ivpu_ipc_info *ipc = vdev->ipc;
- struct ivpu_ipc_rx_msg *rx_msg;
- unsigned long flags;
-
- lockdep_assert_held(&ipc->cons_list_lock);
-
- rx_msg = kzalloc(sizeof(*rx_msg), GFP_ATOMIC);
- if (!rx_msg) {
- ivpu_ipc_rx_mark_free(vdev, ipc_hdr, jsm_msg);
- return;
- }
-
- atomic_inc(&ipc->rx_msg_count);
-
- rx_msg->ipc_hdr = ipc_hdr;
- rx_msg->jsm_msg = jsm_msg;
-
- spin_lock_irqsave(&cons->rx_msg_lock, flags);
- list_add_tail(&rx_msg->link, &cons->rx_msg_list);
- spin_unlock_irqrestore(&cons->rx_msg_lock, flags);
-
- wake_up(&cons->rx_msg_wq);
-}
-
-int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
+void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_consumer *cons;
@@ -375,7 +398,7 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
vpu_addr = ivpu_hw_reg_ipc_rx_addr_get(vdev);
if (vpu_addr == REG_IO_ERROR) {
ivpu_err_ratelimited(vdev, "Failed to read IPC rx addr register\n");
- return -EIO;
+ return;
}
ipc_hdr = ivpu_to_cpu_addr(ipc->mem_rx, vpu_addr);
@@ -405,15 +428,15 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
}
dispatched = false;
- spin_lock_irqsave(&ipc->cons_list_lock, flags);
+ spin_lock_irqsave(&ipc->cons_lock, flags);
list_for_each_entry(cons, &ipc->cons_list, link) {
if (ivpu_ipc_match_consumer(vdev, cons, ipc_hdr, jsm_msg)) {
- ivpu_ipc_dispatch(vdev, cons, ipc_hdr, jsm_msg);
+ ivpu_ipc_rx_msg_add(vdev, cons, ipc_hdr, jsm_msg);
dispatched = true;
break;
}
}
- spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
+ spin_unlock_irqrestore(&ipc->cons_lock, flags);
if (!dispatched) {
ivpu_dbg(vdev, IPC, "IPC RX msg 0x%x dropped (no consumer)\n", vpu_addr);
@@ -421,7 +444,28 @@ int ivpu_ipc_irq_handler(struct ivpu_device *vdev)
}
}
- return 0;
+ if (wake_thread)
+ *wake_thread = !list_empty(&ipc->cb_msg_list);
+}
+
+irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev)
+{
+ struct ivpu_ipc_info *ipc = vdev->ipc;
+ struct ivpu_ipc_rx_msg *rx_msg, *r;
+ struct list_head cb_msg_list;
+
+ INIT_LIST_HEAD(&cb_msg_list);
+
+ spin_lock_irq(&ipc->cons_lock);
+ list_splice_tail_init(&ipc->cb_msg_list, &cb_msg_list);
+ spin_unlock_irq(&ipc->cons_lock);
+
+ list_for_each_entry_safe(rx_msg, r, &cb_msg_list, link) {
+ rx_msg->callback(vdev, rx_msg->ipc_hdr, rx_msg->jsm_msg);
+ ivpu_ipc_rx_msg_del(vdev, rx_msg);
+ }
+
+ return IRQ_HANDLED;
}
int ivpu_ipc_init(struct ivpu_device *vdev)
@@ -456,10 +500,10 @@ int ivpu_ipc_init(struct ivpu_device *vdev)
goto err_free_rx;
}
+ spin_lock_init(&ipc->cons_lock);
INIT_LIST_HEAD(&ipc->cons_list);
- spin_lock_init(&ipc->cons_list_lock);
+ INIT_LIST_HEAD(&ipc->cb_msg_list);
drmm_mutex_init(&vdev->drm, &ipc->lock);
-
ivpu_ipc_reset(vdev);
return 0;
@@ -472,6 +516,13 @@ err_free_tx:
void ivpu_ipc_fini(struct ivpu_device *vdev)
{
+ struct ivpu_ipc_info *ipc = vdev->ipc;
+
+ drm_WARN_ON(&vdev->drm, ipc->on);
+ drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cons_list));
+ drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list));
+ drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0);
+
ivpu_ipc_mem_fini(vdev);
}
@@ -488,16 +539,27 @@ void ivpu_ipc_disable(struct ivpu_device *vdev)
{
struct ivpu_ipc_info *ipc = vdev->ipc;
struct ivpu_ipc_consumer *cons, *c;
- unsigned long flags;
+ struct ivpu_ipc_rx_msg *rx_msg, *r;
+
+ drm_WARN_ON(&vdev->drm, !list_empty(&ipc->cb_msg_list));
mutex_lock(&ipc->lock);
ipc->on = false;
mutex_unlock(&ipc->lock);
- spin_lock_irqsave(&ipc->cons_list_lock, flags);
- list_for_each_entry_safe(cons, c, &ipc->cons_list, link)
+ spin_lock_irq(&ipc->cons_lock);
+ list_for_each_entry_safe(cons, c, &ipc->cons_list, link) {
+ spin_lock(&cons->rx_lock);
+ if (!cons->rx_callback)
+ cons->aborted = true;
+ list_for_each_entry_safe(rx_msg, r, &cons->rx_msg_list, link)
+ ivpu_ipc_rx_msg_del(vdev, rx_msg);
+ spin_unlock(&cons->rx_lock);
wake_up(&cons->rx_msg_wq);
- spin_unlock_irqrestore(&ipc->cons_list_lock, flags);
+ }
+ spin_unlock_irq(&ipc->cons_lock);
+
+ drm_WARN_ON(&vdev->drm, atomic_read(&ipc->rx_msg_count) > 0);
}
void ivpu_ipc_reset(struct ivpu_device *vdev)
@@ -505,6 +567,7 @@ void ivpu_ipc_reset(struct ivpu_device *vdev)
struct ivpu_ipc_info *ipc = vdev->ipc;
mutex_lock(&ipc->lock);
+ drm_WARN_ON(&vdev->drm, ipc->on);
memset(ivpu_bo_vaddr(ipc->mem_tx), 0, ivpu_bo_size(ipc->mem_tx));
memset(ivpu_bo_vaddr(ipc->mem_rx), 0, ivpu_bo_size(ipc->mem_rx));
diff --git a/drivers/accel/ivpu/ivpu_ipc.h b/drivers/accel/ivpu/ivpu_ipc.h
index 68f5b6668e00..40ca3cc4e61f 100644
--- a/drivers/accel/ivpu/ivpu_ipc.h
+++ b/drivers/accel/ivpu/ivpu_ipc.h
@@ -42,13 +42,26 @@ struct ivpu_ipc_hdr {
u8 status;
} __packed __aligned(IVPU_IPC_ALIGNMENT);
+typedef void (*ivpu_ipc_rx_callback_t)(struct ivpu_device *vdev,
+ struct ivpu_ipc_hdr *ipc_hdr,
+ struct vpu_jsm_msg *jsm_msg);
+
+struct ivpu_ipc_rx_msg {
+ struct list_head link;
+ struct ivpu_ipc_hdr *ipc_hdr;
+ struct vpu_jsm_msg *jsm_msg;
+ ivpu_ipc_rx_callback_t callback;
+};
+
struct ivpu_ipc_consumer {
struct list_head link;
u32 channel;
u32 tx_vpu_addr;
u32 request_id;
+ bool aborted;
+ ivpu_ipc_rx_callback_t rx_callback;
- spinlock_t rx_msg_lock; /* Protects rx_msg_list */
+ spinlock_t rx_lock; /* Protects rx_msg_list and aborted */
struct list_head rx_msg_list;
wait_queue_head_t rx_msg_wq;
};
@@ -60,8 +73,9 @@ struct ivpu_ipc_info {
atomic_t rx_msg_count;
- spinlock_t cons_list_lock; /* Protects cons_list */
+ spinlock_t cons_lock; /* Protects cons_list and cb_msg_list */
struct list_head cons_list;
+ struct list_head cb_msg_list;
atomic_t request_id;
struct mutex lock; /* Lock on status */
@@ -75,19 +89,22 @@ void ivpu_ipc_enable(struct ivpu_device *vdev);
void ivpu_ipc_disable(struct ivpu_device *vdev);
void ivpu_ipc_reset(struct ivpu_device *vdev);
-int ivpu_ipc_irq_handler(struct ivpu_device *vdev);
+void ivpu_ipc_irq_handler(struct ivpu_device *vdev, bool *wake_thread);
+irqreturn_t ivpu_ipc_irq_thread_handler(struct ivpu_device *vdev);
void ivpu_ipc_consumer_add(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
- u32 channel);
+ u32 channel, ivpu_ipc_rx_callback_t callback);
void ivpu_ipc_consumer_del(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons);
int ivpu_ipc_receive(struct ivpu_device *vdev, struct ivpu_ipc_consumer *cons,
- struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *ipc_payload,
+ struct ivpu_ipc_hdr *ipc_buf, struct vpu_jsm_msg *jsm_msg,
unsigned long timeout_ms);
+int ivpu_ipc_send_receive_active(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
+ enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+ u32 channel, unsigned long timeout_ms);
int ivpu_ipc_send_receive(struct ivpu_device *vdev, struct vpu_jsm_msg *req,
- enum vpu_ipc_msg_type expected_resp_type,
- struct vpu_jsm_msg *resp, u32 channel,
- unsigned long timeout_ms);
+ enum vpu_ipc_msg_type expected_resp, struct vpu_jsm_msg *resp,
+ u32 channel, unsigned long timeout_ms);
#endif /* __IVPU_IPC_H__ */
diff --git a/drivers/accel/ivpu/ivpu_job.c b/drivers/accel/ivpu/ivpu_job.c
index 8983e3a4fdf9..7206cf9cdb4a 100644
--- a/drivers/accel/ivpu/ivpu_job.c
+++ b/drivers/accel/ivpu/ivpu_job.c
@@ -7,7 +7,6 @@
#include <linux/bitfield.h>
#include <linux/highmem.h>
-#include <linux/kthread.h>
#include <linux/pci.h>
#include <linux/module.h>
#include <uapi/drm/ivpu_accel.h>
@@ -24,10 +23,6 @@
#define JOB_ID_CONTEXT_MASK GENMASK(31, 8)
#define JOB_MAX_BUFFER_COUNT 65535
-static unsigned int ivpu_tdr_timeout_ms;
-module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, uint, 0644);
-MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
-
static void ivpu_cmdq_ring_db(struct ivpu_device *vdev, struct ivpu_cmdq *cmdq)
{
ivpu_hw_reg_db_set(vdev, cmdq->db_id);
@@ -196,6 +191,8 @@ static int ivpu_cmdq_push_job(struct ivpu_cmdq *cmdq, struct ivpu_job *job)
entry->batch_buf_addr = job->cmd_buf_vpu_addr;
entry->job_id = job->job_id;
entry->flags = 0;
+ if (unlikely(ivpu_test_mode & IVPU_TEST_MODE_NULL_SUBMISSION))
+ entry->flags = VPU_JOB_FLAGS_NULL_SUBMISSION_MASK;
wmb(); /* Ensure that tail is updated after filling entry */
header->tail = next_entry;
wmb(); /* Flush WC buffer for jobq header */
@@ -264,7 +261,7 @@ static void job_release(struct kref *ref)
for (i = 0; i < job->bo_count; i++)
if (job->bos[i])
- drm_gem_object_put(&job->bos[i]->base);
+ drm_gem_object_put(&job->bos[i]->base.base);
dma_fence_put(job->done_fence);
ivpu_file_priv_put(&job->file_priv);
@@ -340,23 +337,12 @@ static int ivpu_job_done(struct ivpu_device *vdev, u32 job_id, u32 job_status)
ivpu_dbg(vdev, JOB, "Job complete: id %3u ctx %2d engine %d status 0x%x\n",
job->job_id, job->file_priv->ctx.id, job->engine_idx, job_status);
+ ivpu_stop_job_timeout_detection(vdev);
+
job_put(job);
return 0;
}
-static void ivpu_job_done_message(struct ivpu_device *vdev, void *msg)
-{
- struct vpu_ipc_msg_payload_job_done *payload;
- struct vpu_jsm_msg *job_ret_msg = msg;
- int ret;
-
- payload = (struct vpu_ipc_msg_payload_job_done *)&job_ret_msg->payload;
-
- ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
- if (ret)
- ivpu_err(vdev, "Failed to finish job %d: %d\n", payload->job_id, ret);
-}
-
void ivpu_jobs_abort_all(struct ivpu_device *vdev)
{
struct ivpu_job *job;
@@ -398,11 +384,13 @@ static int ivpu_direct_job_submission(struct ivpu_job *job)
if (ret)
goto err_xa_erase;
+ ivpu_start_job_timeout_detection(vdev);
+
ivpu_dbg(vdev, JOB, "Job submitted: id %3u addr 0x%llx ctx %2d engine %d next %d\n",
job->job_id, job->cmd_buf_vpu_addr, file_priv->ctx.id,
job->engine_idx, cmdq->jobq->header.tail);
- if (ivpu_test_mode == IVPU_TEST_MODE_NULL_HW) {
+ if (ivpu_test_mode & IVPU_TEST_MODE_NULL_HW) {
ivpu_job_done(vdev, job->job_id, VPU_JSM_STATUS_SUCCESS);
cmdq->jobq->header.head = cmdq->jobq->header.tail;
wmb(); /* Flush WC buffer for jobq header */
@@ -448,7 +436,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
}
bo = job->bos[CMD_BUF_IDX];
- if (!dma_resv_test_signaled(bo->base.resv, DMA_RESV_USAGE_READ)) {
+ if (!dma_resv_test_signaled(bo->base.base.resv, DMA_RESV_USAGE_READ)) {
ivpu_warn(vdev, "Buffer is already in use\n");
return -EBUSY;
}
@@ -468,7 +456,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
}
for (i = 0; i < buf_count; i++) {
- ret = dma_resv_reserve_fences(job->bos[i]->base.resv, 1);
+ ret = dma_resv_reserve_fences(job->bos[i]->base.base.resv, 1);
if (ret) {
ivpu_warn(vdev, "Failed to reserve fences: %d\n", ret);
goto unlock_reservations;
@@ -477,7 +465,7 @@ ivpu_job_prepare_bos_for_submit(struct drm_file *file, struct ivpu_job *job, u32
for (i = 0; i < buf_count; i++) {
usage = (i == CMD_BUF_IDX) ? DMA_RESV_USAGE_WRITE : DMA_RESV_USAGE_BOOKKEEP;
- dma_resv_add_fence(job->bos[i]->base.resv, job->done_fence, usage);
+ dma_resv_add_fence(job->bos[i]->base.base.resv, job->done_fence, usage);
}
unlock_reservations:
@@ -562,61 +550,36 @@ free_handles:
return ret;
}
-static int ivpu_job_done_thread(void *arg)
+static void
+ivpu_job_done_callback(struct ivpu_device *vdev, struct ivpu_ipc_hdr *ipc_hdr,
+ struct vpu_jsm_msg *jsm_msg)
{
- struct ivpu_device *vdev = (struct ivpu_device *)arg;
- struct ivpu_ipc_consumer cons;
- struct vpu_jsm_msg jsm_msg;
- bool jobs_submitted;
- unsigned int timeout;
+ struct vpu_ipc_msg_payload_job_done *payload;
int ret;
- ivpu_dbg(vdev, JOB, "Started %s\n", __func__);
-
- ivpu_ipc_consumer_add(vdev, &cons, VPU_IPC_CHAN_JOB_RET);
-
- while (!kthread_should_stop()) {
- timeout = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
- jobs_submitted = !xa_empty(&vdev->submitted_jobs_xa);
- ret = ivpu_ipc_receive(vdev, &cons, NULL, &jsm_msg, timeout);
- if (!ret) {
- ivpu_job_done_message(vdev, &jsm_msg);
- } else if (ret == -ETIMEDOUT) {
- if (jobs_submitted && !xa_empty(&vdev->submitted_jobs_xa)) {
- ivpu_err(vdev, "TDR detected, timeout %d ms", timeout);
- ivpu_hw_diagnose_failure(vdev);
- ivpu_pm_schedule_recovery(vdev);
- }
- }
+ if (!jsm_msg) {
+ ivpu_err(vdev, "IPC message has no JSM payload\n");
+ return;
}
- ivpu_ipc_consumer_del(vdev, &cons);
-
- ivpu_jobs_abort_all(vdev);
+ if (jsm_msg->result != VPU_JSM_STATUS_SUCCESS) {
+ ivpu_err(vdev, "Invalid JSM message result: %d\n", jsm_msg->result);
+ return;
+ }
- ivpu_dbg(vdev, JOB, "Stopped %s\n", __func__);
- return 0;
+ payload = (struct vpu_ipc_msg_payload_job_done *)&jsm_msg->payload;
+ ret = ivpu_job_done(vdev, payload->job_id, payload->job_status);
+ if (!ret && !xa_empty(&vdev->submitted_jobs_xa))
+ ivpu_start_job_timeout_detection(vdev);
}
-int ivpu_job_done_thread_init(struct ivpu_device *vdev)
+void ivpu_job_done_consumer_init(struct ivpu_device *vdev)
{
- struct task_struct *thread;
-
- thread = kthread_run(&ivpu_job_done_thread, (void *)vdev, "ivpu_job_done_thread");
- if (IS_ERR(thread)) {
- ivpu_err(vdev, "Failed to start job completion thread\n");
- return -EIO;
- }
-
- get_task_struct(thread);
- wake_up_process(thread);
-
- vdev->job_done_thread = thread;
-
- return 0;
+ ivpu_ipc_consumer_add(vdev, &vdev->job_done_consumer,
+ VPU_IPC_CHAN_JOB_RET, ivpu_job_done_callback);
}
-void ivpu_job_done_thread_fini(struct ivpu_device *vdev)
+void ivpu_job_done_consumer_fini(struct ivpu_device *vdev)
{
- kthread_stop_put(vdev->job_done_thread);
+ ivpu_ipc_consumer_del(vdev, &vdev->job_done_consumer);
}
diff --git a/drivers/accel/ivpu/ivpu_job.h b/drivers/accel/ivpu/ivpu_job.h
index 5514c2d8a609..45a2f2ec82e5 100644
--- a/drivers/accel/ivpu/ivpu_job.h
+++ b/drivers/accel/ivpu/ivpu_job.h
@@ -59,8 +59,8 @@ int ivpu_submit_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
void ivpu_cmdq_release_all(struct ivpu_file_priv *file_priv);
void ivpu_cmdq_reset_all_contexts(struct ivpu_device *vdev);
-int ivpu_job_done_thread_init(struct ivpu_device *vdev);
-void ivpu_job_done_thread_fini(struct ivpu_device *vdev);
+void ivpu_job_done_consumer_init(struct ivpu_device *vdev);
+void ivpu_job_done_consumer_fini(struct ivpu_device *vdev);
void ivpu_jobs_abort_all(struct ivpu_device *vdev);
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.c b/drivers/accel/ivpu/ivpu_jsm_msg.c
index 0c2fe7142024..8cea0dd731b9 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.c
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.c
@@ -4,6 +4,7 @@
*/
#include "ivpu_drv.h"
+#include "ivpu_hw.h"
#include "ivpu_ipc.h"
#include "ivpu_jsm_msg.h"
@@ -36,6 +37,17 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
IVPU_CASE_TO_STR(VPU_JSM_MSG_DESTROY_CMD_QUEUE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES);
IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_REGISTER_DB);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_ENGINE_RESUME);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_STATE_DUMP_RSP);
IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT);
IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL);
IVPU_CASE_TO_STR(VPU_JSM_MSG_JOB_DONE);
@@ -65,6 +77,12 @@ const char *ivpu_jsm_msg_type_to_str(enum vpu_ipc_msg_type type)
IVPU_CASE_TO_STR(VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP);
IVPU_CASE_TO_STR(VPU_JSM_MSG_BLOB_DEINIT_DONE);
IVPU_CASE_TO_STR(VPU_JSM_MSG_DYNDBG_CONTROL_RSP);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_PWR_D0I3_ENTER_DONE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_ENABLE_DONE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE);
+ IVPU_CASE_TO_STR(VPU_JSM_MSG_DCT_DISABLE_DONE);
}
#undef IVPU_CASE_TO_STR
@@ -243,3 +261,23 @@ int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid)
return ivpu_ipc_send_receive(vdev, &req, VPU_JSM_MSG_SSID_RELEASE_DONE, &resp,
VPU_IPC_CHAN_ASYNC_CMD, vdev->timeout.jsm);
}
+
+int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev)
+{
+ struct vpu_jsm_msg req = { .type = VPU_JSM_MSG_PWR_D0I3_ENTER };
+ struct vpu_jsm_msg resp;
+ int ret;
+
+ if (IVPU_WA(disable_d0i3_msg))
+ return 0;
+
+ req.payload.pwr_d0i3_enter.send_response = 1;
+
+ ret = ivpu_ipc_send_receive_active(vdev, &req, VPU_JSM_MSG_PWR_D0I3_ENTER_DONE,
+ &resp, VPU_IPC_CHAN_GEN_CMD,
+ vdev->timeout.d0i3_entry_msg);
+ if (ret)
+ return ret;
+
+ return ivpu_hw_wait_for_idle(vdev);
+}
diff --git a/drivers/accel/ivpu/ivpu_jsm_msg.h b/drivers/accel/ivpu/ivpu_jsm_msg.h
index 66979a948c7c..ae75e5dbcc41 100644
--- a/drivers/accel/ivpu/ivpu_jsm_msg.h
+++ b/drivers/accel/ivpu/ivpu_jsm_msg.h
@@ -22,4 +22,5 @@ int ivpu_jsm_trace_get_capability(struct ivpu_device *vdev, u32 *trace_destinati
int ivpu_jsm_trace_set_config(struct ivpu_device *vdev, u32 trace_level, u32 trace_destination_mask,
u64 trace_hw_component_mask);
int ivpu_jsm_context_release(struct ivpu_device *vdev, u32 host_ssid);
+int ivpu_jsm_pwr_d0i3_enter(struct ivpu_device *vdev);
#endif
diff --git a/drivers/accel/ivpu/ivpu_mmu.c b/drivers/accel/ivpu/ivpu_mmu.c
index 2538c78fbebe..2228c44b115f 100644
--- a/drivers/accel/ivpu/ivpu_mmu.c
+++ b/drivers/accel/ivpu/ivpu_mmu.c
@@ -230,7 +230,12 @@
(REG_FLD(IVPU_MMU_REG_GERROR, MSI_PRIQ_ABT)) | \
(REG_FLD(IVPU_MMU_REG_GERROR, MSI_ABT)))
-static char *ivpu_mmu_event_to_str(u32 cmd)
+#define IVPU_MMU_CERROR_NONE 0x0
+#define IVPU_MMU_CERROR_ILL 0x1
+#define IVPU_MMU_CERROR_ABT 0x2
+#define IVPU_MMU_CERROR_ATC_INV_SYNC 0x3
+
+static const char *ivpu_mmu_event_to_str(u32 cmd)
{
switch (cmd) {
case IVPU_MMU_EVT_F_UUT:
@@ -276,6 +281,22 @@ static char *ivpu_mmu_event_to_str(u32 cmd)
}
}
+static const char *ivpu_mmu_cmdq_err_to_str(u32 err)
+{
+ switch (err) {
+ case IVPU_MMU_CERROR_NONE:
+ return "No CMDQ Error";
+ case IVPU_MMU_CERROR_ILL:
+ return "Illegal command";
+ case IVPU_MMU_CERROR_ABT:
+ return "External abort on CMDQ read";
+ case IVPU_MMU_CERROR_ATC_INV_SYNC:
+ return "Sync failed to complete ATS invalidation";
+ default:
+ return "Unknown CMDQ Error";
+ }
+}
+
static void ivpu_mmu_config_check(struct ivpu_device *vdev)
{
u32 val_ref;
@@ -479,10 +500,7 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
u64 val;
int ret;
- val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC) |
- FIELD_PREP(IVPU_MMU_CMD_SYNC_0_CS, 0x2) |
- FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSH, 0x3) |
- FIELD_PREP(IVPU_MMU_CMD_SYNC_0_MSI_ATTR, 0xf);
+ val = FIELD_PREP(IVPU_MMU_CMD_OPCODE, CMD_SYNC);
ret = ivpu_mmu_cmdq_cmd_write(vdev, "SYNC", val, 0);
if (ret)
@@ -492,8 +510,15 @@ static int ivpu_mmu_cmdq_sync(struct ivpu_device *vdev)
REGV_WR32(IVPU_MMU_REG_CMDQ_PROD, q->prod);
ret = ivpu_mmu_cmdq_wait_for_cons(vdev);
- if (ret)
- ivpu_err(vdev, "Timed out waiting for consumer: %d\n", ret);
+ if (ret) {
+ u32 err;
+
+ val = REGV_RD32(IVPU_MMU_REG_CMDQ_CONS);
+ err = REG_GET_FLD(IVPU_MMU_REG_CMDQ_CONS, ERR, val);
+
+ ivpu_err(vdev, "Timed out waiting for MMU consumer: %d, error: %s\n", ret,
+ ivpu_mmu_cmdq_err_to_str(err));
+ }
return ret;
}
@@ -750,9 +775,12 @@ int ivpu_mmu_init(struct ivpu_device *vdev)
ivpu_dbg(vdev, MMU, "Init..\n");
- drmm_mutex_init(&vdev->drm, &mmu->lock);
ivpu_mmu_config_check(vdev);
+ ret = drmm_mutex_init(&vdev->drm, &mmu->lock);
+ if (ret)
+ return ret;
+
ret = ivpu_mmu_structs_alloc(vdev);
if (ret)
return ret;
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.c b/drivers/accel/ivpu/ivpu_mmu_context.c
index c1050a2df954..12a8c09d4547 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.c
+++ b/drivers/accel/ivpu/ivpu_mmu_context.c
@@ -5,6 +5,9 @@
#include <linux/bitfield.h>
#include <linux/highmem.h>
+#include <linux/set_memory.h>
+
+#include <drm/drm_cache.h>
#include "ivpu_drv.h"
#include "ivpu_hw.h"
@@ -39,12 +42,57 @@
#define IVPU_MMU_ENTRY_MAPPED (IVPU_MMU_ENTRY_FLAG_AF | IVPU_MMU_ENTRY_FLAG_USER | \
IVPU_MMU_ENTRY_FLAG_NG | IVPU_MMU_ENTRY_VALID)
+static void *ivpu_pgtable_alloc_page(struct ivpu_device *vdev, dma_addr_t *dma)
+{
+ dma_addr_t dma_addr;
+ struct page *page;
+ void *cpu;
+
+ page = alloc_page(GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO);
+ if (!page)
+ return NULL;
+
+ set_pages_array_wc(&page, 1);
+
+ dma_addr = dma_map_page(vdev->drm.dev, page, 0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+ if (dma_mapping_error(vdev->drm.dev, dma_addr))
+ goto err_free_page;
+
+ cpu = vmap(&page, 1, VM_MAP, pgprot_writecombine(PAGE_KERNEL));
+ if (!cpu)
+ goto err_dma_unmap_page;
+
+
+ *dma = dma_addr;
+ return cpu;
+
+err_dma_unmap_page:
+ dma_unmap_page(vdev->drm.dev, dma_addr, PAGE_SIZE, DMA_BIDIRECTIONAL);
+
+err_free_page:
+ put_page(page);
+ return NULL;
+}
+
+static void ivpu_pgtable_free_page(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
+{
+ struct page *page;
+
+ if (cpu_addr) {
+ page = vmalloc_to_page(cpu_addr);
+ vunmap(cpu_addr);
+ dma_unmap_page(vdev->drm.dev, dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK, PAGE_SIZE,
+ DMA_BIDIRECTIONAL);
+ set_pages_array_wb(&page, 1);
+ put_page(page);
+ }
+}
+
static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
{
dma_addr_t pgd_dma;
- pgtable->pgd_dma_ptr = dma_alloc_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pgd_dma,
- GFP_KERNEL);
+ pgtable->pgd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pgd_dma);
if (!pgtable->pgd_dma_ptr)
return -ENOMEM;
@@ -53,13 +101,6 @@ static int ivpu_mmu_pgtable_init(struct ivpu_device *vdev, struct ivpu_mmu_pgtab
return 0;
}
-static void ivpu_mmu_pgtable_free(struct ivpu_device *vdev, u64 *cpu_addr, dma_addr_t dma_addr)
-{
- if (cpu_addr)
- dma_free_coherent(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, cpu_addr,
- dma_addr & ~IVPU_MMU_ENTRY_FLAGS_MASK);
-}
-
static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable)
{
int pgd_idx, pud_idx, pmd_idx;
@@ -84,19 +125,19 @@ static void ivpu_mmu_pgtables_free(struct ivpu_device *vdev, struct ivpu_mmu_pgt
pte_dma_ptr = pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx];
pte_dma = pgtable->pmd_ptrs[pgd_idx][pud_idx][pmd_idx];
- ivpu_mmu_pgtable_free(vdev, pte_dma_ptr, pte_dma);
+ ivpu_pgtable_free_page(vdev, pte_dma_ptr, pte_dma);
}
kfree(pgtable->pte_ptrs[pgd_idx][pud_idx]);
- ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
+ ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
}
kfree(pgtable->pmd_ptrs[pgd_idx]);
kfree(pgtable->pte_ptrs[pgd_idx]);
- ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
+ ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
}
- ivpu_mmu_pgtable_free(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
+ ivpu_pgtable_free_page(vdev, pgtable->pgd_dma_ptr, pgtable->pgd_dma);
}
static u64*
@@ -108,7 +149,7 @@ ivpu_mmu_ensure_pud(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
if (pud_dma_ptr)
return pud_dma_ptr;
- pud_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pud_dma, GFP_KERNEL);
+ pud_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pud_dma);
if (!pud_dma_ptr)
return NULL;
@@ -131,7 +172,7 @@ err_free_pmd_ptrs:
kfree(pgtable->pmd_ptrs[pgd_idx]);
err_free_pud_dma_ptr:
- ivpu_mmu_pgtable_free(vdev, pud_dma_ptr, pud_dma);
+ ivpu_pgtable_free_page(vdev, pud_dma_ptr, pud_dma);
return NULL;
}
@@ -145,7 +186,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
if (pmd_dma_ptr)
return pmd_dma_ptr;
- pmd_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pmd_dma, GFP_KERNEL);
+ pmd_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pmd_dma);
if (!pmd_dma_ptr)
return NULL;
@@ -160,7 +201,7 @@ ivpu_mmu_ensure_pmd(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
return pmd_dma_ptr;
err_free_pmd_dma_ptr:
- ivpu_mmu_pgtable_free(vdev, pmd_dma_ptr, pmd_dma);
+ ivpu_pgtable_free_page(vdev, pmd_dma_ptr, pmd_dma);
return NULL;
}
@@ -174,7 +215,7 @@ ivpu_mmu_ensure_pte(struct ivpu_device *vdev, struct ivpu_mmu_pgtable *pgtable,
if (pte_dma_ptr)
return pte_dma_ptr;
- pte_dma_ptr = dma_alloc_wc(vdev->drm.dev, IVPU_MMU_PGTABLE_SIZE, &pte_dma, GFP_KERNEL);
+ pte_dma_ptr = ivpu_pgtable_alloc_page(vdev, &pte_dma);
if (!pte_dma_ptr)
return NULL;
@@ -249,38 +290,6 @@ static void ivpu_mmu_context_unmap_page(struct ivpu_mmu_context *ctx, u64 vpu_ad
ctx->pgtable.pte_ptrs[pgd_idx][pud_idx][pmd_idx][pte_idx] = IVPU_MMU_ENTRY_INVALID;
}
-static void
-ivpu_mmu_context_flush_page_tables(struct ivpu_mmu_context *ctx, u64 vpu_addr, size_t size)
-{
- struct ivpu_mmu_pgtable *pgtable = &ctx->pgtable;
- u64 end_addr = vpu_addr + size;
-
- /* Align to PMD entry (2 MB) */
- vpu_addr &= ~(IVPU_MMU_PTE_MAP_SIZE - 1);
-
- while (vpu_addr < end_addr) {
- int pgd_idx = FIELD_GET(IVPU_MMU_PGD_INDEX_MASK, vpu_addr);
- u64 pud_end = (pgd_idx + 1) * (u64)IVPU_MMU_PUD_MAP_SIZE;
-
- while (vpu_addr < end_addr && vpu_addr < pud_end) {
- int pud_idx = FIELD_GET(IVPU_MMU_PUD_INDEX_MASK, vpu_addr);
- u64 pmd_end = (pud_idx + 1) * (u64)IVPU_MMU_PMD_MAP_SIZE;
-
- while (vpu_addr < end_addr && vpu_addr < pmd_end) {
- int pmd_idx = FIELD_GET(IVPU_MMU_PMD_INDEX_MASK, vpu_addr);
-
- clflush_cache_range(pgtable->pte_ptrs[pgd_idx][pud_idx][pmd_idx],
- IVPU_MMU_PGTABLE_SIZE);
- vpu_addr += IVPU_MMU_PTE_MAP_SIZE;
- }
- clflush_cache_range(pgtable->pmd_ptrs[pgd_idx][pud_idx],
- IVPU_MMU_PGTABLE_SIZE);
- }
- clflush_cache_range(pgtable->pud_ptrs[pgd_idx], IVPU_MMU_PGTABLE_SIZE);
- }
- clflush_cache_range(pgtable->pgd_dma_ptr, IVPU_MMU_PGTABLE_SIZE);
-}
-
static int
ivpu_mmu_context_map_pages(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 vpu_addr, dma_addr_t dma_addr, size_t size, u64 prot)
@@ -327,6 +336,9 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 prot;
u64 i;
+ if (drm_WARN_ON(&vdev->drm, !ctx))
+ return -EINVAL;
+
if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
return -EINVAL;
@@ -349,10 +361,11 @@ ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
mutex_unlock(&ctx->lock);
return ret;
}
- ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
vpu_addr += size;
}
+ /* Ensure page table modifications are flushed from wc buffers to memory */
+ wmb();
mutex_unlock(&ctx->lock);
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
@@ -369,8 +382,8 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
int ret;
u64 i;
- if (!IS_ALIGNED(vpu_addr, IVPU_MMU_PAGE_SIZE))
- ivpu_warn(vdev, "Unaligned vpu_addr: 0x%llx\n", vpu_addr);
+ if (drm_WARN_ON(&vdev->drm, !ctx))
+ return;
mutex_lock(&ctx->lock);
@@ -378,10 +391,11 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
size_t size = sg_dma_len(sg) + sg->offset;
ivpu_mmu_context_unmap_pages(ctx, vpu_addr, size);
- ivpu_mmu_context_flush_page_tables(ctx, vpu_addr, size);
vpu_addr += size;
}
+ /* Ensure page table modifications are flushed from wc buffers to memory */
+ wmb();
mutex_unlock(&ctx->lock);
ret = ivpu_mmu_invalidate_tlb(vdev, ctx->id);
@@ -390,28 +404,34 @@ ivpu_mmu_context_unmap_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ct
}
int
-ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
- const struct ivpu_addr_range *range,
- u64 size, struct drm_mm_node *node)
+ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,
+ u64 size, struct drm_mm_node *node)
{
- lockdep_assert_held(&ctx->lock);
+ int ret;
+
+ WARN_ON(!range);
+ mutex_lock(&ctx->lock);
if (!ivpu_disable_mmu_cont_pages && size >= IVPU_MMU_CONT_PAGES_SIZE) {
- if (!drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0,
- range->start, range->end, DRM_MM_INSERT_BEST))
- return 0;
+ ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_CONT_PAGES_SIZE, 0,
+ range->start, range->end, DRM_MM_INSERT_BEST);
+ if (!ret)
+ goto unlock;
}
- return drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0,
- range->start, range->end, DRM_MM_INSERT_BEST);
+ ret = drm_mm_insert_node_in_range(&ctx->mm, node, size, IVPU_MMU_PAGE_SIZE, 0,
+ range->start, range->end, DRM_MM_INSERT_BEST);
+unlock:
+ mutex_unlock(&ctx->lock);
+ return ret;
}
void
-ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
+ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node)
{
- lockdep_assert_held(&ctx->lock);
-
+ mutex_lock(&ctx->lock);
drm_mm_remove_node(node);
+ mutex_unlock(&ctx->lock);
}
static int
@@ -421,7 +441,6 @@ ivpu_mmu_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx, u3
int ret;
mutex_init(&ctx->lock);
- INIT_LIST_HEAD(&ctx->bo_list);
ret = ivpu_mmu_pgtable_init(vdev, &ctx->pgtable);
if (ret) {
diff --git a/drivers/accel/ivpu/ivpu_mmu_context.h b/drivers/accel/ivpu/ivpu_mmu_context.h
index f15d8c630d8a..535db3a1fc74 100644
--- a/drivers/accel/ivpu/ivpu_mmu_context.h
+++ b/drivers/accel/ivpu/ivpu_mmu_context.h
@@ -23,10 +23,9 @@ struct ivpu_mmu_pgtable {
};
struct ivpu_mmu_context {
- struct mutex lock; /* protects: mm, pgtable, bo_list */
+ struct mutex lock; /* Protects: mm, pgtable */
struct drm_mm mm;
struct ivpu_mmu_pgtable pgtable;
- struct list_head bo_list;
u32 id;
};
@@ -39,11 +38,9 @@ int ivpu_mmu_user_context_init(struct ivpu_device *vdev, struct ivpu_mmu_context
void ivpu_mmu_user_context_fini(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx);
void ivpu_mmu_user_context_mark_invalid(struct ivpu_device *vdev, u32 ssid);
-int ivpu_mmu_context_insert_node_locked(struct ivpu_mmu_context *ctx,
- const struct ivpu_addr_range *range,
- u64 size, struct drm_mm_node *node);
-void ivpu_mmu_context_remove_node_locked(struct ivpu_mmu_context *ctx,
- struct drm_mm_node *node);
+int ivpu_mmu_context_insert_node(struct ivpu_mmu_context *ctx, const struct ivpu_addr_range *range,
+ u64 size, struct drm_mm_node *node);
+void ivpu_mmu_context_remove_node(struct ivpu_mmu_context *ctx, struct drm_mm_node *node);
int ivpu_mmu_context_map_sgt(struct ivpu_device *vdev, struct ivpu_mmu_context *ctx,
u64 vpu_addr, struct sg_table *sgt, bool llc_coherent);
diff --git a/drivers/accel/ivpu/ivpu_pm.c b/drivers/accel/ivpu/ivpu_pm.c
index e9b16cbc26f4..0af8864cb3b5 100644
--- a/drivers/accel/ivpu/ivpu_pm.c
+++ b/drivers/accel/ivpu/ivpu_pm.c
@@ -15,6 +15,7 @@
#include "ivpu_fw.h"
#include "ivpu_ipc.h"
#include "ivpu_job.h"
+#include "ivpu_jsm_msg.h"
#include "ivpu_mmu.h"
#include "ivpu_pm.h"
@@ -22,6 +23,10 @@ static bool ivpu_disable_recovery;
module_param_named_unsafe(disable_recovery, ivpu_disable_recovery, bool, 0644);
MODULE_PARM_DESC(disable_recovery, "Disables recovery when VPU hang is detected");
+static unsigned long ivpu_tdr_timeout_ms;
+module_param_named(tdr_timeout_ms, ivpu_tdr_timeout_ms, ulong, 0644);
+MODULE_PARM_DESC(tdr_timeout_ms, "Timeout for device hang detection, in milliseconds, 0 - default");
+
#define PM_RESCHEDULE_LIMIT 5
static void ivpu_pm_prepare_cold_boot(struct ivpu_device *vdev)
@@ -69,27 +74,31 @@ retry:
ret = ivpu_hw_power_up(vdev);
if (ret) {
ivpu_err(vdev, "Failed to power up HW: %d\n", ret);
- return ret;
+ goto err_power_down;
}
ret = ivpu_mmu_enable(vdev);
if (ret) {
ivpu_err(vdev, "Failed to resume MMU: %d\n", ret);
- ivpu_hw_power_down(vdev);
- return ret;
+ goto err_power_down;
}
ret = ivpu_boot(vdev);
- if (ret) {
- ivpu_mmu_disable(vdev);
- ivpu_hw_power_down(vdev);
- if (!ivpu_fw_is_cold_boot(vdev)) {
- ivpu_warn(vdev, "Failed to resume the FW: %d. Retrying cold boot..\n", ret);
- ivpu_pm_prepare_cold_boot(vdev);
- goto retry;
- } else {
- ivpu_err(vdev, "Failed to resume the FW: %d\n", ret);
- }
+ if (ret)
+ goto err_mmu_disable;
+
+ return 0;
+
+err_mmu_disable:
+ ivpu_mmu_disable(vdev);
+err_power_down:
+ ivpu_hw_power_down(vdev);
+
+ if (!ivpu_fw_is_cold_boot(vdev)) {
+ ivpu_pm_prepare_cold_boot(vdev);
+ goto retry;
+ } else {
+ ivpu_err(vdev, "Failed to resume the FW: %d\n", ret);
}
return ret;
@@ -136,6 +145,31 @@ void ivpu_pm_schedule_recovery(struct ivpu_device *vdev)
}
}
+static void ivpu_job_timeout_work(struct work_struct *work)
+{
+ struct ivpu_pm_info *pm = container_of(work, struct ivpu_pm_info, job_timeout_work.work);
+ struct ivpu_device *vdev = pm->vdev;
+ unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+
+ ivpu_err(vdev, "TDR detected, timeout %lu ms", timeout_ms);
+ ivpu_hw_diagnose_failure(vdev);
+
+ ivpu_pm_schedule_recovery(vdev);
+}
+
+void ivpu_start_job_timeout_detection(struct ivpu_device *vdev)
+{
+ unsigned long timeout_ms = ivpu_tdr_timeout_ms ? ivpu_tdr_timeout_ms : vdev->timeout.tdr;
+
+ /* No-op if already queued */
+ queue_delayed_work(system_wq, &vdev->pm->job_timeout_work, msecs_to_jiffies(timeout_ms));
+}
+
+void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev)
+{
+ cancel_delayed_work_sync(&vdev->pm->job_timeout_work);
+}
+
int ivpu_pm_suspend_cb(struct device *dev)
{
struct drm_device *drm = dev_get_drvdata(dev);
@@ -153,6 +187,8 @@ int ivpu_pm_suspend_cb(struct device *dev)
}
}
+ ivpu_jsm_pwr_d0i3_enter(vdev);
+
ivpu_suspend(vdev);
ivpu_pm_prepare_warm_boot(vdev);
@@ -188,6 +224,7 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
{
struct drm_device *drm = dev_get_drvdata(dev);
struct ivpu_device *vdev = to_ivpu_device(drm);
+ bool hw_is_idle = true;
int ret;
ivpu_dbg(vdev, PM, "Runtime suspend..\n");
@@ -200,11 +237,16 @@ int ivpu_pm_runtime_suspend_cb(struct device *dev)
return -EAGAIN;
}
+ if (!vdev->pm->suspend_reschedule_counter)
+ hw_is_idle = false;
+ else if (ivpu_jsm_pwr_d0i3_enter(vdev))
+ hw_is_idle = false;
+
ret = ivpu_suspend(vdev);
if (ret)
ivpu_err(vdev, "Failed to set suspend VPU: %d\n", ret);
- if (!vdev->pm->suspend_reschedule_counter) {
+ if (!hw_is_idle) {
ivpu_warn(vdev, "VPU failed to enter idle, force suspended.\n");
ivpu_pm_prepare_cold_boot(vdev);
} else {
@@ -304,6 +346,7 @@ void ivpu_pm_init(struct ivpu_device *vdev)
atomic_set(&pm->in_reset, 0);
INIT_WORK(&pm->recovery_work, ivpu_pm_recovery_work);
+ INIT_DELAYED_WORK(&pm->job_timeout_work, ivpu_job_timeout_work);
if (ivpu_disable_recovery)
delay = -1;
@@ -318,6 +361,7 @@ void ivpu_pm_init(struct ivpu_device *vdev)
void ivpu_pm_cancel_recovery(struct ivpu_device *vdev)
{
+ drm_WARN_ON(&vdev->drm, delayed_work_pending(&vdev->pm->job_timeout_work));
cancel_work_sync(&vdev->pm->recovery_work);
}
diff --git a/drivers/accel/ivpu/ivpu_pm.h b/drivers/accel/ivpu/ivpu_pm.h
index 044db150be07..97c6e0b0aa42 100644
--- a/drivers/accel/ivpu/ivpu_pm.h
+++ b/drivers/accel/ivpu/ivpu_pm.h
@@ -12,6 +12,7 @@ struct ivpu_device;
struct ivpu_pm_info {
struct ivpu_device *vdev;
+ struct delayed_work job_timeout_work;
struct work_struct recovery_work;
atomic_t in_reset;
atomic_t reset_counter;
@@ -37,5 +38,7 @@ int __must_check ivpu_rpm_get_if_active(struct ivpu_device *vdev);
void ivpu_rpm_put(struct ivpu_device *vdev);
void ivpu_pm_schedule_recovery(struct ivpu_device *vdev);
+void ivpu_start_job_timeout_detection(struct ivpu_device *vdev);
+void ivpu_stop_job_timeout_detection(struct ivpu_device *vdev);
#endif /* __IVPU_PM_H__ */
diff --git a/drivers/accel/ivpu/vpu_boot_api.h b/drivers/accel/ivpu/vpu_boot_api.h
index 6b71be92ba65..04c954258563 100644
--- a/drivers/accel/ivpu/vpu_boot_api.h
+++ b/drivers/accel/ivpu/vpu_boot_api.h
@@ -11,7 +11,10 @@
* The bellow values will be used to construct the version info this way:
* fw_bin_header->api_version[VPU_BOOT_API_VER_ID] = (VPU_BOOT_API_VER_MAJOR << 16) |
* VPU_BOOT_API_VER_MINOR;
- * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes.
+ * VPU_BOOT_API_VER_PATCH will be ignored. KMD and compatibility is not affected if this changes
+ * This information is collected by using vpuip_2/application/vpuFirmware/make_std_fw_image.py
+ * If a header is missing this info we ignore the header, if a header is missing or contains
+ * partial info a build error will be generated.
*/
/*
@@ -24,12 +27,12 @@
* Minor version changes when API backward compatibility is preserved.
* Resets to 0 if Major version is incremented.
*/
-#define VPU_BOOT_API_VER_MINOR 12
+#define VPU_BOOT_API_VER_MINOR 20
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
*/
-#define VPU_BOOT_API_VER_PATCH 2
+#define VPU_BOOT_API_VER_PATCH 4
/*
* Index in the API version table
@@ -63,6 +66,12 @@ struct vpu_firmware_header {
/* Size of memory require for firmware execution */
u32 runtime_size;
u32 shave_nn_fw_size;
+ /* Size of primary preemption buffer. */
+ u32 preemption_buffer_1_size;
+ /* Size of secondary preemption buffer. */
+ u32 preemption_buffer_2_size;
+ /* Space reserved for future preemption-related fields. */
+ u32 preemption_reserved[6];
};
/*
@@ -89,6 +98,14 @@ enum VPU_BOOT_L2_CACHE_CFG_TYPE {
VPU_BOOT_L2_CACHE_CFG_NUM = 2
};
+/** VPU MCA ECC signalling mode. By default, no signalling is used */
+enum VPU_BOOT_MCA_ECC_SIGNAL_TYPE {
+ VPU_BOOT_MCA_ECC_NONE = 0,
+ VPU_BOOT_MCA_ECC_CORR = 1,
+ VPU_BOOT_MCA_ECC_FATAL = 2,
+ VPU_BOOT_MCA_ECC_BOTH = 3
+};
+
/**
* Logging destinations.
*
@@ -131,9 +148,11 @@ enum vpu_trace_destination {
#define VPU_TRACE_PROC_BIT_ACT_SHV_3 22
#define VPU_TRACE_PROC_NO_OF_HW_DEVS 23
-/* KMB HW component IDs are sequential, so define first and last IDs. */
-#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_LRT
-#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_SHV_15
+/* VPU 30xx HW component IDs are sequential, so define first and last IDs. */
+#define VPU_TRACE_PROC_BIT_30XX_FIRST VPU_TRACE_PROC_BIT_LRT
+#define VPU_TRACE_PROC_BIT_30XX_LAST VPU_TRACE_PROC_BIT_SHV_15
+#define VPU_TRACE_PROC_BIT_KMB_FIRST VPU_TRACE_PROC_BIT_30XX_FIRST
+#define VPU_TRACE_PROC_BIT_KMB_LAST VPU_TRACE_PROC_BIT_30XX_LAST
struct vpu_boot_l2_cache_config {
u8 use;
@@ -148,6 +167,25 @@ struct vpu_warm_boot_section {
u32 is_clear_op;
};
+/*
+ * When HW scheduling mode is enabled, a present period is defined.
+ * It will be used by VPU to swap between normal and focus priorities
+ * to prevent starving of normal priority band (when implemented).
+ * Host must provide a valid value at boot time in
+ * `vpu_focus_present_timer_ms`. If the value provided by the host is not within the
+ * defined range a default value will be used. Here we define the min. and max.
+ * allowed values and the and default value of the present period. Units are milliseconds.
+ */
+#define VPU_PRESENT_CALL_PERIOD_MS_DEFAULT 50
+#define VPU_PRESENT_CALL_PERIOD_MS_MIN 16
+#define VPU_PRESENT_CALL_PERIOD_MS_MAX 10000
+
+/**
+ * Macros to enable various operation modes within the VPU.
+ * To be defined as part of 32 bit mask.
+ */
+#define VPU_OP_MODE_SURVIVABILITY 0x1
+
struct vpu_boot_params {
u32 magic;
u32 vpu_id;
@@ -218,6 +256,7 @@ struct vpu_boot_params {
* the threshold will not be logged); applies to every enabled logging
* destination and loggable HW component. See 'mvLog_t' enum for acceptable
* values.
+ * TODO: EISW-33556: Move log level definition (mvLog_t) to this file.
*/
u32 default_trace_level;
u32 boot_type;
@@ -249,7 +288,36 @@ struct vpu_boot_params {
u32 temp_sensor_period_ms;
/** PLL ratio for efficient clock frequency */
u32 pn_freq_pll_ratio;
- u32 pad4[28];
+ /** DVFS Mode: Default: 0, Max Performance: 1, On Demand: 2, Power Save: 3 */
+ u32 dvfs_mode;
+ /**
+ * Depending on DVFS Mode:
+ * On-demand: Default if 0.
+ * Bit 0-7 - uint8_t: Highest residency percent
+ * Bit 8-15 - uint8_t: High residency percent
+ * Bit 16-23 - uint8_t: Low residency percent
+ * Bit 24-31 - uint8_t: Lowest residency percent
+ * Bit 32-35 - unsigned 4b: PLL Ratio increase amount on highest residency
+ * Bit 36-39 - unsigned 4b: PLL Ratio increase amount on high residency
+ * Bit 40-43 - unsigned 4b: PLL Ratio decrease amount on low residency
+ * Bit 44-47 - unsigned 4b: PLL Ratio decrease amount on lowest frequency
+ * Bit 48-55 - uint8_t: Period (ms) for residency decisions
+ * Bit 56-63 - uint8_t: Averaging windows (as multiples of period. Max: 30 decimal)
+ * Power Save/Max Performance: Unused
+ */
+ u64 dvfs_param;
+ /**
+ * D0i3 delayed entry
+ * Bit0: Disable CPU state save on D0i2 entry flow.
+ * 0: Every D0i2 entry saves state. Save state IPC message ignored.
+ * 1: IPC message required to save state on D0i3 entry flow.
+ */
+ u32 d0i3_delayed_entry;
+ /* Time spent by VPU in D0i3 state */
+ u64 d0i3_residency_time_us;
+ /* Value of VPU perf counter at the time of entering D0i3 state . */
+ u64 d0i3_entry_vpu_ts;
+ u32 pad4[20];
/* Warm boot information: 0x400 - 0x43F */
u32 warm_boot_sections_count;
u32 warm_boot_start_address_reference;
@@ -274,8 +342,12 @@ struct vpu_boot_params {
u32 vpu_scheduling_mode;
/* Present call period in milliseconds. */
u32 vpu_focus_present_timer_ms;
- /* Unused/reserved: 0x478 - 0xFFF */
- u32 pad6[738];
+ /* VPU ECC Signaling */
+ u32 vpu_uses_ecc_mca_signal;
+ /* Values defined by VPU_OP_MODE* macros */
+ u32 vpu_operation_mode;
+ /* Unused/reserved: 0x480 - 0xFFF */
+ u32 pad6[736];
};
/*
diff --git a/drivers/accel/ivpu/vpu_jsm_api.h b/drivers/accel/ivpu/vpu_jsm_api.h
index 2949ec8365bd..7da7622742be 100644
--- a/drivers/accel/ivpu/vpu_jsm_api.h
+++ b/drivers/accel/ivpu/vpu_jsm_api.h
@@ -22,12 +22,12 @@
/*
* Minor version changes when API backward compatibility is preserved.
*/
-#define VPU_JSM_API_VER_MINOR 0
+#define VPU_JSM_API_VER_MINOR 15
/*
* API header changed (field names, documentation, formatting) but API itself has not been changed
*/
-#define VPU_JSM_API_VER_PATCH 1
+#define VPU_JSM_API_VER_PATCH 0
/*
* Index in the API version table
@@ -84,11 +84,13 @@
* Job flags bit masks.
*/
#define VPU_JOB_FLAGS_NULL_SUBMISSION_MASK 0x00000001
+#define VPU_JOB_FLAGS_PRIVATE_DATA_MASK 0xFF000000
/*
* Sizes of the reserved areas in jobs, in bytes.
*/
-#define VPU_JOB_RESERVED_BYTES 16
+#define VPU_JOB_RESERVED_BYTES 8
+
/*
* Sizes of the reserved areas in job queues, in bytes.
*/
@@ -109,6 +111,20 @@
#define VPU_DYNDBG_CMD_MAX_LEN 96
/*
+ * For HWS command queue scheduling, we can prioritise command queues inside the
+ * same process with a relative in-process priority. Valid values for relative
+ * priority are given below - max and min.
+ */
+#define VPU_HWS_COMMAND_QUEUE_MAX_IN_PROCESS_PRIORITY 7
+#define VPU_HWS_COMMAND_QUEUE_MIN_IN_PROCESS_PRIORITY -7
+
+/*
+ * For HWS priority scheduling, we can have multiple realtime priority bands.
+ * They are numbered 0 to a MAX.
+ */
+#define VPU_HWS_MAX_REALTIME_PRIORITY_LEVEL 31U
+
+/*
* Job format.
*/
struct vpu_job_queue_entry {
@@ -117,8 +133,14 @@ struct vpu_job_queue_entry {
u32 flags; /**< Flags bit field, see VPU_JOB_FLAGS_* above */
u64 root_page_table_addr; /**< Address of root page table to use for this job */
u64 root_page_table_update_counter; /**< Page tables update events counter */
- u64 preemption_buffer_address; /**< Address of the preemption buffer to use for this job */
- u64 preemption_buffer_size; /**< Size of the preemption buffer to use for this job */
+ u64 primary_preempt_buf_addr;
+ /**< Address of the primary preemption buffer to use for this job */
+ u32 primary_preempt_buf_size;
+ /**< Size of the primary preemption buffer to use for this job */
+ u32 secondary_preempt_buf_size;
+ /**< Size of secondary preemption buffer to use for this job */
+ u64 secondary_preempt_buf_addr;
+ /**< Address of secondary preemption buffer to use for this job */
u8 reserved_0[VPU_JOB_RESERVED_BYTES];
};
@@ -153,6 +175,46 @@ enum vpu_trace_entity_type {
};
/*
+ * HWS specific log buffer header details.
+ * Total size is 32 bytes.
+ */
+struct vpu_hws_log_buffer_header {
+ /* Written by VPU after adding a log entry. Initialised by host to 0. */
+ u32 first_free_entry_index;
+ /* Incremented by VPU every time the VPU overwrites the 0th entry;
+ * initialised by host to 0.
+ */
+ u32 wraparound_count;
+ /*
+ * This is the number of buffers that can be stored in the log buffer provided by the host.
+ * It is written by host before passing buffer to VPU. VPU should consider it read-only.
+ */
+ u64 num_of_entries;
+ u64 reserved[2];
+};
+
+/*
+ * HWS specific log buffer entry details.
+ * Total size is 32 bytes.
+ */
+struct vpu_hws_log_buffer_entry {
+ /* VPU timestamp must be an invariant timer tick (not impacted by DVFS) */
+ u64 vpu_timestamp;
+ /*
+ * Operation type:
+ * 0 - context state change
+ * 1 - queue new work
+ * 2 - queue unwait sync object
+ * 3 - queue no more work
+ * 4 - queue wait sync object
+ */
+ u32 operation_type;
+ u32 reserved;
+ /* Operation data depends on operation type */
+ u64 operation_data[2];
+};
+
+/*
* Host <-> VPU IPC messages types.
*/
enum vpu_ipc_msg_type {
@@ -228,6 +290,23 @@ enum vpu_ipc_msg_type {
* deallocated or reassigned to another context.
*/
VPU_JSM_MSG_HWS_REGISTER_DB = 0x1117,
+ /** Control command: Log buffer setting */
+ VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG = 0x1118,
+ /* Control command: Suspend command queue. */
+ VPU_JSM_MSG_HWS_SUSPEND_CMDQ = 0x1119,
+ /* Control command: Resume command queue */
+ VPU_JSM_MSG_HWS_RESUME_CMDQ = 0x111a,
+ /* Control command: Resume engine after reset */
+ VPU_JSM_MSG_HWS_ENGINE_RESUME = 0x111b,
+ /* Control command: Enable survivability/DCT mode */
+ VPU_JSM_MSG_DCT_ENABLE = 0x111c,
+ /* Control command: Disable survivability/DCT mode */
+ VPU_JSM_MSG_DCT_DISABLE = 0x111d,
+ /**
+ * Dump VPU state. To be used for debug purposes only.
+ * NOTE: Please introduce new ASYNC commands before this one. *
+ */
+ VPU_JSM_MSG_STATE_DUMP = 0x11FF,
/* IPC Host -> Device, General commands */
VPU_JSM_MSG_GENERAL_CMD = 0x1200,
VPU_JSM_MSG_BLOB_DEINIT = VPU_JSM_MSG_GENERAL_CMD,
@@ -236,6 +315,10 @@ enum vpu_ipc_msg_type {
* Linux command: `echo '<dyndbg_cmd>' > <debugfs>/dynamic_debug/control`.
*/
VPU_JSM_MSG_DYNDBG_CONTROL = 0x1201,
+ /**
+ * Perform the save procedure for the D0i3 entry
+ */
+ VPU_JSM_MSG_PWR_D0I3_ENTER = 0x1202,
/* IPC Device -> Host, Job completion */
VPU_JSM_MSG_JOB_DONE = 0x2100,
/* IPC Device -> Host, Async command completion */
@@ -304,11 +387,35 @@ enum vpu_ipc_msg_type {
VPU_JSM_MSG_DESTROY_CMD_QUEUE_RSP = 0x2216,
/** Response to control command: Set context scheduling properties */
VPU_JSM_MSG_SET_CONTEXT_SCHED_PROPERTIES_RSP = 0x2217,
+ /** Response to control command: Log buffer setting */
+ VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP = 0x2218,
+ /* IPC Device -> Host, HWS notify index entry of log buffer written */
+ VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION = 0x2219,
+ /* IPC Device -> Host, HWS completion of a context suspend request */
+ VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE = 0x221a,
+ /* Response to control command: Resume command queue */
+ VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP = 0x221b,
+ /* Response to control command: Resume engine command response */
+ VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE = 0x221c,
+ /* Response to control command: Enable survivability/DCT mode */
+ VPU_JSM_MSG_DCT_ENABLE_DONE = 0x221d,
+ /* Response to control command: Disable survivability/DCT mode */
+ VPU_JSM_MSG_DCT_DISABLE_DONE = 0x221e,
+ /**
+ * Response to state dump control command.
+ * NOTE: Please introduce new ASYNC responses before this one. *
+ */
+ VPU_JSM_MSG_STATE_DUMP_RSP = 0x22FF,
/* IPC Device -> Host, General command completion */
VPU_JSM_MSG_GENERAL_CMD_DONE = 0x2300,
VPU_JSM_MSG_BLOB_DEINIT_DONE = VPU_JSM_MSG_GENERAL_CMD_DONE,
/** Response to VPU_JSM_MSG_DYNDBG_CONTROL. */
VPU_JSM_MSG_DYNDBG_CONTROL_RSP = 0x2301,
+ /**
+ * Acknowledgment of completion of the save procedure initiated by
+ * VPU_JSM_MSG_PWR_D0I3_ENTER
+ */
+ VPU_JSM_MSG_PWR_D0I3_ENTER_DONE = 0x2302,
};
enum vpu_ipc_msg_status { VPU_JSM_MSG_FREE, VPU_JSM_MSG_ALLOCATED };
@@ -593,12 +700,12 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
* Default quantum in 100ns units for scheduling across processes
* within a priority band
*/
- u64 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
+ u32 process_quantum[VPU_HWS_NUM_PRIORITY_BANDS];
/*
* Default grace period in 100ns units for processes that preempt each
* other within a priority band
*/
- u64 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
+ u32 process_grace_period[VPU_HWS_NUM_PRIORITY_BANDS];
/*
* For normal priority band, specifies the target VPU percentage
* in situations when it's starved by the focus band.
@@ -608,32 +715,51 @@ struct vpu_ipc_msg_payload_hws_priority_band_setup {
u32 reserved_0;
};
-/* HWS create command queue request */
+/*
+ * @brief HWS create command queue request.
+ * Host will create a command queue via this command.
+ * Note: Cmdq group is a handle of an object which
+ * may contain one or more command queues.
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP
+ */
struct vpu_ipc_msg_payload_hws_create_cmdq {
/* Process id */
u64 process_id;
/* Host SSID */
u32 host_ssid;
- /* Zero Padding */
- u32 reserved;
+ /* Engine for which queue is being created */
+ u32 engine_idx;
+ /*
+ * Cmdq group may be set to 0 or equal to
+ * cmdq_id while each priority band contains
+ * only single engine instances.
+ */
+ u64 cmdq_group;
/* Command queue id */
u64 cmdq_id;
/* Command queue base */
u64 cmdq_base;
/* Command queue size */
u32 cmdq_size;
- /* Reserved */
+ /* Zero padding */
u32 reserved_0;
};
-/* HWS create command queue response */
+/*
+ * @brief HWS create command queue response.
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE
+ * @see VPU_JSM_MSG_CREATE_CMD_QUEUE_RSP
+ */
struct vpu_ipc_msg_payload_hws_create_cmdq_rsp {
/* Process id */
u64 process_id;
/* Host SSID */
u32 host_ssid;
- /* Zero Padding */
- u32 reserved;
+ /* Engine for which queue is being created */
+ u32 engine_idx;
+ /* Command queue group */
+ u64 cmdq_group;
/* Command queue id */
u64 cmdq_id;
};
@@ -661,7 +787,7 @@ struct vpu_ipc_msg_payload_hws_set_context_sched_properties {
/* Inside realtime band assigns a further priority */
u32 realtime_priority_level;
/* Priority relative to other contexts in the same process */
- u32 in_process_priority;
+ s32 in_process_priority;
/* Zero padding / Reserved */
u32 reserved_1;
/* Context quantum relative to other contexts of same priority in the same process */
@@ -694,6 +820,123 @@ struct vpu_jsm_hws_register_db {
u64 cmdq_size;
};
+/*
+ * @brief Structure to set another buffer to be used for scheduling-related logging.
+ * The size of the logging buffer and the number of entries is defined as part of the
+ * buffer itself as described next.
+ * The log buffer received from the host is made up of;
+ * - header: 32 bytes in size, as shown in 'struct vpu_hws_log_buffer_header'.
+ * The header contains the number of log entries in the buffer.
+ * - log entry: 0 to n-1, each log entry is 32 bytes in size, as shown in
+ * 'struct vpu_hws_log_buffer_entry'.
+ * The entry contains the VPU timestamp, operation type and data.
+ * The host should provide the notify index value of log buffer to VPU. This is a
+ * value defined within the log buffer and when written to will generate the
+ * scheduling log notification.
+ * The host should set engine_idx and vpu_log_buffer_va to 0 to disable logging
+ * for a particular engine.
+ * VPU will handle one log buffer for each of supported engines.
+ * VPU should allow the logging to consume one host_ssid.
+ * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG
+ * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG_RSP
+ * @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
+ */
+struct vpu_ipc_msg_payload_hws_set_scheduling_log {
+ /* Engine ordinal */
+ u32 engine_idx;
+ /* Host SSID */
+ u32 host_ssid;
+ /*
+ * VPU log buffer virtual address.
+ * Set to 0 to disable logging for this engine.
+ */
+ u64 vpu_log_buffer_va;
+ /*
+ * Notify index of log buffer. VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
+ * is generated when an event log is written to this index.
+ */
+ u64 notify_index;
+};
+
+/*
+ * @brief The scheduling log notification is generated by VPU when it writes
+ * an event into the log buffer at the notify_index. VPU notifies host with
+ * VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION. This is an asynchronous
+ * message from VPU to host.
+ * @see VPU_JSM_MSG_HWS_SCHEDULING_LOG_NOTIFICATION
+ * @see VPU_JSM_MSG_HWS_SET_SCHEDULING_LOG
+ */
+struct vpu_ipc_msg_payload_hws_scheduling_log_notification {
+ /* Engine ordinal */
+ u32 engine_idx;
+ /* Zero Padding */
+ u32 reserved_0;
+};
+
+/*
+ * @brief HWS suspend command queue request and done structure.
+ * Host will request the suspend of contexts and VPU will;
+ * - Suspend all work on this context
+ * - Preempt any running work
+ * - Asynchronously perform the above and return success immediately once
+ * all items above are started successfully
+ * - Notify the host of completion of these operations via
+ * VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE
+ * - Reject any other context operations on a context with an in-flight
+ * suspend request running
+ * Same structure used when VPU notifies host of completion of a context suspend
+ * request. The ids and suspend fence value reported in this command will match
+ * the one in the request from the host to suspend the context. Once suspend is
+ * complete, VPU will not access any data relating to this command queue until
+ * it is resumed.
+ * @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ
+ * @see VPU_JSM_MSG_HWS_SUSPEND_CMDQ_DONE
+ */
+struct vpu_ipc_msg_payload_hws_suspend_cmdq {
+ /* Host SSID */
+ u32 host_ssid;
+ /* Zero Padding */
+ u32 reserved_0;
+ /* Command queue id */
+ u64 cmdq_id;
+ /*
+ * Suspend fence value - reported by the VPU suspend context
+ * completed once suspend is complete.
+ */
+ u64 suspend_fence_value;
+};
+
+/*
+ * @brief HWS Resume command queue request / response structure.
+ * Host will request the resume of a context;
+ * - VPU will resume all work on this context
+ * - Scheduler will allow this context to be scheduled
+ * @see VPU_JSM_MSG_HWS_RESUME_CMDQ
+ * @see VPU_JSM_MSG_HWS_RESUME_CMDQ_RSP
+ */
+struct vpu_ipc_msg_payload_hws_resume_cmdq {
+ /* Host SSID */
+ u32 host_ssid;
+ /* Zero Padding */
+ u32 reserved_0;
+ /* Command queue id */
+ u64 cmdq_id;
+};
+
+/*
+ * @brief HWS Resume engine request / response structure.
+ * After a HWS engine reset, all scheduling is stopped on VPU until a engine resume.
+ * Host shall send this command to resume scheduling of any valid queue.
+ * @see VPU_JSM_MSG_HWS_RESUME_ENGINE
+ * @see VPU_JSM_MSG_HWS_RESUME_ENGINE_DONE
+ */
+struct vpu_ipc_msg_payload_hws_resume_engine {
+ /* Engine to be resumed */
+ u32 engine_idx;
+ /* Reserved */
+ u32 reserved_0;
+};
+
/**
* Payload for VPU_JSM_MSG_TRACE_SET_CONFIG[_RSP] and
* VPU_JSM_MSG_TRACE_GET_CONFIG_RSP messages.
@@ -938,6 +1181,35 @@ struct vpu_ipc_msg_payload_dyndbg_control {
char dyndbg_cmd[VPU_DYNDBG_CMD_MAX_LEN];
};
+/**
+ * Payload for VPU_JSM_MSG_PWR_D0I3_ENTER
+ *
+ * This is a bi-directional payload.
+ */
+struct vpu_ipc_msg_payload_pwr_d0i3_enter {
+ /**
+ * 0: VPU_JSM_MSG_PWR_D0I3_ENTER_DONE is not sent to the host driver
+ * The driver will poll for D0i2 Idle state transitions.
+ * 1: VPU_JSM_MSG_PWR_D0I3_ENTER_DONE is sent after VPU state save is complete
+ */
+ u32 send_response;
+ u32 reserved_0;
+};
+
+/**
+ * Payload for VPU_JSM_MSG_DCT_ENABLE message.
+ *
+ * Default values for DCT active/inactive times are 5.3ms and 30ms respectively,
+ * corresponding to a 85% duty cycle. This payload allows the host to tune these
+ * values according to application requirements.
+ */
+struct vpu_ipc_msg_payload_pwr_dct_control {
+ /** Duty cycle active time in microseconds */
+ u32 dct_active_us;
+ /** Duty cycle inactive time in microseconds */
+ u32 dct_inactive_us;
+};
+
/*
* Payloads union, used to define complete message format.
*/
@@ -974,6 +1246,13 @@ union vpu_ipc_msg_payload {
struct vpu_ipc_msg_payload_hws_destroy_cmdq hws_destroy_cmdq;
struct vpu_ipc_msg_payload_hws_set_context_sched_properties
hws_set_context_sched_properties;
+ struct vpu_ipc_msg_payload_hws_set_scheduling_log hws_set_scheduling_log;
+ struct vpu_ipc_msg_payload_hws_scheduling_log_notification hws_scheduling_log_notification;
+ struct vpu_ipc_msg_payload_hws_suspend_cmdq hws_suspend_cmdq;
+ struct vpu_ipc_msg_payload_hws_resume_cmdq hws_resume_cmdq;
+ struct vpu_ipc_msg_payload_hws_resume_engine hws_resume_engine;
+ struct vpu_ipc_msg_payload_pwr_d0i3_enter pwr_d0i3_enter;
+ struct vpu_ipc_msg_payload_pwr_dct_control pwr_dct_control;
};
/*